717 lines
27 KiB
Python
717 lines
27 KiB
Python
#!/usr/bin/env python3
|
|
"""Folksy Idiom Generator — Procedural fake-proverb generator using ConceptNet relationships."""
|
|
|
|
import argparse
|
|
import csv
|
|
import json
|
|
import os
|
|
import random
|
|
import sys
|
|
from collections import defaultdict
|
|
from pathlib import Path
|
|
|
|
DATA_DIR = Path(__file__).parent / "data"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Graph data structures
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class FolksyGraph:
|
|
"""In-memory graph of folksy vocabulary and their ConceptNet relationships."""
|
|
|
|
def __init__(self):
|
|
self.vocab = {} # word -> {categories, tangibility, edge_count}
|
|
self.by_category = defaultdict(list) # category -> [words]
|
|
self.edges = defaultdict(list) # (start, relation) -> [(end, weight, surface)]
|
|
self.reverse = defaultdict(list) # (end, relation) -> [(start, weight, surface)]
|
|
self.all_edges = defaultdict(list) # start -> [(end, relation, weight)]
|
|
self.all_words = []
|
|
|
|
def load(self, vocab_path=None, relations_path=None):
|
|
vocab_path = vocab_path or (DATA_DIR / "folksy_vocab.csv")
|
|
relations_path = relations_path or (DATA_DIR / "folksy_relations.csv")
|
|
|
|
with open(vocab_path, newline="", encoding="utf-8") as f:
|
|
reader = csv.DictReader(f)
|
|
for row in reader:
|
|
word = row["word"]
|
|
cats = [c.strip() for c in row["categories"].split(",") if c.strip()]
|
|
self.vocab[word] = {
|
|
"categories": cats,
|
|
"tangibility": float(row.get("tangibility_score", 0)),
|
|
"edge_count": int(row.get("conceptnet_edge_count", 0)),
|
|
}
|
|
for cat in cats:
|
|
self.by_category[cat].append(word)
|
|
self.all_words = list(self.vocab.keys())
|
|
|
|
with open(relations_path, newline="", encoding="utf-8") as f:
|
|
reader = csv.DictReader(f)
|
|
for row in reader:
|
|
sw = row["start_word"]
|
|
ew = row["end_word"]
|
|
rel = row["relation"]
|
|
w = float(row["weight"])
|
|
surf = row.get("surface_text", "")
|
|
self.edges[(sw, rel)].append((ew, w, surf))
|
|
self.reverse[(ew, rel)].append((sw, w, surf))
|
|
self.all_edges[sw].append((ew, rel, w))
|
|
self.all_edges[ew].append((sw, rel, w))
|
|
|
|
def merge_fictional(self, entities_path):
|
|
"""Merge fictional entities into the graph."""
|
|
with open(entities_path, encoding="utf-8") as f:
|
|
data = json.load(f)
|
|
|
|
for entity in data.get("entities", []):
|
|
name = entity["name"].lower()
|
|
cats = entity.get("categories", [])
|
|
props = entity.get("properties", [])
|
|
|
|
# Inherit from parents
|
|
inherited_relations = defaultdict(list)
|
|
for parent in entity.get("derived_from", []):
|
|
parent = parent.lower()
|
|
if parent in self.vocab:
|
|
parent_cats = self.vocab[parent]["categories"]
|
|
cats = list(set(cats + parent_cats))
|
|
# Gather all edges from parent
|
|
for (sw, rel), targets in list(self.edges.items()):
|
|
if sw == parent:
|
|
for (ew, w, surf) in targets:
|
|
inherited_relations[rel].append((ew, w, ""))
|
|
for (ew, rel), sources in list(self.reverse.items()):
|
|
if ew == parent:
|
|
for (sw, w, surf) in sources:
|
|
inherited_relations[rel].append((sw, w, ""))
|
|
|
|
# Register the entity as a vocab word
|
|
self.vocab[name] = {
|
|
"categories": cats,
|
|
"tangibility": 0.5,
|
|
"edge_count": 0,
|
|
}
|
|
for cat in cats:
|
|
self.by_category[cat].append(name)
|
|
self.all_words.append(name)
|
|
|
|
# Add inherited relations (lower priority)
|
|
for rel, targets in inherited_relations.items():
|
|
for (target, w, surf) in targets:
|
|
self.edges[(name, rel)].append((target, w, ""))
|
|
self.reverse[(target, rel)].append((name, w, ""))
|
|
self.all_edges[name].append((target, rel, w))
|
|
|
|
# Add explicit relations (override)
|
|
for rel, targets in entity.get("relations", {}).items():
|
|
for target in targets:
|
|
target_lower = target.lower()
|
|
self.edges[(name, rel)].append((target_lower, 2.0, ""))
|
|
self.reverse[(target_lower, rel)].append((name, 2.0, ""))
|
|
self.all_edges[name].append((target_lower, rel, 2.0))
|
|
|
|
# Add properties as HasProperty edges
|
|
for prop in props:
|
|
self.edges[(name, "HasProperty")].append((prop.lower(), 2.0, ""))
|
|
self.all_edges[name].append((prop.lower(), "HasProperty", 2.0))
|
|
|
|
def neighbors(self, word, relation=None, min_weight=0.0, vocab_only=False):
|
|
"""Get neighbors of a word, optionally filtered by relation type.
|
|
|
|
Args:
|
|
vocab_only: If True, only return neighbors that are in the folksy vocab.
|
|
If False (default), return all neighbors including action
|
|
phrases, properties, etc.
|
|
"""
|
|
if relation:
|
|
return [(ew, w, s) for (ew, w, s) in self.edges.get((word, relation), [])
|
|
if w >= min_weight and (not vocab_only or ew in self.vocab)]
|
|
results = []
|
|
for (ew, rel, w) in self.all_edges.get(word, []):
|
|
if w >= min_weight and (not vocab_only or ew in self.vocab):
|
|
results.append((ew, rel, w))
|
|
return results
|
|
|
|
def vocab_neighbors(self, word, relation=None, min_weight=0.0):
|
|
"""Get neighbors restricted to folksy vocab words only."""
|
|
return self.neighbors(word, relation, min_weight, vocab_only=True)
|
|
|
|
def two_hop(self, word, rel1, rel2, min_weight=0.5):
|
|
"""Find 2-hop paths: word -[rel1]-> bridge -[rel2]-> target.
|
|
|
|
Bridge can be any word; target must be in folksy vocab.
|
|
"""
|
|
results = []
|
|
for (bridge, w1, _) in self.edges.get((word, rel1), []):
|
|
for (target, w2, _) in self.edges.get((bridge, rel2), []):
|
|
if target != word and target in self.vocab and w2 >= min_weight:
|
|
results.append((bridge, target, w1, w2))
|
|
return results
|
|
|
|
def two_hop_any(self, word, rel1, rel2, min_weight=0.5):
|
|
"""Find 2-hop paths where target can be any word (not just vocab)."""
|
|
results = []
|
|
for (bridge, w1, _) in self.edges.get((word, rel1), []):
|
|
for (target, w2, _) in self.edges.get((bridge, rel2), []):
|
|
if target != word and w2 >= min_weight:
|
|
results.append((bridge, target, w1, w2))
|
|
return results
|
|
|
|
def random_word(self, category=None):
|
|
"""Pick a random word, optionally from a specific category."""
|
|
if category and category in self.by_category:
|
|
pool = self.by_category[category]
|
|
else:
|
|
pool = self.all_words
|
|
return random.choice(pool) if pool else None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Meta-templates
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class MetaTemplate:
|
|
"""Base class for meta-template families."""
|
|
|
|
id = "base"
|
|
name = "Base Template"
|
|
surface_templates = []
|
|
|
|
def __init__(self, graph):
|
|
self.graph = graph
|
|
|
|
def generate(self, seed_word=None, seed_category=None):
|
|
"""Attempt to generate a saying. Returns (saying, debug_info) or (None, None)."""
|
|
raise NotImplementedError
|
|
|
|
def _pick_template(self):
|
|
return random.choice(self.surface_templates)
|
|
|
|
def _seed(self, seed_word=None, seed_category=None):
|
|
if seed_word:
|
|
return seed_word.lower()
|
|
return self.graph.random_word(seed_category)
|
|
|
|
|
|
class Deconstruction(MetaTemplate):
|
|
"""A without B is just humble D."""
|
|
|
|
id = "deconstruction"
|
|
name = "Deconstruction"
|
|
surface_templates = [
|
|
"You know what they say, a {A} with no {B} is just a {C} {D}.",
|
|
"Take the {B} out of {A} and all you've got left is {C} {D}.",
|
|
"{A} without {B}? That's just {D} with ideas above its station.",
|
|
"An {A} ain't nothing but {D} that met some {B}.",
|
|
]
|
|
|
|
def generate(self, seed_word=None, seed_category=None):
|
|
a = self._seed(seed_word, seed_category)
|
|
if not a:
|
|
return None, None
|
|
|
|
# Find what A is made of / requires
|
|
ingredients = []
|
|
for rel in ("MadeOf", "HasPrerequisite", "HasA"):
|
|
ingredients.extend(_short_concepts(self.graph.neighbors(a, rel, min_weight=0.5)))
|
|
|
|
if len(ingredients) < 2:
|
|
for rel in ("MadeOf", "HasPrerequisite"):
|
|
for (start, w, s) in self.graph.reverse.get((a, rel), []):
|
|
if len(start.split("_")) <= 2:
|
|
ingredients.append((start, w, s))
|
|
|
|
if len(ingredients) < 2:
|
|
return None, None
|
|
|
|
random.shuffle(ingredients)
|
|
b_word = _readable(ingredients[0][0])
|
|
d_word = _readable(ingredients[1][0])
|
|
|
|
# Find a property for D
|
|
props = self.graph.neighbors(ingredients[1][0], "HasProperty")
|
|
if props:
|
|
c_word = _readable(random.choice(props)[0])
|
|
else:
|
|
c_word = random.choice(["plain", "sorry", "old", "humble", "dry", "wet", "cold"])
|
|
|
|
template = self._pick_template()
|
|
saying = template.format(A=a, B=b_word, C=c_word, D=d_word)
|
|
|
|
debug = {
|
|
"template_family": self.id,
|
|
"template": template,
|
|
"chain": f"{a} MadeOf/Has [{b_word}, {d_word}]; {d_word} HasProperty {c_word}",
|
|
"slots": {"A": a, "B": b_word, "C": c_word, "D": d_word},
|
|
}
|
|
return saying, debug
|
|
|
|
|
|
class DenialOfConsequences(MetaTemplate):
|
|
"""Don't create conditions for B and deny B."""
|
|
|
|
id = "denial_of_consequences"
|
|
name = "Denial of Consequences"
|
|
surface_templates = [
|
|
"Don't {C} the {A} and say you ain't got {B}.",
|
|
"Don't {C} the {A} and act surprised when the {B} show up.",
|
|
"Man who {C}s a {A} can't complain about {B}.",
|
|
"You can't {C} a {A} and then wonder where all the {B} came from.",
|
|
]
|
|
|
|
def generate(self, seed_word=None, seed_category=None):
|
|
a = self._seed(seed_word, seed_category)
|
|
if not a:
|
|
return None, None
|
|
|
|
# What is found at A? (reverse: B AtLocation A)
|
|
attracted = []
|
|
for (b, w, s) in self.graph.reverse.get((a, "AtLocation"), []):
|
|
attracted.append((b, w))
|
|
|
|
# Also: what does A attract/cause?
|
|
for rel in ("Causes", "CausesDesire"):
|
|
for (b, w, s) in self.graph.edges.get((a, rel), []):
|
|
attracted.append((b, w))
|
|
|
|
if not attracted:
|
|
for (bridge, target, w1, w2) in self.graph.two_hop(a, "UsedFor", "AtLocation"):
|
|
attracted.append((target, w1 + w2))
|
|
|
|
if not attracted:
|
|
return None, None
|
|
|
|
b_word = _readable(random.choice(attracted)[0])
|
|
|
|
create_verbs = {
|
|
"pond": "dig", "birdhouse": "hang", "fence": "build", "trap": "set",
|
|
"fire": "light", "garden": "plant", "nest": "build", "well": "dig",
|
|
"bridge": "build", "barn": "raise", "path": "clear", "stable": "raise",
|
|
"coop": "build", "den": "dig", "ditch": "dig", "furrow": "plow",
|
|
"orchard": "plant", "hearth": "lay", "chimney": "build",
|
|
}
|
|
c_word = create_verbs.get(a)
|
|
if not c_word:
|
|
c_word = random.choice(["build", "set up", "put out", "lay down", "make"])
|
|
|
|
template = self._pick_template()
|
|
saying = template.format(A=a, B=b_word, C=c_word)
|
|
|
|
debug = {
|
|
"template_family": self.id,
|
|
"template": template,
|
|
"chain": f"{b_word} AtLocation {a}; {a} created by {c_word}",
|
|
"slots": {"A": a, "B": b_word, "C": c_word},
|
|
}
|
|
return saying, debug
|
|
|
|
|
|
class IronicDeficiency(MetaTemplate):
|
|
"""Producer of X lacks X."""
|
|
|
|
id = "ironic_deficiency"
|
|
name = "Ironic Deficiency"
|
|
surface_templates = [
|
|
"The {A}'s {F} always goes without {X}.",
|
|
"Nobody's got less {X} than the man who makes the {A}.",
|
|
"Funny how the {A} never has enough {X} for itself.",
|
|
"The {A} feeds everyone's {X} but its own.",
|
|
]
|
|
|
|
def generate(self, seed_word=None, seed_category=None):
|
|
a = self._seed(seed_word, seed_category)
|
|
if not a:
|
|
return None, None
|
|
|
|
products = []
|
|
for rel in ("UsedFor", "CapableOf", "Causes"):
|
|
products.extend(self.graph.neighbors(a, rel, min_weight=0.5))
|
|
|
|
products = _short_concepts(products)
|
|
if not products:
|
|
return None, None
|
|
|
|
x_word = _readable(random.choice(products)[0])
|
|
|
|
family_members = ["wife", "children", "household", "family", "own kind"]
|
|
f_word = random.choice(family_members)
|
|
|
|
template = self._pick_template()
|
|
saying = template.format(A=a, X=x_word, F=f_word)
|
|
|
|
debug = {
|
|
"template_family": self.id,
|
|
"template": template,
|
|
"chain": f"{a} UsedFor/Produces {x_word}; irony: {a} lacks {x_word}",
|
|
"slots": {"A": a, "X": x_word, "F": f_word},
|
|
}
|
|
return saying, debug
|
|
|
|
|
|
class FutilePreparation(MetaTemplate):
|
|
"""Like doing A and hoping for unrelated Y."""
|
|
|
|
id = "futile_preparation"
|
|
name = "Futile Preparation"
|
|
surface_templates = [
|
|
"Like {A_gerund} and hoping for {Y}.",
|
|
"That's just {A_gerund} and praying for {Y}.",
|
|
"My grandmother used to say, '{A_gerund} won't bring you {Y}.'",
|
|
"You can {A_verb} all you want, it still won't get you {Y}.",
|
|
]
|
|
|
|
def generate(self, seed_word=None, seed_category=None):
|
|
# Find an action and a desired outcome that are in the same domain but mismatched
|
|
seed = self._seed(seed_word, seed_category)
|
|
if not seed:
|
|
return None, None
|
|
|
|
# What is the seed used for?
|
|
uses = _short_concepts(self.graph.neighbors(seed, "UsedFor", min_weight=0.5), max_words=2)
|
|
if not uses:
|
|
return None, None
|
|
|
|
action_word = random.choice(uses)[0]
|
|
|
|
# Find a different outcome in a related domain via 2-hop
|
|
outcomes = []
|
|
for rel in ("Causes", "UsedFor", "HasSubevent"):
|
|
hops = self.graph.two_hop_any(seed, "AtLocation", rel)
|
|
outcomes.extend([(_readable(t), w1 + w2) for (_, t, w1, w2) in hops])
|
|
|
|
# Also try: things that siblings are UsedFor
|
|
seed_cats = self.graph.vocab.get(seed, {}).get("categories", [])
|
|
for cat in seed_cats:
|
|
siblings = self.graph.by_category.get(cat, [])
|
|
for sib in random.sample(siblings, min(5, len(siblings))):
|
|
if sib != seed:
|
|
for (target, w, s) in self.graph.edges.get((sib, "UsedFor"), []):
|
|
if target != action_word:
|
|
outcomes.append((_readable(target), w))
|
|
|
|
if not outcomes:
|
|
return None, None
|
|
|
|
y_word = random.choice(outcomes)[0]
|
|
|
|
gerund = _gerund(action_word)
|
|
verb = _readable(action_word)
|
|
|
|
template = self._pick_template()
|
|
saying = template.format(A_gerund=f"{gerund} the {seed}", Y=y_word,
|
|
A_verb=f"{verb} the {seed}")
|
|
|
|
debug = {
|
|
"template_family": self.id,
|
|
"template": template,
|
|
"chain": f"{seed} UsedFor {action_word}; different domain: {y_word}",
|
|
"slots": {"seed": seed, "action": action_word, "Y": y_word},
|
|
}
|
|
return saying, debug
|
|
|
|
|
|
class HypocriticalComplaint(MetaTemplate):
|
|
"""Consumes X from system Z, complains about remaining Y."""
|
|
|
|
id = "hypocritical_complaint"
|
|
name = "Hypocritical Complaint"
|
|
surface_templates = [
|
|
"There's a fella who {verb}s the {X} and says the {Y}'s no good.",
|
|
"That's like eating the {X} and complaining the {Y} tastes off.",
|
|
"He picks all the {X} then wonders why the {Y} looks bare.",
|
|
"Don't {verb} the {X} and then gripe about the {Y}.",
|
|
]
|
|
|
|
def generate(self, seed_word=None, seed_category=None):
|
|
# Z is the whole, X and Y are parts
|
|
z = self._seed(seed_word, seed_category)
|
|
if not z:
|
|
return None, None
|
|
|
|
# Find parts of Z
|
|
parts = []
|
|
for rel in ("HasA", "PartOf", "MadeOf"):
|
|
parts.extend(_short_concepts(self.graph.neighbors(z, rel, min_weight=0.5)))
|
|
for (start, w, s) in self.graph.reverse.get((z, "PartOf"), []):
|
|
if len(start.split("_")) <= 2:
|
|
parts.append((start, w, s))
|
|
for (start, w, s) in self.graph.reverse.get((z, "HasA"), []):
|
|
if len(start.split("_")) <= 2:
|
|
parts.append((start, w, s))
|
|
|
|
if len(parts) < 2:
|
|
return None, None
|
|
|
|
random.shuffle(parts)
|
|
x_word = _readable(parts[0][0])
|
|
y_word = _readable(parts[1][0])
|
|
|
|
consume_verbs = ["eat", "drink", "take", "pick", "use up", "grab"]
|
|
verb = random.choice(consume_verbs)
|
|
|
|
template = self._pick_template()
|
|
saying = template.format(X=x_word, Y=y_word, verb=verb)
|
|
|
|
debug = {
|
|
"template_family": self.id,
|
|
"template": template,
|
|
"chain": f"{x_word} PartOf/HasA {z}; {y_word} PartOf/HasA {z}",
|
|
"slots": {"Z": z, "X": x_word, "Y": y_word, "verb": verb},
|
|
}
|
|
return saying, debug
|
|
|
|
|
|
class TautologicalWisdom(MetaTemplate):
|
|
"""States obvious causal/prerequisite as wisdom."""
|
|
|
|
id = "tautological_wisdom"
|
|
name = "Tautological Wisdom"
|
|
surface_templates = [
|
|
"You know what they say, it takes a {X} to get a {Y}.",
|
|
"My daddy always said, can't have {Y} without {X}.",
|
|
"A {Y} don't come without its {X}, now does it?",
|
|
"You want {Y}? Well, first you're gonna need {X}.",
|
|
"Ain't no {Y} ever came from nothing — you need {X}.",
|
|
]
|
|
|
|
def generate(self, seed_word=None, seed_category=None):
|
|
seed = self._seed(seed_word, seed_category)
|
|
if not seed:
|
|
return None, None
|
|
|
|
# seed HasPrerequisite/Causes something
|
|
chains = []
|
|
for (target, w, s) in self.graph.edges.get((seed, "HasPrerequisite"), []):
|
|
chains.append((_readable(target), seed, w)) # X=prereq, Y=seed
|
|
for (target, w, s) in self.graph.edges.get((seed, "Causes"), []):
|
|
chains.append((seed, _readable(target), w)) # X=seed, Y=effect
|
|
# Also: what does seed require?
|
|
for (source, w, s) in self.graph.reverse.get((seed, "HasPrerequisite"), []):
|
|
chains.append((seed, _readable(source), w))
|
|
|
|
if not chains:
|
|
return None, None
|
|
|
|
x_word, y_word, _ = random.choice(chains)
|
|
|
|
template = self._pick_template()
|
|
saying = template.format(X=x_word, Y=y_word)
|
|
|
|
debug = {
|
|
"template_family": self.id,
|
|
"template": template,
|
|
"chain": f"{x_word} -> {y_word} (prerequisite/cause)",
|
|
"slots": {"X": x_word, "Y": y_word},
|
|
}
|
|
return saying, debug
|
|
|
|
|
|
class FalseEquivalence(MetaTemplate):
|
|
"""A is just B with/without property P."""
|
|
|
|
id = "false_equivalence"
|
|
name = "False Equivalence"
|
|
surface_templates = [
|
|
"A {A} is just a {B} that's got {P}.",
|
|
"What's a {A} but a {B} with {P}?",
|
|
"The only difference between a {A} and a {B} is {P}.",
|
|
"Take the {P} from a {A} and you've got yourself a {B}.",
|
|
]
|
|
|
|
def generate(self, seed_word=None, seed_category=None):
|
|
a = self._seed(seed_word, seed_category)
|
|
if not a:
|
|
return None, None
|
|
|
|
a_cats = set(self.graph.vocab.get(a, {}).get("categories", []))
|
|
if not a_cats:
|
|
return None, None
|
|
|
|
# Find siblings (same category, different word)
|
|
siblings = []
|
|
for cat in a_cats:
|
|
for sib in self.graph.by_category.get(cat, []):
|
|
if sib != a:
|
|
siblings.append(sib)
|
|
|
|
if not siblings:
|
|
return None, None
|
|
|
|
b_word = random.choice(siblings)
|
|
|
|
# Find a property of A that B might lack
|
|
a_props = _short_concepts(self.graph.neighbors(a, "HasProperty"), max_words=2)
|
|
b_props = set(p[0] for p in self.graph.neighbors(b_word, "HasProperty"))
|
|
|
|
differentiators = [p for p in a_props if p[0] not in b_props]
|
|
if differentiators:
|
|
p_word = _readable(random.choice(differentiators)[0])
|
|
elif a_props:
|
|
p_word = _readable(random.choice(a_props)[0])
|
|
else:
|
|
a_caps = self.graph.neighbors(a, "CapableOf")
|
|
if a_caps:
|
|
p_word = _readable(random.choice(a_caps)[0])
|
|
else:
|
|
p_word = random.choice(["ambition", "an attitude", "a plan", "patience"])
|
|
|
|
template = self._pick_template()
|
|
saying = template.format(A=a, B=b_word, P=p_word)
|
|
|
|
debug = {
|
|
"template_family": self.id,
|
|
"template": template,
|
|
"chain": f"{a} IsA same category as {b_word}; {a} HasProperty {p_word}",
|
|
"slots": {"A": a, "B": b_word, "P": p_word},
|
|
}
|
|
return saying, debug
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _readable(concept):
|
|
"""Convert ConceptNet concept to readable form: 'feed_chicken' -> 'feed chicken'."""
|
|
return concept.replace("_", " ")
|
|
|
|
|
|
def _short_concepts(items, max_words=3):
|
|
"""Filter concept tuples to only those with short readable names.
|
|
|
|
Items can be tuples where first element is the concept string.
|
|
Returns items where the concept has at most max_words words.
|
|
"""
|
|
return [item for item in items if len(item[0].split("_")) <= max_words]
|
|
|
|
|
|
def _gerund(word):
|
|
"""Rough gerund form of a verb/action word."""
|
|
word = word.split("_")[0] if "_" in word else word # take first word for compounds
|
|
if word.endswith("e") and not word.endswith("ee"):
|
|
return word[:-1] + "ing"
|
|
if word.endswith("ing"):
|
|
return word
|
|
if len(word) > 2 and word[-1] not in "aeiou" and word[-2] in "aeiou" and word[-3] not in "aeiou":
|
|
return word + word[-1] + "ing"
|
|
return word + "ing"
|
|
|
|
|
|
def _a(word):
|
|
"""Add 'a' or 'an' article."""
|
|
if word and word[0] in "aeiou":
|
|
return f"an {word}"
|
|
return f"a {word}"
|
|
|
|
|
|
TEMPLATE_REGISTRY = {
|
|
"deconstruction": Deconstruction,
|
|
"denial_of_consequences": DenialOfConsequences,
|
|
"ironic_deficiency": IronicDeficiency,
|
|
"futile_preparation": FutilePreparation,
|
|
"hypocritical_complaint": HypocriticalComplaint,
|
|
"tautological_wisdom": TautologicalWisdom,
|
|
"false_equivalence": FalseEquivalence,
|
|
}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Main generation logic
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def generate_one(graph, template_id=None, seed_word=None, seed_category=None,
|
|
debug=False, max_retries=20):
|
|
"""Generate a single folksy saying."""
|
|
for _ in range(max_retries):
|
|
if template_id:
|
|
tid = template_id
|
|
else:
|
|
tid = random.choice(list(TEMPLATE_REGISTRY.keys()))
|
|
|
|
cls = TEMPLATE_REGISTRY.get(tid)
|
|
if not cls:
|
|
print(f"Unknown template: {tid}", file=sys.stderr)
|
|
return None
|
|
|
|
tmpl = cls(graph)
|
|
saying, dbg = tmpl.generate(seed_word=seed_word, seed_category=seed_category)
|
|
if saying:
|
|
if debug:
|
|
return saying, dbg
|
|
return saying, None
|
|
|
|
return None, None
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Generate folksy fake-proverbs using ConceptNet relationships."
|
|
)
|
|
parser.add_argument("--template", "-t", choices=list(TEMPLATE_REGISTRY.keys()),
|
|
help="Specify a meta-template family")
|
|
parser.add_argument("--seed", "-s", help="Seed with a specific word")
|
|
parser.add_argument("--category", "-c", help="Seed with a category (e.g., animal, tool)")
|
|
parser.add_argument("--entities", "-e", help="Path to fictional entities JSON file")
|
|
parser.add_argument("--count", "-n", type=int, default=1, help="Number of sayings to generate")
|
|
parser.add_argument("--output", "-o", help="Output file (default: stdout)")
|
|
parser.add_argument("--debug", "-d", action="store_true", help="Show relationship chain debug info")
|
|
parser.add_argument("--vocab", help="Path to folksy_vocab.csv")
|
|
parser.add_argument("--relations", help="Path to folksy_relations.csv")
|
|
parser.add_argument("--list-templates", action="store_true", help="List available templates")
|
|
parser.add_argument("--list-categories", action="store_true", help="List available categories")
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.list_templates:
|
|
for tid, cls in TEMPLATE_REGISTRY.items():
|
|
print(f" {tid:30s} {cls.name}")
|
|
return
|
|
|
|
# Load graph
|
|
graph = FolksyGraph()
|
|
try:
|
|
graph.load(
|
|
vocab_path=args.vocab or (DATA_DIR / "folksy_vocab.csv"),
|
|
relations_path=args.relations or (DATA_DIR / "folksy_relations.csv"),
|
|
)
|
|
except FileNotFoundError as e:
|
|
print(f"Error: {e}", file=sys.stderr)
|
|
print("Run scripts/extract_from_conceptnet.py first to generate data files.", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
if args.list_categories:
|
|
for cat in sorted(graph.by_category.keys()):
|
|
print(f" {cat:20s} ({len(graph.by_category[cat])} words)")
|
|
return
|
|
|
|
# Merge fictional entities
|
|
if args.entities:
|
|
graph.merge_fictional(args.entities)
|
|
|
|
# Generate
|
|
out = open(args.output, "w", encoding="utf-8") if args.output else sys.stdout
|
|
try:
|
|
for i in range(args.count):
|
|
saying, dbg = generate_one(
|
|
graph,
|
|
template_id=args.template,
|
|
seed_word=args.seed,
|
|
seed_category=args.category,
|
|
debug=args.debug,
|
|
)
|
|
if saying:
|
|
out.write(saying + "\n")
|
|
if args.debug and dbg:
|
|
out.write(f" [DEBUG] family={dbg['template_family']}\n")
|
|
out.write(f" [DEBUG] chain: {dbg['chain']}\n")
|
|
out.write(f" [DEBUG] slots: {dbg['slots']}\n")
|
|
out.write("\n")
|
|
else:
|
|
out.write(f"(failed to generate saying #{i+1} after retries)\n")
|
|
finally:
|
|
if args.output:
|
|
out.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|