#!/usr/bin/python import re, sys, StringIO """Enumerate sentences of a context-free grammar.
Some sample, and kind of creepy, output: Python writes this. Python writes Python. Python writes code. Python writes you. Python writes me. ... the Python is code. the Python is you. the Python is me. the code writes this. ... 6 lines are Python. 6 lines are code. 6 lines are you. 6 lines are me. the generators are this. the generators are Python. ... by contrast, this is code. by contrast, this is you. by contrast, this is me. the Python writes 6 generators. the Python writes 6 lines. the Python writes the generators. the Python writes the lines. ... by contrast, the lines are Python. by contrast, the lines are code. by contrast, the lines are you. by contrast, the lines are me. ... 6 generators let you think me. 6 generators let me write this. 6 generators let me write Python. ... the lines let Python think this. the lines let Python think Python. ... by contrast, the code writes the generators. by contrast, the code writes the lines. by contrast, the code is 6 generators. by contrast, the code is 6 lines. ... Python writes the Python code generators. Python writes the Python code lines. ... code writes 6 lines of code. code writes 6 lines of you. code writes 6 lines of me. code writes the generators of this. code writes the generators of Python. code writes the generators of code. code writes the generators of you. code writes the generators of me. """ default_cfg = ''' sentence "." sentence: singular-subject " " singular-predicate plural-subject " " plural-predicate sentence-modifier ", " sentence sentence-modifier: "by contrast" plural-subject: plural-noun singular-subject: mass-noun singular-subject-pronoun "the " mass-noun singular-subject-pronoun: "that" "this" mass-noun: "Python" "code" plural-noun: number " " simple-plural-noun plural-article " " simple-plural-noun plural-noun " " prepositional-phrase plural-article: "the" singular-article: "a" plural-article prepositional-phrase: preposition " " object preposition: "of" simple-plural-noun: mass-noun " " simple-plural-noun "generators" "lines" number: "6" plural-predicate: plural-transitive-verb-phrase " " object plural-transitive-verb-phrase: "let " object " " infinitive "are" infinitive: "write" "think" object: "this" plural-noun mass-noun "you" "me" singular-predicate: singular-transitive-verb-phrase " " object singular-transitive-verb-phrase: "writes" "is" ''' class Literal: "A literal string in the grammar." def __init__(self, contents): self.contents = contents.replace(r"\n", "\n") def alternatives(self, nwords, grammar): if nwords == 1: yield self.contents class Nonterminal: "A nonterminal in the grammar; expands into some phrase." def __init__(self, name): self.name = name def alternatives(self, nwords, grammar): for production in grammar[self.name]: for alternative in production.alternatives(nwords, grammar): yield alternative class Concatenation: "A concatenation of two literals, nonterminals, and concatenations." def __init__(self, left, right): self.left, self.right = left, right def alternatives(self, nwords, grammar): for ii in range(1, nwords): for left_alt in self.left.alternatives(ii, grammar): for right_alt in self.right.alternatives(nwords - ii, grammar): yield left_alt + right_alt def concatenation(items): "Produce a concatenation of an arbitrary nonzero number of terms." assert len(items) > 0 if len(items) == 1: return items[0] elif len(items) == 2: return Concatenation(items[0], items[1]) else: return Concatenation(items[0], concatenation(items[1:])) def parse_production(line): "Turn a production line into a Concatenation, Nonterminal, or Literal" tokens = re.findall(r'\s*([-\w]+|"(?:[^"\\]|\\.)+")', line) rv = [] for tok in tokens: if tok.startswith('"'): rv.append(Literal(tok[1:-1])) else: rv.append(Nonterminal(tok)) return concatenation(rv) class CouldntParse(Exception): pass class Grammar: "A container for the productions and the first rule." def __init__(self, productions, first_rule): self.first_rule = first_rule self.productions = productions def __getitem__(self, x): return self.productions[x] def alternatives(self, nwords): return self.first_rule.alternatives(nwords, self) def parse_grammar(infile): current_rule = None first_rule = None productions = {} for line in infile: if line.startswith(" "): productions[current_rule].append(parse_production(line)) elif line.endswith(":\n"): current_rule = line[:-2] productions[current_rule] = [] elif re.match(r"\s*$", line) or re.match(r'#', line): pass elif first_rule is None: first_rule = parse_production(line) else: raise CouldntParse(line) return Grammar(productions, first_rule) if __name__ == '__main__': if len(sys.argv) == 1: infile = StringIO.StringIO(default_cfg) else: infile = file(sys.argv[1]) grammar = parse_grammar(infile) ii = 0 while True: for sentence in grammar.alternatives(ii): print sentence ii += 1