#!/usr/bin/python
import re, sys, StringIO
"""Enumerate sentences of a context-free grammar.
Some sample, and kind of creepy, output:
Python writes this.
Python writes Python.
Python writes code.
Python writes you.
Python writes me.
...
the Python is code.
the Python is you.
the Python is me.
the code writes this.
...
6 lines are Python.
6 lines are code.
6 lines are you.
6 lines are me.
the generators are this.
the generators are Python.
...
by contrast, this is code.
by contrast, this is you.
by contrast, this is me.
the Python writes 6 generators.
the Python writes 6 lines.
the Python writes the generators.
the Python writes the lines.
...
by contrast, the lines are Python.
by contrast, the lines are code.
by contrast, the lines are you.
by contrast, the lines are me.
...
6 generators let you think me.
6 generators let me write this.
6 generators let me write Python.
...
the lines let Python think this.
the lines let Python think Python.
...
by contrast, the code writes the generators.
by contrast, the code writes the lines.
by contrast, the code is 6 generators.
by contrast, the code is 6 lines.
...
Python writes the Python code generators.
Python writes the Python code lines.
...
code writes 6 lines of code.
code writes 6 lines of you.
code writes 6 lines of me.
code writes the generators of this.
code writes the generators of Python.
code writes the generators of code.
code writes the generators of you.
code writes the generators of me.
"""
default_cfg = '''
sentence "."
sentence:
singular-subject " " singular-predicate
plural-subject " " plural-predicate
sentence-modifier ", " sentence
sentence-modifier:
"by contrast"
plural-subject:
plural-noun
singular-subject:
mass-noun
singular-subject-pronoun
"the " mass-noun
singular-subject-pronoun:
"that"
"this"
mass-noun:
"Python"
"code"
plural-noun:
number " " simple-plural-noun
plural-article " " simple-plural-noun
plural-noun " " prepositional-phrase
plural-article:
"the"
singular-article:
"a"
plural-article
prepositional-phrase:
preposition " " object
preposition:
"of"
simple-plural-noun:
mass-noun " " simple-plural-noun
"generators"
"lines"
number:
"6"
plural-predicate:
plural-transitive-verb-phrase " " object
plural-transitive-verb-phrase:
"let " object " " infinitive
"are"
infinitive:
"write"
"think"
object:
"this"
plural-noun
mass-noun
"you"
"me"
singular-predicate:
singular-transitive-verb-phrase " " object
singular-transitive-verb-phrase:
"writes"
"is"
'''
class Literal:
"A literal string in the grammar."
def __init__(self, contents):
self.contents = contents.replace(r"\n", "\n")
def alternatives(self, nwords, grammar):
if nwords == 1:
yield self.contents
class Nonterminal:
"A nonterminal in the grammar; expands into some phrase."
def __init__(self, name):
self.name = name
def alternatives(self, nwords, grammar):
for production in grammar[self.name]:
for alternative in production.alternatives(nwords, grammar):
yield alternative
class Concatenation:
"A concatenation of two literals, nonterminals, and concatenations."
def __init__(self, left, right):
self.left, self.right = left, right
def alternatives(self, nwords, grammar):
for ii in range(1, nwords):
for left_alt in self.left.alternatives(ii, grammar):
for right_alt in self.right.alternatives(nwords - ii,
grammar):
yield left_alt + right_alt
def concatenation(items):
"Produce a concatenation of an arbitrary nonzero number of terms."
assert len(items) > 0
if len(items) == 1: return items[0]
elif len(items) == 2: return Concatenation(items[0], items[1])
else: return Concatenation(items[0], concatenation(items[1:]))
def parse_production(line):
"Turn a production line into a Concatenation, Nonterminal, or Literal"
tokens = re.findall(r'\s*([-\w]+|"(?:[^"\\]|\\.)+")', line)
rv = []
for tok in tokens:
if tok.startswith('"'):
rv.append(Literal(tok[1:-1]))
else:
rv.append(Nonterminal(tok))
return concatenation(rv)
class CouldntParse(Exception): pass
class Grammar:
"A container for the productions and the first rule."
def __init__(self, productions, first_rule):
self.first_rule = first_rule
self.productions = productions
def __getitem__(self, x):
return self.productions[x]
def alternatives(self, nwords):
return self.first_rule.alternatives(nwords, self)
def parse_grammar(infile):
current_rule = None
first_rule = None
productions = {}
for line in infile:
if line.startswith(" "):
productions[current_rule].append(parse_production(line))
elif line.endswith(":\n"):
current_rule = line[:-2]
productions[current_rule] = []
elif re.match(r"\s*$", line) or re.match(r'#', line):
pass
elif first_rule is None:
first_rule = parse_production(line)
else:
raise CouldntParse(line)
return Grammar(productions, first_rule)
if __name__ == '__main__':
if len(sys.argv) == 1:
infile = StringIO.StringIO(default_cfg)
else:
infile = file(sys.argv[1])
grammar = parse_grammar(infile)
ii = 0
while True:
for sentence in grammar.alternatives(ii):
print sentence
ii += 1