#!/usr/bin/python
import re, sys, StringIO
"""Enumerate sentences of a context-free grammar.

Some sample, and kind of creepy, output:

Python writes this.
Python writes Python.
Python writes code.
Python writes you.
Python writes me.
...
the Python is code.
the Python is you.
the Python is me.
the code writes this.
...
6 lines are Python.
6 lines are code.
6 lines are you.
6 lines are me.
the generators are this.
the generators are Python.
...
by contrast, this is code.
by contrast, this is you.
by contrast, this is me.
the Python writes 6 generators.
the Python writes 6 lines.
the Python writes the generators.
the Python writes the lines.
...
by contrast, the lines are Python.
by contrast, the lines are code.
by contrast, the lines are you.
by contrast, the lines are me.
...
6 generators let you think me.
6 generators let me write this.
6 generators let me write Python.
...
the lines let Python think this.
the lines let Python think Python.
...
by contrast, the code writes the generators.
by contrast, the code writes the lines.
by contrast, the code is 6 generators.
by contrast, the code is 6 lines.
...
Python writes the Python code generators.
Python writes the Python code lines.
...
code writes 6 lines of code.
code writes 6 lines of you.
code writes 6 lines of me.
code writes the generators of this.
code writes the generators of Python.
code writes the generators of code.
code writes the generators of you.
code writes the generators of me.
"""

default_cfg = '''
sentence "."

sentence:
        singular-subject " " singular-predicate
        plural-subject " " plural-predicate
        sentence-modifier ", " sentence
sentence-modifier:
        "by contrast"
plural-subject:
        plural-noun
singular-subject:
        mass-noun
        singular-subject-pronoun
        "the " mass-noun
singular-subject-pronoun:
        "that"
        "this"
mass-noun:
        "Python"
        "code"
plural-noun:
        number " " simple-plural-noun
        plural-article " " simple-plural-noun
        plural-noun " " prepositional-phrase
plural-article:
        "the"
singular-article:
        "a"
        plural-article
prepositional-phrase:
        preposition " " object
preposition:
        "of"
simple-plural-noun:
        mass-noun " " simple-plural-noun
        "generators"
        "lines"
number:
        "6"
plural-predicate:
        plural-transitive-verb-phrase " " object
plural-transitive-verb-phrase:
        "let " object " " infinitive
        "are"
infinitive:
        "write"
        "think"
object:
        "this"
        plural-noun
        mass-noun
        "you"
        "me"
singular-predicate:
        singular-transitive-verb-phrase " " object
singular-transitive-verb-phrase:
        "writes"
        "is"
'''


class Literal:
    "A literal string in the grammar."
    def __init__(self, contents):
        self.contents = contents.replace(r"\n", "\n")
    def alternatives(self, nwords, grammar):
        if nwords == 1:
            yield self.contents

class Nonterminal:
    "A nonterminal in the grammar; expands into some phrase."
    def __init__(self, name):
        self.name = name
    def alternatives(self, nwords, grammar):
        for production in grammar[self.name]:
            for alternative in production.alternatives(nwords, grammar):
                yield alternative

class Concatenation:
    "A concatenation of two literals, nonterminals, and concatenations."
    def __init__(self, left, right):
        self.left, self.right = left, right
    def alternatives(self, nwords, grammar):
        for ii in range(1, nwords):
            for left_alt in self.left.alternatives(ii, grammar):
                for right_alt in self.right.alternatives(nwords - ii,
                                                         grammar):
                    yield left_alt + right_alt

def concatenation(items):
    "Produce a concatenation of an arbitrary nonzero number of terms."
    assert len(items) > 0
    if len(items) == 1: return items[0]
    elif len(items) == 2: return Concatenation(items[0], items[1])
    else: return Concatenation(items[0], concatenation(items[1:]))

def parse_production(line):
    "Turn a production line into a Concatenation, Nonterminal, or Literal"
    tokens = re.findall(r'\s*([-\w]+|"(?:[^"\\]|\\.)+")', line)
    rv = []
    for tok in tokens:
        if tok.startswith('"'):
            rv.append(Literal(tok[1:-1]))
        else:
            rv.append(Nonterminal(tok))
    return concatenation(rv)

class CouldntParse(Exception): pass

class Grammar:
    "A container for the productions and the first rule."
    def __init__(self, productions, first_rule):
        self.first_rule = first_rule
        self.productions = productions
    def __getitem__(self, x):
        return self.productions[x]
    def alternatives(self, nwords):
        return self.first_rule.alternatives(nwords, self)

def parse_grammar(infile):
    current_rule = None
    first_rule = None
    productions = {}
    for line in infile:
        if line.startswith(" "):
            productions[current_rule].append(parse_production(line))
        elif line.endswith(":\n"):
            current_rule = line[:-2]
            productions[current_rule] = []
        elif re.match(r"\s*$", line) or re.match(r'#', line):
            pass
        elif first_rule is None:
            first_rule = parse_production(line)
        else:
            raise CouldntParse(line)
    return Grammar(productions, first_rule)

if __name__ == '__main__':
    if len(sys.argv) == 1:
        infile = StringIO.StringIO(default_cfg)
    else:
        infile = file(sys.argv[1])
    grammar = parse_grammar(infile)
    ii = 0
    while True:
        for sentence in grammar.alternatives(ii):
            print sentence
        ii += 1


Reply via email to