I improved the tree-rewriting system a little bit more today, including a patch from Dave Long that lets you pass in filenames on the command line, and I transliterated Wouter van Oortmerssen's quicksort example from his thesis.
The notation is much improved; the old notation had the nice feature that you could, in some sense, generate rewrite rules on the fly; but it had the serious bug that both sides of a rewrite rule would be reduced into normal form before they could be used. This version lacks that bug. It's lots of fun to play with, but it's not yet ready for any serious work, for the following reasons: - it's still outrageously slow; quicksorting the list of 8 (base-one) integers at the end of the example takes around 350000 function calls, eight seconds on my laptop. This is due in part to linearly searching the rule database for every rewrite. - the language presently contains no I/O, data types (integers are distinguished from atoms but have the same behavior), local rules, or conditionals, and all of this makes it a bit clumsy. - because I wrote the tokenizer/parser in a side-effect-free heavily tail-call-oriented style, long input expressions will overflow its stack. - because the evaluator is recursive, deeply-nested internal data will overflow the interpreter's stack too. - there is no syntactic sugar for lists, which makes them extremely clumsy The quicksort example follows, followed by the main program itself. #!/home/kragen/devel/tree-rewriting/treetramp # quicksort example from Wouter van Oortmerssen's thesis, more or less append(nil $456) -> $456 append(cons($1 $23) $456) -> cons($1 append($23 $456)) qsort(nil) -> nil # thesis version: # qsort(cons($p $t)) -> append(qsort($a) cons($p qsort($z))) # when ($a $z) = filter(qsortcompare($p) $t) qsort(cons($p $t)) -> qsort2($p partition(qsortcompare($p) $t)) qsort2($p partitioned($a $z)) -> append(qsort($a) cons($p qsort($z))) apply(qsortcompare($x) $y) -> <($y $x) # definition of comparisons for a simple repr. of numbers <(0 0) -> false # zero is not less than zero <(succ($x) 0) -> false # nothing else is less than zero <(0 succ($x)) -> true # zero is less than anything else <(succ($a) succ($b)) -> <($a $b) # and some numbers for convenience 1 -> succ(0) 2 -> succ(1) 3 -> succ(2) 4 -> succ(3) 5 -> succ(4) 6 -> succ(5) partition($func $list) -> partition($func $list nil nil) partition($func nil $a $b) -> partitioned($a $b) partition($func cons($h $t) $a $b) -> partition2(apply($func $h) $func $h $t $a $b) partition2(true $func $h $t $a $b) -> partition($func $t cons($h $a) $b) partition2(false $func $h $t $a $b) -> partition($func $t $a cons($h $b)) # on my machine this takes 8.7 user seconds to sort this list of 8 numbers! qsort(cons(3 cons(1 cons(2 cons(4 cons(0 cons(0 cons(5 cons(6 nil))))))))) #!/usr/bin/python # A tree is a possibly empty sequence of arguments, the first # of which is an atom. # The arguments may be trees, atoms, integers, or variables. # Variables are "abstract"; atoms and integers are not. # Trees are "abstract" iff they contain any abstract arguments. # Things that are not abstract are "concrete". # Abstract things can be "instantiated" with some mapping of # variables to concrete things, producing a concrete thing. # If an abstract thing can be instantiated to produce a # certain concrete thing, it is said to unify with it. # TODO: # - eliminate distinctions between trees, atoms, and integers # - remove integer type, use integer trees instead # - add integer functions: + - * / % < = > <= >= & | # - figure out how to implement Boolean logic completely (variables incl.) # - add some facility for nice I/O # - do some symbolic math with nice I/O from __future__ import nested_scopes import string, traceback, sys, StringIO class thing: def __init__(self, value): self.value = value def abstract(self): return 0 def __str__(self): return str(self.value) def instantiate(self, mapping): return self def compatible_type(self, other): return isinstance(other, self.__class__) def unify(self, concrete, mapping): if not self.compatible_type(concrete): return None if self.value != concrete.value: return None return mapping def __cmp__(self, other): if not self.compatible_type(other): return cmp(id(self), id(other)) return cmp(self.value, other.value) def toplevel_result(self, ostream): ostream.write('-> ' + str(self) + '\n') class variable(thing): def abstract(self): return 1 def __str__(self): return "$" + self.value def instantiate(self, mapping): return mapping[self.value] def unify(self, concrete, mapping): if mapping.has_key(self.value) and mapping[self.value] != concrete: return None mapping[self.value] = concrete return mapping class atom(thing): pass class integer(thing): pass class tree(thing): def __init__(self, items): assert isinstance(items[0], atom) self.items = items self.traversing = 0 self.is_abstract = 0 for item in self.items: if item.abstract(): self.is_abstract = 1 def abstract(self): return self.is_abstract def __str__(self): if self.traversing: return "((error))" self.traversing = 1 try: head, args = self.items[0], self.items[1:] return str(head) + "(" + string.join(map(str, args), ' ') + ')' finally: self.traversing = 0 def instantiate(self, mapping): if not self.abstract(): return self return tree(map(lambda item: item.instantiate(mapping), self.items)) def unify(self, concrete, mapping): if not self.compatible_type(concrete): return None if len(self.items) != len(concrete.items): return None for ii in range(len(self.items)): mapping = self.items[ii].unify(concrete.items[ii], mapping) if mapping is None: return None return mapping def __cmp__(self, other): if not self.compatible_type(other): return cmp(id(self), id(other)) for ii in range(len(self.items)): if ii >= len(other.items): return 1 rv = cmp(self.items[ii], other.items[ii]) if rv != 0: return rv if len(self.items) == len(other.items): return 0 else: return -1 class toplevel_action(thing): def __init__(self, value, mapping = None): self.value, self.mapping = value, mapping def instantiate(self, mapping): return self.__class__(self.value, mapping) def toplevel_result(self, ostream): return apply(self.value, (ostream,), self.mapping) def unify(abstract, concrete, mapping = None): """Return a mapping that instantiates the abstract as the concrete, or None If a mapping exists, find it and return it; otherwise, return None. """ if mapping is None: mapping = {} mapping = abstract.unify(concrete, mapping) if mapping is not None: assert abstract.instantiate(mapping) == concrete return mapping ### parsing def cons_string(astring): rv = () for ii in range(len(astring)-1, -1, -1): rv = (astring[ii], rv) return rv def append_charlists(aa, bb): if aa == (): return bb else: hh, tt = aa return (hh, append_charlists(tt, bb)) def restring(charlist): rv = [] while charlist: rv.append(charlist[0]) charlist = charlist[1] return string.join(rv, '') class parse_error(Exception): pass class unmatched_leftparen(parse_error): pass def parse_charlist(charlist): if charlist == (): raise parse_error("premature eof") hh, tt = charlist if hh == ')': raise parse_error("unmatched )") elif hh == '(': raise parse_error("( not following atom") elif hh in string.whitespace: return parse_charlist(tt) elif hh == '$': return parse_variable(tt) elif hh in string.digits: return parse_number(tt, hh) else: return parse_atomstuff(tt, hh) def parse_atomstuff(charlist, name=''): if charlist == (): return atom(name), charlist hh, tt = charlist if hh in string.whitespace + '()': return parse_postatom(strip_whitespace(charlist), name) else: return parse_atomstuff(tt, name + hh) def parse_variable(charlist): name, charlist = parse_atomstuff(charlist) if name.value == '': raise parse_error("missing variable name") return variable(name.value), charlist def parse_number(charlist, digits): if charlist == (): return integer(int(digits)), () hh, tt = charlist if hh in string.digits: return parse_number(tt, digits + hh) else: return integer(int(digits)), strip_whitespace(charlist) def parse_postatom(charlist, name): if name == '': raise parse_error("missing atom name") if charlist == (): return atom(name), () hh, tt = charlist if hh == '(': return parse_args(tt, (atom(name),)) else: return atom(name), charlist def parse_args(charlist, args): if charlist == (): raise unmatched_leftparen("unmatched (", args) hh, tt = charlist if hh == ')': return tree(args), strip_whitespace(tt) elif hh in string.whitespace: return parse_args(tt, args) else: nextitem, tt = parse_charlist(charlist) return parse_args(tt, args + (nextitem,)) def strip_whitespace(charlist): if charlist == (): return charlist hh, tt = charlist if hh in string.whitespace: return strip_whitespace(tt) else: return charlist def parse(astring): return parse_charlist(cons_string(astring)) def parseall(astring): result, remainder = parse(astring) assert remainder == () return result def rewrite(atree, before, after): mapping = unify(before, atree) if mapping is None: return None return after.instantiate(mapping) def eagerly_rewrite(athing, rules): if isinstance(athing, tree): items = map(lambda thing: eagerly_rewrite(thing, rules), athing.items) athing = tree(items) for before, after in rules: result = rewrite(athing, before, after) if result is not None: return eagerly_rewrite(result, rules) return athing class dummy_writer: def write(self, astring): pass def flush(self): pass class parser: def __init__(self, infile, lineno=0, outfile=dummy_writer(), prompt=''): self.infile, self.lineno, self.outfile, self.prompt = ( infile, lineno, outfile, prompt) def readline(self): self.outfile.write(self.prompt) self.outfile.flush() self.lineno = self.lineno + 1 return self.infile.readline() def parseline(self): """Parse at least a line from infile, returning a list of trees. When an argument list continues across lines, this function prefers parsing multiple lines to reporting an error. """ inputline = self.readline() startlineno = self.lineno if inputline == '': raise EOFError charlist = strip_whitespace(cons_string(inputline)) results = [] while charlist and charlist[0] != '#': try: tree, charlist = parse_charlist(charlist) except unmatched_leftparen, v: nextline = cons_string(self.readline()) if nextline == (): # eof raise unmatched_leftparen(*(v.args + ('line no', startlineno))) charlist = append_charlists(charlist, nextline) except parse_error, v: v.args = v.args + ('line no', startlineno) raise v else: results.append(tree) return results def test_parser(): infile = (''' x a b x() a() b() x( ) a( ) b( ) x() a( ''' + # lines 6-10 follow ''') b() # a comment snarg fuzz # another comment a( ) # yet another ''' + # lines 11-14 follow '''3 ) x() a( ) b( ''') p = parser(StringIO.StringIO(infile)) assert p.parseline() == [] assert p.lineno == 1 assert p.parseline() == map(parseall, 'x a b'.split()) assert p.lineno == 2 assert p.parseline() == map(parseall, 'x() a() b()'.split()) assert p.lineno == 3 assert p.parseline() == map(parseall, 'x() a() b()'.split()) assert p.lineno == 4 assert p.parseline() == map(parseall, 'x() a() b()'.split()) assert p.lineno == 6 assert p.parseline() == [] assert p.lineno == 7 assert p.parseline() == map(parseall, 'snarg fuzz'.split()) assert p.lineno == 8 assert p.parseline() == [parseall('a()')] assert p.lineno == 10 assert p.parseline() == [parseall('3')] assert p.lineno == 11 try: p.parseline() except parse_error, pe: assert 12 in pe.args, `pe.args` else: assert 0 assert_raises(lambda: p.parseline(), parse_error) assert_raises(lambda: p.parseline(), EOFError) def repl(files): """Play with rewriting expressions. a -> b rewrites a to b. d(peanuts) deletes rules for expressions starting with 'peanuts'. See the various example *.tree files packaged herewith for examples. """ rules = [] def define_rewrite_rule(ostream, pattern, replacement): rules.append((pattern, replacement)) ostream.write("will rewrite %s to %s\n" % (pattern, replacement)) def delete_rules(ostream, atom): ostream.write('forgetting about %s\n' % atom) rules[:] = [(pat, rep) for (pat, rep) in rules if not hasattr(pat, 'items') or pat.items[0] != atom] def define_action(patternstr, routine): rules.append((parseall(patternstr), toplevel_action(routine))) #define_action('r($pattern $replacement)', define_rewrite_rule) define_action('d($atom)', delete_rules) def import_file(ostream, filename): i_file(ostream, filename.value) define_action('import($filename)', import_file) def i_file(ostream, filename): infile = open(filename) readfile(parser(infile), ostream) infile.close() ostream.write("imported %s\n" % filename) def readfile(cmdlineparser, ostream): while 1: try: try: trees = cmdlineparser.parseline() except EOFError: ostream.write("\n"); return if len(trees) == 1: eagerly_rewrite(trees[0], rules[:]).toplevel_result(ostream) elif len(trees) == 0: pass elif len(trees) == 3 and trees[1] == atom('->'): define_rewrite_rule(ostream, trees[0], trees[2]) else: ostream.write("Couldn't understand that: %s\n" % (map(str, trees))) except: traceback.print_exc() for f in files: i_file(sys.stdout, f) cmdlineparser = parser(sys.stdin, outfile=sys.stdout, prompt=' ') readfile(cmdlineparser, sys.stdout) class wrong_exception(AssertionError): pass def assert_raises(subr, exc): try: subr() except exc: return 1 except: exc_type, exc_value, tb = sys.exc_info() raise wrong_exception, wrong_exception(exc_value), tb else: raise wrong_exception("no exception") def test(): def raise_TypeError(): raise TypeError assert_raises(raise_TypeError, TypeError) def assert_wrong(): assert_raises(raise_TypeError, ValueError) assert_raises(assert_wrong, AssertionError) assert_raises(assert_wrong, wrong_exception) def assert_parse_error(astr): assert_raises(lambda: parse(astr), parse_error) assert_parse_error('') assert_parse_error(')') assert_parse_error('(') assert_parse_error(' ') assert_parse_error(' )') assert_parse_error(' (') def dparse(astr): return parse(astr)[0] def punp(astr): return str(dparse(astr)) def teq(astr): assert punp(astr) == astr, punp(astr) teq('asdf') teq('asdf()') teq('1324') teq('asdf(a b c)') teq('a(1 3 b() c)') teq('.agjiew0#*2808U%]*@#U*') teq('$x') teq('a(3 $x b($3) x)') assert punp(' as ') == 'as' assert punp(' x ( y 2 ) ') == 'x(y 2)' assert_parse_error('x(y(z)') assert_parse_error('x((y) z)') assert_parse_error('$') assert_parse_error('$()') ### instantiation atree = dparse('a(b($c d)$e)') assert str(atree) == 'a(b($c d) $e)', str(atree) atoma = atom('a') assert_raises(lambda: atree.instantiate({}), KeyError) insttree = atree.instantiate({'c': atoma, 'e': atoma}) assert str(insttree) == 'a(b(a d) a)', str(insttree) insttree = atree.instantiate({'c': atom('b'), 'e': atoma}) assert str(insttree) == 'a(b(b d) a)', str(insttree) ### comparison assert atom('a') == atom('a') assert atom('a') != atom('b') assert integer(1) == integer(1) assert integer(1) < integer(2) assert integer(2) > integer(1) assert variable('a') == variable('a') assert variable('a') != variable('b') assert atom('a') != variable('a') assert variable('a') != atom('a') assert atom('1') != variable('1') assert dparse('x(a b c(d e f()) g)') == dparse('x(a b c(d e f()) g)') assert dparse('x(a b c(d e f()) g)') != 'x(a b c(d e f()) g)' assert dparse('x(a b c(d e f()) g)') != dparse('x()') assert dparse('x(a)') != dparse('x()') assert dparse('x()') != dparse('x(a)') assert dparse('x(a)') == dparse('x(a)') assert dparse('x(a)') != dparse('x(b)') ### unification def sunify(stra, strb): return unify(dparse(stra), dparse(strb)) assert sunify('a', 'a') == {} assert sunify('a', 'b') == None assert sunify('$a', '3') == {'a': integer(3)} assert sunify('a(b($c d) $e)', 'a(b(a d) a)') == { 'c': atom('a'), 'e': atom('a')} assert sunify('a(b($c d) $e)', 'a(b(b d) a)') == { 'c': atom('b'), 'e': atom('a')} assert sunify('hi($x)', 'hi(kragen(sitaker))') == { 'x': dparse('kragen(sitaker)')} assert sunify('a($a $a)', 'a(b b)') == {'a': atom('b')} assert sunify('a($a $a)', 'a(b c)') == None ### rewriting def srew(stra, strb, strc): return str(rewrite(dparse(stra), dparse(strb), dparse(strc))) assert srew('a', 'b', 'c') == 'None' assert srew('a', '$b', 'c') == 'c' assert srew('a', '$b', '$b') == 'a' assert srew('a(b)', '$x', '$x') == 'a(b)' assert srew('a(b)', '$x', 'y($x)') == 'y(a(b))' assert srew('a(b)', 'a($x)', 'c($x)') == 'c(b)' ### repetitive rewriting def rules(alist): def parsepair(apair): a, b = apair return dparse(a), dparse(b) return map(parsepair, alist) def evalu(astr, rules): return str(eagerly_rewrite(dparse(astr), rules)) # "laws of form" boolean logic lofrules = rules([('not(not($x))', '$x'), ('or(1 $x)', '1'), ('or($x 1)', '1'), ('or($x $x)', '$x')]) assert evalu('1', lofrules) == '1' assert evalu('not(1)', lofrules) == 'not(1)' assert evalu('not(not(1))', lofrules) == '1' assert evalu('not(not(not(1)))', lofrules) == 'not(1)' assert evalu('not(not(not(not(1)))))', lofrules) == '1' assert evalu('or(1 not(1))', lofrules) == '1' assert evalu('or(not(1) 1)', lofrules) == '1' assert evalu('or(1 1)', lofrules) == '1' assert evalu('not(or(1 1))', lofrules) == 'not(1)' assert evalu('not(or(not(1) not(1)))', lofrules) == '1' a = cons_string('fibble') b = cons_string('wur') c = restring(append_charlists(a, b)) assert c == restring(a) + restring(b), c test_parser() if __name__ == '__main__': test() if len(sys.argv) > 1: repl(sys.argv[1:]) else: print repl.__doc__ repl([]) I wanted to be able to #! the quicksort program, but my kernel isn't smart enough to chain #! lines. So I wrote this tiny C shim to put on the #! lines instead: #include <unistd.h> #include <stdio.h> #include <stdlib.h> #define PYTHONPATH "/usr/bin/python" #define TREEPYPATH "/home/kragen/devel/tree-rewriting/tree.py" int usage(char *argv0) { fprintf(stderr, "Usage: %s something.tree [args...]\n", argv0); return -1; } int main(int argc, char **argv) { int ii; char **nargv; if (argc < 2) return usage(argv[0]); nargv = malloc(sizeof(*nargv) * (argc + 2)); if (!nargv) { perror("malloc"); return -1; } nargv[0] = PYTHONPATH; nargv[1] = TREEPYPATH; for (ii = 1; ii < argc; ii++) { nargv[ii + 1] = argv[ii]; } nargv[argc + 1] = NULL; execv(nargv[0], nargv); perror("execv"); return -1; } -- <[EMAIL PROTECTED]> Kragen Sitaker <http://www.pobox.com/~kragen/> Edsger Wybe Dijkstra died in August of 2002. The world has lost a great man. See http://advogato.org/person/raph/diary.html?start=252 and http://www.kode-fu.com/geek/2002_08_04_archive.shtml for details.