more on tree rewriting

Kragen Sitaker Sun, 10 Nov 2002 00:39:54 -0800

I improved the tree-rewriting system a little bit more today,
including a patch from Dave Long that lets you pass in filenames on
the command line, and I transliterated Wouter van Oortmerssen's
quicksort example from his thesis.


The notation is much improved; the old notation had the nice feature
that you could, in some sense, generate rewrite rules on the fly; but
it had the serious bug that both sides of a rewrite rule would be
reduced into normal form before they could be used.  This version
lacks that bug.

It's lots of fun to play with, but it's not yet ready for any serious
work, for the following reasons:
- it's still outrageously slow; quicksorting the list of 8 (base-one)
  integers at the end of the example takes around 350000 function calls,
  eight seconds on my laptop.  This is due in part to linearly searching
  the rule database for every rewrite.
- the language presently contains no I/O, data types (integers are
  distinguished from atoms but have the same behavior), local rules,
  or conditionals, and all of this makes it a bit clumsy.
- because I wrote the tokenizer/parser in a side-effect-free
  heavily tail-call-oriented style, long input expressions will overflow
  its stack.
- because the evaluator is recursive, deeply-nested internal data will
  overflow the interpreter's stack too.
- there is no syntactic sugar for lists, which makes them extremely clumsy

The quicksort example follows, followed by the main program itself.

#!/home/kragen/devel/tree-rewriting/treetramp
# quicksort example from Wouter van Oortmerssen's thesis, more or less
append(nil $456)          -> $456
append(cons($1 $23) $456) -> cons($1 append($23 $456))

qsort(nil) -> nil
# thesis version:
#  qsort(cons($p $t)) -> append(qsort($a) cons($p qsort($z)))
#    when ($a $z) = filter(qsortcompare($p) $t)
qsort(cons($p $t)) -> qsort2($p partition(qsortcompare($p) $t))
qsort2($p partitioned($a $z)) -> append(qsort($a) cons($p qsort($z)))

apply(qsortcompare($x) $y) -> <($y $x)

# definition of comparisons for a simple repr. of numbers
<(0 0) -> false             # zero is not less than zero
<(succ($x) 0) -> false      # nothing else is less than zero
<(0 succ($x)) -> true       # zero is less than anything else
<(succ($a) succ($b)) -> <($a $b)
# and some numbers for convenience
1 -> succ(0)
2 -> succ(1)
3 -> succ(2)
4 -> succ(3)
5 -> succ(4)
6 -> succ(5)

partition($func $list) -> partition($func $list nil nil)
partition($func nil $a $b) -> partitioned($a $b)
partition($func cons($h $t) $a $b) -> partition2(apply($func $h) 
                                                 $func $h $t $a $b)
partition2(true $func $h $t $a $b)  -> partition($func $t cons($h $a) $b)
partition2(false $func $h $t $a $b) -> partition($func $t $a cons($h $b))

# on my machine this takes 8.7 user seconds to sort this list of 8 numbers!
qsort(cons(3 cons(1 cons(2 cons(4 cons(0 cons(0 cons(5 cons(6 nil)))))))))






#!/usr/bin/python
# A tree is a possibly empty sequence of arguments, the first
# of which is an atom.
# The arguments may be trees, atoms, integers, or variables.
# Variables are "abstract"; atoms and integers are not.
# Trees are "abstract" iff they contain any abstract arguments.
# Things that are not abstract are "concrete".

# Abstract things can be "instantiated" with some mapping of
# variables to concrete things, producing a concrete thing.

# If an abstract thing can be instantiated to produce a
# certain concrete thing, it is said to unify with it.

# TODO:
# - eliminate distinctions between trees, atoms, and integers
# - remove integer type, use integer trees instead
# - add integer functions: + - * / % < = > <= >= & | # - figure out how to implement 
Boolean logic completely (variables incl.)
# - add some facility for nice I/O
# - do some symbolic math with nice I/O

from __future__ import nested_scopes

import string, traceback, sys, StringIO

class thing:
    def __init__(self, value): self.value = value
    def abstract(self): return 0
    def __str__(self): return str(self.value)
    def instantiate(self, mapping): return self
    def compatible_type(self, other):
        return isinstance(other, self.__class__)
    def unify(self, concrete, mapping):
        if not self.compatible_type(concrete): return None
        if self.value != concrete.value: return None
        return mapping
    def __cmp__(self, other):
        if not self.compatible_type(other): return cmp(id(self), id(other))
        return cmp(self.value, other.value)
    def toplevel_result(self, ostream):
        ostream.write('-> ' + str(self) + '\n')
class variable(thing):
    def abstract(self): return 1
    def __str__(self): return "$" + self.value
    def instantiate(self, mapping): return mapping[self.value]
    def unify(self, concrete, mapping):
        if mapping.has_key(self.value) and mapping[self.value] != concrete:
            return None
        mapping[self.value] = concrete
        return mapping
class atom(thing): pass
class integer(thing): pass
class tree(thing):
    def __init__(self, items):
        assert isinstance(items[0], atom)
        self.items = items
        self.traversing = 0
        self.is_abstract = 0
        for item in self.items:
            if item.abstract(): self.is_abstract = 1
    def abstract(self): return self.is_abstract
    def __str__(self):
        if self.traversing: return "((error))"
        self.traversing = 1
        try:
            head, args = self.items[0], self.items[1:]
            return str(head) + "(" + string.join(map(str, args), ' ') + ')'
        finally:
            self.traversing = 0
    def instantiate(self, mapping):
        if not self.abstract(): return self
        return tree(map(lambda item: item.instantiate(mapping), self.items))
    def unify(self, concrete, mapping):
        if not self.compatible_type(concrete): return None
        if len(self.items) != len(concrete.items): return None
        for ii in range(len(self.items)):
            mapping = self.items[ii].unify(concrete.items[ii], mapping)
            if mapping is None: return None
        return mapping
    def __cmp__(self, other):
        if not self.compatible_type(other): return cmp(id(self), id(other))
        for ii in range(len(self.items)):
            if ii >= len(other.items): return 1
            rv = cmp(self.items[ii], other.items[ii])
            if rv != 0: return rv
        if len(self.items) == len(other.items): return 0
        else: return -1

class toplevel_action(thing):
    def __init__(self, value, mapping = None):
        self.value, self.mapping = value, mapping
    def instantiate(self, mapping):
        return self.__class__(self.value, mapping)
    def toplevel_result(self, ostream):
        return apply(self.value, (ostream,), self.mapping)

def unify(abstract, concrete, mapping = None):
    """Return a mapping that instantiates the abstract as the concrete, or None

    If a mapping exists, find it and return it; otherwise, return None.

    """
    if mapping is None: mapping = {}
    mapping = abstract.unify(concrete, mapping)
    if mapping is not None:
        assert abstract.instantiate(mapping) == concrete
    return mapping

### parsing

def cons_string(astring):
    rv = ()
    for ii in range(len(astring)-1, -1, -1):
        rv = (astring[ii], rv)
    return rv

def append_charlists(aa, bb):
    if aa == (): return bb
    else:
        hh, tt = aa
        return (hh, append_charlists(tt, bb))

def restring(charlist):
    rv = []
    while charlist:
        rv.append(charlist[0])
        charlist = charlist[1]
    return string.join(rv, '')

class parse_error(Exception): pass
class unmatched_leftparen(parse_error): pass

def parse_charlist(charlist):
    if charlist == (): raise parse_error("premature eof")
    hh, tt = charlist
    if hh == ')': raise parse_error("unmatched )")
    elif hh == '(': raise parse_error("( not following atom")
    elif hh in string.whitespace: return parse_charlist(tt)
    elif hh == '$': return parse_variable(tt)
    elif hh in string.digits: return parse_number(tt, hh)
    else: return parse_atomstuff(tt, hh)

def parse_atomstuff(charlist, name=''):
    if charlist == (): return atom(name), charlist
    hh, tt = charlist
    if hh in string.whitespace + '()':
        return parse_postatom(strip_whitespace(charlist), name)
    else: return parse_atomstuff(tt, name + hh)

def parse_variable(charlist):
    name, charlist = parse_atomstuff(charlist)
    if name.value == '': raise parse_error("missing variable name")
    return variable(name.value), charlist

def parse_number(charlist, digits):
    if charlist == (): return integer(int(digits)), ()
    hh, tt = charlist
    if hh in string.digits: return parse_number(tt, digits + hh)
    else: return integer(int(digits)), strip_whitespace(charlist)

def parse_postatom(charlist, name):
    if name == '': raise parse_error("missing atom name")
    if charlist == (): return atom(name), ()
    hh, tt = charlist
    if hh == '(': return parse_args(tt, (atom(name),))
    else: return atom(name), charlist

def parse_args(charlist, args):
    if charlist == (): raise unmatched_leftparen("unmatched (", args)
    hh, tt = charlist
    if hh == ')': return tree(args), strip_whitespace(tt)
    elif hh in string.whitespace: return parse_args(tt, args)
    else:
        nextitem, tt = parse_charlist(charlist)
        return parse_args(tt, args + (nextitem,))

def strip_whitespace(charlist):
    if charlist == (): return charlist
    hh, tt = charlist
    if hh in string.whitespace: return strip_whitespace(tt)
    else: return charlist
    
def parse(astring):
    return parse_charlist(cons_string(astring))
def parseall(astring):
    result, remainder = parse(astring)
    assert remainder == ()
    return result

def rewrite(atree, before, after):
    mapping = unify(before, atree)
    if mapping is None: return None
    return after.instantiate(mapping)

def eagerly_rewrite(athing, rules):
    if isinstance(athing, tree):
        items = map(lambda thing: eagerly_rewrite(thing, rules), athing.items)
        athing = tree(items)
    for before, after in rules:
        result = rewrite(athing, before, after)
        if result is not None: return eagerly_rewrite(result, rules)
    return athing

class dummy_writer:
    def write(self, astring): pass
    def flush(self): pass

class parser:
    def __init__(self, infile, lineno=0, outfile=dummy_writer(), prompt=''):
        self.infile, self.lineno, self.outfile, self.prompt = (
            infile, lineno, outfile, prompt)
    def readline(self):
        self.outfile.write(self.prompt)
        self.outfile.flush()
        self.lineno = self.lineno + 1
        return self.infile.readline()
    def parseline(self):
        """Parse at least a line from infile, returning a list of trees.
                  
        When an argument list continues across lines, this function
        prefers parsing multiple lines to reporting an error. """
        inputline = self.readline()
        startlineno = self.lineno
        if inputline == '': raise EOFError
        charlist = strip_whitespace(cons_string(inputline))
        results = []
        while charlist and charlist[0] != '#':
            try: tree, charlist = parse_charlist(charlist)
            except unmatched_leftparen, v:
                nextline = cons_string(self.readline())
                if nextline == ():  # eof
                    raise unmatched_leftparen(*(v.args +
                                              ('line no', startlineno)))
                charlist = append_charlists(charlist, nextline)
            except parse_error, v:
                v.args = v.args + ('line no', startlineno)
                raise v
            else:
                results.append(tree)
        return results

def test_parser():
    infile = ('''
    x a b
    x() a() b()
    x( ) a( ) b( )
    x() a(
    ''' + # lines 6-10 follow
    ''') b()
    # a comment
    snarg fuzz  # another comment
    a(
    ) # yet another
    ''' + # lines 11-14 follow
    '''3
    )
    x() a(
    ) b(
    ''')
    p = parser(StringIO.StringIO(infile))
    assert p.parseline() == []
    assert p.lineno == 1
    assert p.parseline() == map(parseall, 'x a b'.split())
    assert p.lineno == 2
    assert p.parseline() == map(parseall, 'x() a() b()'.split())
    assert p.lineno == 3
    assert p.parseline() == map(parseall, 'x() a() b()'.split())
    assert p.lineno == 4
    assert p.parseline() == map(parseall, 'x() a() b()'.split())
    assert p.lineno == 6
    assert p.parseline() == []
    assert p.lineno == 7
    assert p.parseline() == map(parseall, 'snarg fuzz'.split())
    assert p.lineno == 8
    assert p.parseline() == [parseall('a()')]
    assert p.lineno == 10
    assert p.parseline() == [parseall('3')]
    assert p.lineno == 11
    try: p.parseline()
    except parse_error, pe:
        assert 12 in pe.args, `pe.args`
    else:
        assert 0
    assert_raises(lambda: p.parseline(), parse_error)
    assert_raises(lambda: p.parseline(), EOFError)

def repl(files):
    """Play with rewriting expressions.  a -> b rewrites a to b.

    d(peanuts) deletes rules for expressions starting with 'peanuts'.

    See the various example *.tree files packaged herewith for
    examples.

    """
    rules = []
    def define_rewrite_rule(ostream, pattern, replacement):
        rules.append((pattern, replacement))
        ostream.write("will rewrite %s to %s\n" % (pattern, replacement))
    def delete_rules(ostream, atom):
        ostream.write('forgetting about %s\n' % atom)
        rules[:] = [(pat, rep) for (pat, rep) in rules
                    if not hasattr(pat, 'items')
                    or pat.items[0] != atom]
    def define_action(patternstr, routine):
        rules.append((parseall(patternstr), toplevel_action(routine)))
    #define_action('r($pattern $replacement)', define_rewrite_rule)
    define_action('d($atom)', delete_rules)
    
    def import_file(ostream, filename):
        i_file(ostream, filename.value)
    define_action('import($filename)', import_file)

    def i_file(ostream, filename):
        infile = open(filename)
        readfile(parser(infile), ostream)
        infile.close()
        ostream.write("imported %s\n" % filename)

    def readfile(cmdlineparser, ostream):
        while 1:
            try:
                try: trees = cmdlineparser.parseline()
                except EOFError: ostream.write("\n"); return
                if len(trees) == 1:
                    eagerly_rewrite(trees[0],
                                    rules[:]).toplevel_result(ostream)
                elif len(trees) == 0:
                    pass
                elif len(trees) == 3 and trees[1] == atom('->'):
                    define_rewrite_rule(ostream, trees[0], trees[2])
                else:
                    ostream.write("Couldn't understand that: %s\n" %
                                  (map(str, trees)))
            except:
                traceback.print_exc()

    for f in files:
        i_file(sys.stdout, f)
    cmdlineparser = parser(sys.stdin, outfile=sys.stdout, prompt='      ')
    readfile(cmdlineparser, sys.stdout)

class wrong_exception(AssertionError): pass

def assert_raises(subr, exc):
    try:
        subr()
    except exc:
        return 1
    except:
        exc_type, exc_value, tb = sys.exc_info()
        raise wrong_exception, wrong_exception(exc_value), tb
    else:
        raise wrong_exception("no exception")

def test():
    def raise_TypeError():
        raise TypeError
    assert_raises(raise_TypeError, TypeError)
    def assert_wrong():
        assert_raises(raise_TypeError, ValueError)
    assert_raises(assert_wrong, AssertionError)
    assert_raises(assert_wrong, wrong_exception)

    def assert_parse_error(astr):
        assert_raises(lambda: parse(astr), parse_error)

    assert_parse_error('')
    assert_parse_error(')')
    assert_parse_error('(')
    assert_parse_error('  ')
    assert_parse_error(' )')
    assert_parse_error(' (')

    def dparse(astr): return parse(astr)[0]
    def punp(astr): return str(dparse(astr))
    def teq(astr):
        assert punp(astr) == astr, punp(astr)

    teq('asdf')
    teq('asdf()')
    teq('1324')
    teq('asdf(a b c)')
    teq('a(1 3 b() c)')
    teq('.agjiew0#*2808U%]*@#U*')
    teq('$x')
    teq('a(3 $x b($3) x)')
    assert punp('   as   ') == 'as'
    assert punp('  x  (  y  2  )  ') == 'x(y 2)'
    assert_parse_error('x(y(z)')
    assert_parse_error('x((y) z)')
    assert_parse_error('$')
    assert_parse_error('$()')

    ### instantiation
    atree = dparse('a(b($c d)$e)')
    assert str(atree) == 'a(b($c d) $e)', str(atree)
    atoma = atom('a')
    assert_raises(lambda: atree.instantiate({}), KeyError)
    insttree = atree.instantiate({'c': atoma, 'e': atoma})
    assert str(insttree) == 'a(b(a d) a)', str(insttree)
    insttree = atree.instantiate({'c': atom('b'), 'e': atoma})
    assert str(insttree) == 'a(b(b d) a)', str(insttree)

    ### comparison
    assert atom('a') == atom('a')
    assert atom('a') != atom('b')
    assert integer(1) == integer(1)
    assert integer(1) < integer(2)
    assert integer(2) > integer(1)
    assert variable('a') == variable('a')
    assert variable('a') != variable('b')
    assert atom('a') != variable('a')
    assert variable('a') != atom('a')
    assert atom('1') != variable('1')
    assert dparse('x(a  b c(d e f()) g)') == dparse('x(a b c(d e f()) g)')
    assert dparse('x(a  b c(d e f()) g)') != 'x(a b c(d e f()) g)'
    assert dparse('x(a  b c(d e f()) g)') != dparse('x()')
    assert dparse('x(a)') != dparse('x()')
    assert dparse('x()') != dparse('x(a)')
    assert dparse('x(a)') == dparse('x(a)')
    assert dparse('x(a)') != dparse('x(b)')

    ### unification
    def sunify(stra, strb):
        return unify(dparse(stra), dparse(strb))
    assert sunify('a', 'a') == {}
    assert sunify('a', 'b') == None
    assert sunify('$a', '3') == {'a': integer(3)}
    assert sunify('a(b($c d) $e)', 'a(b(a d) a)') == {
        'c': atom('a'),
        'e': atom('a')}
    assert sunify('a(b($c d) $e)', 'a(b(b d) a)') == {
        'c': atom('b'),
        'e': atom('a')}
    assert sunify('hi($x)', 'hi(kragen(sitaker))') == {
        'x': dparse('kragen(sitaker)')}
    assert sunify('a($a $a)', 'a(b b)') == {'a': atom('b')}
    assert sunify('a($a $a)', 'a(b c)') == None

    ### rewriting
    def srew(stra, strb, strc):
        return str(rewrite(dparse(stra), dparse(strb), dparse(strc)))
    assert srew('a', 'b', 'c') == 'None'
    assert srew('a', '$b', 'c') == 'c'
    assert srew('a', '$b', '$b') == 'a'
    assert srew('a(b)', '$x', '$x') == 'a(b)'
    assert srew('a(b)', '$x', 'y($x)') == 'y(a(b))'
    assert srew('a(b)', 'a($x)', 'c($x)') == 'c(b)'

    ### repetitive rewriting
    def rules(alist):
        def parsepair(apair):
            a, b = apair
            return dparse(a), dparse(b)
        return map(parsepair, alist)
    def evalu(astr, rules):
        return str(eagerly_rewrite(dparse(astr), rules))
    # "laws of form" boolean logic
    lofrules = rules([('not(not($x))', '$x'),
                      ('or(1 $x)', '1'),
                      ('or($x 1)', '1'),
                      ('or($x $x)', '$x')])
    assert evalu('1', lofrules) == '1'
    assert evalu('not(1)', lofrules) == 'not(1)'
    assert evalu('not(not(1))', lofrules) == '1'
    assert evalu('not(not(not(1)))', lofrules) == 'not(1)'
    assert evalu('not(not(not(not(1)))))', lofrules) == '1'
    assert evalu('or(1 not(1))', lofrules) == '1'
    assert evalu('or(not(1) 1)', lofrules) == '1'
    assert evalu('or(1 1)', lofrules) == '1'
    assert evalu('not(or(1 1))', lofrules) == 'not(1)'
    assert evalu('not(or(not(1) not(1)))', lofrules) == '1'

    a = cons_string('fibble')
    b = cons_string('wur')
    c = restring(append_charlists(a, b))
    assert c == restring(a) + restring(b), c
    test_parser()

if __name__ == '__main__':
    test()
    if len(sys.argv) > 1:
        repl(sys.argv[1:])
    else:
        print repl.__doc__
        repl([])




I wanted to be able to #! the quicksort program, but my kernel isn't
smart enough to chain #! lines.  So I wrote this tiny C shim to put on
the #! lines instead:

#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>

#define PYTHONPATH "/usr/bin/python"
#define TREEPYPATH "/home/kragen/devel/tree-rewriting/tree.py"

int usage(char *argv0) {
  fprintf(stderr, "Usage: %s something.tree [args...]\n", argv0);
  return -1;
}

int main(int argc, char **argv) {
  int ii;
  char **nargv;

  if (argc < 2) return usage(argv[0]);

  nargv = malloc(sizeof(*nargv) * (argc + 2));
  if (!nargv) {
    perror("malloc");
    return -1;
  }
  nargv[0] = PYTHONPATH;
  nargv[1] = TREEPYPATH;
  for (ii = 1; ii < argc; ii++) {
    nargv[ii + 1] = argv[ii];
  }
  nargv[argc + 1] = NULL;

  execv(nargv[0], nargv);
  perror("execv");
  return -1;
}

-- 
<[EMAIL PROTECTED]>       Kragen Sitaker     <http://www.pobox.com/~kragen/>
Edsger Wybe Dijkstra died in August of 2002.  The world has lost a great
man.  See http://advogato.org/person/raph/diary.html?start=252 and
http://www.kode-fu.com/geek/2002_08_04_archive.shtml for details.

more on tree rewriting

Reply via email to