Hi everybody,
We have talked on and off about making a front-end compiler for VIFF and
today I figured that I would try making such a guy...
So far it can only parse a simple language and spit out something which
looks like equivalent Python code in which both branches of
if-statements are run. So a program like this:
if a:
x = y
if b:
x = z
else:
x = w
fi
fi
is transformed into this
x = (a * y + (1 - a) * x)
x = (a * b * z + (1 - a * b) * x)
x = ((1 - a * b) * w + (1 - (1 - a * b)) * x)
which one could plug into a VIFF skeleton and run (not yet done).
The idea is that the conditions in if-statements are pushed down to all
assignments done in the then- and else-branches, nothing more.
This is just a quick test to make people start thinking about what we
want in such a language and to make people think about what kind of
transformation we can do and which we cannot do.
The program is attached below -- you will need to grab simplegeneric and
PLY (Python Lex-Yacc) to use it, but both modules are easy to install:
http://pypi.python.org/pypi/simplegeneric
http://www.dabeaz.com/ply/
You run the program by giving it the name of a file to parse and it will
tell you the results on stdout.
#!/usr/bin/python
from pprint import pprint
from simplegeneric import generic
from ply import lex, yacc
keywords = ("print", "if", "else", "fi")
tokens = ("NUMBER", "VARIABLE", "PLUS", "MINUS", "TIMES", "EQUAL", "COLON",
"NEWLINE") + tuple(map(str.upper, keywords))
t_NUMBER = r'\d+'
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_EQUAL = r'='
t_COLON = r':'
def t_VARIABLE(t):
r"""[a-z]+"""
if t.value in keywords:
t.type = t.value.upper()
return t
def t_NEWLINE(t):
r"""\n"""
t.lexer.lineno += 1
return t
def t_error(t):
print "Illegal character '%s'" % t.value[0]
t.lexer.skip(1)
t_ignore = ' \t'
class BinOp(object):
def __init__(self, lhs, rhs):
self.lhs = lhs
self.rhs = rhs
def __repr__(self):
return "%s(%s, %s)" % (self.__class__.__name__, self.lhs, self.rhs)
class Add(BinOp): pass
class Sub(BinOp): pass
class Mul(BinOp): pass
class AssignStmt(object):
def __init__(self, lhs, rhs):
self.lhs = lhs
self.rhs = rhs
def __repr__(self):
return "AssignStmt(%r, %r)" % (self.lhs, self.rhs)
class PrintStmt(object):
def __init__(self, expr):
self.expr = expr
def __repr__(self):
return "PrintStmt(%r)" % self.expr
class IfStmt(object):
def __init__(self, bool_expr, then_stmts, else_stmts=None):
self.bool_expr = bool_expr
self.then_stmts = then_stmts
if else_stmts:
self.else_stmts = else_stmts
else:
self.else_stmts = []
def __repr__(self):
return "IfStmt(%r, %r, %r)" % (self.bool_expr, self.then_stmts,
self.else_stmts)
def p_program(p):
"""program : statement"""
p[0] = [p[1]]
def p_program_cont(p):
"""program : program statement"""
p[0] = p[1] + [p[2]]
def p_statement(p):
"""statement : assignment NEWLINE
| print NEWLINE
| if NEWLINE"""
p[0] = p[1]
def p_assignment(p):
"""assignment : VARIABLE EQUAL expr"""
p[0] = AssignStmt(p[1], p[3])
def p_print(p):
"""print : PRINT expr"""
p[0] = PrintStmt(p[2])
def p_if(p):
"""if : IF expr COLON NEWLINE program FI"""
p[0] = IfStmt(p[2], p[5])
def p_if_else(p):
"""if : IF expr COLON NEWLINE program ELSE COLON NEWLINE program FI"""
p[0] = IfStmt(p[2], p[5], p[9])
def p_expr(p):
"""expr : expr PLUS term
| expr MINUS term"""
if p[2] == '+':
p[0] = Add(p[1], p[3])
else:
p[0] = Sub(p[1], p[3])
def p_expr_term(p):
"""expr : term"""
p[0] = p[1]
def p_term(p):
"""term : term TIMES factor"""
p[0] = Mul(p[1], p[3])
def p_term_factor(p):
"""term : factor"""
p[0] = p[1]
def p_factor(p):
"""factor : NUMBER
| VARIABLE"""
p[0] = p[1]
def p_error(p):
print "Syntax error!"
print p
lex.lex()
yacc.yacc()
### Printing code ###
@generic
def code(node):
return str(node)
@code.when_type(list)
def code_result(result):
return "\n".join(map(code, result))
@code.when_type(Add)
def code_add(node):
return "(%s + %s)" % (code(node.lhs), code(node.rhs))
@code.when_type(Sub)
def code_sub(node):
return "(%s - %s)" % (code(node.lhs), code(node.rhs))
@code.when_type(Mul)
def code_mul(node):
return "%s * %s" % (code(node.lhs), code(node.rhs))
@code.when_type(AssignStmt)
def code_assignment(node):
return "%s = %s" % (code(node.lhs), code(node.rhs))
@code.when_type(PrintStmt)
def code_print(node):
return "print %s" % node.stmt
@code.when_type(IfStmt)
def code_if(node):
result = ["if %s:" % code(node.bool_expr)]
for stmt in node.then_stmts:
result.append(code(stmt))
if node.else_stmts:
result.append("else:")
for stmt in node.else_stmts:
result.append(code(stmt))
result.append("fi")
return "\n".join(result)
### Desugaring of if-statements ###
@generic
def desugar_if(node, expr=None):
return node
@desugar_if.when_type(list)
def desugar_if_list(node, expr=None):
node = map(lambda n: desugar_if(n, expr), node)
result = []
for n in node:
if isinstance(n, list):
result.extend(n)
else:
result.append(n)
return result
@desugar_if.when_type(IfStmt)
def desugar_if_if(node, expr=None):
if expr:
b = Mul(expr, node.bool_expr)
else:
b = node.bool_expr
nb = Sub(1, b)
node.then_stmts = map(lambda n: desugar_if(n, b), node.then_stmts)
node.else_stmts = map(lambda n: desugar_if(n, nb), node.else_stmts)
return node.then_stmts + node.else_stmts
@desugar_if.when_type(AssignStmt)
def desugar_if_assignment(node, expr=None):
if expr:
nexpr = Sub(1, expr)
node.rhs = Add(Mul(expr, node.rhs), Mul(nexpr, node.lhs))
return node
### Conversion of parse tree into nested tuples ###
@generic
def tree_to_tuples(node):
return node
@tree_to_tuples.when_type(list)
def tree_to_tuples_list(node):
return map(tree_to_tuples, node)
@tree_to_tuples.when_type(IfStmt)
def tree_to_tuples_if(node):
if node.else_stmts:
return ('IfStmt',
tree_to_tuples(node.bool_expr),
tree_to_tuples(node.then_stmts),
tree_to_tuples(node.else_stmts))
else:
return ('IfStmt',
tree_to_tuples(node.bool_expr),
tree_to_tuples(node.then_stmts))
@tree_to_tuples.when_type(AssignStmt)
def tree_to_tuples_assign(node):
return ('AssignStmt', tree_to_tuples(node.lhs), tree_to_tuples(node.rhs))
if __name__ == "__main__":
import sys
fp = open(sys.argv[1], 'r')
data = fp.read()
print "Original program:"
print data
tree = yacc.parse(data) #, debug=2)
print "Parse tree:"
pprint(tree_to_tuples(tree))
print
print "Raw Python code:"
print code(tree)
tree = desugar_if(tree)
print "Desugared Python code:"
print code(tree)
--
Martin Geisler
_______________________________________________
viff-devel mailing list (http://viff.dk/)
[email protected]
http://lists.viff.dk/listinfo.cgi/viff-devel-viff.dk