Author: Maciej Fijalkowski <fij...@gmail.com> Branch: numpy-multidim Changeset: r48712:6d64103f1147 Date: 2011-11-03 19:28 +0100 http://bitbucket.org/pypy/pypy/changeset/6d64103f1147/
Log: make compile use a real tokenizer - breaks test_zjit for now diff --git a/pypy/module/micronumpy/compile.py b/pypy/module/micronumpy/compile.py --- a/pypy/module/micronumpy/compile.py +++ b/pypy/module/micronumpy/compile.py @@ -23,6 +23,12 @@ class WrongFunctionName(Exception): pass +class TokenizerError(Exception): + pass + +class BadToken(Exception): + pass + SINGLE_ARG_FUNCTIONS = ["sum", "prod", "max", "min", "all", "any", "unegative"] class FakeSpace(object): @@ -192,7 +198,7 @@ interp.variables[self.name] = self.expr.execute(interp) def __repr__(self): - return "%% = %r" % (self.name, self.expr) + return "%r = %r" % (self.name, self.expr) class ArrayAssignment(Node): def __init__(self, name, index, expr): @@ -214,7 +220,7 @@ class Variable(Node): def __init__(self, name): - self.name = name + self.name = name.strip() def execute(self, interp): return interp.variables[self.name] @@ -332,7 +338,7 @@ class FunctionCall(Node): def __init__(self, name, args): - self.name = name + self.name = name.strip() self.args = args def __repr__(self): @@ -375,118 +381,174 @@ else: raise WrongFunctionName +import re + +_REGEXES = [ + ('-?[\d]+', 'number'), + ('\[', 'array_left'), + (':', 'colon'), + ('\w+', 'identifier'), + ('\]', 'array_right'), + ('(->)|[\+\-\*\/]', 'operator'), + ('=', 'assign'), + (',', 'coma'), + ('\|', 'pipe'), + ('\(', 'paren_left'), + ('\)', 'paren_right'), +] +REGEXES = [] + +for r, name in _REGEXES: + REGEXES.append((re.compile(' *(' + r + ')'), name)) +del _REGEXES + +class Token(object): + def __init__(self, name, v): + self.name = name + self.v = v + + def __repr__(self): + return '(%s, %s)' % (self.name, self.v) + +empty = Token('', '') + +class TokenStack(object): + def __init__(self, tokens): + self.tokens = tokens + self.c = 0 + + def pop(self): + token = self.tokens[self.c] + self.c += 1 + return token + + def get(self, i): + if self.c + i >= len(self.tokens): + return empty + return self.tokens[self.c + i] + + def remaining(self): + return len(self.tokens) - self.c + + def push(self): + self.c -= 1 + + def __repr__(self): + return repr(self.tokens[self.c:]) + class Parser(object): - def parse_identifier(self, id): - id = id.strip(" ") - #assert id.isalpha() - return Variable(id) + def tokenize(self, line): + tokens = [] + while True: + for r, name in REGEXES: + m = r.match(line) + if m is not None: + g = m.group(0) + tokens.append(Token(name, g)) + line = line[len(g):] + if not line: + return TokenStack(tokens) + break + else: + raise TokenizerError(line) - def parse_expression(self, expr): - tokens = [i for i in expr.split(" ") if i] - if len(tokens) == 1: - return self.parse_constant_or_identifier(tokens[0]) + def parse_number_or_slice(self, tokens): + start_tok = tokens.pop() + if start_tok.name == 'colon': + start = 0 + else: + start = int(start_tok.v) + if tokens.get(0).name != 'colon': + return FloatConstant(start) + tokens.pop() + if not tokens.get(0).name in ['colon', 'number']: + stop = -1 + step = 1 + else: + next = tokens.pop() + if next.name == 'colon': + stop = -1 + step = int(tokens.pop().v) + else: + stop = int(next.v) + if tokens.get(0).name == 'colon': + tokens.pop() + step = int(tokens.pop().v) + else: + step = 1 + return SliceConstant(start, stop, step) + + + def parse_expression(self, tokens): stack = [] - tokens.reverse() - while tokens: + while tokens.remaining(): token = tokens.pop() - if token == ')': - raise NotImplementedError - elif self.is_identifier_or_const(token): - if stack: - name = stack.pop().name - lhs = stack.pop() - rhs = self.parse_constant_or_identifier(token) - stack.append(Operator(lhs, name, rhs)) + if token.name == 'identifier': + if tokens.remaining() and tokens.get(0).name == 'paren_left': + stack.append(self.parse_function_call(token.v, tokens)) else: - stack.append(self.parse_constant_or_identifier(token)) + stack.append(Variable(token.v)) + elif token.name == 'array_left': + stack.append(ArrayConstant(self.parse_array_const(tokens))) + elif token.name == 'operator': + stack.append(Variable(token.v)) + elif token.name == 'number' or token.name == 'colon': + tokens.push() + stack.append(self.parse_number_or_slice(tokens)) + elif token.name == 'pipe': + stack.append(RangeConstant(tokens.pop().v)) + end = tokens.pop() + assert end.name == 'pipe' else: - stack.append(Variable(token)) - assert len(stack) == 1 - return stack[-1] + tokens.push() + break + stack.reverse() + lhs = stack.pop() + while stack: + op = stack.pop() + assert isinstance(op, Variable) + rhs = stack.pop() + lhs = Operator(lhs, op.name, rhs) + return lhs - def parse_constant(self, v): - lgt = len(v)-1 - assert lgt >= 0 - if ':' in v: - # a slice - if v == ':': - return SliceConstant(0, 0, 0) - else: - l = v.split(':') - if len(l) == 2: - one = l[0] - two = l[1] - if not one: - one = 0 - else: - one = int(one) - return SliceConstant(int(l[0]), int(l[1]), 1) - else: - three = int(l[2]) - # all can be empty - if l[0]: - one = int(l[0]) - else: - one = 0 - if l[1]: - two = int(l[1]) - else: - two = -1 - return SliceConstant(one, two, three) - - if v[0] == '[': - return ArrayConstant([self.parse_constant(elem) - for elem in v[1:lgt].split(",")]) - if v[0] == '|': - return RangeConstant(v[1:lgt]) - return FloatConstant(v) - - def is_identifier_or_const(self, v): - c = v[0] - if ((c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or - (c >= '0' and c <= '9') or c in '-.[|:'): - if v == '-' or v == "->": - return False - return True - return False - - def parse_function_call(self, v): - l = v.split('(') - assert len(l) == 2 - name = l[0] - cut = len(l[1]) - 1 - assert cut >= 0 - args = [self.parse_constant_or_identifier(id) - for id in l[1][:cut].split(",")] + def parse_function_call(self, name, tokens): + args = [] + tokens.pop() # lparen + while tokens.get(0).name != 'paren_right': + args.append(self.parse_expression(tokens)) return FunctionCall(name, args) - def parse_constant_or_identifier(self, v): - c = v[0] - if (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z'): - if '(' in v: - return self.parse_function_call(v) - return self.parse_identifier(v) - return self.parse_constant(v) - - def parse_array_subscript(self, v): - v = v.strip(" ") - l = v.split("[") - lgt = len(l[1]) - 1 - assert lgt >= 0 - rhs = self.parse_constant_or_identifier(l[1][:lgt]) - return l[0], rhs + def parse_array_const(self, tokens): + elems = [] + while True: + token = tokens.pop() + if token.name == 'number': + elems.append(FloatConstant(token.v)) + elif token.name == 'array_left': + elems.append(ArrayConstant(self.parse_array_const(tokens))) + else: + raise BadToken() + token = tokens.pop() + if token.name == 'array_right': + return elems + assert token.name == 'coma' - def parse_statement(self, line): - if '=' in line: - lhs, rhs = line.split("=") - lhs = lhs.strip(" ") - if '[' in lhs: - name, index = self.parse_array_subscript(lhs) - return ArrayAssignment(name, index, self.parse_expression(rhs)) - else: - return Assignment(lhs, self.parse_expression(rhs)) - else: - return Execute(self.parse_expression(line)) + def parse_statement(self, tokens): + if (tokens.get(0).name == 'identifier' and + tokens.get(1).name == 'assign'): + lhs = tokens.pop().v + tokens.pop() + rhs = self.parse_expression(tokens) + return Assignment(lhs, rhs) + elif (tokens.get(0).name == 'identifier' and + tokens.get(1).name == 'array_left'): + name = tokens.pop().v + tokens.pop() + index = self.parse_expression(tokens) + tokens.pop() + tokens.pop() + return ArrayAssignment(name, index, self.parse_expression(tokens)) + return Execute(self.parse_expression(tokens)) def parse(self, code): statements = [] @@ -495,7 +557,8 @@ line = line.split('#', 1)[0] line = line.strip(" ") if line: - statements.append(self.parse_statement(line)) + tokens = self.tokenize(line) + statements.append(self.parse_statement(tokens)) return Code(statements) def numpy_compile(code): diff --git a/pypy/module/micronumpy/test/test_compile.py b/pypy/module/micronumpy/test/test_compile.py --- a/pypy/module/micronumpy/test/test_compile.py +++ b/pypy/module/micronumpy/test/test_compile.py @@ -177,3 +177,9 @@ """) assert interp.results[0].value.val == 6 + def test_multidim_getitem(self): + interp = self.run(""" + a = [[1,2]] + a -> 0 -> 1 + """) + assert interp.results[0].value.val == 2 _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit