Author: Carl Friedrich Bolz-Tereick <cfb...@gmx.de> Branch: pyparser-improvements Changeset: r93977:6440ef7f3d66 Date: 2018-03-12 17:01 +0100 http://bitbucket.org/pypy/pypy/changeset/6440ef7f3d66/
Log: lazify the creation of the Nonterminal this makes it necessary to turn the DFA into an object that knows its id diff --git a/pypy/interpreter/pyparser/metaparser.py b/pypy/interpreter/pyparser/metaparser.py --- a/pypy/interpreter/pyparser/metaparser.py +++ b/pypy/interpreter/pyparser/metaparser.py @@ -147,8 +147,10 @@ for label, next in state.arcs.iteritems(): arcs.append((self.make_label(gram, label), dfa.index(next))) states.append((arcs, state.is_final)) - gram.dfas.append((states, self.make_first(gram, name))) - assert len(gram.dfas) - 1 == gram.symbol_ids[name] - 256 + symbol_id = gram.symbol_ids[name] + dfa = parser.DFA(symbol_id, states, self.make_first(gram, name)) + gram.dfas.append(dfa) + assert len(gram.dfas) - 1 == symbol_id - 256 gram.start = gram.symbol_ids[self.start_symbol] return gram diff --git a/pypy/interpreter/pyparser/parser.py b/pypy/interpreter/pyparser/parser.py --- a/pypy/interpreter/pyparser/parser.py +++ b/pypy/interpreter/pyparser/parser.py @@ -41,6 +41,12 @@ pass return True +class DFA(object): + def __init__(self, symbol_id, states, first): + self.symbol_id = symbol_id + self.states = states + self.first = first + class Node(object): @@ -188,18 +194,28 @@ class StackEntry(object): - def __init__(self, next, dfa, state, node): + def __init__(self, next, dfa, state): self.next = next self.dfa = dfa self.state = state - self.node = node + self.node = None - def push(self, dfa, state, node): - return StackEntry(self, dfa, state, node) + def push(self, dfa, state): + return StackEntry(self, dfa, state) def pop(self): return self.next + def node_append_child(self, child): + node = self.node + if node is None: + self.node = Nonterminal1(self.dfa.symbol_id, child) + elif isinstance(node, Nonterminal1): + self.node = Nonterminal(self.dfa.symbol_id) + self.node._children = [node._child, child] + else: + self.node.append_child(child) + class Parser(object): @@ -215,8 +231,7 @@ if start == -1: start = self.grammar.start self.root = None - current_node = Nonterminal(start) - self.stack = StackEntry(None, self.grammar.dfas[start - 256], 0, current_node) + self.stack = StackEntry(None, self.grammar.dfas[start - 256], 0) def add_token(self, token_type, value, lineno, column, line): label_index = self.classify(token_type, value, lineno, column, line) @@ -224,7 +239,7 @@ while True: dfa = self.stack.dfa state_index = self.stack.state - states, first = dfa + states = dfa.states arcs, is_accepting = states[state_index] for i, next_state in arcs: sym_id = self.grammar.labels[i] @@ -242,12 +257,12 @@ return True dfa = self.stack.dfa state_index = self.stack.state - state = dfa[0][state_index] + state = dfa.states[state_index] return False elif sym_id >= 256: sub_node_dfa = self.grammar.dfas[sym_id - 256] # Check if this token can start a child node. - if label_index in sub_node_dfa[1]: + if label_index in sub_node_dfa.first: self.push(sub_node_dfa, next_state, sym_id, lineno, column) break @@ -285,26 +300,21 @@ def shift(self, next_state, token_type, value, lineno, column): """Shift a non-terminal and prepare for the next state.""" new_node = Terminal(token_type, value, lineno, column) - self.stack.node.append_child(new_node) + self.stack.node_append_child(new_node) self.stack.state = next_state def push(self, next_dfa, next_state, node_type, lineno, column): """Push a terminal and adjust the current state.""" - new_node = Nonterminal(node_type) - self.stack.state = next_state - self.stack = self.stack.push(next_dfa, 0, new_node) + self.stack = self.stack.push(next_dfa, 0) def pop(self): """Pop an entry off the stack and make its node a child of the last.""" top = self.stack self.stack = top.pop() node = top.node + assert node is not None if self.stack: - # we are now done with node, so we can store it more efficiently if - # it has just one child - if node.num_children() == 1: - node = Nonterminal1(node.type, node.get_child(0)) - self.stack.node.append_child(node) + self.stack.node_append_child(node) else: self.root = node diff --git a/pypy/interpreter/pyparser/test/test_metaparser.py b/pypy/interpreter/pyparser/test/test_metaparser.py --- a/pypy/interpreter/pyparser/test/test_metaparser.py +++ b/pypy/interpreter/pyparser/test/test_metaparser.py @@ -34,8 +34,8 @@ assert len(g.dfas) == 1 eval_sym = g.symbol_ids["eval"] assert g.start == eval_sym - states, first = g.dfas[eval_sym - 256] - assert states == [([(1, 1)], False), ([], True)] + dfa = g.dfas[eval_sym - 256] + assert dfa.states == [([(1, 1)], False), ([], True)] assert g.labels[0] == 0 def test_load_python_grammars(self): @@ -51,7 +51,7 @@ def test_items(self): g = self.gram_for("foo: NAME STRING OP '+'") assert len(g.dfas) == 1 - states = g.dfas[g.symbol_ids["foo"] - 256][0] + states = g.dfas[g.symbol_ids["foo"] - 256].states last = states[0][0][0][1] for state in states[1:-1]: assert last < state[0][0][1] diff --git a/pypy/module/parser/pyparser.py b/pypy/module/parser/pyparser.py --- a/pypy/module/parser/pyparser.py +++ b/pypy/module/parser/pyparser.py @@ -152,7 +152,7 @@ # Raise an exception now and be done with it. raise parser_error(space, w_tuple, "Illegal syntax-tree; cannot start with terminal symbol.") - node = pyparse.parser.Nonterminal(type, []) + node = pyparse.parser.Nonterminal(type) build_node_children(space, w_tuple, node, node_state) return node @@ -171,7 +171,7 @@ strn = space.text_w(w_obj) child = pyparse.parser.Terminal(type, strn, node_state.lineno, 0) else: - child = pyparse.parser.Nonterminal(type, []) + child = pyparse.parser.Nonterminal(type) node.append_child(child) if type >= 256: # Nonterminal node build_node_children(space, w_elem, child, node_state) @@ -187,8 +187,7 @@ raise parse_error(space, "Unrecognized node type %d." % type) dfa = parser.grammar.dfas[type] # Run the DFA for this nonterminal - states, first = dfa - arcs, is_accepting = states[0] + arcs, is_accepting = dfa.states[0] for pos in range(tree.num_children()): ch = tree.get_child(pos) for i, next_state in arcs: @@ -198,7 +197,7 @@ if ch.type >= 256: validate_node(space, ch) # Update the state, and move on to the next child. - arcs, is_accepting = states[next_state] + arcs, is_accepting = dfa.states[next_state] break else: raise parse_error(space, "Illegal node") _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit