[pypy-commit] pypy pyparser-improvements: lazify the creation of the Nonterminal

cfbolz Mon, 12 Mar 2018 09:54:03 -0700

Author: Carl Friedrich Bolz-Tereick <cfb...@gmx.de>
Branch: pyparser-improvements
Changeset: r93977:6440ef7f3d66
Date: 2018-03-12 17:01 +0100
http://bitbucket.org/pypy/pypy/changeset/6440ef7f3d66/


Log:    lazify the creation of the Nonterminal

        this makes it necessary to turn the DFA into an object that knows
        its id

diff --git a/pypy/interpreter/pyparser/metaparser.py 
b/pypy/interpreter/pyparser/metaparser.py
--- a/pypy/interpreter/pyparser/metaparser.py
+++ b/pypy/interpreter/pyparser/metaparser.py
@@ -147,8 +147,10 @@
                 for label, next in state.arcs.iteritems():
                     arcs.append((self.make_label(gram, label), 
dfa.index(next)))
                 states.append((arcs, state.is_final))
-            gram.dfas.append((states, self.make_first(gram, name)))
-            assert len(gram.dfas) - 1 == gram.symbol_ids[name] - 256
+            symbol_id = gram.symbol_ids[name]
+            dfa = parser.DFA(symbol_id, states, self.make_first(gram, name))
+            gram.dfas.append(dfa)
+            assert len(gram.dfas) - 1 == symbol_id - 256
         gram.start = gram.symbol_ids[self.start_symbol]
         return gram
 
diff --git a/pypy/interpreter/pyparser/parser.py 
b/pypy/interpreter/pyparser/parser.py
--- a/pypy/interpreter/pyparser/parser.py
+++ b/pypy/interpreter/pyparser/parser.py
@@ -41,6 +41,12 @@
             pass
         return True
 
+class DFA(object):
+    def __init__(self, symbol_id, states, first):
+        self.symbol_id = symbol_id
+        self.states = states
+        self.first = first
+
 
 class Node(object):
 
@@ -188,18 +194,28 @@
 
 
 class StackEntry(object):
-    def __init__(self, next, dfa, state, node):
+    def __init__(self, next, dfa, state):
         self.next = next
         self.dfa = dfa
         self.state = state
-        self.node = node
+        self.node = None
 
-    def push(self, dfa, state, node):
-        return StackEntry(self, dfa, state, node)
+    def push(self, dfa, state):
+        return StackEntry(self, dfa, state)
 
     def pop(self):
         return self.next
 
+    def node_append_child(self, child):
+        node = self.node
+        if node is None:
+            self.node = Nonterminal1(self.dfa.symbol_id, child)
+        elif isinstance(node, Nonterminal1):
+            self.node = Nonterminal(self.dfa.symbol_id)
+            self.node._children = [node._child, child]
+        else:
+            self.node.append_child(child)
+
 
 class Parser(object):
 
@@ -215,8 +231,7 @@
         if start == -1:
             start = self.grammar.start
         self.root = None
-        current_node = Nonterminal(start)
-        self.stack = StackEntry(None, self.grammar.dfas[start - 256], 0, 
current_node)
+        self.stack = StackEntry(None, self.grammar.dfas[start - 256], 0)
 
     def add_token(self, token_type, value, lineno, column, line):
         label_index = self.classify(token_type, value, lineno, column, line)
@@ -224,7 +239,7 @@
         while True:
             dfa = self.stack.dfa
             state_index = self.stack.state
-            states, first = dfa
+            states = dfa.states
             arcs, is_accepting = states[state_index]
             for i, next_state in arcs:
                 sym_id = self.grammar.labels[i]
@@ -242,12 +257,12 @@
                             return True
                         dfa = self.stack.dfa
                         state_index = self.stack.state
-                        state = dfa[0][state_index]
+                        state = dfa.states[state_index]
                     return False
                 elif sym_id >= 256:
                     sub_node_dfa = self.grammar.dfas[sym_id - 256]
                     # Check if this token can start a child node.
-                    if label_index in sub_node_dfa[1]:
+                    if label_index in sub_node_dfa.first:
                         self.push(sub_node_dfa, next_state, sym_id, lineno,
                                   column)
                         break
@@ -285,26 +300,21 @@
     def shift(self, next_state, token_type, value, lineno, column):
         """Shift a non-terminal and prepare for the next state."""
         new_node = Terminal(token_type, value, lineno, column)
-        self.stack.node.append_child(new_node)
+        self.stack.node_append_child(new_node)
         self.stack.state = next_state
 
     def push(self, next_dfa, next_state, node_type, lineno, column):
         """Push a terminal and adjust the current state."""
-        new_node = Nonterminal(node_type)
-
         self.stack.state = next_state
-        self.stack = self.stack.push(next_dfa, 0, new_node)
+        self.stack = self.stack.push(next_dfa, 0)
 
     def pop(self):
         """Pop an entry off the stack and make its node a child of the last."""
         top = self.stack
         self.stack = top.pop()
         node = top.node
+        assert node is not None
         if self.stack:
-            # we are now done with node, so we can store it more efficiently if
-            # it has just one child
-            if node.num_children() == 1:
-                node = Nonterminal1(node.type, node.get_child(0))
-            self.stack.node.append_child(node)
+            self.stack.node_append_child(node)
         else:
             self.root = node
diff --git a/pypy/interpreter/pyparser/test/test_metaparser.py 
b/pypy/interpreter/pyparser/test/test_metaparser.py
--- a/pypy/interpreter/pyparser/test/test_metaparser.py
+++ b/pypy/interpreter/pyparser/test/test_metaparser.py
@@ -34,8 +34,8 @@
         assert len(g.dfas) == 1
         eval_sym = g.symbol_ids["eval"]
         assert g.start == eval_sym
-        states, first = g.dfas[eval_sym - 256]
-        assert states == [([(1, 1)], False), ([], True)]
+        dfa = g.dfas[eval_sym - 256]
+        assert dfa.states == [([(1, 1)], False), ([], True)]
         assert g.labels[0] == 0
 
     def test_load_python_grammars(self):
@@ -51,7 +51,7 @@
     def test_items(self):
         g = self.gram_for("foo: NAME STRING OP '+'")
         assert len(g.dfas) == 1
-        states = g.dfas[g.symbol_ids["foo"] - 256][0]
+        states = g.dfas[g.symbol_ids["foo"] - 256].states
         last = states[0][0][0][1]
         for state in states[1:-1]:
             assert last < state[0][0][1]
diff --git a/pypy/module/parser/pyparser.py b/pypy/module/parser/pyparser.py
--- a/pypy/module/parser/pyparser.py
+++ b/pypy/module/parser/pyparser.py
@@ -152,7 +152,7 @@
         # Raise an exception now and be done with it.
         raise parser_error(space, w_tuple,
                            "Illegal syntax-tree; cannot start with terminal 
symbol.")
-    node = pyparse.parser.Nonterminal(type, [])
+    node = pyparse.parser.Nonterminal(type)
     build_node_children(space, w_tuple, node, node_state)
     return node
 
@@ -171,7 +171,7 @@
             strn = space.text_w(w_obj)
             child = pyparse.parser.Terminal(type, strn, node_state.lineno, 0)
         else:
-            child = pyparse.parser.Nonterminal(type, [])
+            child = pyparse.parser.Nonterminal(type)
         node.append_child(child)
         if type >= 256:  # Nonterminal node
             build_node_children(space, w_elem, child, node_state)
@@ -187,8 +187,7 @@
         raise parse_error(space, "Unrecognized node type %d." % type)
     dfa = parser.grammar.dfas[type]
     # Run the DFA for this nonterminal
-    states, first = dfa
-    arcs, is_accepting = states[0]
+    arcs, is_accepting = dfa.states[0]
     for pos in range(tree.num_children()):
         ch = tree.get_child(pos)
         for i, next_state in arcs:
@@ -198,7 +197,7 @@
                 if ch.type >= 256:
                     validate_node(space, ch)
                 # Update the state, and move on to the next child.
-                arcs, is_accepting = states[next_state]
+                arcs, is_accepting = dfa.states[next_state]
                 break
         else:
             raise parse_error(space, "Illegal node")
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy pyparser-improvements: lazify the creation of the Nonterminal

Reply via email to