Author: Carl Friedrich Bolz-Tereick <[email protected]>
Branch: pyparser-improvements
Changeset: r93977:6440ef7f3d66
Date: 2018-03-12 17:01 +0100
http://bitbucket.org/pypy/pypy/changeset/6440ef7f3d66/
Log: lazify the creation of the Nonterminal
this makes it necessary to turn the DFA into an object that knows
its id
diff --git a/pypy/interpreter/pyparser/metaparser.py
b/pypy/interpreter/pyparser/metaparser.py
--- a/pypy/interpreter/pyparser/metaparser.py
+++ b/pypy/interpreter/pyparser/metaparser.py
@@ -147,8 +147,10 @@
for label, next in state.arcs.iteritems():
arcs.append((self.make_label(gram, label),
dfa.index(next)))
states.append((arcs, state.is_final))
- gram.dfas.append((states, self.make_first(gram, name)))
- assert len(gram.dfas) - 1 == gram.symbol_ids[name] - 256
+ symbol_id = gram.symbol_ids[name]
+ dfa = parser.DFA(symbol_id, states, self.make_first(gram, name))
+ gram.dfas.append(dfa)
+ assert len(gram.dfas) - 1 == symbol_id - 256
gram.start = gram.symbol_ids[self.start_symbol]
return gram
diff --git a/pypy/interpreter/pyparser/parser.py
b/pypy/interpreter/pyparser/parser.py
--- a/pypy/interpreter/pyparser/parser.py
+++ b/pypy/interpreter/pyparser/parser.py
@@ -41,6 +41,12 @@
pass
return True
+class DFA(object):
+ def __init__(self, symbol_id, states, first):
+ self.symbol_id = symbol_id
+ self.states = states
+ self.first = first
+
class Node(object):
@@ -188,18 +194,28 @@
class StackEntry(object):
- def __init__(self, next, dfa, state, node):
+ def __init__(self, next, dfa, state):
self.next = next
self.dfa = dfa
self.state = state
- self.node = node
+ self.node = None
- def push(self, dfa, state, node):
- return StackEntry(self, dfa, state, node)
+ def push(self, dfa, state):
+ return StackEntry(self, dfa, state)
def pop(self):
return self.next
+ def node_append_child(self, child):
+ node = self.node
+ if node is None:
+ self.node = Nonterminal1(self.dfa.symbol_id, child)
+ elif isinstance(node, Nonterminal1):
+ self.node = Nonterminal(self.dfa.symbol_id)
+ self.node._children = [node._child, child]
+ else:
+ self.node.append_child(child)
+
class Parser(object):
@@ -215,8 +231,7 @@
if start == -1:
start = self.grammar.start
self.root = None
- current_node = Nonterminal(start)
- self.stack = StackEntry(None, self.grammar.dfas[start - 256], 0,
current_node)
+ self.stack = StackEntry(None, self.grammar.dfas[start - 256], 0)
def add_token(self, token_type, value, lineno, column, line):
label_index = self.classify(token_type, value, lineno, column, line)
@@ -224,7 +239,7 @@
while True:
dfa = self.stack.dfa
state_index = self.stack.state
- states, first = dfa
+ states = dfa.states
arcs, is_accepting = states[state_index]
for i, next_state in arcs:
sym_id = self.grammar.labels[i]
@@ -242,12 +257,12 @@
return True
dfa = self.stack.dfa
state_index = self.stack.state
- state = dfa[0][state_index]
+ state = dfa.states[state_index]
return False
elif sym_id >= 256:
sub_node_dfa = self.grammar.dfas[sym_id - 256]
# Check if this token can start a child node.
- if label_index in sub_node_dfa[1]:
+ if label_index in sub_node_dfa.first:
self.push(sub_node_dfa, next_state, sym_id, lineno,
column)
break
@@ -285,26 +300,21 @@
def shift(self, next_state, token_type, value, lineno, column):
"""Shift a non-terminal and prepare for the next state."""
new_node = Terminal(token_type, value, lineno, column)
- self.stack.node.append_child(new_node)
+ self.stack.node_append_child(new_node)
self.stack.state = next_state
def push(self, next_dfa, next_state, node_type, lineno, column):
"""Push a terminal and adjust the current state."""
- new_node = Nonterminal(node_type)
-
self.stack.state = next_state
- self.stack = self.stack.push(next_dfa, 0, new_node)
+ self.stack = self.stack.push(next_dfa, 0)
def pop(self):
"""Pop an entry off the stack and make its node a child of the last."""
top = self.stack
self.stack = top.pop()
node = top.node
+ assert node is not None
if self.stack:
- # we are now done with node, so we can store it more efficiently if
- # it has just one child
- if node.num_children() == 1:
- node = Nonterminal1(node.type, node.get_child(0))
- self.stack.node.append_child(node)
+ self.stack.node_append_child(node)
else:
self.root = node
diff --git a/pypy/interpreter/pyparser/test/test_metaparser.py
b/pypy/interpreter/pyparser/test/test_metaparser.py
--- a/pypy/interpreter/pyparser/test/test_metaparser.py
+++ b/pypy/interpreter/pyparser/test/test_metaparser.py
@@ -34,8 +34,8 @@
assert len(g.dfas) == 1
eval_sym = g.symbol_ids["eval"]
assert g.start == eval_sym
- states, first = g.dfas[eval_sym - 256]
- assert states == [([(1, 1)], False), ([], True)]
+ dfa = g.dfas[eval_sym - 256]
+ assert dfa.states == [([(1, 1)], False), ([], True)]
assert g.labels[0] == 0
def test_load_python_grammars(self):
@@ -51,7 +51,7 @@
def test_items(self):
g = self.gram_for("foo: NAME STRING OP '+'")
assert len(g.dfas) == 1
- states = g.dfas[g.symbol_ids["foo"] - 256][0]
+ states = g.dfas[g.symbol_ids["foo"] - 256].states
last = states[0][0][0][1]
for state in states[1:-1]:
assert last < state[0][0][1]
diff --git a/pypy/module/parser/pyparser.py b/pypy/module/parser/pyparser.py
--- a/pypy/module/parser/pyparser.py
+++ b/pypy/module/parser/pyparser.py
@@ -152,7 +152,7 @@
# Raise an exception now and be done with it.
raise parser_error(space, w_tuple,
"Illegal syntax-tree; cannot start with terminal
symbol.")
- node = pyparse.parser.Nonterminal(type, [])
+ node = pyparse.parser.Nonterminal(type)
build_node_children(space, w_tuple, node, node_state)
return node
@@ -171,7 +171,7 @@
strn = space.text_w(w_obj)
child = pyparse.parser.Terminal(type, strn, node_state.lineno, 0)
else:
- child = pyparse.parser.Nonterminal(type, [])
+ child = pyparse.parser.Nonterminal(type)
node.append_child(child)
if type >= 256: # Nonterminal node
build_node_children(space, w_elem, child, node_state)
@@ -187,8 +187,7 @@
raise parse_error(space, "Unrecognized node type %d." % type)
dfa = parser.grammar.dfas[type]
# Run the DFA for this nonterminal
- states, first = dfa
- arcs, is_accepting = states[0]
+ arcs, is_accepting = dfa.states[0]
for pos in range(tree.num_children()):
ch = tree.get_child(pos)
for i, next_state in arcs:
@@ -198,7 +197,7 @@
if ch.type >= 256:
validate_node(space, ch)
# Update the state, and move on to the next child.
- arcs, is_accepting = states[next_state]
+ arcs, is_accepting = dfa.states[next_state]
break
else:
raise parse_error(space, "Illegal node")
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit