Author: Carl Friedrich Bolz-Tereick <cfb...@gmx.de>
Branch: pyparser-improvements
Changeset: r93979:2221844101cc
Date: 2018-03-12 17:40 +0100
http://bitbucket.org/pypy/pypy/changeset/2221844101cc/

Log:    store the first set as 32-char string, as opposed to a dict

diff --git a/pypy/interpreter/pyparser/parser.py 
b/pypy/interpreter/pyparser/parser.py
--- a/pypy/interpreter/pyparser/parser.py
+++ b/pypy/interpreter/pyparser/parser.py
@@ -1,6 +1,7 @@
 """
 A CPython inspired RPython parser.
 """
+from rpython.rlib.objectmodel import not_rpython
 
 
 class Grammar(object):
@@ -45,8 +46,23 @@
     def __init__(self, symbol_id, states, first):
         self.symbol_id = symbol_id
         self.states = states
-        self.first = first
+        self.first = self._first_to_string(first)
 
+    def could_match_token(self, label_index):
+        pos = label_index >> 3
+        bit = 1 << (label_index & 0b111)
+        return bool(ord(self.first[label_index >> 3]) & bit)
+
+    @staticmethod
+    @not_rpython
+    def _first_to_string(first):
+        l = sorted(first.keys())
+        b = bytearray(32)
+        for label_index in l:
+            pos = label_index >> 3
+            bit = 1 << (label_index & 0b111)
+            b[pos] |= bit
+        return str(b)
 
 class Node(object):
 
@@ -259,7 +275,7 @@
                 elif sym_id >= 256:
                     sub_node_dfa = self.grammar.dfas[sym_id - 256]
                     # Check if this token can start a child node.
-                    if label_index in sub_node_dfa.first:
+                    if sub_node_dfa.could_match_token(label_index):
                         self.push(sub_node_dfa, next_state, sym_id, lineno,
                                   column)
                         break
diff --git a/pypy/interpreter/pyparser/test/test_parser.py 
b/pypy/interpreter/pyparser/test/test_parser.py
--- a/pypy/interpreter/pyparser/test/test_parser.py
+++ b/pypy/interpreter/pyparser/test/test_parser.py
@@ -7,6 +7,12 @@
 from pypy.interpreter.pyparser.test.test_metaparser import MyGrammar
 
 
+def test_char_set():
+    first = {5: None, 9: None, 100: None, 255:None}
+    p = parser.DFA(None, None, first)
+    for i in range(256):
+        assert p.could_match_token(i) == (i in first)
+
 class SimpleParser(parser.Parser):
 
     def parse(self, input):
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to