Reviewers: marja,

Message:
Committed patchset #1 manually as r19295 (presubmit successful).

Description:
Experimental parser: store literals as ints

[email protected]

BUG=

Committed: https://code.google.com/p/v8/source/detail?r=19295

Please review this at https://codereview.chromium.org/157813004/

SVN Base: https://v8.googlecode.com/svn/branches/experimental/parser

Affected files (+23, -26 lines):
  M tools/lexer_generator/automata_test.py
  M tools/lexer_generator/nfa_builder.py
  M tools/lexer_generator/regex_parser.py
  M tools/lexer_generator/transition_key_test.py
  M tools/lexer_generator/transition_keys.py


Index: tools/lexer_generator/automata_test.py
diff --git a/tools/lexer_generator/automata_test.py b/tools/lexer_generator/automata_test.py index 158a602543206cc3665370340c63e34a52644b44..63142f21fbf9ec19503b9d4856481d3e3f60bdf0 100644
--- a/tools/lexer_generator/automata_test.py
+++ b/tools/lexer_generator/automata_test.py
@@ -91,9 +91,9 @@ class AutomataTestCase(unittest.TestCase):
         'terminal' : False,
         'action' : Action.empty_action() }
     mapping = { k : empty_node() for k in ['S_0', 'S_1', 'S_2', 'S_3'] }
-    key_a = TransitionKey.single_char(encoding, 'a')
-    key_b = TransitionKey.single_char(encoding, 'b')
-    key_c = TransitionKey.single_char(encoding, 'c')
+    key_a = TransitionKey.single_char(encoding, ord('a'))
+    key_b = TransitionKey.single_char(encoding, ord('b'))
+    key_c = TransitionKey.single_char(encoding, ord('c'))

     mapping['S_0']['transitions'][key_a] = 'S_1'
     mapping['S_0']['transitions'][key_b] = 'S_2'
Index: tools/lexer_generator/nfa_builder.py
diff --git a/tools/lexer_generator/nfa_builder.py b/tools/lexer_generator/nfa_builder.py index d2a2d566c7b82921920a80ba32bee198a23de10d..ea30c2ce0e605a6bd0c92073691e24df8cd6c2e5 100644
--- a/tools/lexer_generator/nfa_builder.py
+++ b/tools/lexer_generator/nfa_builder.py
@@ -128,7 +128,7 @@ class NfaBuilder(object):
     state.add_unclosed_transition(key)
     return (state, [state])

-  def __literal(self, chars):
+  def __literal(self, *chars):
     terms = map(lambda c : Term('SINGLE_CHAR', c), chars)
     return self.__process(self.cat_terms(terms))

@@ -320,24 +320,21 @@ class NfaBuilder(object):

   @staticmethod
   def __flatten_literals(terms):
-    literal = None
+    acc = ()
     for term in terms:
       assert isinstance(term, Term)
       if not term:
         continue
       if term.name() == 'LITERAL':
-        if literal:
-          literal += term.args()[0]
-        else:
-          literal = term.args()[0]
+        acc += term.args()
       else:
-        if literal:
-          yield Term('LITERAL', literal)
-          literal = None
+        if acc:
+          yield Term('LITERAL', *acc)
+          acc = ()
         if term:
           yield term
-    if literal:
-      yield Term('LITERAL', literal)
+    if acc:
+      yield Term('LITERAL', *acc)

   @staticmethod
   def or_terms(terms):
Index: tools/lexer_generator/regex_parser.py
diff --git a/tools/lexer_generator/regex_parser.py b/tools/lexer_generator/regex_parser.py index d74a3e8d9b76b941ce8e9376f9d1b720d29e76d0..9c86a0b927f7742982d0f213d13f1ce37ebc9975 100644
--- a/tools/lexer_generator/regex_parser.py
+++ b/tools/lexer_generator/regex_parser.py
@@ -238,7 +238,7 @@ class RegexParser:

   def p_literal(self, p):
     '''literal : LITERAL'''
-    p[0] = Term('LITERAL', p[1])
+    p[0] = Term('LITERAL', ord(p[1]))

   def p_any(self, p):
     '''any : ANY'''
@@ -262,10 +262,10 @@ class RegexParser:
                      | CHARACTER_CLASS maybe_class_content
     '''
     if len(p) == 5:
-      left = Term("RANGE", p[1], p[3])
+      left = Term("RANGE", ord(p[1]), ord(p[3]))
     else:
       if len(p[1]) == 1:
-        left = Term('LITERAL', p[1])
+        left = Term('LITERAL', ord(p[1]))
       else:
         left = Term('CHARACTER_CLASS', p[1][1:-1])
     p[0] = self.__cat(left, p[len(p)-1])
Index: tools/lexer_generator/transition_key_test.py
diff --git a/tools/lexer_generator/transition_key_test.py b/tools/lexer_generator/transition_key_test.py index 0b627c9306b3ccbb65618db1c75ee8cdddba8f47..d4e348ad26642ffe97544b435bc15a24e4556b36 100644
--- a/tools/lexer_generator/transition_key_test.py
+++ b/tools/lexer_generator/transition_key_test.py
@@ -36,8 +36,8 @@ class TransitionKeyTestCase(unittest.TestCase):
   __equal_pairs = [
     (TransitionKey.epsilon(), TransitionKey.epsilon()),
     (TransitionKey.any(__encoding), TransitionKey.any(__encoding)),
-    (TransitionKey.single_char(__encoding, 'a'),
-     TransitionKey.single_char(__encoding, 'a')),
+    (TransitionKey.single_char(__encoding, ord('a')),
+     TransitionKey.single_char(__encoding, ord('a'))),
   ]

   def test_eq(self):
Index: tools/lexer_generator/transition_keys.py
diff --git a/tools/lexer_generator/transition_keys.py b/tools/lexer_generator/transition_keys.py index a7ef16ee54f8a3aa56627de99fd53a56ad30f8db..14dfe770bf60c4ea83b3647b3ac07084be9c2f02 100644
--- a/tools/lexer_generator/transition_keys.py
+++ b/tools/lexer_generator/transition_keys.py
@@ -25,6 +25,7 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+from types import IntType
 from itertools import chain
 from action import Term
 from string import printable
@@ -144,8 +145,7 @@ class TransitionKey(object):
   @staticmethod
   def omega():
     '''Always matches.'''
-    return TransitionKey.__cached_key(None, 'omega',
-      lambda : Term("OMEGA_KEY"))
+ return TransitionKey.__cached_key(None, 'omega', lambda : Term("OMEGA_KEY"))

   @staticmethod
   def any(encoding):
@@ -154,13 +154,14 @@ class TransitionKey(object):
         lambda : encoding.all_components_iter())

   @staticmethod
-  def single_char(encoding, char):  # TODO(dcarney): char should be int
+  def single_char(encoding, char):
     '''Returns a TransitionKey for a single-character transition.'''
- return TransitionKey(encoding, Term("NUMERIC_RANGE_KEY", ord(char), ord(char)))
+    return TransitionKey.range(encoding, char, char)

   @staticmethod
   def range(encoding, a, b):
     '''Returns a TransitionKey for a single-character transition.'''
+    assert type(a) == IntType and type(b) == IntType
     return TransitionKey(encoding, Term("NUMERIC_RANGE_KEY", a, b))

   @staticmethod
@@ -176,10 +177,9 @@ class TransitionKey(object):
     key = term.name()
     args = term.args()
     if key == 'RANGE':
- components.append(Term('NUMERIC_RANGE_KEY', ord(args[0]), ord(args[1])))
+      components.append(Term('NUMERIC_RANGE_KEY', args[0], args[1]))
     elif key == 'LITERAL':
-      for char in args[0]:  # TODO(dcarney): don't use strings for literals
-        components.append(Term('NUMERIC_RANGE_KEY', ord(char), ord(char)))
+      components += map(lambda x : Term('NUMERIC_RANGE_KEY', x, x), args)
     elif key == 'CAT':
       for x in args:
         TransitionKey.__process_term(encoding, x, components, key_map)


--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to