Reviewers: dcarney,

Message:
Committed patchset #1 manually as r17870.

Description:
Experimental lexer generator: make tests pass again + style fixes.

- We no longer want to 0-terminate strings; added eos rule.
- TransitionKey.__class_bounds.values() is not sorted, TransitionKey.any() was
  assuming it is.
- Style: use 'strings' instead of "strings".

BUG=
[email protected]

Committed: https://code.google.com/p/v8/source/detail?r=17870

Please review this at https://codereview.chromium.org/76263003/

SVN Base: https://v8.googlecode.com/svn/branches/experimental/parser

Affected files (+33, -24 lines):
  M tools/lexer_generator/code_generator_test.py
  M tools/lexer_generator/lexer_test.py
  M tools/lexer_generator/transition_keys.py


Index: tools/lexer_generator/code_generator_test.py
diff --git a/tools/lexer_generator/code_generator_test.py b/tools/lexer_generator/code_generator_test.py index 4725052d02ce8f7e511b689be0eae9bbcba3a5e6..2e6eb1b3d932836b23b1666bc8eef613a24761cc 100644
--- a/tools/lexer_generator/code_generator_test.py
+++ b/tools/lexer_generator/code_generator_test.py
@@ -33,11 +33,12 @@ class CodeGeneratorTestCase(unittest.TestCase):

   def test_simple(self):
     rules = '''
+    eos = [:eos:];
     <<default>>
     "("           <|{LBRACE}|>
     ")"           <|{RBRACE}|>

     "foo"         <|{FOO}|>
-    eof           <|terminate|>
+    eos           <|terminate|>
     default_action <{DEFAULT}>'''
     CodeGenerator(RuleProcessor.parse(rules))
Index: tools/lexer_generator/lexer_test.py
diff --git a/tools/lexer_generator/lexer_test.py b/tools/lexer_generator/lexer_test.py index cd65207ce5d26ac867a61656a04224514b219039..861afc0d98363d5b1a99fb0496aca7290ec247a8 100644
--- a/tools/lexer_generator/lexer_test.py
+++ b/tools/lexer_generator/lexer_test.py
@@ -42,36 +42,39 @@ class LexerTestCase(unittest.TestCase):

   def test_simple(self):
     rules = '''
+    eos = [:eos:];
     <<default>>
     "("           <|LBRACE|>
     ")"           <|RBRACE|>

     "foo"         <|FOO|>
-    eof           <|terminate|>'''
+    eos           <|terminate|>'''

-    string = 'foo()\0'
+    string = 'foo()'
     self.__verify_action_stream(rules, string,
         [('FOO', 'foo'), ('LBRACE', '('), ('RBRACE', ')')])

   def test_maximal_matching(self):
     rules = '''
+    eos = [:eos:];
     <<default>>
     "<"           <|LT|>
     "<<"          <|SHL|>
     " "           <|SPACE|>
-    eof           <|terminate|>'''
+    eos           <|terminate|>'''

-    string = '<< <\0'
+    string = '<< <'
     self.__verify_action_stream(rules, string,
         [('SHL', '<<'), ('SPACE', ' '), ('LT', '<')])

   def test_consecutive_epsilon_transitions(self):
     rules = '''
+    eos = [:eos:];
     digit = [0-9];
     number = (digit+ ("." digit+)?);
     <<default>>
     number        <|NUMBER|>
-    eof           <|terminate|>'''
+    eos           <|terminate|>'''

-    string = '555\0'
+    string = '555'
     self.__verify_action_stream(rules, string, [('NUMBER', '555')])
Index: tools/lexer_generator/transition_keys.py
diff --git a/tools/lexer_generator/transition_keys.py b/tools/lexer_generator/transition_keys.py index 92d50b96334301c3cce92a65ebc66fc5fb3efc88..98332ec846573ddd05dd6dc7ce19963da01895e1 100644
--- a/tools/lexer_generator/transition_keys.py
+++ b/tools/lexer_generator/transition_keys.py
@@ -30,12 +30,13 @@ from string import printable
 class TransitionKey:

   __class_bounds = {
-    "latin_1" : (1, 255),
-    # These are not "real" ranges; they just need to be separate.
-    "whitespace" : (256, 256),
-    "literal" : (257, 257),
-    "eos" : (258, 258),
-    "zero" : (259, 259),
+    'latin_1' : (1, 255),
+ # These are not real ranges; they just need to be separate from any real
+    # ranges.
+    'whitespace' : (256, 256),
+    'literal' : (257, 257),
+    'eos' : (258, 258),
+    'zero' : (259, 259),
   }
   __lower_bound = 1
__upper_bound = reduce(lambda acc, (k, v): max(acc, v[1]), __class_bounds.items(), 0)
@@ -46,7 +47,7 @@ class TransitionKey:

   @staticmethod
   def __in_latin_1(char):
-    bound = TransitionKey.__class_bounds["latin_1"]
+    bound = TransitionKey.__class_bounds['latin_1']
     return (bound[0] <= char and char <= bound[1])

   @staticmethod
@@ -68,6 +69,7 @@ class TransitionKey:
       assert r[1] <= TransitionKey.__upper_bound
       assert r[0] <= r[1]
       r_is_class = TransitionKey.__is_class_range(r)
+      # Assert that the ranges are in order.
       if last != None and check_merged:
         assert last[1] + 1 < r[0] or r_is_class
       if not TransitionKey.__in_latin_1(r[0]):
@@ -101,12 +103,15 @@ class TransitionKey:

   @staticmethod
   def epsilon():
-    return TransitionKey.__cached_key("epsilon", lambda name : [])
+    return TransitionKey.__cached_key('epsilon', lambda name : [])

   @staticmethod
   def any():
-    return TransitionKey.__cached_key("any",
-      lambda name : TransitionKey.__class_bounds.values())
+    def bounds_getter(name):
+      bounds = TransitionKey.__class_bounds.values()
+      bounds.sort()
+      return bounds
+    return TransitionKey.__cached_key('any', bounds_getter)

   @staticmethod
   def single_char(char):
@@ -120,7 +125,7 @@ class TransitionKey:
       bound = TransitionKey.__unique_key_counter
       TransitionKey.__unique_key_counter -= 1
       return [(bound, bound)]
-    name = "__" + name
+    name = '__' + name
     return TransitionKey.__cached_key(name, get_bounds)

   @staticmethod
@@ -146,9 +151,9 @@ class TransitionKey:
       elif class_name in key_map:
         ranges += key_map[class_name].__ranges
       else:
-        raise Exception("unknown character class [%s]" % graph[1])
+        raise Exception('unknown character class [%s]' % graph[1])
     else:
-      raise Exception("bad key [%s]" % key)
+      raise Exception('bad key [%s]' % key)

   @staticmethod
   def character_class(graph, key_map):
@@ -238,14 +243,14 @@ class TransitionKey:
         TransitionKey.__printable_cache[x] = res
       return TransitionKey.__printable_cache[x]
     if r[0] == r[1]:
-      return "%s" % to_str(r[0])
+      return '%s' % to_str(r[0])
     else:
-      return "[%s-%s]" % (to_str(r[0]), to_str(r[1]))
+      return '[%s-%s]' % (to_str(r[0]), to_str(r[1]))

   def __str__(self):
     if self.__name:
       return self.__name
-    return ", ".join(TransitionKey.__range_str(x) for x in self.__ranges)
+    return ', '.join(TransitionKey.__range_str(x) for x in self.__ranges)

   @staticmethod
   def __disjoint_keys(range_map):
@@ -340,7 +345,7 @@ class TransitionKey:
     inverted = []
     last = None
     classes = set(TransitionKey.__class_bounds.values())
-    latin_1 = TransitionKey.__class_bounds["latin_1"]
+    latin_1 = TransitionKey.__class_bounds['latin_1']
     classes.remove(latin_1)
     for r in ranges:
       assert not TransitionKey.__is_unique_range(r)


--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to