Reviewers: dcarney,
Message:
Committed patchset #1 manually as r17870.
Description:
Experimental lexer generator: make tests pass again + style fixes.
- We no longer want to 0-terminate strings; added eos rule.
- TransitionKey.__class_bounds.values() is not sorted, TransitionKey.any()
was
assuming it is.
- Style: use 'strings' instead of "strings".
BUG=
[email protected]
Committed: https://code.google.com/p/v8/source/detail?r=17870
Please review this at https://codereview.chromium.org/76263003/
SVN Base: https://v8.googlecode.com/svn/branches/experimental/parser
Affected files (+33, -24 lines):
M tools/lexer_generator/code_generator_test.py
M tools/lexer_generator/lexer_test.py
M tools/lexer_generator/transition_keys.py
Index: tools/lexer_generator/code_generator_test.py
diff --git a/tools/lexer_generator/code_generator_test.py
b/tools/lexer_generator/code_generator_test.py
index
4725052d02ce8f7e511b689be0eae9bbcba3a5e6..2e6eb1b3d932836b23b1666bc8eef613a24761cc
100644
--- a/tools/lexer_generator/code_generator_test.py
+++ b/tools/lexer_generator/code_generator_test.py
@@ -33,11 +33,12 @@ class CodeGeneratorTestCase(unittest.TestCase):
def test_simple(self):
rules = '''
+ eos = [:eos:];
<<default>>
"(" <|{LBRACE}|>
")" <|{RBRACE}|>
"foo" <|{FOO}|>
- eof <|terminate|>
+ eos <|terminate|>
default_action <{DEFAULT}>'''
CodeGenerator(RuleProcessor.parse(rules))
Index: tools/lexer_generator/lexer_test.py
diff --git a/tools/lexer_generator/lexer_test.py
b/tools/lexer_generator/lexer_test.py
index
cd65207ce5d26ac867a61656a04224514b219039..861afc0d98363d5b1a99fb0496aca7290ec247a8
100644
--- a/tools/lexer_generator/lexer_test.py
+++ b/tools/lexer_generator/lexer_test.py
@@ -42,36 +42,39 @@ class LexerTestCase(unittest.TestCase):
def test_simple(self):
rules = '''
+ eos = [:eos:];
<<default>>
"(" <|LBRACE|>
")" <|RBRACE|>
"foo" <|FOO|>
- eof <|terminate|>'''
+ eos <|terminate|>'''
- string = 'foo()\0'
+ string = 'foo()'
self.__verify_action_stream(rules, string,
[('FOO', 'foo'), ('LBRACE', '('), ('RBRACE', ')')])
def test_maximal_matching(self):
rules = '''
+ eos = [:eos:];
<<default>>
"<" <|LT|>
"<<" <|SHL|>
" " <|SPACE|>
- eof <|terminate|>'''
+ eos <|terminate|>'''
- string = '<< <\0'
+ string = '<< <'
self.__verify_action_stream(rules, string,
[('SHL', '<<'), ('SPACE', ' '), ('LT', '<')])
def test_consecutive_epsilon_transitions(self):
rules = '''
+ eos = [:eos:];
digit = [0-9];
number = (digit+ ("." digit+)?);
<<default>>
number <|NUMBER|>
- eof <|terminate|>'''
+ eos <|terminate|>'''
- string = '555\0'
+ string = '555'
self.__verify_action_stream(rules, string, [('NUMBER', '555')])
Index: tools/lexer_generator/transition_keys.py
diff --git a/tools/lexer_generator/transition_keys.py
b/tools/lexer_generator/transition_keys.py
index
92d50b96334301c3cce92a65ebc66fc5fb3efc88..98332ec846573ddd05dd6dc7ce19963da01895e1
100644
--- a/tools/lexer_generator/transition_keys.py
+++ b/tools/lexer_generator/transition_keys.py
@@ -30,12 +30,13 @@ from string import printable
class TransitionKey:
__class_bounds = {
- "latin_1" : (1, 255),
- # These are not "real" ranges; they just need to be separate.
- "whitespace" : (256, 256),
- "literal" : (257, 257),
- "eos" : (258, 258),
- "zero" : (259, 259),
+ 'latin_1' : (1, 255),
+ # These are not real ranges; they just need to be separate from any
real
+ # ranges.
+ 'whitespace' : (256, 256),
+ 'literal' : (257, 257),
+ 'eos' : (258, 258),
+ 'zero' : (259, 259),
}
__lower_bound = 1
__upper_bound = reduce(lambda acc, (k, v): max(acc, v[1]),
__class_bounds.items(), 0)
@@ -46,7 +47,7 @@ class TransitionKey:
@staticmethod
def __in_latin_1(char):
- bound = TransitionKey.__class_bounds["latin_1"]
+ bound = TransitionKey.__class_bounds['latin_1']
return (bound[0] <= char and char <= bound[1])
@staticmethod
@@ -68,6 +69,7 @@ class TransitionKey:
assert r[1] <= TransitionKey.__upper_bound
assert r[0] <= r[1]
r_is_class = TransitionKey.__is_class_range(r)
+ # Assert that the ranges are in order.
if last != None and check_merged:
assert last[1] + 1 < r[0] or r_is_class
if not TransitionKey.__in_latin_1(r[0]):
@@ -101,12 +103,15 @@ class TransitionKey:
@staticmethod
def epsilon():
- return TransitionKey.__cached_key("epsilon", lambda name : [])
+ return TransitionKey.__cached_key('epsilon', lambda name : [])
@staticmethod
def any():
- return TransitionKey.__cached_key("any",
- lambda name : TransitionKey.__class_bounds.values())
+ def bounds_getter(name):
+ bounds = TransitionKey.__class_bounds.values()
+ bounds.sort()
+ return bounds
+ return TransitionKey.__cached_key('any', bounds_getter)
@staticmethod
def single_char(char):
@@ -120,7 +125,7 @@ class TransitionKey:
bound = TransitionKey.__unique_key_counter
TransitionKey.__unique_key_counter -= 1
return [(bound, bound)]
- name = "__" + name
+ name = '__' + name
return TransitionKey.__cached_key(name, get_bounds)
@staticmethod
@@ -146,9 +151,9 @@ class TransitionKey:
elif class_name in key_map:
ranges += key_map[class_name].__ranges
else:
- raise Exception("unknown character class [%s]" % graph[1])
+ raise Exception('unknown character class [%s]' % graph[1])
else:
- raise Exception("bad key [%s]" % key)
+ raise Exception('bad key [%s]' % key)
@staticmethod
def character_class(graph, key_map):
@@ -238,14 +243,14 @@ class TransitionKey:
TransitionKey.__printable_cache[x] = res
return TransitionKey.__printable_cache[x]
if r[0] == r[1]:
- return "%s" % to_str(r[0])
+ return '%s' % to_str(r[0])
else:
- return "[%s-%s]" % (to_str(r[0]), to_str(r[1]))
+ return '[%s-%s]' % (to_str(r[0]), to_str(r[1]))
def __str__(self):
if self.__name:
return self.__name
- return ", ".join(TransitionKey.__range_str(x) for x in self.__ranges)
+ return ', '.join(TransitionKey.__range_str(x) for x in self.__ranges)
@staticmethod
def __disjoint_keys(range_map):
@@ -340,7 +345,7 @@ class TransitionKey:
inverted = []
last = None
classes = set(TransitionKey.__class_bounds.values())
- latin_1 = TransitionKey.__class_bounds["latin_1"]
+ latin_1 = TransitionKey.__class_bounds['latin_1']
classes.remove(latin_1)
for r in ranges:
assert not TransitionKey.__is_unique_range(r)
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.