Reviewers: dcarney,
Message:
Committed patchset #1 manually as r17846 (presubmit successful).
Description:
Experimental lexer generator: Lex files with zeros properly.
BUG=
[email protected]
Committed: https://code.google.com/p/v8/source/detail?r=17846
Please review this at https://codereview.chromium.org/75353003/
SVN Base: https://v8.googlecode.com/svn/branches/experimental/parser
Affected files (+11, -2 lines):
M src/lexer/lexer_py.re
A + test/cornercases/file-with-zero.js
M tools/lexer_generator/code_generator.jinja
M tools/lexer_generator/rule_parser.py
M tools/lexer_generator/transition_keys.py
Index: src/lexer/lexer_py.re
diff --git a/src/lexer/lexer_py.re b/src/lexer/lexer_py.re
index
650f5455b071da47caac47db651a43e2d3e118f6..f5a0713bfcedcf42accf852492a4211e71474ccb
100644
--- a/src/lexer/lexer_py.re
+++ b/src/lexer/lexer_py.re
@@ -40,6 +40,7 @@ number =
/[:digit:]+(\.[:digit:]*)?/ maybe_exponent );
# TODO this is incomplete/incorrect
line_terminator_sequence = (/\n\r?/)|(/\r\n?/);
+eof = [:eof:];
# grammar is
# regex <action_on_state_entry|action_on_match|transition>
Index: test/cornercases/file-with-zero.js
diff --git a/test/mjsunit/regress/regress-2690.js
b/test/cornercases/file-with-zero.js
similarity index 95%
copy from test/mjsunit/regress/regress-2690.js
copy to test/cornercases/file-with-zero.js
index
5fe7dc42dc693cf7f0a3e9a2b13c0f7b306189e2..dc921a75d150a11903afa5a5185d4047f638f01f
100644
Binary files a/test/mjsunit/regress/regress-2690.js and
b/test/cornercases/file-with-zero.js differ
Index: tools/lexer_generator/code_generator.jinja
diff --git a/tools/lexer_generator/code_generator.jinja
b/tools/lexer_generator/code_generator.jinja
index
40b27d9c900c465d1e03905ba9275f6264c904a2..231e58d17cbc3701eb7b4c7885b55f97a76c9671
100644
--- a/tools/lexer_generator/code_generator.jinja
+++ b/tools/lexer_generator/code_generator.jinja
@@ -15,6 +15,12 @@
{%- else -%}
({{r[1][0]}} <= yych && yych <= {{r[1][1]}})
{%- endif -%}
+ {%- elif r[0] == 'CLASS' -%}
+ {%- if r[1] == 'eof' -%}
+ (yych == 0 && cursor_ == buffer_end_)
+ {%- else -%}
+ false
+ {%- endif -%}
{%- else -%}
false
{%- endif -%}
Index: tools/lexer_generator/rule_parser.py
diff --git a/tools/lexer_generator/rule_parser.py
b/tools/lexer_generator/rule_parser.py
index
f065436e3428ab2413726a735f3971e805cbd82b..d15ee23773bb218c84b59afe6ab7bab238e43882
100644
--- a/tools/lexer_generator/rule_parser.py
+++ b/tools/lexer_generator/rule_parser.py
@@ -36,9 +36,7 @@ from transition_keys import TransitionKey
class RuleParserState:
def __init__(self):
- self.aliases = {
- 'eof' : RegexParser.parse("[\\0]"),
- }
+ self.aliases = {}
self.character_classes = {}
self.current_state = None
self.rules = {}
Index: tools/lexer_generator/transition_keys.py
diff --git a/tools/lexer_generator/transition_keys.py
b/tools/lexer_generator/transition_keys.py
index
f639d43843620dd4417c6639edbde68380eaf0a2..4e650969f1ae8d866c6b99c32d433d46b6d14b92
100644
--- a/tools/lexer_generator/transition_keys.py
+++ b/tools/lexer_generator/transition_keys.py
@@ -31,8 +31,10 @@ class TransitionKey:
__class_bounds = {
"latin_1" : (0, 255),
+ # These are not "real" ranges; they just need to be separate.
"whitespace" : (256, 256),
"literal" : (257, 257),
+ "eof" : (258, 258),
}
__lower_bound = 0
__upper_bound = reduce(lambda acc, (k, v): max(acc, v[1]),
__class_bounds.items(), 0)
@@ -136,6 +138,8 @@ class TransitionKey:
ranges.append(TransitionKey.__class_bounds["whitespace"])
elif class_name == 'lit':
ranges.append(TransitionKey.__class_bounds["literal"])
+ elif class_name == 'eof':
+ ranges.append(TransitionKey.__class_bounds["eof"])
elif class_name in key_map:
ranges += key_map[class_name].__ranges
else:
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.