Revision: 17846
Author: [email protected]
Date: Mon Nov 18 16:22:04 2013 UTC
Log: Experimental lexer generator: Lex files with zeros properly.
BUG=
[email protected]
Review URL: https://codereview.chromium.org/75353003
http://code.google.com/p/v8/source/detail?r=17846
Added:
/branches/experimental/parser/test/cornercases
/branches/experimental/parser/test/cornercases/file-with-zero.js
Modified:
/branches/experimental/parser/src/lexer/lexer_py.re
/branches/experimental/parser/tools/lexer_generator/code_generator.jinja
/branches/experimental/parser/tools/lexer_generator/rule_parser.py
/branches/experimental/parser/tools/lexer_generator/transition_keys.py
=======================================
--- /dev/null
+++ /branches/experimental/parser/test/cornercases/file-with-zero.js Mon
Nov 18 16:22:04 2013 UTC
@@ -0,0 +1,28 @@
+// Copyright 2013 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+var description = "this file has a zero character inside a string";
=======================================
--- /branches/experimental/parser/src/lexer/lexer_py.re Mon Nov 18 15:23:23
2013 UTC
+++ /branches/experimental/parser/src/lexer/lexer_py.re Mon Nov 18 16:22:04
2013 UTC
@@ -40,6 +40,7 @@
/[:digit:]+(\.[:digit:]*)?/ maybe_exponent );
# TODO this is incomplete/incorrect
line_terminator_sequence = (/\n\r?/)|(/\r\n?/);
+eof = [:eof:];
# grammar is
# regex <action_on_state_entry|action_on_match|transition>
=======================================
---
/branches/experimental/parser/tools/lexer_generator/code_generator.jinja
Mon Nov 18 15:23:23 2013 UTC
+++
/branches/experimental/parser/tools/lexer_generator/code_generator.jinja
Mon Nov 18 16:22:04 2013 UTC
@@ -14,6 +14,12 @@
yych >= {{r[1][0]}}
{%- else -%}
({{r[1][0]}} <= yych && yych <= {{r[1][1]}})
+ {%- endif -%}
+ {%- elif r[0] == 'CLASS' -%}
+ {%- if r[1] == 'eof' -%}
+ (yych == 0 && cursor_ == buffer_end_)
+ {%- else -%}
+ false
{%- endif -%}
{%- else -%}
false
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Thu
Nov 14 21:09:14 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Mon
Nov 18 16:22:04 2013 UTC
@@ -36,9 +36,7 @@
class RuleParserState:
def __init__(self):
- self.aliases = {
- 'eof' : RegexParser.parse("[\\0]"),
- }
+ self.aliases = {}
self.character_classes = {}
self.current_state = None
self.rules = {}
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_keys.py
Mon Nov 18 12:39:46 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/transition_keys.py
Mon Nov 18 16:22:04 2013 UTC
@@ -31,8 +31,10 @@
__class_bounds = {
"latin_1" : (0, 255),
+ # These are not "real" ranges; they just need to be separate.
"whitespace" : (256, 256),
"literal" : (257, 257),
+ "eof" : (258, 258),
}
__lower_bound = 0
__upper_bound = reduce(lambda acc, (k, v): max(acc, v[1]),
__class_bounds.items(), 0)
@@ -136,6 +138,8 @@
ranges.append(TransitionKey.__class_bounds["whitespace"])
elif class_name == 'lit':
ranges.append(TransitionKey.__class_bounds["literal"])
+ elif class_name == 'eof':
+ ranges.append(TransitionKey.__class_bounds["eof"])
elif class_name in key_map:
ranges += key_map[class_name].__ranges
else:
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.