Revision: 17846
Author:   [email protected]
Date:     Mon Nov 18 16:22:04 2013 UTC
Log:      Experimental lexer generator: Lex files with zeros properly.

BUG=
[email protected]

Review URL: https://codereview.chromium.org/75353003
http://code.google.com/p/v8/source/detail?r=17846

Added:
 /branches/experimental/parser/test/cornercases
 /branches/experimental/parser/test/cornercases/file-with-zero.js
Modified:
 /branches/experimental/parser/src/lexer/lexer_py.re
 /branches/experimental/parser/tools/lexer_generator/code_generator.jinja
 /branches/experimental/parser/tools/lexer_generator/rule_parser.py
 /branches/experimental/parser/tools/lexer_generator/transition_keys.py

=======================================
--- /dev/null
+++ /branches/experimental/parser/test/cornercases/file-with-zero.js Mon Nov 18 16:22:04 2013 UTC
@@ -0,0 +1,28 @@
+// Copyright 2013 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+var description = "this file has a zero character  inside a string";
=======================================
--- /branches/experimental/parser/src/lexer/lexer_py.re Mon Nov 18 15:23:23 2013 UTC +++ /branches/experimental/parser/src/lexer/lexer_py.re Mon Nov 18 16:22:04 2013 UTC
@@ -40,6 +40,7 @@
   /[:digit:]+(\.[:digit:]*)?/ maybe_exponent );
 # TODO this is incomplete/incorrect
 line_terminator_sequence = (/\n\r?/)|(/\r\n?/);
+eof = [:eof:];

 # grammar is
 #   regex <action_on_state_entry|action_on_match|transition>
=======================================
--- /branches/experimental/parser/tools/lexer_generator/code_generator.jinja Mon Nov 18 15:23:23 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/code_generator.jinja Mon Nov 18 16:22:04 2013 UTC
@@ -14,6 +14,12 @@
        yych >= {{r[1][0]}}
       {%- else -%}
        ({{r[1][0]}} <= yych && yych <= {{r[1][1]}})
+      {%- endif -%}
+    {%- elif r[0] == 'CLASS' -%}
+      {%- if r[1] == 'eof' -%}
+       (yych == 0 && cursor_ == buffer_end_)
+      {%- else -%}
+       false
       {%- endif -%}
     {%- else -%}
       false
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Thu Nov 14 21:09:14 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Mon Nov 18 16:22:04 2013 UTC
@@ -36,9 +36,7 @@
 class RuleParserState:

   def __init__(self):
-    self.aliases = {
-      'eof' : RegexParser.parse("[\\0]"),
-    }
+    self.aliases = {}
     self.character_classes = {}
     self.current_state = None
     self.rules = {}
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_keys.py Mon Nov 18 12:39:46 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/transition_keys.py Mon Nov 18 16:22:04 2013 UTC
@@ -31,8 +31,10 @@

   __class_bounds = {
     "latin_1" : (0, 255),
+    # These are not "real" ranges; they just need to be separate.
     "whitespace" : (256, 256),
     "literal" : (257, 257),
+    "eof" : (258, 258),
   }
   __lower_bound = 0
__upper_bound = reduce(lambda acc, (k, v): max(acc, v[1]), __class_bounds.items(), 0)
@@ -136,6 +138,8 @@
         ranges.append(TransitionKey.__class_bounds["whitespace"])
       elif class_name == 'lit':
         ranges.append(TransitionKey.__class_bounds["literal"])
+      elif class_name == 'eof':
+        ranges.append(TransitionKey.__class_bounds["eof"])
       elif class_name in key_map:
         ranges += key_map[class_name].__ranges
       else:

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to