Revision: 17857
Author:   [email protected]
Date:     Tue Nov 19 10:52:01 2013 UTC
Log:      Experimental lexer generator: Misc fixes.

- eof inside a multiline comment
- other eof fixes (buffer check)
- rename eof -> eos

[email protected]
BUG=

Review URL: https://codereview.chromium.org/64023004
http://code.google.com/p/v8/source/detail?r=17857

Modified:
 /branches/experimental/parser/src/lexer/experimental-scanner.cc
 /branches/experimental/parser/src/lexer/lexer_py.re
 /branches/experimental/parser/tools/lexer_generator/code_generator.jinja
 /branches/experimental/parser/tools/lexer_generator/transition_keys.py

=======================================
--- /branches/experimental/parser/src/lexer/experimental-scanner.cc Mon Nov 18 12:07:11 2013 UTC +++ /branches/experimental/parser/src/lexer/experimental-scanner.cc Tue Nov 19 10:52:01 2013 UTC
@@ -51,7 +51,7 @@

   *size = file_size * repeat;

-  byte* chars = new byte[*size + 1];
+  byte* chars = new byte[*size];
   for (int i = 0; i < file_size;) {
     int read = static_cast<int>(fread(&chars[i], 1, file_size - i, file));
     i += read;
@@ -61,7 +61,6 @@
   for (int i = file_size; i < *size; i++) {
     chars[i] = chars[i - file_size];
   }
-  chars[*size] = 0;

   return chars;
 }
=======================================
--- /branches/experimental/parser/src/lexer/lexer_py.re Mon Nov 18 16:22:04 2013 UTC +++ /branches/experimental/parser/src/lexer/lexer_py.re Tue Nov 19 10:52:01 2013 UTC
@@ -40,7 +40,7 @@
   /[:digit:]+(\.[:digit:]*)?/ maybe_exponent );
 # TODO this is incomplete/incorrect
 line_terminator_sequence = (/\n\r?/)|(/\r\n?/);
-eof = [:eof:];
+eos = [:eos:];

 # grammar is
 #   regex <action_on_state_entry|action_on_match|transition>
@@ -191,7 +191,7 @@
   }
 }|push_token(IDENTIFIER)|Identifier>

-eof             <|terminate|>
+eos             <|terminate|>
 default_action  <push_token_and_go_forward(ILLEGAL)>

 <<DoubleQuoteString>>
@@ -202,7 +202,7 @@
 "\\"                          <|push_token(ILLEGAL)|>
 /\n|\r/                       <|push_token(ILLEGAL)|>
 "\""                          <|push_token(STRING)|>
-eof                           <|terminate_illegal|>
+eos                           <|terminate_illegal|>
 catch_all                     <||continue>

 <<SingleQuoteString>>
@@ -214,7 +214,7 @@
 "\\"                          <|push_token(ILLEGAL)|>
 /\n|\r/                       <|push_token(ILLEGAL)|>
 "'"                           <|push_token(STRING)|>
-eof                           <|terminate_illegal|>
+eos                           <|terminate_illegal|>
 catch_all                     <||continue>

 <<Identifier>>
@@ -227,7 +227,7 @@

 <<SingleLineComment>>
 line_terminator  <|push_line_terminator|>
-eof <|skip_and_terminate|>
+eos <|skip_and_terminate|>
 catch_all <||continue>

 <<MultiLineComment>>
@@ -235,5 +235,5 @@
 # TODO find a way to generate the below rule
 /\*+[^\/*]/       <||continue>
 line_terminator  <push_line_terminator||continue>
-eof <|{start_ = marker_; BACKWARD(); PUSH_TOKEN(Token::ILLEGAL);}|>
+eos <|{start_ = marker_; PUSH_TOKEN(Token::ILLEGAL);}|>
 catch_all        <||continue>
=======================================
--- /branches/experimental/parser/tools/lexer_generator/code_generator.jinja Tue Nov 19 08:17:07 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/code_generator.jinja Tue Nov 19 10:52:01 2013 UTC
@@ -1,6 +1,5 @@
 #include "lexer/even-more-experimental-scanner.h"

-
 {# TODO implement CLASS checks #}
 {%- macro do_key(key) -%}
   {%- for r in key -%}
@@ -16,8 +15,10 @@
        ({{r[1][0]}} <= yych && yych <= {{r[1][1]}})
       {%- endif -%}
     {%- elif r[0] == 'CLASS' -%}
-      {%- if r[1] == 'eof' -%}
-       (yych == 0 && cursor_ == buffer_end_)
+      {%- if r[1] == 'eos' -%}
+       (yych == 0 && cursor_ >= buffer_end_)
+      {%- elif r[1] == 'zero' -%}
+       (yych == 0 && cursor_ < buffer_end_)
       {%- else -%}
        false
       {%- endif -%}
@@ -142,7 +143,8 @@
 }

 #define PUSH_EOS() {                  \
-  --cursor_;                          \
+  --start_;                           \
+  cursor_ -= 2;                       \
   PUSH_TOKEN(Token::EOS);             \
 }

@@ -152,11 +154,13 @@
 }

 #define FORWARD() {                   \
-  yych = *(++cursor_);                \
+  if (++cursor_ >= buffer_end_) yych = 0; \
+  else yych = *(cursor_);                 \
 }

 #define BACKWARD() {                  \
-  yych = *(--cursor_);                \
+  if (--cursor_ >= buffer_end_) yych = 0; \
+  else yych = *(cursor_);                 \
 }

 #define SKIP() {                      \
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_keys.py Mon Nov 18 16:22:04 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/transition_keys.py Tue Nov 19 10:52:01 2013 UTC
@@ -30,13 +30,14 @@
 class TransitionKey:

   __class_bounds = {
-    "latin_1" : (0, 255),
+    "latin_1" : (1, 255),
     # These are not "real" ranges; they just need to be separate.
     "whitespace" : (256, 256),
     "literal" : (257, 257),
-    "eof" : (258, 258),
+    "eos" : (258, 258),
+    "zero" : (259, 259),
   }
-  __lower_bound = 0
+  __lower_bound = 1
__upper_bound = reduce(lambda acc, (k, v): max(acc, v[1]), __class_bounds.items(), 0)

   __cached_keys = {}
@@ -135,11 +136,13 @@
     elif key == 'CHARACTER_CLASS':
       class_name = graph[1]
       if class_name == 'ws':
-        ranges.append(TransitionKey.__class_bounds["whitespace"])
+        ranges.append(TransitionKey.__class_bounds['whitespace'])
       elif class_name == 'lit':
-        ranges.append(TransitionKey.__class_bounds["literal"])
-      elif class_name == 'eof':
-        ranges.append(TransitionKey.__class_bounds["eof"])
+        ranges.append(TransitionKey.__class_bounds['literal'])
+      elif class_name == 'eos':
+        ranges.append(TransitionKey.__class_bounds['eos'])
+      elif class_name == 'zero':
+        ranges.append(TransitionKey.__class_bounds['zero'])
       elif class_name in key_map:
         ranges += key_map[class_name].__ranges
       else:

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to