Reviewers: dcarney, ulan,

Message:
Committed patchset #2 manually as r18112 (presubmit successful).

Description:
Experimental scanner: End the octal position madness.

Now we just try to have the beginning of the octal position right, and are quite liberal about the end. It should be fine for all practical purposes. The error
messages will change a bit.

[email protected], [email protected]
BUG=

Committed: https://code.google.com/p/v8/source/detail?r=18112

Please review this at https://codereview.chromium.org/91963002/

SVN Base: https://v8.googlecode.com/svn/branches/experimental/parser

Affected files (+10, -15 lines):
  src/lexer/lexer-shell.cc
  src/lexer/lexer_py.re


Index: src/lexer/lexer-shell.cc
diff --git a/src/lexer/lexer-shell.cc b/src/lexer/lexer-shell.cc
index 35c34145d3ac1c3adc9bad52c6f223baa2af14d5..af50c9c9e65564a3778178d154e6cd24adc422dd 100644
--- a/src/lexer/lexer-shell.cc
+++ b/src/lexer/lexer-shell.cc
@@ -177,17 +177,13 @@ struct TokenWithLocation {
   TokenWithLocation() :
       value(Token::ILLEGAL), beg(0), end(0), is_ascii(false) { }
   TokenWithLocation(Token::Value value, size_t beg, size_t end,
-                    int octal_beg, int octal_end) :
- value(value), beg(beg), end(end), is_ascii(false), octal_beg(octal_beg),
-      octal_end(octal_end) { }
+                    int octal_beg) :
+ value(value), beg(beg), end(end), is_ascii(false), octal_beg(octal_beg) {
+  }
   bool operator==(const TokenWithLocation& other) {
-    // The octal_end of the baseline scanner is inconsistent between octal
- // numbers (end = one beyond the last digit) and octal escapes (end = the
-    // last digit). Ignore that.
     return value == other.value && beg == other.beg && end == other.end &&
            literal == other.literal && is_ascii == other.is_ascii &&
-        octal_beg == other.octal_beg &&
- octal_end >= other.octal_end - 1 && octal_end <= other.octal_end + 1;
+        octal_beg == other.octal_beg;
   }
   bool operator!=(const TokenWithLocation& other) {
     return !(*this == other);
@@ -201,7 +197,7 @@ struct TokenWithLocation {
         printf(is_ascii ? " %02x" : " %04x", literal[i]);
       }
     }
-    printf(" (last octal: %d %d)\n", octal_beg, octal_end);
+    printf(" (last octal start: %d)\n", octal_beg);
   }
 };

@@ -227,8 +223,7 @@ template<typename Scanner>
TokenWithLocation GetTokenWithLocation(Scanner *scanner, Token::Value token) {
   int beg = scanner->location().beg_pos;
   int end = scanner->location().end_pos;
- TokenWithLocation result(token, beg, end, scanner->octal_position().beg_pos,
-                           scanner->octal_position().end_pos);
+ TokenWithLocation result(token, beg, end, scanner->octal_position().beg_pos);
   if (HasLiteral(token)) {
     result.is_ascii = scanner->is_literal_ascii();
     if (scanner->is_literal_ascii()) {
Index: src/lexer/lexer_py.re
diff --git a/src/lexer/lexer_py.re b/src/lexer/lexer_py.re
index df03fe7c758da990205bcd7ca61e11882da946a8..7b0cababf8e029f6b1530fb3957efe6bf815593a 100644
--- a/src/lexer/lexer_py.re
+++ b/src/lexer/lexer_py.re
@@ -198,11 +198,11 @@ eos             <|terminate|>
 default_action  <do_token_and_go_forward(ILLEGAL)>

 <<DoubleQuoteString>>
-"\\" line_terminator_sequence <||continue>
+"\\" line_terminator_sequence <set_has_escapes||continue>
 /\\[x][:hex_digit:]{2}/       <set_has_escapes||continue>
 /\\[u][:hex_digit:]{4}/       <set_has_escapes||continue>
 /\\[1-7]/                     <octal_inside_string||continue>
-/\\[0-7]{2,3}/                <octal_inside_string||continue>
+/\\[0-7][0-7]+/               <octal_inside_string||continue>
 /\\[^xu1-7:line_terminator:]/ <set_has_escapes||continue>
 "\\"                          <|token(ILLEGAL)|>
 line_terminator               <|token(ILLEGAL)|>
@@ -212,11 +212,11 @@ catch_all                     <||continue>

 <<SingleQuoteString>>
 # TODO subgraph for '\'
-"\\" line_terminator_sequence <||continue>
+"\\" line_terminator_sequence <set_has_escapes||continue>
 /\\[x][:hex_digit:]{2}/       <set_has_escapes||continue>
 /\\[u][:hex_digit:]{4}/       <set_has_escapes||continue>
 /\\[1-7]/                     <octal_inside_string||continue>
-/\\[0-7]{2,3}/                <octal_inside_string||continue>
+/\\[0-7][0-7]+/               <octal_inside_string||continue>
 /\\[^xu1-7:line_terminator:]/ <set_has_escapes||continue>
 "\\"                          <|token(ILLEGAL)|>
 line_terminator               <|token(ILLEGAL)|>


--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to