[v8-dev] [v8] r18110 committed - Experimental scanner: keeping track of octal numbers and octal escapes...

codesite-noreply Wed, 27 Nov 2013 07:31:16 -0800

Revision: 18110
Author:   [email protected]
Date:     Wed Nov 27 15:28:46 2013 UTC

Log: Experimental scanner: keeping track of octal numbers and octalescapes.

The baseline uses a more liberal definition than ECMA, but it's ok, sincethis

is only used for whining about octal escapes in the strict mode. (So, even

though "\1" is technically not an octal escape inside a string (since itshould

be exactly 2 digits), it's still a good idea to whine.

[email protected]
[email protected], [email protected]
BUG=

Review URL: https://codereview.chromium.org/91833002
http://code.google.com/p/v8/source/detail?r=18110

Added:
 /branches/experimental/parser/test/lexer/cornercases/octals.js
Modified:
 /branches/experimental/parser/src/lexer/experimental-scanner.h
 /branches/experimental/parser/src/lexer/lexer-shell.cc
 /branches/experimental/parser/src/lexer/lexer_py.re

/branches/experimental/parser/test/lexer/cornercases/strings-and-identifiers-with-escapes.js

 /branches/experimental/parser/tools/lexer_generator/code_generator.jinja

=======================================
--- /dev/null

+++ /branches/experimental/parser/test/lexer/cornercases/octals.js Wed Nov27 15:28:46 2013 UTC

@@ -0,0 +1,58 @@
+// Copyright 2013 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Octal numbers and octal escapes in strings are not allowed in the strict
+// mode.
+
+var octal_number = 031;
+var not_octal_number = 0;
+var again_not = 019;
+
+"octal inside \01 string"
+"this is not octal \0"
+"this is an octal escape followed by 9: \019"
+"doesn't need to start with 0: \11"
+
+'octal inside \01 string'
+'this is not octal \0'
+'this is an octal escape followed by 9: \01'
+'doesn\'t need to start with 0: \11'
+
+// Even more complicated cases: two octals in one string:
+"foo\00\00"
+'foo\00\00'
+
+// Different lengths of octals:
+"bar\0" // not an octal
+"bar\00"
+"bar\000" // Not an octal according to Ecma
+"bar\0000" // First 3 recognized as octal
+
+'bar\0' // not an octal
+'bar\00'
+'bar\000' // Not an octal according to Ecma
+'bar\0000' // First 3 recognized as octal
=======================================

--- /branches/experimental/parser/src/lexer/experimental-scanner.h Wed Nov27 13:51:50 2013 UTC+++ /branches/experimental/parser/src/lexer/experimental-scanner.h Wed Nov27 15:28:46 2013 UTC

@@ -67,7 +67,6 @@
       has_line_terminator_before_next_(true),
       current_literal_(&literals_[0]),
       next_literal_(&literals_[1]),
-      octal_pos_(Location::invalid()),
       harmony_numeric_literals_(false),
       harmony_modules_(false),
       harmony_scoping_(false) {
@@ -216,10 +215,6 @@
     return literal.length() == keyword.length() &&
         (memcmp(literal.start(), keyword.start(), literal.length()) == 0);
   }
-
-  // Returns the location of the last seen octal literal.
-  Location octal_position() const { return octal_pos_; }
-  void clear_octal_position() { octal_pos_ = Location::invalid(); }

// Seek forward to the given position. This operation works for simplecases// such as seeking forward until simple delimiter tokens, which is whatit is

@@ -237,6 +232,10 @@
   // be empty).
   virtual bool ScanRegExpFlags() = 0;

+  // Returns the location of the last seen octal literal.
+  virtual Location octal_position() const = 0;
+  virtual void clear_octal_position() = 0;
+
  protected:
   struct TokenDesc {
     Token::Value token;
@@ -273,8 +272,6 @@
   LiteralDesc* next_literal_;
   LiteralDesc literals_[2];

-  Location octal_pos_;
-
   bool harmony_numeric_literals_;
   bool harmony_modules_;
   bool harmony_scoping_;
@@ -296,7 +293,8 @@
         buffer_end_(NULL),
         start_(NULL),
         cursor_(NULL),
-        marker_(NULL) {
+        marker_(NULL),
+        last_octal_end_(NULL) {
     ASSERT(source->IsFlat());
     SetBufferBasedOnHandle();
     Scan();
@@ -304,12 +302,17 @@

   virtual ~ExperimentalScanner() { }

- protected:
-  virtual void Scan();
   virtual void SeekForward(int pos);
   virtual void SetEnd(int pos);
   virtual bool ScanRegExpPattern(bool seen_equal);
   virtual bool ScanRegExpFlags();
+  virtual Location octal_position() const;
+  virtual void clear_octal_position() {
+    last_octal_end_ = NULL;
+  }
+
+ protected:
+  virtual void Scan();

   virtual void SetBufferBasedOnHandle() {

// We get a raw pointer from the Handle, but we also update it everytime

@@ -363,6 +366,10 @@
   const Char* start_;
   const Char* cursor_;
   const Char* marker_;
+
+  // Where we have seen the last octal number or an octal escape inside a
+  // string. Used by octal_position().
+  const Char* last_octal_end_;
 };


@@ -488,14 +495,6 @@
     if (nx >= 256) break;
     x = nx;
   }

- // Anything except '\0' is an octal escape sequence, illegal in strictmode.

-  // Remember the position of octal escape sequences so that an error
-  // can be reported later (in strict mode).
-  // We don't report the error immediately, because the octal escape can
-  // occur before the "use strict" directive.
-  if (*result != '0' || cursor > start) {
-    octal_pos_ = Location(start - 1 - buffer_, cursor - 1 - buffer_);
-  }
   *result = x;
   return cursor;
 }
@@ -593,6 +592,18 @@
   return cursor;
 }

+template<typename Char>
+ScannerBase::Location ExperimentalScanner<Char>::octal_position() const {
+  if (!last_octal_end_)
+    return Location::invalid();

+ // The last octal might be an octal escape or an octal number. Whicheverit+ // is, we'll find the start by just scanning back until we hit anon-octal

+  // character.
+  const Char* temp_cursor = last_octal_end_ - 1;

+ while (temp_cursor >= buffer_ && *temp_cursor >= '0' && *temp_cursor<= '7')

+    --temp_cursor;
+  return Location(temp_cursor - buffer_ + 1, last_octal_end_ - buffer_);
+}

 } }

=======================================

--- /branches/experimental/parser/src/lexer/lexer-shell.cc Wed Nov 2714:18:23 2013 UTC+++ /branches/experimental/parser/src/lexer/lexer-shell.cc Wed Nov 2715:28:46 2013 UTC

@@ -171,13 +171,23 @@
   size_t end;
   std::vector<int> literal;
   bool is_ascii;
+  // The location of the latest octal position when the token was seen.
+  int octal_beg;
+  int octal_end;
   TokenWithLocation() :
       value(Token::ILLEGAL), beg(0), end(0), is_ascii(false) { }
-  TokenWithLocation(Token::Value value, size_t beg, size_t end) :
-      value(value), beg(beg), end(end), is_ascii(false) { }
+  TokenWithLocation(Token::Value value, size_t beg, size_t end,
+                    int octal_beg, int octal_end) :

+ value(value), beg(beg), end(end), is_ascii(false),octal_beg(octal_beg),

+      octal_end(octal_end) { }
   bool operator==(const TokenWithLocation& other) {
+    // The octal_end of the baseline scanner is inconsistent between octal

+ // numbers (end = one beyond the last digit) and octal escapes (end =the

+    // last digit). Ignore that.
     return value == other.value && beg == other.beg && end == other.end &&
-           literal == other.literal && is_ascii == other.is_ascii;
+           literal == other.literal && is_ascii == other.is_ascii &&
+        octal_beg == other.octal_beg &&

+ octal_end >= other.octal_end - 1 && octal_end <= other.octal_end +1;

   }
   bool operator!=(const TokenWithLocation& other) {
     return !(*this == other);
@@ -191,7 +201,7 @@
         printf(is_ascii ? " %02x" : " %04x", literal[i]);
       }
     }
-    printf("\n");
+    printf(" (last octal: %d %d)\n", octal_beg, octal_end);
   }
 };

@@ -217,7 +227,8 @@

TokenWithLocation GetTokenWithLocation(Scanner *scanner, Token::Valuetoken) {

   int beg = scanner->location().beg_pos;
   int end = scanner->location().end_pos;
-  TokenWithLocation result(token, beg, end);

+ TokenWithLocation result(token, beg, end,scanner->octal_position().beg_pos,

+                           scanner->octal_position().end_pos);
   if (HasLiteral(token)) {
     result.is_ascii = scanner->is_literal_ascii();
     if (scanner->is_literal_ascii()) {
=======================================

--- /branches/experimental/parser/src/lexer/lexer_py.re Wed Nov 27 14:18:232013 UTC+++ /branches/experimental/parser/src/lexer/lexer_py.re Wed Nov 27 15:28:462013 UTC

@@ -32,6 +32,7 @@
 hex_digit = [0-9a-fA-F];
 single_escape_char = ['"\\bfnrtv];
 maybe_exponent = /([eE][\-+]?[:digit:]+)?/;
+octal_number = /0[0-7]+/;
 number =
   /0[xX][:hex_digit:]+/ | (
   /\.[:digit:]+/ maybe_exponent |
@@ -96,6 +97,7 @@
 "<"           <|token(LT)|>
 ">"           <|token(GT)|>

+octal_number            <|octal_number|>
 number                  <|token(NUMBER)|>
 number identifier_char  <|token(ILLEGAL)|>
 number "\\"             <|token(ILLEGAL)|>
@@ -199,7 +201,9 @@
 "\\" line_terminator_sequence <||continue>
 /\\[x][:hex_digit:]{2}/       <set_has_escapes||continue>
 /\\[u][:hex_digit:]{4}/       <set_has_escapes||continue>
-/\\[^xu:line_terminator:]/    <set_has_escapes||continue>
+/\\[1-7]/                     <octal_inside_string||continue>
+/\\[0-7]{2,3}/                <octal_inside_string||continue>
+/\\[^xu1-7:line_terminator:]/ <set_has_escapes||continue>
 "\\"                          <|token(ILLEGAL)|>
 line_terminator               <|token(ILLEGAL)|>
 "\""                          <|token(STRING)|>
@@ -211,7 +215,9 @@
 "\\" line_terminator_sequence <||continue>
 /\\[x][:hex_digit:]{2}/       <set_has_escapes||continue>
 /\\[u][:hex_digit:]{4}/       <set_has_escapes||continue>
-/\\[^xu:line_terminator:]/    <set_has_escapes||continue>
+/\\[1-7]/                     <octal_inside_string||continue>
+/\\[0-7]{2,3}/                <octal_inside_string||continue>
+/\\[^xu1-7:line_terminator:]/ <set_has_escapes||continue>
 "\\"                          <|token(ILLEGAL)|>
 line_terminator               <|token(ILLEGAL)|>
 "'"                           <|token(STRING)|>
=======================================

---/branches/experimental/parser/test/lexer/cornercases/strings-and-identifiers-with-escapes.jsWed Nov 27 10:41:24 2013 UTC+++/branches/experimental/parser/test/lexer/cornercases/strings-and-identifiers-with-escapes.jsWed Nov 27 15:28:46 2013 UTC

@@ -28,6 +28,11 @@
 "this is a normal string"
 "this is a string with an \xaa escape"
 "this \u00ab too"
-"and \n this \t\000"
+"and \n this \t\00"
+
+'this is a normal string'
+'this is a string with an \xaa escape'
+'this \u00ab too'
+'and \n this \t\00'

 identifier\u1118oo
=======================================

---/branches/experimental/parser/tools/lexer_generator/code_generator.jinjaWed Nov 27 09:41:41 2013 UTC+++/branches/experimental/parser/tools/lexer_generator/code_generator.jinjaWed Nov 27 15:28:46 2013 UTC

@@ -114,7 +114,13 @@
   {% elif type == 'set_marker' %}
     marker_ = cursor_ - {{value}};
   {% elif type == 'set_has_escapes' %}
-    next_.has_escapes = true;
+     next_.has_escapes = true;
+  {% elif type == 'octal_number' %}
+     last_octal_end_ = cursor_;
+     DO_TOKEN(Token::NUMBER);
+  {% elif type == 'octal_inside_string' %}
+     last_octal_end_ = cursor_;
+     next_.has_escapes = true;
   {% else %}
     uncompilable code for {{type}}
   {% endif -%}

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev

---You received this message because you are subscribed to the Google Groups "v8-dev" group.

To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

[v8-dev] [v8] r18110 committed - Experimental scanner: keeping track of octal numbers and octal escapes...

Reply via email to