[v8-dev] Scanner: remove PushBack calls when we're going to return ILLEGAL. (issue 684873002 by [email protected])

marja Fri, 31 Oct 2014 02:03:38 -0700

Reviewers: rossberg,

Message:
rossberg, ptal


Description:
Scanner: remove PushBack calls when we're going to return ILLEGAL.

This simplifies escape handling and makes it easier to extend escapes forES6.

PushBack just before detecting ILLEGAL is unnecessary, since we will abortthescanning / parsing anyway at that point, and it doesn't matter where thecursorexactly is. The error messages w/ PushBack are not any better or morecorrect

than without.

In addition: remove a comment about handling invalid escapes gracefullywhen we

no longer do. (*)

This CL includes a behavioral change: For input "var r = /foobar/g\urrrr;"we

used to report "unexpected_token: ILLEGAL" for "\u", but now we report
malformed_regexp_flags which is a more correct error message. (Note that the

code for reporting invalid_regexp_flags was dead, and invalid_regexp_flagsis

not the right error message.)

Note that the V8 is more relaxed about unicode escapes in regexp flags thanES6

(see

http://people.mozilla.org/~jorendorff/es6-draft.html#sec-regular-expressions)

and this CL doesn't change it. (V8 accepts any \uxxxx, ES6 spec says only a
certain value range is acceptable.)

(*) Code archaeology:

Originally, doing PushBack in ScanHexEscape made sense (see e.g., here

https://codereview.chromium.org/5063003/diff/6001/src/prescanner.h ), sincewe

wouldn't return ILLEGAL but treat an invalid escape sequence "\uxxxx" as
"uxxxx".

(The repo at that point contains another instance of the same function,from the

initial commit. The logic is the same.)

This behavior was changed in a "renaming" commit
https://codereview.chromium.org/7739020.

BUG=

Please review this at https://codereview.chromium.org/684873002/

Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Affected files (+23, -34 lines):
  M src/messages.js
  M src/preparser.h
  M src/scanner.cc
  M test/cctest/test-parsing.cc


Index: src/messages.js
diff --git a/src/messages.js b/src/messages.js

index6578e8dff41044f88133083db03ea4c6ea8c5b91..6c31cdfbc965bcc8539808592335536976f86fba100644

--- a/src/messages.js
+++ b/src/messages.js
@@ -20,6 +20,7 @@ var kMessages = {
   unexpected_strict_reserved:    ["Unexpected strict mode reserved word"],
   unexpected_eos:                ["Unexpected end of input"],

malformed_regexp: ["Invalid regular expression:/", "%0", "/: ", "%1"],

+  malformed_regexp_flags:        ["Invalid regular expression flags"],
   unterminated_regexp:           ["Invalid regular expression: missing /"],

regexp_flags: ["Cannot supply flags when constructingone RegExp from another"],incompatible_method_receiver: ["Method ", "%0", " called onincompatible receiver ", "%1"],

Index: src/preparser.h
diff --git a/src/preparser.h b/src/preparser.h

indexa53abfe4ff3af732cd2cd46ac79db8c35264f698..4b4417d82975742339011640a799ea25caa76272100644

--- a/src/preparser.h
+++ b/src/preparser.h

@@ -1706,7 +1706,7 @@ typename ParserBase<Traits>::ExpressionTParserBase<Traits>::ParseRegExpLiteral(

   IdentifierT js_pattern = this->GetNextSymbol(scanner());
   if (!scanner()->ScanRegExpFlags()) {
     Next();
-    ReportMessage("invalid_regexp_flags");
+    ReportMessage("malformed_regexp_flags");
     *ok = false;
     return Traits::EmptyExpression();
   }
Index: src/scanner.cc
diff --git a/src/scanner.cc b/src/scanner.cc

index0709939dbac8b142a3ad81a2ea948fa681179b34..bb06f833e80779baed9c6b5c322c9d7adc178bac100644

--- a/src/scanner.cc
+++ b/src/scanner.cc
@@ -57,20 +57,10 @@ void Scanner::Initialize(Utf16CharacterStream* source) {
 uc32 Scanner::ScanHexNumber(int expected_length) {
   DCHECK(expected_length <= 4);  // prevent overflow

-  uc32 digits[4] = { 0, 0, 0, 0 };
   uc32 x = 0;
   for (int i = 0; i < expected_length; i++) {
-    digits[i] = c0_;
     int d = HexValue(c0_);
     if (d < 0) {
-      // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes
-      // should be illegal, but other JS VMs just return the
-      // non-escaped version of the original character.
-
-      // Push back digits that we have advanced past.
-      for (int j = i-1; j >= 0; j--) {
-        PushBack(digits[j]);
-      }
       return -1;
     }
     x = x * 16 + d;
@@ -894,9 +884,7 @@ uc32 Scanner::ScanIdentifierUnicodeEscape() {
   Advance();
   if (c0_ != 'u') return -1;
   Advance();
-  uc32 result = ScanHexNumber(4);
-  if (result < 0) PushBack('u');
-  return result;
+  return ScanHexNumber(4);
 }


@@ -1145,31 +1133,17 @@ bool Scanner::ScanRegExpPattern(bool seen_equal) {

 bool Scanner::ScanLiteralUnicodeEscape() {
   DCHECK(c0_ == '\\');
-  uc32 chars_read[6] = {'\\', 'u', 0, 0, 0, 0};
   Advance();
-  int i = 1;
+  int hex_digits_read = 0;
   if (c0_ == 'u') {
-    i++;
-    while (i < 6) {
+    while (hex_digits_read < 4) {
       Advance();
       if (!IsHexDigit(c0_)) break;
-      chars_read[i] = c0_;
-      i++;
-    }
-  }
-  if (i < 6) {
-    // Incomplete escape. Undo all advances and return false.
-    while (i > 0) {
-      i--;
-      PushBack(chars_read[i]);
+      AddLiteralChar(c0_);
+      ++hex_digits_read;
     }
-    return false;
-  }
-  // Complete escape. Add all chars to current literal buffer.
-  for (int i = 0; i < 6; i++) {
-    AddLiteralChar(chars_read[i]);
   }
-  return true;
+  return hex_digits_read == 4;
 }


@@ -1181,7 +1155,7 @@ bool Scanner::ScanRegExpFlags() {
       AddLiteralCharAdvance();
     } else {
       if (!ScanLiteralUnicodeEscape()) {
-        break;
+        return false;
       }
       Advance();
     }
Index: test/cctest/test-parsing.cc
diff --git a/test/cctest/test-parsing.cc b/test/cctest/test-parsing.cc

index40281678d9442c7096f3ff0ee27feaf1f3b09986..7955de524434c9ba7ecf21499440ce3b390cf718100644

--- a/test/cctest/test-parsing.cc
+++ b/test/cctest/test-parsing.cc
@@ -4258,3 +4258,17 @@ TEST(ConstParsingInForInError) {
   RunParserSyncTest(context_data, data, kError, NULL, 0, always_flags,
                     arraysize(always_flags));
 }
+
+
+TEST(InvalidUnicodeEscapes) {
+  const char* context_data[][2] = {{"", ""},
+                                   {"'use strict';", ""},
+                                   {NULL, NULL}};
+  const char* data[] = {
+    "var foob\\u123r = 0;",
+    "var \\u123roo = 0;",
+    "\"foob\\u123rr\"",
+    "/regex/g\\u123r",
+    NULL};
+  RunParserSyncTest(context_data, data, kError);
+}


--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev

---You received this message because you are subscribed to the Google Groups "v8-dev" group.

To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/d/optout.

[v8-dev] Scanner: remove PushBack calls when we're going to return ILLEGAL. (issue 684873002 by [email protected])

Reply via email to