Reviewers: rossberg,
Message:
rossberg, ptal
Description:
Scanner: remove PushBack calls when we're going to return ILLEGAL.
This simplifies escape handling and makes it easier to extend escapes for
ES6.
PushBack just before detecting ILLEGAL is unnecessary, since we will abort
the
scanning / parsing anyway at that point, and it doesn't matter where the
cursor
exactly is. The error messages w/ PushBack are not any better or more
correct
than without.
In addition: remove a comment about handling invalid escapes gracefully
when we
no longer do. (*)
This CL includes a behavioral change: For input "var r = /foobar/g\urrrr;"
we
used to report "unexpected_token: ILLEGAL" for "\u", but now we report
malformed_regexp_flags which is a more correct error message. (Note that the
code for reporting invalid_regexp_flags was dead, and invalid_regexp_flags
is
not the right error message.)
Note that the V8 is more relaxed about unicode escapes in regexp flags than
ES6
(see
http://people.mozilla.org/~jorendorff/es6-draft.html#sec-regular-expressions
)
and this CL doesn't change it. (V8 accepts any \uxxxx, ES6 spec says only a
certain value range is acceptable.)
(*) Code archaeology:
Originally, doing PushBack in ScanHexEscape made sense (see e.g., here
https://codereview.chromium.org/5063003/diff/6001/src/prescanner.h ), since
we
wouldn't return ILLEGAL but treat an invalid escape sequence "\uxxxx" as
"uxxxx".
(The repo at that point contains another instance of the same function,
from the
initial commit. The logic is the same.)
This behavior was changed in a "renaming" commit
https://codereview.chromium.org/7739020.
BUG=
Please review this at https://codereview.chromium.org/684873002/
Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Affected files (+23, -34 lines):
M src/messages.js
M src/preparser.h
M src/scanner.cc
M test/cctest/test-parsing.cc
Index: src/messages.js
diff --git a/src/messages.js b/src/messages.js
index
6578e8dff41044f88133083db03ea4c6ea8c5b91..6c31cdfbc965bcc8539808592335536976f86fba
100644
--- a/src/messages.js
+++ b/src/messages.js
@@ -20,6 +20,7 @@ var kMessages = {
unexpected_strict_reserved: ["Unexpected strict mode reserved word"],
unexpected_eos: ["Unexpected end of input"],
malformed_regexp: ["Invalid regular expression:
/", "%0", "/: ", "%1"],
+ malformed_regexp_flags: ["Invalid regular expression flags"],
unterminated_regexp: ["Invalid regular expression: missing /"],
regexp_flags: ["Cannot supply flags when constructing
one RegExp from another"],
incompatible_method_receiver: ["Method ", "%0", " called on
incompatible receiver ", "%1"],
Index: src/preparser.h
diff --git a/src/preparser.h b/src/preparser.h
index
a53abfe4ff3af732cd2cd46ac79db8c35264f698..4b4417d82975742339011640a799ea25caa76272
100644
--- a/src/preparser.h
+++ b/src/preparser.h
@@ -1706,7 +1706,7 @@ typename ParserBase<Traits>::ExpressionT
ParserBase<Traits>::ParseRegExpLiteral(
IdentifierT js_pattern = this->GetNextSymbol(scanner());
if (!scanner()->ScanRegExpFlags()) {
Next();
- ReportMessage("invalid_regexp_flags");
+ ReportMessage("malformed_regexp_flags");
*ok = false;
return Traits::EmptyExpression();
}
Index: src/scanner.cc
diff --git a/src/scanner.cc b/src/scanner.cc
index
0709939dbac8b142a3ad81a2ea948fa681179b34..bb06f833e80779baed9c6b5c322c9d7adc178bac
100644
--- a/src/scanner.cc
+++ b/src/scanner.cc
@@ -57,20 +57,10 @@ void Scanner::Initialize(Utf16CharacterStream* source) {
uc32 Scanner::ScanHexNumber(int expected_length) {
DCHECK(expected_length <= 4); // prevent overflow
- uc32 digits[4] = { 0, 0, 0, 0 };
uc32 x = 0;
for (int i = 0; i < expected_length; i++) {
- digits[i] = c0_;
int d = HexValue(c0_);
if (d < 0) {
- // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes
- // should be illegal, but other JS VMs just return the
- // non-escaped version of the original character.
-
- // Push back digits that we have advanced past.
- for (int j = i-1; j >= 0; j--) {
- PushBack(digits[j]);
- }
return -1;
}
x = x * 16 + d;
@@ -894,9 +884,7 @@ uc32 Scanner::ScanIdentifierUnicodeEscape() {
Advance();
if (c0_ != 'u') return -1;
Advance();
- uc32 result = ScanHexNumber(4);
- if (result < 0) PushBack('u');
- return result;
+ return ScanHexNumber(4);
}
@@ -1145,31 +1133,17 @@ bool Scanner::ScanRegExpPattern(bool seen_equal) {
bool Scanner::ScanLiteralUnicodeEscape() {
DCHECK(c0_ == '\\');
- uc32 chars_read[6] = {'\\', 'u', 0, 0, 0, 0};
Advance();
- int i = 1;
+ int hex_digits_read = 0;
if (c0_ == 'u') {
- i++;
- while (i < 6) {
+ while (hex_digits_read < 4) {
Advance();
if (!IsHexDigit(c0_)) break;
- chars_read[i] = c0_;
- i++;
- }
- }
- if (i < 6) {
- // Incomplete escape. Undo all advances and return false.
- while (i > 0) {
- i--;
- PushBack(chars_read[i]);
+ AddLiteralChar(c0_);
+ ++hex_digits_read;
}
- return false;
- }
- // Complete escape. Add all chars to current literal buffer.
- for (int i = 0; i < 6; i++) {
- AddLiteralChar(chars_read[i]);
}
- return true;
+ return hex_digits_read == 4;
}
@@ -1181,7 +1155,7 @@ bool Scanner::ScanRegExpFlags() {
AddLiteralCharAdvance();
} else {
if (!ScanLiteralUnicodeEscape()) {
- break;
+ return false;
}
Advance();
}
Index: test/cctest/test-parsing.cc
diff --git a/test/cctest/test-parsing.cc b/test/cctest/test-parsing.cc
index
40281678d9442c7096f3ff0ee27feaf1f3b09986..7955de524434c9ba7ecf21499440ce3b390cf718
100644
--- a/test/cctest/test-parsing.cc
+++ b/test/cctest/test-parsing.cc
@@ -4258,3 +4258,17 @@ TEST(ConstParsingInForInError) {
RunParserSyncTest(context_data, data, kError, NULL, 0, always_flags,
arraysize(always_flags));
}
+
+
+TEST(InvalidUnicodeEscapes) {
+ const char* context_data[][2] = {{"", ""},
+ {"'use strict';", ""},
+ {NULL, NULL}};
+ const char* data[] = {
+ "var foob\\u123r = 0;",
+ "var \\u123roo = 0;",
+ "\"foob\\u123rr\"",
+ "/regex/g\\u123r",
+ NULL};
+ RunParserSyncTest(context_data, data, kError);
+}
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.