Reviewers: Erik Corry,

Description:
Make scanner not accept invalid unicode escapes in identifiers.

BUG=v8:1620
TEST=mjsunit/regress/regress-1620


Please review this at http://codereview.chromium.org/7663005/

SVN Base: https://v8.googlecode.com/svn/branches/bleeding_edge

Affected files:
  M src/scanner-base.h
  M src/scanner-base.cc
  A test/mjsunit/regress/regress-1620.js


Index: src/scanner-base.cc
diff --git a/src/scanner-base.cc b/src/scanner-base.cc
index e4590b1261ecfe6d2159fa45a0564b57008b1216..14efa687730d23fda66a49004fd62a1583ea5969 100644
--- a/src/scanner-base.cc
+++ b/src/scanner-base.cc
@@ -41,12 +41,12 @@ Scanner::Scanner(UnicodeCache* unicode_cache)
     : unicode_cache_(unicode_cache) { }


-uc32 Scanner::ScanHexEscape(uc32 c, int length) {
-  ASSERT(length <= 4);  // prevent overflow
+  uc32 Scanner::ScanHexNumber(int expected_length) {
+  ASSERT(expected_length <= 4);  // prevent overflow

   uc32 digits[4];
   uc32 x = 0;
-  for (int i = 0; i < length; i++) {
+  for (int i = 0; i < expected_length; i++) {
     digits[i] = c0_;
     int d = HexValue(c0_);
     if (d < 0) {
@@ -54,12 +54,11 @@ uc32 Scanner::ScanHexEscape(uc32 c, int length) {
       // should be illegal, but other JS VMs just return the
       // non-escaped version of the original character.

-      // Push back digits read, except the last one (in c0_).
+      // Push back digits that we have advanced past.
       for (int j = i-1; j >= 0; j--) {
         PushBack(digits[j]);
       }
-      // Notice: No handling of error - treat it as "\u"->"u".
-      return c;
+      return unibrow::Utf8::kBadChar;
     }
     x = x * 16 + d;
     Advance();
@@ -638,9 +637,17 @@ void JavaScriptScanner::ScanEscape() {
     case 'n' : c = '\n'; break;
     case 'r' : c = '\r'; break;
     case 't' : c = '\t'; break;
-    case 'u' : c = ScanHexEscape(c, 4); break;
+    case 'u' : {
+      c = ScanHexNumber(4);
+      if (c == static_cast<uc32>(unibrow::Utf8::kBadChar)) c = 'u';
+      break;
+    }
     case 'v' : c = '\v'; break;
-    case 'x' : c = ScanHexEscape(c, 2); break;
+    case 'x' : {
+      c = ScanHexNumber(2);
+      if (c == static_cast<uc32>(unibrow::Utf8::kBadChar)) c = 'x';
+      break;
+    }
     case '0' :  // fall through
     case '1' :  // fall through
     case '2' :  // fall through
@@ -802,7 +809,7 @@ uc32 JavaScriptScanner::ScanIdentifierUnicodeEscape() {
   Advance();
   if (c0_ != 'u') return unibrow::Utf8::kBadChar;
   Advance();
-  uc32 c = ScanHexEscape('u', 4);
+  uc32 c = ScanHexNumber(4);
   // We do not allow a unicode escape sequence to start another
   // unicode escape sequence.
   if (c == '\\') return unibrow::Utf8::kBadChar;
Index: src/scanner-base.h
diff --git a/src/scanner-base.h b/src/scanner-base.h
index d3776e5f8996719561bd037f6c764e28171608d0..7e416651e7a78c76867639b7d4fb4825b0a02acc 100644
--- a/src/scanner-base.h
+++ b/src/scanner-base.h
@@ -419,7 +419,7 @@ class Scanner {
     }
   }

-  uc32 ScanHexEscape(uc32 c, int length);
+  uc32 ScanHexNumber(int expected_length);

   // Return the current source position.
   int source_pos() {
Index: test/mjsunit/regress/regress-1620.js
diff --git a/test/mjsunit/regress/regress-1620.js b/test/mjsunit/regress/regress-1620.js
new file mode 100644
index 0000000000000000000000000000000000000000..8deed1e9233c0f43f5ea40ee80c9b623a4349267
--- /dev/null
+++ b/test/mjsunit/regress/regress-1620.js
@@ -0,0 +1,35 @@
+// Copyright 2011 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// See: http://code.google.com/p/v8/issues/detail?id=1620
+
+assertThrows("var \\u\\u\\u = 42;");
+assertThrows("var \\u41 = 42;");
+assertThrows("var \\u123 = 42;");
+eval("var \\u1234 = 42;");
+assertEquals(42, eval("\u1234"));
+assertThrows("var uuu = 42; var x = \\u\\u\\u");


--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev

Reply via email to