Reviewers: arv,

Message:
arv, could you have a look at this CL? (This is a prerequisite for the ES6
unicode regexps...)

Description:
RegExpParser: Fix Reset()ting to the end.

The bug would occur when we try to Reset() to a position already at the end.

This happens e.g., when the regexp ends with \u. What used to happen in that
case: 1) Advance past \ and u (to the end) (which wouldn't increase next_pos_
enough) 2) Try to parse 4 hex digits 3) When that failed, Reset() to the
position which should've been at the end but wasn't.

To be able to properly Reset() to a position at the end, we need to allow
next_pos_ to move beyond the end (since position() is next_pos_ - 1).

Minimal repro case:

var r = /foo\u/
r.test("foou") // should be true, was false.

(Note that \u not followed by 4 hex didits should be interpreted as an identity
escape. It already worked unless \u was at the end of the regexp.)

BUG=v8:3756
LOG=NO

Please review this at https://codereview.chromium.org/802313003/

Base URL: https://chromium.googlesource.com/v8/v8.git@master

Affected files (+75, -0 lines):
  M src/parser.cc
  A test/mjsunit/regress/regress-3756.js


Index: src/parser.cc
diff --git a/src/parser.cc b/src/parser.cc
index 9cb92b3c535e79900d1782c689ac0966cef1d15b..0713b26e8b9bfb24e9c06ad5c1500754d7243ab1 100644
--- a/src/parser.cc
+++ b/src/parser.cc
@@ -4323,6 +4323,9 @@ void RegExpParser::Advance() {
     }
   } else {
     current_ = kEndMarker;
+ // Advance so that position() points to 1-after-the-last-character. This is
+    // important so that Reset() to this position works correctly.
+    next_pos_ = in()->length() + 1;
     has_more_ = false;
   }
 }
Index: test/mjsunit/regress/regress-3756.js
diff --git a/test/mjsunit/regress/regress-3756.js b/test/mjsunit/regress/regress-3756.js
new file mode 100644
index 0000000000000000000000000000000000000000..7646ea652bef68ab87a73db664ff6fe143c6edd3
--- /dev/null
+++ b/test/mjsunit/regress/regress-3756.js
@@ -0,0 +1,72 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+(function TestIdentityEscapes() {
+  // \u not followed by 4 hex digits is treated as an identity escape.
+  var r0 = /\u/;
+  assertTrue(r0.test("u"));
+
+  r0 = RegExp("\\u");
+  assertTrue(r0.test("u"));
+
+  var r1 = /\usecond/;
+  assertTrue(r1.test("usecond"));
+
+  r1 = RegExp("\\usecond");
+  assertTrue(r1.test("usecond"));
+
+  var r2 = /first\u/;
+  assertTrue(r2.test("firstu"));
+ assertFalse(r2.test("first\\u")); // This used to return true (which was a bug).
+
+  r2 = RegExp("first\\u");
+  assertTrue(r2.test("firstu"));
+ assertFalse(r2.test("first\\u")); // This used to return true (which was a bug).
+
+  var r3 = /first\usecond/;
+  assertTrue(r3.test("firstusecond"));
+  assertFalse(r3.test("first\\usecond"));
+
+  r3 = RegExp("first\\usecond");
+  assertTrue(r3.test("firstusecond"));
+  assertFalse(r3.test("first\\usecond"));
+
+  var r4 = /first\u123second/;
+  assertTrue(r4.test("firstu123second"));
+  assertFalse(r4.test("first\\u123second"));
+
+  r4 = RegExp("first\\u123second");
+  assertTrue(r4.test("firstu123second"));
+  assertFalse(r4.test("first\\u123second"));
+
+ // \X where X is not a legal escape character is treated as identity escape
+  // too.
+  var r5 = /\a/;
+  assertTrue(r5.test("a"));
+
+  r5 = RegExp("\\a");
+  assertTrue(r5.test("a"));
+
+  var r6 = /\asecond/;
+  assertTrue(r6.test("asecond"));
+
+  r6 = RegExp("\\asecond");
+  assertTrue(r6.test("asecond"));
+
+  var r7 = /first\a/;
+  assertTrue(r7.test("firsta"));
+  assertFalse(r7.test("first\\a"));
+
+  r7 = RegExp("first\\a");
+  assertTrue(r7.test("firsta"));
+  assertFalse(r7.test("first\\a"));
+
+  var r8 = /first\asecond/;
+  assertTrue(r8.test("firstasecond"));
+  assertFalse(r8.test("first\\asecond"));
+
+  r8 = RegExp("first\\asecond");
+  assertTrue(r8.test("firstasecond"));
+  assertFalse(r8.test("first\\asecond"));
+})();


--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/d/optout.

Reply via email to