Reviewers: ulan,

Description:
Fix assert triggered in fast/regex/pcre-test-4.html We were not filtering out all the nodes that had non-ASCII characters. That has been fixed, but because
of the protection against over-deep recursion when filtering it is wrong to
assert that all nodes were filtered. This change therefore also makes sure we
can cope with non-filtered nodes by adding back some code removed in
https://chromiumcodereview.appspot.com/10174017/

Please review this at https://chromiumcodereview.appspot.com/10358008/

SVN Base: http://v8.googlecode.com/svn/branches/bleeding_edge/

Affected files:
  M     src/jsregexp.cc
  M     test/mjsunit/regexp-capture-3.js


Index: src/jsregexp.cc
===================================================================
--- src/jsregexp.cc     (revision 11473)
+++ src/jsregexp.cc     (working copy)
@@ -2426,9 +2426,15 @@
         QuickCheckDetails::Position* pos =
             details->positions(characters_filled_in);
         uc16 c = quarks[i];
-        // We should already have filtered out nodes that have non-ASCII
-        // characters if we are matching against an ASCII string.
-        ASSERT(c <= char_mask);
+        if (c > char_mask) {
+          // If we expect a non-ASCII character from an ASCII string,
+          // there is no way we can match. Not even case independent
+          // matching can turn an ASCII character into non-ASCII or    
+          // vice versa.       
+          details->set_cannot_match();      
+          pos->determines_perfectly = false;        
+          return;      
+        }
         if (compiler->ignore_case()) {
           unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
int length = GetCaseIndependentLetters(isolate, c, compiler->ascii(),
@@ -2490,9 +2496,11 @@
         int first_range = 0;
         while (ranges->at(first_range).from() > char_mask) {
           first_range++;
-          // We should already have filtered out nodes that cannot match
-          // so the first range should be a valid range.
-          ASSERT(first_range != ranges->length());
+          if (first_range == ranges->length()) {
+            details->set_cannot_match();
+            pos->determines_perfectly = false;
+            return;
+          }
         }
         CharacterRange range = ranges->at(first_range);
         uc16 from = range.from();
@@ -2540,10 +2548,12 @@
     }
   }
   ASSERT(characters_filled_in != details->characters());
-  on_success()-> GetQuickCheckDetails(details,
-                                      compiler,
-                                      characters_filled_in,
-                                      true);
+  if (!details->cannot_match()) {
+    on_success()-> GetQuickCheckDetails(details,
+                                        compiler,
+                                        characters_filled_in,
+                                        true);
+  }
 }


@@ -2687,13 +2697,15 @@
   if (info()->replacement_calculated) return replacement();
   if (depth < 0) return this;
   if (info()->visited) return this;
-  VisitMarker marker(info());
+  {
+    VisitMarker marker(info());
+
+ RegExpNode* continue_replacement = continue_node_->FilterASCII(depth - 1); + // If we can't continue after the loop then there is no sense in doing the
+    // loop.
+    if (continue_replacement == NULL) return set_replacement(NULL);
+  }

- RegExpNode* continue_replacement = continue_node_->FilterASCII(depth - 1); - // If we can't continue after the loop then there is no sense in doing the
-  // loop.
-  if (continue_replacement == NULL) return set_replacement(NULL);
-
   return ChoiceNode::FilterASCII(depth - 1);
 }

Index: test/mjsunit/regexp-capture-3.js
===================================================================
--- test/mjsunit/regexp-capture-3.js    (revision 11473)
+++ test/mjsunit/regexp-capture-3.js    (working copy)
@@ -187,3 +187,28 @@
 var s = "Don't prune based on a repetition of length 0";
 assertEquals(null, s.match(/å{1,1}prune/));
 assertEquals("prune", (s.match(/å{0,0}prune/)[0]));
+
+// Some very deep regexps where FilterASCII gives up in order not to make the
+// stack overflow.
+var regex6 = /a*\u0100*\w/;
+var input0 = "a";
+regex6.exec(input0);
+
+var re = "\u0100*\\w";
+
+for (var i = 0; i < 200; i++) re = "a*" + re;
+
+var regex7 = new RegExp(re);
+regex7.exec(input0);
+
+var regex8 = new RegExp(re, "i");
+regex8.exec(input0);
+
+re = "[\u0100]*\\w";
+for (var i = 0; i < 200; i++) re = "a*" + re;
+
+var regex9 = new RegExp(re);
+regex9.exec(input0);
+
+var regex10 = new RegExp(re, "i");
+regex10.exec(input0);


--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev

Reply via email to