Reviewers: ulan,
Description:
Fix assert triggered in fast/regex/pcre-test-4.html We were not filtering
out
all the nodes that had non-ASCII characters. That has been fixed, but
because
of the protection against over-deep recursion when filtering it is wrong to
assert that all nodes were filtered. This change therefore also makes sure
we
can cope with non-filtered nodes by adding back some code removed in
https://chromiumcodereview.appspot.com/10174017/
Please review this at https://chromiumcodereview.appspot.com/10358008/
SVN Base: http://v8.googlecode.com/svn/branches/bleeding_edge/
Affected files:
M src/jsregexp.cc
M test/mjsunit/regexp-capture-3.js
Index: src/jsregexp.cc
===================================================================
--- src/jsregexp.cc (revision 11473)
+++ src/jsregexp.cc (working copy)
@@ -2426,9 +2426,15 @@
QuickCheckDetails::Position* pos =
details->positions(characters_filled_in);
uc16 c = quarks[i];
- // We should already have filtered out nodes that have non-ASCII
- // characters if we are matching against an ASCII string.
- ASSERT(c <= char_mask);
+ if (c > char_mask) {
+ // If we expect a non-ASCII character from an ASCII string,
+ // there is no way we can match. Not even case independent
+ // matching can turn an ASCII character into non-ASCII or
+ // vice versa.
+ details->set_cannot_match();
+ pos->determines_perfectly = false;
+ return;
+ }
if (compiler->ignore_case()) {
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
int length = GetCaseIndependentLetters(isolate, c,
compiler->ascii(),
@@ -2490,9 +2496,11 @@
int first_range = 0;
while (ranges->at(first_range).from() > char_mask) {
first_range++;
- // We should already have filtered out nodes that cannot match
- // so the first range should be a valid range.
- ASSERT(first_range != ranges->length());
+ if (first_range == ranges->length()) {
+ details->set_cannot_match();
+ pos->determines_perfectly = false;
+ return;
+ }
}
CharacterRange range = ranges->at(first_range);
uc16 from = range.from();
@@ -2540,10 +2548,12 @@
}
}
ASSERT(characters_filled_in != details->characters());
- on_success()-> GetQuickCheckDetails(details,
- compiler,
- characters_filled_in,
- true);
+ if (!details->cannot_match()) {
+ on_success()-> GetQuickCheckDetails(details,
+ compiler,
+ characters_filled_in,
+ true);
+ }
}
@@ -2687,13 +2697,15 @@
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
if (info()->visited) return this;
- VisitMarker marker(info());
+ {
+ VisitMarker marker(info());
+
+ RegExpNode* continue_replacement = continue_node_->FilterASCII(depth -
1);
+ // If we can't continue after the loop then there is no sense in doing
the
+ // loop.
+ if (continue_replacement == NULL) return set_replacement(NULL);
+ }
- RegExpNode* continue_replacement = continue_node_->FilterASCII(depth -
1);
- // If we can't continue after the loop then there is no sense in doing
the
- // loop.
- if (continue_replacement == NULL) return set_replacement(NULL);
-
return ChoiceNode::FilterASCII(depth - 1);
}
Index: test/mjsunit/regexp-capture-3.js
===================================================================
--- test/mjsunit/regexp-capture-3.js (revision 11473)
+++ test/mjsunit/regexp-capture-3.js (working copy)
@@ -187,3 +187,28 @@
var s = "Don't prune based on a repetition of length 0";
assertEquals(null, s.match(/å{1,1}prune/));
assertEquals("prune", (s.match(/å{0,0}prune/)[0]));
+
+// Some very deep regexps where FilterASCII gives up in order not to make
the
+// stack overflow.
+var regex6 = /a*\u0100*\w/;
+var input0 = "a";
+regex6.exec(input0);
+
+var re = "\u0100*\\w";
+
+for (var i = 0; i < 200; i++) re = "a*" + re;
+
+var regex7 = new RegExp(re);
+regex7.exec(input0);
+
+var regex8 = new RegExp(re, "i");
+regex8.exec(input0);
+
+re = "[\u0100]*\\w";
+for (var i = 0; i < 200; i++) re = "a*" + re;
+
+var regex9 = new RegExp(re);
+regex9.exec(input0);
+
+var regex10 = new RegExp(re, "i");
+regex10.exec(input0);
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev