Revision: 11686
Author: [email protected]
Date: Thu May 31 04:59:04 2012
Log: Avoid overdeep recursion in regexp where a guarded expression
with a
minimum repetition count is inside another quantifier.
Bug=129926
Review URL: https://chromiumcodereview.appspot.com/10451092
http://code.google.com/p/v8/source/detail?r=11686
Modified:
/branches/bleeding_edge/src/jsregexp.cc
/branches/bleeding_edge/src/jsregexp.h
/branches/bleeding_edge/test/mjsunit/regexp-capture-3.js
=======================================
--- /branches/bleeding_edge/src/jsregexp.cc Wed May 23 07:24:29 2012
+++ /branches/bleeding_edge/src/jsregexp.cc Thu May 31 04:59:04 2012
@@ -2191,12 +2191,13 @@
void ActionNode::FillInBMInfo(int offset,
+ int recursion_depth,
BoyerMooreLookahead* bm,
bool not_at_start) {
if (type_ == BEGIN_SUBMATCH) {
bm->SetRest(offset);
} else if (type_ != POSITIVE_SUBMATCH_SUCCESS) {
- on_success()->FillInBMInfo(offset, bm, not_at_start);
+ on_success()->FillInBMInfo(offset, recursion_depth + 1, bm,
not_at_start);
}
SaveBMInfo(bm, not_at_start, offset);
}
@@ -2218,11 +2219,13 @@
}
-void AssertionNode::FillInBMInfo(
- int offset, BoyerMooreLookahead* bm, bool not_at_start) {
+void AssertionNode::FillInBMInfo(int offset,
+ int recursion_depth,
+ BoyerMooreLookahead* bm,
+ bool not_at_start) {
// Match the behaviour of EatsAtLeast on this node.
if (type() == AT_START && not_at_start) return;
- on_success()->FillInBMInfo(offset, bm, not_at_start);
+ on_success()->FillInBMInfo(offset, recursion_depth + 1, bm,
not_at_start);
SaveBMInfo(bm, not_at_start, offset);
}
@@ -2803,14 +2806,17 @@
}
-void LoopChoiceNode::FillInBMInfo(
- int offset, BoyerMooreLookahead* bm, bool not_at_start) {
- if (body_can_be_zero_length_) {
+void LoopChoiceNode::FillInBMInfo(int offset,
+ int recursion_depth,
+ BoyerMooreLookahead* bm,
+ bool not_at_start) {
+ if (body_can_be_zero_length_ ||
+ recursion_depth > RegExpCompiler::kMaxRecursion) {
bm->SetRest(offset);
SaveBMInfo(bm, not_at_start, offset);
return;
}
- ChoiceNode::FillInBMInfo(offset, bm, not_at_start);
+ ChoiceNode::FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start);
SaveBMInfo(bm, not_at_start, offset);
}
@@ -2912,7 +2918,7 @@
if (eats_at_least >= 1) {
BoyerMooreLookahead* bm =
new BoyerMooreLookahead(eats_at_least, compiler);
- FillInBMInfo(0, bm, not_at_start);
+ FillInBMInfo(0, 0, bm, not_at_start);
if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE;
if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE;
}
@@ -3850,7 +3856,7 @@
BoyerMooreLookahead* bm =
new BoyerMooreLookahead(eats_at_least, compiler);
GuardedAlternative alt0 = alternatives_->at(0);
- alt0.node()->FillInBMInfo(0, bm, not_at_start);
+ alt0.node()->FillInBMInfo(0, 0, bm, not_at_start);
skip_was_emitted = bm->EmitSkipInstructions(macro_assembler);
}
} else {
@@ -5589,8 +5595,10 @@
}
-void BackReferenceNode::FillInBMInfo(
- int offset, BoyerMooreLookahead* bm, bool not_at_start) {
+void BackReferenceNode::FillInBMInfo(int offset,
+ int recursion_depth,
+ BoyerMooreLookahead* bm,
+ bool not_at_start) {
// Working out the set of characters that a backreference can match is
too
// hard, so we just say that any character can match.
bm->SetRest(offset);
@@ -5602,8 +5610,10 @@
RegExpMacroAssembler::kTableSize);
-void ChoiceNode::FillInBMInfo(
- int offset, BoyerMooreLookahead* bm, bool not_at_start) {
+void ChoiceNode::FillInBMInfo(int offset,
+ int recursion_depth,
+ BoyerMooreLookahead* bm,
+ bool not_at_start) {
ZoneList<GuardedAlternative>* alts = alternatives();
for (int i = 0; i < alts->length(); i++) {
GuardedAlternative& alt = alts->at(i);
@@ -5612,14 +5622,16 @@
SaveBMInfo(bm, not_at_start, offset);
return;
}
- alt.node()->FillInBMInfo(offset, bm, not_at_start);
+ alt.node()->FillInBMInfo(offset, recursion_depth + 1, bm,
not_at_start);
}
SaveBMInfo(bm, not_at_start, offset);
}
-void TextNode::FillInBMInfo(
- int initial_offset, BoyerMooreLookahead* bm, bool not_at_start) {
+void TextNode::FillInBMInfo(int initial_offset,
+ int recursion_depth,
+ BoyerMooreLookahead* bm,
+ bool not_at_start) {
if (initial_offset >= bm->length()) return;
int offset = initial_offset;
int max_char = bm->max_char();
@@ -5673,6 +5685,7 @@
return;
}
on_success()->FillInBMInfo(offset,
+ recursion_depth + 1,
bm,
true); // Not at start after a text node.
if (initial_offset == 0) set_bm_info(not_at_start, bm);
=======================================
--- /branches/bleeding_edge/src/jsregexp.h Tue May 22 07:05:44 2012
+++ /branches/bleeding_edge/src/jsregexp.h Thu May 31 04:59:04 2012
@@ -581,8 +581,10 @@
// we look forward. This is used for a Boyer-Moore-like string searching
// implementation. TODO(erikcorry): This should share more code with
// EatsAtLeast, GetQuickCheckDetails.
- virtual void FillInBMInfo(
- int offset, BoyerMooreLookahead* bm, bool not_at_start) {
+ virtual void FillInBMInfo(int offset,
+ int recursion_depth,
+ BoyerMooreLookahead* bm,
+ bool not_at_start) {
UNREACHABLE();
}
@@ -681,9 +683,11 @@
RegExpNode* on_success() { return on_success_; }
void set_on_success(RegExpNode* node) { on_success_ = node; }
virtual RegExpNode* FilterASCII(int depth);
- virtual void FillInBMInfo(
- int offset, BoyerMooreLookahead* bm, bool not_at_start) {
- on_success_->FillInBMInfo(offset, bm, not_at_start);
+ virtual void FillInBMInfo(int offset,
+ int recursion_depth,
+ BoyerMooreLookahead* bm,
+ bool not_at_start) {
+ on_success_->FillInBMInfo(offset, recursion_depth + 1, bm,
not_at_start);
if (offset == 0) set_bm_info(not_at_start, bm);
}
@@ -736,8 +740,10 @@
return on_success()->GetQuickCheckDetails(
details, compiler, filled_in, not_at_start);
}
- virtual void FillInBMInfo(
- int offset, BoyerMooreLookahead* bm, bool not_at_start);
+ virtual void FillInBMInfo(int offset,
+ int recursion_depth,
+ BoyerMooreLookahead* bm,
+ bool not_at_start);
Type type() { return type_; }
// TODO(erikcorry): We should allow some action nodes in greedy loops.
virtual int GreedyLoopTextLength() { return
kNodeIsTooComplexForGreedyLoops; }
@@ -805,8 +811,10 @@
virtual int GreedyLoopTextLength();
virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
RegExpCompiler* compiler);
- virtual void FillInBMInfo(
- int offset, BoyerMooreLookahead* bm, bool not_at_start);
+ virtual void FillInBMInfo(int offset,
+ int recursion_depth,
+ BoyerMooreLookahead* bm,
+ bool not_at_start);
void CalculateOffsets();
virtual RegExpNode* FilterASCII(int depth);
@@ -865,8 +873,10 @@
RegExpCompiler* compiler,
int filled_in,
bool not_at_start);
- virtual void FillInBMInfo(
- int offset, BoyerMooreLookahead* bm, bool not_at_start);
+ virtual void FillInBMInfo(int offset,
+ int recursion_depth,
+ BoyerMooreLookahead* bm,
+ bool not_at_start);
AssertionNodeType type() { return type_; }
void set_type(AssertionNodeType type) { type_ = type; }
@@ -903,8 +913,10 @@
bool not_at_start) {
return;
}
- virtual void FillInBMInfo(
- int offset, BoyerMooreLookahead* bm, bool not_at_start);
+ virtual void FillInBMInfo(int offset,
+ int recursion_depth,
+ BoyerMooreLookahead* bm,
+ bool not_at_start);
private:
int start_reg_;
@@ -928,8 +940,10 @@
// Returning 0 from EatsAtLeast should ensure we never get here.
UNREACHABLE();
}
- virtual void FillInBMInfo(
- int offset, BoyerMooreLookahead* bm, bool not_at_start) {
+ virtual void FillInBMInfo(int offset,
+ int recursion_depth,
+ BoyerMooreLookahead* bm,
+ bool not_at_start) {
// Returning 0 from EatsAtLeast should ensure we never get here.
UNREACHABLE();
}
@@ -1018,8 +1032,10 @@
RegExpCompiler* compiler,
int characters_filled_in,
bool not_at_start);
- virtual void FillInBMInfo(
- int offset, BoyerMooreLookahead* bm, bool not_at_start);
+ virtual void FillInBMInfo(int offset,
+ int recursion_depth,
+ BoyerMooreLookahead* bm,
+ bool not_at_start);
bool being_calculated() { return being_calculated_; }
bool not_at_start() { return not_at_start_; }
@@ -1068,9 +1084,12 @@
RegExpCompiler* compiler,
int characters_filled_in,
bool not_at_start);
- virtual void FillInBMInfo(
- int offset, BoyerMooreLookahead* bm, bool not_at_start) {
- alternatives_->at(1).node()->FillInBMInfo(offset, bm, not_at_start);
+ virtual void FillInBMInfo(int offset,
+ int recursion_depth,
+ BoyerMooreLookahead* bm,
+ bool not_at_start) {
+ alternatives_->at(1).node()->FillInBMInfo(
+ offset, recursion_depth + 1, bm, not_at_start);
if (offset == 0) set_bm_info(not_at_start, bm);
}
// For a negative lookahead we don't emit the quick check for the
@@ -1100,8 +1119,10 @@
RegExpCompiler* compiler,
int characters_filled_in,
bool not_at_start);
- virtual void FillInBMInfo(
- int offset, BoyerMooreLookahead* bm, bool not_at_start);
+ virtual void FillInBMInfo(int offset,
+ int recursion_depth,
+ BoyerMooreLookahead* bm,
+ bool not_at_start);
RegExpNode* loop_node() { return loop_node_; }
RegExpNode* continue_node() { return continue_node_; }
bool body_can_be_zero_length() { return body_can_be_zero_length_; }
=======================================
--- /branches/bleeding_edge/test/mjsunit/regexp-capture-3.js Mon May 7
06:23:56 2012
+++ /branches/bleeding_edge/test/mjsunit/regexp-capture-3.js Thu May 31
04:59:04 2012
@@ -216,3 +216,5 @@
var regex11 = /^(?:[^\u0000-\u0080]|[0-9a-z?,.!&\s#()])+$/i;
regex11.exec(input0);
+var regex12 = /u(\xf0{8}?\D*?|( ? !)$h??(|)*?(||)+?\6((?:\W\B|--\d-*-|
)?$){0, }?|^Y( ? !1)\d+)+a/;
+regex12.exec("");
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev