Author: [email protected]
Date: Thu Jan 8 04:40:47 2009
New Revision: 1047
Modified:
branches/bleeding_edge/src/bytecodes-irregexp.h
branches/bleeding_edge/src/interpreter-irregexp.cc
branches/bleeding_edge/src/jsregexp.cc
branches/bleeding_edge/src/jsregexp.h
branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc
branches/bleeding_edge/src/regexp-macro-assembler-ia32.h
branches/bleeding_edge/src/regexp-macro-assembler-irregexp.cc
branches/bleeding_edge/src/regexp-macro-assembler-irregexp.h
branches/bleeding_edge/src/regexp-macro-assembler-tracer.cc
branches/bleeding_edge/src/regexp-macro-assembler-tracer.h
branches/bleeding_edge/src/regexp-macro-assembler.h
branches/bleeding_edge/test/cctest/test-regexp.cc
Log:
Added check that bails out of a repetition when the body is empty.
Modified: branches/bleeding_edge/src/bytecodes-irregexp.h
==============================================================================
--- branches/bleeding_edge/src/bytecodes-irregexp.h (original)
+++ branches/bleeding_edge/src/bytecodes-irregexp.h Thu Jan 8 04:40:47 2009
@@ -71,8 +71,9 @@
V(LOOKUP_HI_MAP8, 36, 99) /* l_himap8 start8 byte_map_addr32
addr32* */ \
V(CHECK_REGISTER_LT, 37, 8) /* check_reg_lt register_index value16
addr32 */ \
V(CHECK_REGISTER_GE, 38, 8) /* check_reg_ge register_index value16
addr32 */ \
-V(CHECK_NOT_AT_START, 39, 5) /* check_not_at_start
addr32 */ \
-V(CHECK_GREEDY, 40, 5) /* check_greedy
addr32 */
+V(CHECK_REGISTER_EQ_POS, 39, 6) /* check_register_eq_pos index
addr32 */ \
+V(CHECK_NOT_AT_START, 40, 5) /* check_not_at_start
addr32 */ \
+V(CHECK_GREEDY, 41, 5) /* check_greedy
addr32 */
#define DECLARE_BYTECODES(name, code, length) \
static const int BC_##name = code;
Modified: branches/bleeding_edge/src/interpreter-irregexp.cc
==============================================================================
--- branches/bleeding_edge/src/interpreter-irregexp.cc (original)
+++ branches/bleeding_edge/src/interpreter-irregexp.cc Thu Jan 8 04:40:47
2009
@@ -379,6 +379,13 @@
pc += BC_CHECK_REGISTER_GE_LENGTH;
}
break;
+ BYTECODE(CHECK_REGISTER_EQ_POS)
+ if (registers[pc[1]] == current) {
+ pc = code_base + Load32(pc + 2);
+ } else {
+ pc += BC_CHECK_REGISTER_EQ_POS_LENGTH;
+ }
+ break;
BYTECODE(LOOKUP_MAP1) {
// Look up character in a bitmap. If we find a 0, then jump to the
// location at pc + 7. Otherwise fall through!
Modified: branches/bleeding_edge/src/jsregexp.cc
==============================================================================
--- branches/bleeding_edge/src/jsregexp.cc (original)
+++ branches/bleeding_edge/src/jsregexp.cc Thu Jan 8 04:40:47 2009
@@ -1222,6 +1222,7 @@
inline bool ignore_case() { return ignore_case_; }
inline bool ascii() { return ascii_; }
+ static const int kNoRegister = -1;
private:
EndNode* accept_;
int next_register_;
@@ -1313,8 +1314,26 @@
}
+bool GenerationVariant::GetStoredPosition(int reg, int* cp_offset) {
+ ASSERT_EQ(0, *cp_offset);
+ for (DeferredAction* action = actions_;
+ action != NULL;
+ action = action->next()) {
+ if (reg == action->reg()) {
+ if (action->type() == ActionNode::STORE_POSITION) {
+ *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset();
+ return true;
+ } else {
+ return false;
+ }
+ }
+ }
+ return false;
+}
+
+
int GenerationVariant::FindAffectedRegisters(OutSet* affected_registers) {
- int max_register = -1;
+ int max_register = RegExpCompiler::kNoRegister;
for (DeferredAction* action = actions_;
action != NULL;
action = action->next()) {
@@ -1576,6 +1595,18 @@
}
+ActionNode* ActionNode::EmptyMatchCheck(int start_register,
+ int repetition_register,
+ int repetition_limit,
+ RegExpNode* on_success) {
+ ActionNode* result = new ActionNode(EMPTY_MATCH_CHECK, on_success);
+ result->data_.u_empty_match_check.start_register = start_register;
+ result->data_.u_empty_match_check.repetition_register =
repetition_register;
+ result->data_.u_empty_match_check.repetition_limit = repetition_limit;
+ return result;
+}
+
+
#define DEFINE_ACCEPT(Type) \
void Type##Node::Accept(NodeVisitor* visitor) { \
visitor->Visit##Type(this); \
@@ -2967,6 +2998,37 @@
assembler->WriteStackPointerToRegister(
data_.u_submatch.stack_pointer_register);
return on_success()->Emit(compiler, variant);
+ case EMPTY_MATCH_CHECK: {
+ int start_pos_reg = data_.u_empty_match_check.start_register;
+ int stored_pos = 0;
+ int rep_reg = data_.u_empty_match_check.repetition_register;
+ bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister);
+ bool know_dist = variant->GetStoredPosition(start_pos_reg,
&stored_pos);
+ if (know_dist && !has_minimum && stored_pos == variant->cp_offset())
{
+ // If we know we haven't advanced and there is no minimum we
+ // can just backtrack immediately.
+ assembler->GoTo(variant->backtrack());
+ return true;
+ } else if (know_dist && stored_pos < variant->cp_offset()) {
+ // If we know we've advanced we can generate the continuation
+ // immediately.
+ return on_success()->Emit(compiler, variant);
+ }
+ if (!variant->is_trivial()) return variant->Flush(compiler, this);
+ Label skip_empty_check;
+ // If we have a minimum number of repetitions we check the current
+ // number first and skip the empty check if it's not enough.
+ if (has_minimum) {
+ int limit = data_.u_empty_match_check.repetition_limit;
+ assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check);
+ }
+ // If the match is empty we bail out, otherwise we fall through
+ // to the on-success continuation.
+ assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register,
+ variant->backtrack());
+ assembler->Bind(&skip_empty_check);
+ return on_success()->Emit(compiler, variant);
+ }
case POSITIVE_SUBMATCH_SUCCESS:
if (!variant->is_trivial()) return variant->Flush(compiler, this);
// TODO(erikcorry): Implement support.
@@ -3286,6 +3348,12 @@
case ActionNode::POSITIVE_SUBMATCH_SUCCESS:
stream()->Add("label=\"escape\", shape=septagon");
break;
+ case ActionNode::EMPTY_MATCH_CHECK:
+ stream()->Add("label=\"$%i=$pos?,$%i<%i?\", shape=septagon",
+ that->data_.u_empty_match_check.start_register,
+ that->data_.u_empty_match_check.repetition_register,
+ that->data_.u_empty_match_check.repetition_limit);
+ break;
}
stream()->Add("];\n");
PrintAttributes(that);
@@ -3511,7 +3579,11 @@
static const int kMaxUnrolledMinMatches = 3; // Unroll (foo)+ and
(foo){3,}
static const int kMaxUnrolledMaxMatches = 3; // Unroll (foo)? and
(foo){x,3}
if (max == 0) return on_success; // This can happen due to recursion.
- if (body->min_match() > 0) {
+ bool body_can_be_empty = (body->min_match() == 0);
+ int body_start_reg = RegExpCompiler::kNoRegister;
+ if (body_can_be_empty) {
+ body_start_reg = compiler->AllocateRegister();
+ } else {
if (min > 0 && min <= kMaxUnrolledMinMatches) {
int new_max = (max == kInfinity) ? max : max - min;
// Recurse once to get the loop or optional matches after the fixed
ones.
@@ -3548,12 +3620,27 @@
bool has_min = min > 0;
bool has_max = max < RegExpTree::kInfinity;
bool needs_counter = has_min || has_max;
- int reg_ctr = needs_counter ? compiler->AllocateRegister() : -1;
+ int reg_ctr = needs_counter
+ ? compiler->AllocateRegister()
+ : RegExpCompiler::kNoRegister;
LoopChoiceNode* center = new LoopChoiceNode(body->min_match() == 0);
RegExpNode* loop_return = needs_counter
? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr,
center))
: static_cast<RegExpNode*>(center);
+ if (body_can_be_empty) {
+ // If the body can be empty we need to check if it was and then
+ // backtrack.
+ loop_return = ActionNode::EmptyMatchCheck(body_start_reg,
+ reg_ctr,
+ min,
+ loop_return);
+ }
RegExpNode* body_node = body->ToNode(compiler, loop_return);
+ if (body_can_be_empty) {
+ // If the body can be empty we need to store the start position
+ // so we can bail out if it was empty.
+ body_node = ActionNode::StorePosition(body_start_reg, body_node);
+ }
GuardedAlternative body_alt(body_node);
if (has_max) {
Guard* body_guard = new Guard(reg_ctr, Guard::LT, max);
Modified: branches/bleeding_edge/src/jsregexp.h
==============================================================================
--- branches/bleeding_edge/src/jsregexp.h (original)
+++ branches/bleeding_edge/src/jsregexp.h Thu Jan 8 04:40:47 2009
@@ -682,7 +682,8 @@
INCREMENT_REGISTER,
STORE_POSITION,
BEGIN_SUBMATCH,
- POSITIVE_SUBMATCH_SUCCESS
+ POSITIVE_SUBMATCH_SUCCESS,
+ EMPTY_MATCH_CHECK
};
static ActionNode* SetRegister(int reg, int val, RegExpNode* on_success);
static ActionNode* IncrementRegister(int reg, RegExpNode* on_success);
@@ -695,6 +696,11 @@
int stack_pointer_reg,
int restore_reg,
RegExpNode* on_success);
+ static ActionNode* EmptyMatchCheck(
+ int start_register,
+ int repetition_register,
+ int repetition_limit,
+ RegExpNode* on_success);
virtual void Accept(NodeVisitor* visitor);
virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
virtual int EatsAtLeast(int recursion_depth);
@@ -725,6 +731,11 @@
int stack_pointer_register;
int current_position_register;
} u_submatch;
+ struct {
+ int start_register;
+ int repetition_register;
+ int repetition_limit;
+ } u_empty_match_check;
} data_;
ActionNode(Type type, RegExpNode* on_success)
: SeqRegExpNode(on_success),
@@ -1031,6 +1042,10 @@
int bound_checked_up_to() { return bound_checked_up_to_; }
QuickCheckDetails* quick_check_performed() { return
&quick_check_performed_; }
bool mentions_reg(int reg);
+ // Returns true if a deferred position store exists to the specified
+ // register and stores the offset in the out-parameter. Otherwise
+ // returns false.
+ bool GetStoredPosition(int reg, int* cp_offset);
// These set methods and AdvanceVariant should be used only on new
// GenerationVariants - the intention is that GenerationVariants are
// immutable after creation.
Modified: branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc
==============================================================================
--- branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc (original)
+++ branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc Thu Jan 8
04:40:47 2009
@@ -744,6 +744,13 @@
}
+void RegExpMacroAssemblerIA32::IfRegisterEqPos(int reg,
+ Label* if_eq) {
+ __ cmp(edi, register_location(reg));
+ BranchOrBacktrack(equal, if_eq);
+}
+
+
RegExpMacroAssembler::IrregexpImplementation
RegExpMacroAssemblerIA32::Implementation() {
return kIA32Implementation;
Modified: branches/bleeding_edge/src/regexp-macro-assembler-ia32.h
==============================================================================
--- branches/bleeding_edge/src/regexp-macro-assembler-ia32.h (original)
+++ branches/bleeding_edge/src/regexp-macro-assembler-ia32.h Thu Jan 8
04:40:47 2009
@@ -86,6 +86,7 @@
virtual void GoTo(Label* label);
virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
+ virtual void IfRegisterEqPos(int reg, Label* if_eq);
virtual IrregexpImplementation Implementation();
virtual void LoadCurrentCharacter(int cp_offset,
Label* on_end_of_input,
Modified: branches/bleeding_edge/src/regexp-macro-assembler-irregexp.cc
==============================================================================
--- branches/bleeding_edge/src/regexp-macro-assembler-irregexp.cc
(original)
+++ branches/bleeding_edge/src/regexp-macro-assembler-irregexp.cc Thu Jan
8 04:40:47 2009
@@ -401,6 +401,14 @@
}
+void RegExpMacroAssemblerIrregexp::IfRegisterEqPos(int register_index,
+ Label* on_eq) {
+ Emit(BC_CHECK_REGISTER_EQ_POS);
+ Emit(register_index);
+ EmitOrLink(on_eq);
+}
+
+
Handle<Object> RegExpMacroAssemblerIrregexp::GetCode(Handle<String>
source) {
Bind(&backtrack_);
Emit(BC_POP_BT);
Modified: branches/bleeding_edge/src/regexp-macro-assembler-irregexp.h
==============================================================================
--- branches/bleeding_edge/src/regexp-macro-assembler-irregexp.h
(original)
+++ branches/bleeding_edge/src/regexp-macro-assembler-irregexp.h Thu Jan
8
04:40:47 2009
@@ -106,6 +106,7 @@
const Vector<Label*>& destinations);
virtual void IfRegisterLT(int register_index, int comparand, Label*
if_lt);
virtual void IfRegisterGE(int register_index, int comparand, Label*
if_ge);
+ virtual void IfRegisterEqPos(int register_index, Label* if_eq);
virtual IrregexpImplementation Implementation();
virtual Handle<Object> GetCode(Handle<String> source);
Modified: branches/bleeding_edge/src/regexp-macro-assembler-tracer.cc
==============================================================================
--- branches/bleeding_edge/src/regexp-macro-assembler-tracer.cc (original)
+++ branches/bleeding_edge/src/regexp-macro-assembler-tracer.cc Thu Jan 8
04:40:47 2009
@@ -373,6 +373,14 @@
}
+void RegExpMacroAssemblerTracer::IfRegisterEqPos(int register_index,
+ Label* if_eq) {
+ PrintF(" IfRegisterEqPos(register=%d, label[%08x]);\n",
+ register_index, if_eq);
+ assembler_->IfRegisterEqPos(register_index, if_eq);
+}
+
+
void RegExpMacroAssemblerTracer::IfRegisterGE(int register_index,
int comparand, Label* if_ge)
{
PrintF(" IfRegisterGE(register=%d, number=%d, label[%08x]);\n",
Modified: branches/bleeding_edge/src/regexp-macro-assembler-tracer.h
==============================================================================
--- branches/bleeding_edge/src/regexp-macro-assembler-tracer.h (original)
+++ branches/bleeding_edge/src/regexp-macro-assembler-tracer.h Thu Jan 8
04:40:47 2009
@@ -88,6 +88,7 @@
virtual void GoTo(Label* label);
virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
+ virtual void IfRegisterEqPos(int reg, Label* if_eq);
virtual IrregexpImplementation Implementation();
virtual void LoadCurrentCharacter(int cp_offset,
Label* on_end_of_input,
Modified: branches/bleeding_edge/src/regexp-macro-assembler.h
==============================================================================
--- branches/bleeding_edge/src/regexp-macro-assembler.h (original)
+++ branches/bleeding_edge/src/regexp-macro-assembler.h Thu Jan 8 04:40:47
2009
@@ -135,6 +135,9 @@
// Check whether a register is < a given constant and go to a label if
it is.
// Backtracks instead if the label is NULL.
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt) = 0;
+ // Check whether a register is == to the current position and go to a
+ // label if it is.
+ virtual void IfRegisterEqPos(int reg, Label* if_eq) = 0;
virtual IrregexpImplementation Implementation() = 0;
virtual void LoadCurrentCharacter(int cp_offset,
Label* on_end_of_input,
Modified: branches/bleeding_edge/test/cctest/test-regexp.cc
==============================================================================
--- branches/bleeding_edge/test/cctest/test-regexp.cc (original)
+++ branches/bleeding_edge/test/cctest/test-regexp.cc Thu Jan 8 04:40:47
2009
@@ -1466,5 +1466,5 @@
TEST(Graph) {
V8::Initialize(NULL);
- Execute("\\b\\w+\\b", false, true, true);
+ Execute("(?:a|)*", false, true, true);
}
--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---