Reviewers: Erik Corry, Message: Small review
Description: Matching a back-reference must handle unbound start-register (but can assume that if start register is bound, then end register is bound too). After matching a back reference, the character position is advanced past the match Please review this at http://codereview.chromium.org/13090 Affected files: M src/interpreter-irregexp.cc M src/jsregexp.cc M src/regexp-macro-assembler-ia32.cc Index: src/interpreter-irregexp.cc diff --git a/src/interpreter-irregexp.cc b/src/interpreter-irregexp.cc index d5d1a89ca9ff91d4efb542a78fa7c7b2ddd646f7..f76c13501d29cd7a0487bdb72ad10f7b87301ab5 100644 --- a/src/interpreter-irregexp.cc +++ b/src/interpreter-irregexp.cc @@ -333,6 +333,10 @@ static bool RawMatch(const byte* code_base, BYTECODE(CHECK_NOT_BACK_REF) { int from = registers[pc[1]]; int len = registers[pc[1] + 1] - from; + if (from < 0 || len <= 0) { + pc += BC_CHECK_NOT_BACK_REF_LENGTH; + break; + } if (current + len > subject.length()) { pc = code_base + Load32(pc + 2); break; @@ -353,6 +357,10 @@ static bool RawMatch(const byte* code_base, BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) { int from = registers[pc[1]]; int len = registers[pc[1] + 1] - from; + if (from < 0 || len <= 0) { + pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH; + break; + } if (current + len > subject.length()) { pc = code_base + Load32(pc + 2); break; Index: src/jsregexp.cc diff --git a/src/jsregexp.cc b/src/jsregexp.cc index 05b8aa0f82bf214265f0832036e4d35099d0c961..3046b96afa650561afaa37163a4fdd81a5ee6515 100644 --- a/src/jsregexp.cc +++ b/src/jsregexp.cc @@ -1541,10 +1541,6 @@ bool ActionNode::Emit(RegExpCompiler* compiler) { bool BackReferenceNode::Emit(RegExpCompiler* compiler) { RegExpMacroAssembler* macro = compiler->macro_assembler(); Bind(macro); - // Check whether the registers are uninitialized and always - // succeed if they are. - macro->IfRegisterLT(start_reg_, 0, on_success()->label()); - macro->IfRegisterLT(end_reg_, 0, on_success()->label()); ASSERT_EQ(start_reg_ + 1, end_reg_); if (info()->at_end) { // If we are constrained to match at the end of the input then succeed Index: src/regexp-macro-assembler-ia32.cc diff --git a/src/regexp-macro-assembler-ia32.cc b/src/regexp-macro-assembler-ia32.cc index e58323f11fce7e2f7686259677028c0f0304807c..9372c64e2ae032e4201e3e4291631465e0f37a79 100644 --- a/src/regexp-macro-assembler-ia32.cc +++ b/src/regexp-macro-assembler-ia32.cc @@ -262,17 +262,16 @@ void RegExpMacroAssemblerIA32::CheckNotBackReference( __ sub(ecx, Operand(eax)); // Length to check. __ j(less, on_no_match); __ j(equal, &fallthrough); - // check that there are sufficient characters left in the input + // Check that there are sufficient characters left in the input. __ mov(ebx, edi); __ add(ebx, Operand(ecx)); __ j(greater, on_no_match); - __ mov(ebx, Operand(edi)); - __ push(esi); + __ mov(edx, esi); __ add(edi, Operand(esi)); __ add(esi, Operand(eax)); __ rep_cmpsb(); - __ pop(esi); - __ mov(edi, Operand(ebx)); + __ mov(esi, edx); + __ mov(edi, ebx); BranchOrBacktrack(not_equal, on_no_match); __ bind(&fallthrough); } @@ -629,7 +628,6 @@ void RegExpMacroAssemblerIA32::CheckStackLimit() { ExternalReference::address_of_stack_guard_limit(); __ cmp(esp, Operand::StaticVariable(stack_guard_limit)); __ j(above, &no_preempt, taken); - __ push(edi); // Current position. __ push(edx); // Current character. // Restore original edi, esi. --~--~---------~--~----~------------~-------~--~----~ v8-dev mailing list [email protected] http://groups.google.com/group/v8-dev -~----------~----~----~----~------~----~------~--~---
