Author: [email protected]
Date: Thu Jan  8 04:40:47 2009
New Revision: 1047

Modified:
    branches/bleeding_edge/src/bytecodes-irregexp.h
    branches/bleeding_edge/src/interpreter-irregexp.cc
    branches/bleeding_edge/src/jsregexp.cc
    branches/bleeding_edge/src/jsregexp.h
    branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc
    branches/bleeding_edge/src/regexp-macro-assembler-ia32.h
    branches/bleeding_edge/src/regexp-macro-assembler-irregexp.cc
    branches/bleeding_edge/src/regexp-macro-assembler-irregexp.h
    branches/bleeding_edge/src/regexp-macro-assembler-tracer.cc
    branches/bleeding_edge/src/regexp-macro-assembler-tracer.h
    branches/bleeding_edge/src/regexp-macro-assembler.h
    branches/bleeding_edge/test/cctest/test-regexp.cc

Log:
Added check that bails out of a repetition when the body is empty.


Modified: branches/bleeding_edge/src/bytecodes-irregexp.h
==============================================================================
--- branches/bleeding_edge/src/bytecodes-irregexp.h     (original)
+++ branches/bleeding_edge/src/bytecodes-irregexp.h     Thu Jan  8 04:40:47 2009
@@ -71,8 +71,9 @@
  V(LOOKUP_HI_MAP8,    36, 99) /* l_himap8 start8 byte_map_addr32  
addr32*     */ \
  V(CHECK_REGISTER_LT, 37, 8) /* check_reg_lt register_index value16  
addr32   */ \
  V(CHECK_REGISTER_GE, 38, 8) /* check_reg_ge register_index value16  
addr32   */ \
-V(CHECK_NOT_AT_START, 39, 5) /* check_not_at_start  
addr32                   */ \
-V(CHECK_GREEDY,      40, 5) /* check_greedy  
addr32                          */
+V(CHECK_REGISTER_EQ_POS, 39, 6) /* check_register_eq_pos index  
addr32       */ \
+V(CHECK_NOT_AT_START, 40, 5) /* check_not_at_start  
addr32                   */ \
+V(CHECK_GREEDY,      41, 5) /* check_greedy  
addr32                          */

  #define DECLARE_BYTECODES(name, code, length) \
    static const int BC_##name = code;

Modified: branches/bleeding_edge/src/interpreter-irregexp.cc
==============================================================================
--- branches/bleeding_edge/src/interpreter-irregexp.cc  (original)
+++ branches/bleeding_edge/src/interpreter-irregexp.cc  Thu Jan  8 04:40:47  
2009
@@ -379,6 +379,13 @@
            pc += BC_CHECK_REGISTER_GE_LENGTH;
          }
          break;
+      BYTECODE(CHECK_REGISTER_EQ_POS)
+        if (registers[pc[1]] == current) {
+          pc = code_base + Load32(pc + 2);
+        } else {
+          pc += BC_CHECK_REGISTER_EQ_POS_LENGTH;
+        }
+        break;
        BYTECODE(LOOKUP_MAP1) {
          // Look up character in a bitmap.  If we find a 0, then jump to the
          // location at pc + 7.  Otherwise fall through!

Modified: branches/bleeding_edge/src/jsregexp.cc
==============================================================================
--- branches/bleeding_edge/src/jsregexp.cc      (original)
+++ branches/bleeding_edge/src/jsregexp.cc      Thu Jan  8 04:40:47 2009
@@ -1222,6 +1222,7 @@
    inline bool ignore_case() { return ignore_case_; }
    inline bool ascii() { return ascii_; }

+  static const int kNoRegister = -1;
   private:
    EndNode* accept_;
    int next_register_;
@@ -1313,8 +1314,26 @@
  }


+bool GenerationVariant::GetStoredPosition(int reg, int* cp_offset) {
+  ASSERT_EQ(0, *cp_offset);
+  for (DeferredAction* action = actions_;
+       action != NULL;
+       action = action->next()) {
+    if (reg == action->reg()) {
+      if (action->type() == ActionNode::STORE_POSITION) {
+        *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset();
+        return true;
+      } else {
+        return false;
+      }
+    }
+  }
+  return false;
+}
+
+
  int GenerationVariant::FindAffectedRegisters(OutSet* affected_registers) {
-  int max_register = -1;
+  int max_register = RegExpCompiler::kNoRegister;
    for (DeferredAction* action = actions_;
         action != NULL;
         action = action->next()) {
@@ -1576,6 +1595,18 @@
  }


+ActionNode* ActionNode::EmptyMatchCheck(int start_register,
+                                        int repetition_register,
+                                        int repetition_limit,
+                                        RegExpNode* on_success) {
+  ActionNode* result = new ActionNode(EMPTY_MATCH_CHECK, on_success);
+  result->data_.u_empty_match_check.start_register = start_register;
+  result->data_.u_empty_match_check.repetition_register =  
repetition_register;
+  result->data_.u_empty_match_check.repetition_limit = repetition_limit;
+  return result;
+}
+
+
  #define DEFINE_ACCEPT(Type)                                          \
    void Type##Node::Accept(NodeVisitor* visitor) {                    \
      visitor->Visit##Type(this);                                      \
@@ -2967,6 +2998,37 @@
        assembler->WriteStackPointerToRegister(
            data_.u_submatch.stack_pointer_register);
        return on_success()->Emit(compiler, variant);
+    case EMPTY_MATCH_CHECK: {
+      int start_pos_reg = data_.u_empty_match_check.start_register;
+      int stored_pos = 0;
+      int rep_reg = data_.u_empty_match_check.repetition_register;
+      bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister);
+      bool know_dist = variant->GetStoredPosition(start_pos_reg,  
&stored_pos);
+      if (know_dist && !has_minimum && stored_pos == variant->cp_offset())  
{
+        // If we know we haven't advanced and there is no minimum we
+        // can just backtrack immediately.
+        assembler->GoTo(variant->backtrack());
+        return true;
+      } else if (know_dist && stored_pos < variant->cp_offset()) {
+        // If we know we've advanced we can generate the continuation
+        // immediately.
+        return on_success()->Emit(compiler, variant);
+      }
+      if (!variant->is_trivial()) return variant->Flush(compiler, this);
+      Label skip_empty_check;
+      // If we have a minimum number of repetitions we check the current
+      // number first and skip the empty check if it's not enough.
+      if (has_minimum) {
+        int limit = data_.u_empty_match_check.repetition_limit;
+        assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check);
+      }
+      // If the match is empty we bail out, otherwise we fall through
+      // to the on-success continuation.
+      assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register,
+                                 variant->backtrack());
+      assembler->Bind(&skip_empty_check);
+      return on_success()->Emit(compiler, variant);
+    }
      case POSITIVE_SUBMATCH_SUCCESS:
        if (!variant->is_trivial()) return variant->Flush(compiler, this);
        // TODO(erikcorry): Implement support.
@@ -3286,6 +3348,12 @@
      case ActionNode::POSITIVE_SUBMATCH_SUCCESS:
        stream()->Add("label=\"escape\", shape=septagon");
        break;
+    case ActionNode::EMPTY_MATCH_CHECK:
+      stream()->Add("label=\"$%i=$pos?,$%i<%i?\", shape=septagon",
+                    that->data_.u_empty_match_check.start_register,
+                    that->data_.u_empty_match_check.repetition_register,
+                    that->data_.u_empty_match_check.repetition_limit);
+      break;
    }
    stream()->Add("];\n");
    PrintAttributes(that);
@@ -3511,7 +3579,11 @@
    static const int kMaxUnrolledMinMatches = 3;  // Unroll (foo)+ and  
(foo){3,}
    static const int kMaxUnrolledMaxMatches = 3;  // Unroll (foo)? and  
(foo){x,3}
    if (max == 0) return on_success;  // This can happen due to recursion.
-  if (body->min_match() > 0) {
+  bool body_can_be_empty = (body->min_match() == 0);
+  int body_start_reg = RegExpCompiler::kNoRegister;
+  if (body_can_be_empty) {
+    body_start_reg = compiler->AllocateRegister();
+  } else {
      if (min > 0 && min <= kMaxUnrolledMinMatches) {
        int new_max = (max == kInfinity) ? max : max - min;
        // Recurse once to get the loop or optional matches after the fixed  
ones.
@@ -3548,12 +3620,27 @@
    bool has_min = min > 0;
    bool has_max = max < RegExpTree::kInfinity;
    bool needs_counter = has_min || has_max;
-  int reg_ctr = needs_counter ? compiler->AllocateRegister() : -1;
+  int reg_ctr = needs_counter
+      ? compiler->AllocateRegister()
+      : RegExpCompiler::kNoRegister;
    LoopChoiceNode* center = new LoopChoiceNode(body->min_match() == 0);
    RegExpNode* loop_return = needs_counter
        ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr,  
center))
        : static_cast<RegExpNode*>(center);
+  if (body_can_be_empty) {
+    // If the body can be empty we need to check if it was and then
+    // backtrack.
+    loop_return = ActionNode::EmptyMatchCheck(body_start_reg,
+                                              reg_ctr,
+                                              min,
+                                              loop_return);
+  }
    RegExpNode* body_node = body->ToNode(compiler, loop_return);
+  if (body_can_be_empty) {
+    // If the body can be empty we need to store the start position
+    // so we can bail out if it was empty.
+    body_node = ActionNode::StorePosition(body_start_reg, body_node);
+  }
    GuardedAlternative body_alt(body_node);
    if (has_max) {
      Guard* body_guard = new Guard(reg_ctr, Guard::LT, max);

Modified: branches/bleeding_edge/src/jsregexp.h
==============================================================================
--- branches/bleeding_edge/src/jsregexp.h       (original)
+++ branches/bleeding_edge/src/jsregexp.h       Thu Jan  8 04:40:47 2009
@@ -682,7 +682,8 @@
      INCREMENT_REGISTER,
      STORE_POSITION,
      BEGIN_SUBMATCH,
-    POSITIVE_SUBMATCH_SUCCESS
+    POSITIVE_SUBMATCH_SUCCESS,
+    EMPTY_MATCH_CHECK
    };
    static ActionNode* SetRegister(int reg, int val, RegExpNode* on_success);
    static ActionNode* IncrementRegister(int reg, RegExpNode* on_success);
@@ -695,6 +696,11 @@
        int stack_pointer_reg,
        int restore_reg,
        RegExpNode* on_success);
+  static ActionNode* EmptyMatchCheck(
+      int start_register,
+      int repetition_register,
+      int repetition_limit,
+      RegExpNode* on_success);
    virtual void Accept(NodeVisitor* visitor);
    virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
    virtual int EatsAtLeast(int recursion_depth);
@@ -725,6 +731,11 @@
        int stack_pointer_register;
        int current_position_register;
      } u_submatch;
+    struct {
+      int start_register;
+      int repetition_register;
+      int repetition_limit;
+    } u_empty_match_check;
    } data_;
    ActionNode(Type type, RegExpNode* on_success)
        : SeqRegExpNode(on_success),
@@ -1031,6 +1042,10 @@
    int bound_checked_up_to() { return bound_checked_up_to_; }
    QuickCheckDetails* quick_check_performed() { return  
&quick_check_performed_; }
    bool mentions_reg(int reg);
+  // Returns true if a deferred position store exists to the specified
+  // register and stores the offset in the out-parameter.  Otherwise
+  // returns false.
+  bool GetStoredPosition(int reg, int* cp_offset);
    // These set methods and AdvanceVariant should be used only on new
    // GenerationVariants - the intention is that GenerationVariants are
    // immutable after creation.

Modified: branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc
==============================================================================
--- branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc   (original)
+++ branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc   Thu Jan  8  
04:40:47 2009
@@ -744,6 +744,13 @@
  }


+void RegExpMacroAssemblerIA32::IfRegisterEqPos(int reg,
+                                               Label* if_eq) {
+  __ cmp(edi, register_location(reg));
+  BranchOrBacktrack(equal, if_eq);
+}
+
+
  RegExpMacroAssembler::IrregexpImplementation
      RegExpMacroAssemblerIA32::Implementation() {
    return kIA32Implementation;

Modified: branches/bleeding_edge/src/regexp-macro-assembler-ia32.h
==============================================================================
--- branches/bleeding_edge/src/regexp-macro-assembler-ia32.h    (original)
+++ branches/bleeding_edge/src/regexp-macro-assembler-ia32.h    Thu Jan  8  
04:40:47 2009
@@ -86,6 +86,7 @@
    virtual void GoTo(Label* label);
    virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
    virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
+  virtual void IfRegisterEqPos(int reg, Label* if_eq);
    virtual IrregexpImplementation Implementation();
    virtual void LoadCurrentCharacter(int cp_offset,
                                      Label* on_end_of_input,

Modified: branches/bleeding_edge/src/regexp-macro-assembler-irregexp.cc
==============================================================================
--- branches/bleeding_edge/src/regexp-macro-assembler-irregexp.cc       
(original)
+++ branches/bleeding_edge/src/regexp-macro-assembler-irregexp.cc       Thu Jan 
  
8 04:40:47 2009
@@ -401,6 +401,14 @@
  }


+void RegExpMacroAssemblerIrregexp::IfRegisterEqPos(int register_index,
+                                                   Label* on_eq) {
+  Emit(BC_CHECK_REGISTER_EQ_POS);
+  Emit(register_index);
+  EmitOrLink(on_eq);
+}
+
+
  Handle<Object> RegExpMacroAssemblerIrregexp::GetCode(Handle<String>  
source) {
    Bind(&backtrack_);
    Emit(BC_POP_BT);

Modified: branches/bleeding_edge/src/regexp-macro-assembler-irregexp.h
==============================================================================
--- branches/bleeding_edge/src/regexp-macro-assembler-irregexp.h        
(original)
+++ branches/bleeding_edge/src/regexp-macro-assembler-irregexp.h        Thu Jan 
 8  
04:40:47 2009
@@ -106,6 +106,7 @@
                                     const Vector<Label*>& destinations);
    virtual void IfRegisterLT(int register_index, int comparand, Label*  
if_lt);
    virtual void IfRegisterGE(int register_index, int comparand, Label*  
if_ge);
+  virtual void IfRegisterEqPos(int register_index, Label* if_eq);

    virtual IrregexpImplementation Implementation();
    virtual Handle<Object> GetCode(Handle<String> source);

Modified: branches/bleeding_edge/src/regexp-macro-assembler-tracer.cc
==============================================================================
--- branches/bleeding_edge/src/regexp-macro-assembler-tracer.cc (original)
+++ branches/bleeding_edge/src/regexp-macro-assembler-tracer.cc Thu Jan  8  
04:40:47 2009
@@ -373,6 +373,14 @@
  }


+void RegExpMacroAssemblerTracer::IfRegisterEqPos(int register_index,
+                                                 Label* if_eq) {
+  PrintF(" IfRegisterEqPos(register=%d, label[%08x]);\n",
+         register_index, if_eq);
+  assembler_->IfRegisterEqPos(register_index, if_eq);
+}
+
+
  void RegExpMacroAssemblerTracer::IfRegisterGE(int register_index,
                                                int comparand, Label* if_ge)  
{
    PrintF(" IfRegisterGE(register=%d, number=%d, label[%08x]);\n",

Modified: branches/bleeding_edge/src/regexp-macro-assembler-tracer.h
==============================================================================
--- branches/bleeding_edge/src/regexp-macro-assembler-tracer.h  (original)
+++ branches/bleeding_edge/src/regexp-macro-assembler-tracer.h  Thu Jan  8  
04:40:47 2009
@@ -88,6 +88,7 @@
    virtual void GoTo(Label* label);
    virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
    virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
+  virtual void IfRegisterEqPos(int reg, Label* if_eq);
    virtual IrregexpImplementation Implementation();
    virtual void LoadCurrentCharacter(int cp_offset,
                                      Label* on_end_of_input,

Modified: branches/bleeding_edge/src/regexp-macro-assembler.h
==============================================================================
--- branches/bleeding_edge/src/regexp-macro-assembler.h (original)
+++ branches/bleeding_edge/src/regexp-macro-assembler.h Thu Jan  8 04:40:47  
2009
@@ -135,6 +135,9 @@
    // Check whether a register is < a given constant and go to a label if  
it is.
    // Backtracks instead if the label is NULL.
    virtual void IfRegisterLT(int reg, int comparand, Label* if_lt) = 0;
+  // Check whether a register is == to the current position and go to a
+  // label if it is.
+  virtual void IfRegisterEqPos(int reg, Label* if_eq) = 0;
    virtual IrregexpImplementation Implementation() = 0;
    virtual void LoadCurrentCharacter(int cp_offset,
                                      Label* on_end_of_input,

Modified: branches/bleeding_edge/test/cctest/test-regexp.cc
==============================================================================
--- branches/bleeding_edge/test/cctest/test-regexp.cc   (original)
+++ branches/bleeding_edge/test/cctest/test-regexp.cc   Thu Jan  8 04:40:47  
2009
@@ -1466,5 +1466,5 @@

  TEST(Graph) {
    V8::Initialize(NULL);
-  Execute("\\b\\w+\\b", false, true, true);
+  Execute("(?:a|)*", false, true, true);
  }

--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---

Reply via email to