Author: [email protected]
Date: Tue Feb  3 03:43:55 2009
New Revision: 1217

Modified:
    branches/bleeding_edge/src/ast.cc
    branches/bleeding_edge/src/ast.h
    branches/bleeding_edge/src/flags.cc
    branches/bleeding_edge/src/jsregexp.cc
    branches/bleeding_edge/src/jsregexp.h
    branches/bleeding_edge/src/parser.cc
    branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc

Log:
Trace contains information about whether we know that we are at the start  
of input.
Choice nodes may know that they are never not at the start of input.
This can remove start_of_input assertions in cases where they are  
statically known to fail.
The initial loop is unrolled once if the regexp might check for the start  
of input. Only the first iteration may be at the start, the following loop  
knows that it isn't.


Modified: branches/bleeding_edge/src/ast.cc
==============================================================================
--- branches/bleeding_edge/src/ast.cc   (original)
+++ branches/bleeding_edge/src/ast.cc   Tue Feb  3 03:43:55 2009
@@ -250,7 +250,13 @@


  bool RegExpAlternative::IsAnchored() {
-  return this->nodes()->at(0)->IsAnchored();
+  ZoneList<RegExpTree*>* nodes = this->nodes();
+  for (int i = 0; i < nodes->length(); i++) {
+    RegExpTree* node = nodes->at(i);
+    if (node->IsAnchored()) { return true; }
+    if (node->max_match() > 0) { return false; }
+  }
+  return false;
  }



Modified: branches/bleeding_edge/src/ast.h
==============================================================================
--- branches/bleeding_edge/src/ast.h    (original)
+++ branches/bleeding_edge/src/ast.h    Tue Feb  3 03:43:55 2009
@@ -1461,7 +1461,8 @@
                              bool is_greedy,
                              RegExpTree* body,
                              RegExpCompiler* compiler,
-                            RegExpNode* on_success);
+                            RegExpNode* on_success,
+                            bool not_at_start = false);
    virtual RegExpQuantifier* AsQuantifier();
    virtual Interval CaptureRegisters();
    virtual bool IsQuantifier();

Modified: branches/bleeding_edge/src/flags.cc
==============================================================================
--- branches/bleeding_edge/src/flags.cc (original)
+++ branches/bleeding_edge/src/flags.cc Tue Feb  3 03:43:55 2009
@@ -438,6 +438,7 @@

    if (FLAG_help) {
      PrintHelp();
+    exit(0);
    }
    // parsed all flags successfully
    return 0;

Modified: branches/bleeding_edge/src/jsregexp.cc
==============================================================================
--- branches/bleeding_edge/src/jsregexp.cc      (original)
+++ branches/bleeding_edge/src/jsregexp.cc      Tue Feb  3 03:43:55 2009
@@ -1269,7 +1269,7 @@
    List <RegExpNode*> work_list(0);
    work_list_ = &work_list;
    Label fail;
-  macro_assembler->PushBacktrack(&fail);
+  macro_assembler_->PushBacktrack(&fail);
    Trace new_trace;
    start->Emit(this, &new_trace);
    macro_assembler_->Bind(&fail);
@@ -2068,11 +2068,12 @@
  void NegativeLookaheadChoiceNode::GetQuickCheckDetails(
      QuickCheckDetails* details,
      RegExpCompiler* compiler,
-    int filled_in) {
+    int filled_in,
+    bool not_at_start) {
    // Alternative 0 is the negative lookahead, alternative 1 is what comes
    // afterwards.
    RegExpNode* node = alternatives_->at(1).node();
-  return node->GetQuickCheckDetails(details, compiler, filled_in);
+  return node->GetQuickCheckDetails(details, compiler, filled_in,  
not_at_start);
  }


@@ -2145,7 +2146,8 @@
                                  QuickCheckDetails* details,
                                  bool fall_through_on_failure) {
    if (details->characters() == 0) return false;
-  GetQuickCheckDetails(details, compiler, 0);
+  GetQuickCheckDetails(details, compiler, 0, trace->at_start() ==  
Trace::FALSE);
+  if (details->cannot_match()) return false;
    if (!details->Rationalize(compiler->ascii())) return false;
    uint32_t mask = details->mask();
    uint32_t value = details->value();
@@ -2210,7 +2212,8 @@
  // generating a quick check.
  void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
                                      RegExpCompiler* compiler,
-                                    int characters_filled_in) {
+                                    int characters_filled_in,
+                                    bool not_at_start) {
    ASSERT(characters_filled_in < details->characters());
    int characters = details->characters();
    int char_mask;
@@ -2322,7 +2325,10 @@
      }
    }
    ASSERT(characters_filled_in != details->characters());
-  on_success()-> GetQuickCheckDetails(details, compiler,  
characters_filled_in);
+  on_success()-> GetQuickCheckDetails(details,
+                                      compiler,
+                                      characters_filled_in,
+                                      true);
  }


@@ -2359,6 +2365,13 @@

  void QuickCheckDetails::Merge(QuickCheckDetails* other, int from_index) {
    ASSERT(characters_ == other->characters_);
+  if (other->cannot_match_) {
+    return;
+  }
+  if (cannot_match_) {
+    *this = *other;
+    return;
+  }
    for (int i = from_index; i < characters_; i++) {
      QuickCheckDetails::Position* pos = positions(i);
      QuickCheckDetails::Position* other_pos = other->positions(i);
@@ -2395,27 +2408,34 @@

  void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
                                            RegExpCompiler* compiler,
-                                          int characters_filled_in) {
+                                          int characters_filled_in,
+                                          bool not_at_start) {
    if (body_can_be_zero_length_ || info()->visited) return;
    VisitMarker marker(info());
    return ChoiceNode::GetQuickCheckDetails(details,
                                            compiler,
-                                          characters_filled_in);
+                                          characters_filled_in,
+                                          not_at_start);
  }


  void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
                                        RegExpCompiler* compiler,
-                                      int characters_filled_in) {
+                                      int characters_filled_in,
+                                      bool not_at_start) {
+  not_at_start = not_at_start || not_at_start_;
    int choice_count = alternatives_->length();
    ASSERT(choice_count > 0);
    alternatives_->at(0).node()->GetQuickCheckDetails(details,
                                                      compiler,
-                                                    characters_filled_in);
+                                                    characters_filled_in,
+                                                    not_at_start);
    for (int i = 1; i < choice_count; i++) {
      QuickCheckDetails new_details(details->characters());
      RegExpNode* node = alternatives_->at(i).node();
-    node->GetQuickCheckDetails(&new_details, compiler,  
characters_filled_in);
+    node->GetQuickCheckDetails(&new_details, compiler,
+                               characters_filled_in,
+                               not_at_start);
      // Here we merge the quick match details of the two branches.
      details->Merge(&new_details, characters_filled_in);
    }
@@ -2541,6 +2561,21 @@
  }


+void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details,
+                                         RegExpCompiler* compiler,
+                                         int filled_in,
+                                         bool not_at_start) {
+  if (type_ == AT_START && not_at_start) {
+    details->set_cannot_match();
+    return;
+  }
+  return on_success()->GetQuickCheckDetails(details,
+                                            compiler,
+                                            filled_in,
+                                            not_at_start);
+}
+
+
  void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
    RegExpMacroAssembler* assembler = compiler->macro_assembler();
    switch (type_) {
@@ -2551,9 +2586,20 @@
        assembler->Bind(&ok);
        break;
      }
-    case AT_START:
-      assembler->CheckNotAtStart(trace->backtrack());
-      break;
+    case AT_START: {
+      if (trace->at_start() == Trace::FALSE) {
+        assembler->GoTo(trace->backtrack());
+        return;
+      }
+      if (trace->at_start() == Trace::UNKNOWN) {
+        assembler->CheckNotAtStart(trace->backtrack());
+        Trace at_start_trace = *trace;
+        at_start_trace.set_at_start(true);
+        on_success()->Emit(compiler, &at_start_trace);
+        return;
+      }
+    }
+    break;
      case AFTER_NEWLINE:
        EmitHat(compiler, on_success(), trace);
        return;
@@ -2772,6 +2818,7 @@
                 &bound_checked_to);

    Trace successor_trace(*trace);
+  successor_trace.set_at_start(false);
    successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler);
    RecursionCheck rc(compiler);
    on_success()->Emit(compiler, &successor_trace);
@@ -3065,6 +3112,8 @@
    Label greedy_loop_label;
    Trace counter_backtrack_trace;
    counter_backtrack_trace.set_backtrack(&greedy_loop_label);
+  if (not_at_start()) counter_backtrack_trace.set_at_start(false);
+
    if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
      // Here we have special handling for greedy loops containing only text  
nodes
      // and other simple nodes.  These are handled by pushing the current
@@ -3079,6 +3128,7 @@
      current_trace = &counter_backtrack_trace;
      Label greedy_match_failed;
      Trace greedy_match_trace;
+    if (not_at_start()) greedy_match_trace.set_at_start(false);
      greedy_match_trace.set_backtrack(&greedy_match_failed);
      Label loop_label;
      macro_assembler->Bind(&loop_label);
@@ -3139,6 +3189,11 @@
          new_trace.set_bound_checked_up_to(preload_characters);
          generate_full_check_inline = true;
        }
+    } else if (alt_gen->quick_check_details.cannot_match()) {
+      if (i == choice_count - 1 && !greedy_loop) {
+        macro_assembler->GoTo(trace->backtrack());
+      }
+      continue;
      } else {
        // No quick check was generated.  Put the full code here.
        // If this is not the first choice then there could be slow checks  
from
@@ -3868,7 +3923,8 @@
                                       bool is_greedy,
                                       RegExpTree* body,
                                       RegExpCompiler* compiler,
-                                     RegExpNode* on_success) {
+                                     RegExpNode* on_success,
+                                     bool not_at_start) {
    // x{f, t} becomes this:
    //
    //             (r++)<-.
@@ -3907,8 +3963,8 @@
      if (min > 0 && min <= kMaxUnrolledMinMatches) {
        int new_max = (max == kInfinity) ? max : max - min;
        // Recurse once to get the loop or optional matches after the fixed  
ones.
-      RegExpNode* answer =
-          ToNode(0, new_max, is_greedy, body, compiler, on_success);
+      RegExpNode* answer = ToNode(
+          0, new_max, is_greedy, body, compiler, on_success, true);
        // Unroll the forced matches from 0 to min.  This can cause chains of
        // TextNodes (which the parser does not generate).  These should be
        // combined if it turns out they hinder good code generation.
@@ -3933,6 +3989,7 @@
                                                                         
answer)));
          }
          answer = alternation;
+        if (not_at_start) alternation->set_not_at_start();
        }
        return answer;
      }
@@ -3944,6 +4001,7 @@
        ? compiler->AllocateRegister()
        : RegExpCompiler::kNoRegister;
    LoopChoiceNode* center = new LoopChoiceNode(body->min_match() == 0);
+  if (not_at_start) center->set_not_at_start();
    RegExpNode* loop_return = needs_counter
        ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr,  
center))
        : static_cast<RegExpNode*>(center);
@@ -4764,12 +4822,26 @@
    if (!data->tree->IsAnchored()) {
      // Add a .*? at the beginning, outside the body capture, unless
      // this expression is anchored at the beginning.
-    node = RegExpQuantifier::ToNode(0,
-                                    RegExpTree::kInfinity,
-                                    false,
-                                    new RegExpCharacterClass('*'),
-                                    &compiler,
-                                    captured_body);
+    RegExpNode* loop_node =
+        RegExpQuantifier::ToNode(0,
+                                 RegExpTree::kInfinity,
+                                 false,
+                                 new RegExpCharacterClass('*'),
+                                 &compiler,
+                                 captured_body,
+                                 data->contains_anchor);
+
+    if (data->contains_anchor) {
+      // Unroll loop once, to take care of the case that might start
+      // at the start of input.
+      ChoiceNode* first_step_node = new ChoiceNode(2);
+      first_step_node->AddAlternative(GuardedAlternative(captured_body));
+      first_step_node->AddAlternative(GuardedAlternative(
+          new TextNode(new RegExpCharacterClass('*'), loop_node)));
+      node = first_step_node;
+    } else {
+      node = loop_node;
+    }
    }
    data->node = node;
    Analysis analysis(ignore_case);

Modified: branches/bleeding_edge/src/jsregexp.h
==============================================================================
--- branches/bleeding_edge/src/jsregexp.h       (original)
+++ branches/bleeding_edge/src/jsregexp.h       Tue Feb  3 03:43:55 2009
@@ -454,10 +454,6 @@


  struct NodeInfo {
-  enum TriBool {
-    UNKNOWN = -1, FALSE = 0, TRUE = 1
-  };
-
    NodeInfo()
        : being_analyzed(false),
          been_analyzed(false),
@@ -544,17 +540,21 @@
    QuickCheckDetails()
        : characters_(0),
          mask_(0),
-        value_(0) { }
+        value_(0),
+        cannot_match_(false) { }
    explicit QuickCheckDetails(int characters)
        : characters_(characters),
          mask_(0),
-        value_(0) { }
+        value_(0),
+        cannot_match_(false) { }
    bool Rationalize(bool ascii);
    // Merge in the information from another branch of an alternation.
    void Merge(QuickCheckDetails* other, int from_index);
    // Advance the current position by some amount.
    void Advance(int by, bool ascii);
    void Clear();
+  bool cannot_match() { return cannot_match_; }
+  void set_cannot_match() { cannot_match_ = true; }
    struct Position {
      Position() : mask(0), value(0), determines_perfectly(false) { }
      uc16 mask;
@@ -579,6 +579,9 @@
    // These values are the condensate of the above array after  
Rationalize().
    uint32_t mask_;
    uint32_t value_;
+  // If set to true, there is no way this quick check can match at all.
+  // E.g., if it requires to be at the start of the input, and isn't.
+  bool cannot_match_;
  };


@@ -609,7 +612,8 @@
    // A comparison success indicates the node may match.
    virtual void GetQuickCheckDetails(QuickCheckDetails* details,
                                      RegExpCompiler* compiler,
-                                    int characters_filled_in) = 0;
+                                    int characters_filled_in,
+                                    bool not_at_start) = 0;
    static const int kNodeIsTooComplexForGreedyLoops = -1;
    virtual int GreedyLoopTextLength() { return  
kNodeIsTooComplexForGreedyLoops; }
    Label* label() { return &label_; }
@@ -740,8 +744,10 @@
    virtual int EatsAtLeast(int still_to_find, int recursion_depth);
    virtual void GetQuickCheckDetails(QuickCheckDetails* details,
                                      RegExpCompiler* compiler,
-                                    int filled_in) {
-    return on_success()->GetQuickCheckDetails(details, compiler,  
filled_in);
+                                    int filled_in,
+                                    bool not_at_start) {
+    return on_success()->GetQuickCheckDetails(
+        details, compiler, filled_in, not_at_start);
    }
    Type type() { return type_; }
    // TODO(erikcorry): We should allow some action nodes in greedy loops.
@@ -802,7 +808,8 @@
    virtual int EatsAtLeast(int still_to_find, int recursion_depth);
    virtual void GetQuickCheckDetails(QuickCheckDetails* details,
                                      RegExpCompiler* compiler,
-                                    int characters_filled_in);
+                                    int characters_filled_in,
+                                    bool not_at_start);
    ZoneList<TextElement>* elements() { return elms_; }
    void MakeCaseIndependent();
    virtual int GreedyLoopTextLength();
@@ -860,9 +867,8 @@
    virtual int EatsAtLeast(int still_to_find, int recursion_depth);
    virtual void GetQuickCheckDetails(QuickCheckDetails* details,
                                      RegExpCompiler* compiler,
-                                    int filled_in) {
-    return on_success()->GetQuickCheckDetails(details, compiler,  
filled_in);
-  }
+                                    int filled_in,
+                                    bool not_at_start);
    virtual AssertionNode* Clone() { return new AssertionNode(*this); }
    AssertionNodeType type() { return type_; }
   private:
@@ -887,7 +893,8 @@
    virtual int EatsAtLeast(int still_to_find, int recursion_depth);
    virtual void GetQuickCheckDetails(QuickCheckDetails* details,
                                      RegExpCompiler* compiler,
-                                    int characters_filled_in) {
+                                    int characters_filled_in,
+                                    bool not_at_start) {
      return;
    }
    virtual BackReferenceNode* Clone() { return new  
BackReferenceNode(*this); }
@@ -907,7 +914,8 @@
    virtual int EatsAtLeast(int still_to_find, int recursion_depth) { return  
0; }
    virtual void GetQuickCheckDetails(QuickCheckDetails* details,
                                      RegExpCompiler* compiler,
-                                    int characters_filled_in) {
+                                    int characters_filled_in,
+                                    bool not_at_start) {
      // Returning 0 from EatsAtLeast should ensure we never get here.
      UNREACHABLE();
    }
@@ -979,6 +987,7 @@
    explicit ChoiceNode(int expected_size)
        : alternatives_(new ZoneList<GuardedAlternative>(expected_size)),
          table_(NULL),
+        not_at_start_(false),
          being_calculated_(false) { }
    virtual void Accept(NodeVisitor* visitor);
    void AddAlternative(GuardedAlternative node) {  
alternatives()->Add(node); }
@@ -991,10 +1000,13 @@
                          RegExpNode* ignore_this_node);
    virtual void GetQuickCheckDetails(QuickCheckDetails* details,
                                      RegExpCompiler* compiler,
-                                    int characters_filled_in);
+                                    int characters_filled_in,
+                                    bool not_at_start);
    virtual ChoiceNode* Clone() { return new ChoiceNode(*this); }

    bool being_calculated() { return being_calculated_; }
+  bool not_at_start() { return not_at_start_; }
+  void set_not_at_start() { not_at_start_ = true; }
    void set_being_calculated(bool b) { being_calculated_ = b; }
    virtual bool try_to_emit_quick_check_for_alternative(int i) { return  
true; }

@@ -1016,6 +1028,9 @@
                                   int preload_characters,
                                   bool next_expects_preload);
    DispatchTable* table_;
+  // If true, this node is never checked at the start of the input.
+  // Allows a new trace to start with at_start() set to false.
+  bool not_at_start_;
    bool being_calculated_;
  };

@@ -1031,7 +1046,8 @@
    virtual int EatsAtLeast(int still_to_find, int recursion_depth);
    virtual void GetQuickCheckDetails(QuickCheckDetails* details,
                                      RegExpCompiler* compiler,
-                                    int characters_filled_in);
+                                    int characters_filled_in,
+                                    bool not_at_start);
    // For a negative lookahead we don't emit the quick check for the
    // alternative that is expected to fail.  This is because quick check  
code
    // starts by loading enough characters for the alternative that takes  
fewest
@@ -1054,7 +1070,8 @@
    virtual int EatsAtLeast(int still_to_find, int recursion_depth);
    virtual void GetQuickCheckDetails(QuickCheckDetails* details,
                                      RegExpCompiler* compiler,
-                                    int characters_filled_in);
+                                    int characters_filled_in,
+                                    bool not_at_start);
    virtual LoopChoiceNode* Clone() { return new LoopChoiceNode(*this); }
    RegExpNode* loop_node() { return loop_node_; }
    RegExpNode* continue_node() { return continue_node_; }
@@ -1088,6 +1105,12 @@
  // where baz has been matched.
  class Trace {
   public:
+  // A value for a property that is either known to be true, know to be  
false,
+  // or not known.
+  enum TriBool {
+    UNKNOWN = -1, FALSE = 0, TRUE = 1
+  };
+
    class DeferredAction {
     public:
      DeferredAction(ActionNode::Type type, int reg)
@@ -1150,7 +1173,9 @@
          stop_node_(NULL),
          loop_label_(NULL),
          characters_preloaded_(0),
-        bound_checked_up_to_(0) { }
+        bound_checked_up_to_(0),
+        at_start_(UNKNOWN) { }
+
    // End the trace.  This involves flushing the deferred actions in the  
trace
    // and pushing a backtrack location onto the backtrack stack.  Once this  
is
    // done we can start a new trace or go to one that has already been
@@ -1174,8 +1199,11 @@
             cp_offset_ == 0 &&
             characters_preloaded_ == 0 &&
             bound_checked_up_to_ == 0 &&
-           quick_check_performed_.characters() == 0;
+           quick_check_performed_.characters() == 0 &&
+           at_start_ == UNKNOWN;
    }
+  TriBool at_start() { return at_start_; }
+  void set_at_start(bool at_start) { at_start_ = at_start ? TRUE : FALSE; }
    Label* backtrack() { return backtrack_; }
    Label* loop_label() { return loop_label_; }
    RegExpNode* stop_node() { return stop_node_; }
@@ -1223,6 +1251,7 @@
    int characters_preloaded_;
    int bound_checked_up_to_;
    QuickCheckDetails quick_check_performed_;
+  TriBool at_start_;
  };


@@ -1305,10 +1334,12 @@
      : tree(NULL),
        node(NULL),
        simple(true),
+      contains_anchor(false),
        capture_count(0) { }
    RegExpTree* tree;
    RegExpNode* node;
    bool simple;
+  bool contains_anchor;
    Handle<String> error;
    int capture_count;
  };

Modified: branches/bleeding_edge/src/parser.cc
==============================================================================
--- branches/bleeding_edge/src/parser.cc        (original)
+++ branches/bleeding_edge/src/parser.cc        Tue Feb  3 03:43:55 2009
@@ -532,7 +532,8 @@
    // Reports whether the pattern might be used as a literal search string.
    // Only use if the result of the parse is a single atom node.
    bool simple();
-
+  bool contains_anchor() { return contains_anchor_; }
+  void set_contains_anchor() { contains_anchor_ = true; }
    int captures_started() { return captures_ == NULL ? 0 :  
captures_->length(); }
    int position() { return next_pos_ - 1; }
    bool failed() { return failed_; }
@@ -555,6 +556,7 @@
    FlatStringReader* in_;
    Handle<String>* error_;
    bool simple_;
+  bool contains_anchor_;
    ZoneList<RegExpCapture*>* captures_;
    bool is_scanned_for_captures_;
    // The capture count is only valid after we have scanned for captures.
@@ -3486,6 +3488,7 @@
      in_(in),
      error_(error),
      simple_(true),
+    contains_anchor_(false),
      captures_(NULL),
      is_scanned_for_captures_(false),
      capture_count_(0),
@@ -3603,10 +3606,14 @@
        ReportError(CStrVector("Nothing to repeat") CHECK_FAILED);
      case '^': {
        Advance();
-      RegExpAssertion::Type type =
-          multiline_ ? RegExpAssertion::START_OF_LINE :
-                       RegExpAssertion::START_OF_INPUT;
-      builder.AddAssertion(new RegExpAssertion(type));
+      if (multiline_) {
+        builder.AddAssertion(
+            new RegExpAssertion(RegExpAssertion::START_OF_LINE));
+      } else {
+        builder.AddAssertion(
+            new RegExpAssertion(RegExpAssertion::START_OF_INPUT));
+        set_contains_anchor();
+      }
        continue;
      }
      case '$': {
@@ -4312,6 +4319,7 @@
      result->tree = tree;
      int capture_count = parser.captures_started();
      result->simple = tree->IsAtom() && parser.simple() && capture_count ==  
0;
+    result->contains_anchor = parser.contains_anchor();
      result->capture_count = capture_count;
    }
    return !parser.failed();

Modified: branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc
==============================================================================
--- branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc   (original)
+++ branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc   Tue Feb  3  
03:43:55 2009
@@ -636,7 +636,7 @@
    __ push(esi);
    __ push(edi);
    __ push(ebx);  // Callee-save on MacOS.
-  __ push(Immediate(0));  // Make room for input start minus one
+  __ push(Immediate(0));  // Make room for "input start - 1" constant.

    // Check if we have space on the stack for registers.
    Label retry_stack_check;

--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---

Reply via email to