Author: [EMAIL PROTECTED]
Date: Thu Nov 13 22:22:05 2008
New Revision: 749

Modified:
    branches/experimental/regexp2000/src/ast.h
    branches/experimental/regexp2000/src/jsregexp.cc
    branches/experimental/regexp2000/test/cctest/test-regexp.cc

Log:
Added .*? in the beginning and an enclosing capture around regexps.
Restructured a little bit too.


Modified: branches/experimental/regexp2000/src/ast.h
==============================================================================
--- branches/experimental/regexp2000/src/ast.h  (original)
+++ branches/experimental/regexp2000/src/ast.h  Thu Nov 13 22:22:05 2008
@@ -1274,6 +1274,11 @@
    RegExpCharacterClass(ZoneList<CharacterRange>* ranges, bool is_negated)
      : ranges_(ranges),
        is_negated_(is_negated) { }
+  RegExpCharacterClass(uc16 type)
+    : ranges_(new ZoneList<CharacterRange>(2)),
+      is_negated_(false) {
+    CharacterRange::AddClassEscape(type, ranges_);
+  }
    virtual void* Accept(RegExpVisitor* visitor, void* data);
    virtual RegExpNode* ToNode(RegExpCompiler* compiler,
                               RegExpNode* on_success,
@@ -1312,6 +1317,13 @@
    virtual RegExpNode* ToNode(RegExpCompiler* compiler,
                               RegExpNode* on_success,
                               RegExpNode* on_failure);
+  static RegExpNode* ToNode(int min,
+                            int max,
+                            bool is_greedy,
+                            RegExpTree* body,
+                            RegExpCompiler* compiler,
+                            RegExpNode* on_success,
+                            RegExpNode* on_failure);
    virtual RegExpQuantifier* AsQuantifier();
    int min() { return min_; }
    int max() { return max_; }
@@ -1336,11 +1348,16 @@
    virtual RegExpNode* ToNode(RegExpCompiler* compiler,
                               RegExpNode* on_success,
                               RegExpNode* on_failure);
+  static RegExpNode* ToNode(RegExpTree* body,
+                            int index,
+                            RegExpCompiler* compiler,
+                            RegExpNode* on_success,
+                            RegExpNode* on_failure);
    virtual RegExpCapture* AsCapture();
    RegExpTree* body() { return body_; }
    int index() { return index_; }
-  static int StartRegister(int index) { return (index - 1) * 2; }
-  static int EndRegister(int index) { return (index - 1) * 2 + 1; }
+  static int StartRegister(int index) { return index * 2; }
+  static int EndRegister(int index) { return index * 2 + 1; }
   private:
    RegExpTree* body_;
    int index_;

Modified: branches/experimental/regexp2000/src/jsregexp.cc
==============================================================================
--- branches/experimental/regexp2000/src/jsregexp.cc    (original)
+++ branches/experimental/regexp2000/src/jsregexp.cc    Thu Nov 13 22:22:05  
2008
@@ -635,7 +635,6 @@
  // New regular expression engine


-class RegExpCompiler;
  class DotPrinter;


@@ -645,10 +644,6 @@
      : next_register_(2 * capture_count),
        work_list_(NULL) { }

-  RegExpNode* Compile(RegExpTree* tree,
-                      RegExpNode* on_success,
-                      RegExpNode* on_failure);
-
    int AllocateRegister() { return next_register_++; }

    Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler,
@@ -914,7 +909,7 @@
      stream()->Add("\"];\n");
      that->choices()->at(i).node()->Accept(this);
    }
-  OS::PrintError("--- %p ---\n", static_cast<void*>(this));
+  OS::PrintError("--- %p ---\n", static_cast<void*>(that));
    that->table()->Dump();
  }

@@ -946,7 +941,14 @@


  void DotPrinter::VisitCharacterClass(CharacterClassNode* that) {
-  stream()->Add("  n%p [label=\"[...]\"];\n", that);
+  stream()->Add("  n%p [label=\"[", that);
+  if (that->is_negated())
+    stream()->Add("^");
+  for (int i = 0; i < that->ranges()->length(); i++) {
+    CharacterRange range = that->ranges()->at(i);
+    stream()->Add("%k-%k", range.from(), range.to());
+  }
+  stream()->Add("]\"];\n");
    stream()->Add("  n%p -> n%p;\n", that, that->on_success());
    Visit(that->on_success());
    PrintOnFailure(that, that->on_failure());
@@ -1062,9 +1064,9 @@
    int length = children->length();
    ChoiceNode* result = new ChoiceNode(length, on_failure);
    for (int i = 0; i < length; i++) {
-    GuardedAlternative child(compiler->Compile(children->at(i),
-                                               on_success,
-                                               on_failure));
+    GuardedAlternative child(children->at(i)->ToNode(compiler,
+                                                     on_success,
+                                                     on_failure));
      result->AddChild(child);
    }
    return result;
@@ -1074,6 +1076,23 @@
  RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler,
                                       RegExpNode* on_success,
                                       RegExpNode* on_failure) {
+  return ToNode(min(),
+                max(),
+                is_greedy(),
+                body(),
+                compiler,
+                on_success,
+                on_failure);
+}
+
+
+RegExpNode* RegExpQuantifier::ToNode(int min,
+                                     int max,
+                                     bool is_greedy,
+                                     RegExpTree* body,
+                                     RegExpCompiler* compiler,
+                                     RegExpNode* on_success,
+                                     RegExpNode* on_failure) {
    // x{f, t} becomes this:
    //
    //             (r++)<-.
@@ -1087,26 +1106,26 @@
    //
    // TODO(someone): clear captures on repetition and handle empty
    //   matches.
-  bool has_min = min() > 0;
-  bool has_max = max() < RegExpQuantifier::kInfinity;
+  bool has_min = min > 0;
+  bool has_max = max < RegExpQuantifier::kInfinity;
    bool needs_counter = has_min || has_max;
    int reg_ctr = needs_counter ? compiler->AllocateRegister() : -1;
    ChoiceNode* center = new ChoiceNode(2, on_failure);
    RegExpNode* loop_return = needs_counter
        ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr,  
center))
        : static_cast<RegExpNode*>(center);
-  RegExpNode* body_node = compiler->Compile(body(), loop_return,  
on_failure);
+  RegExpNode* body_node = body->ToNode(compiler, loop_return, on_failure);
    GuardedAlternative body_alt(body_node);
    if (has_max) {
-    Guard* body_guard = new Guard(reg_ctr, Guard::LT, max());
+    Guard* body_guard = new Guard(reg_ctr, Guard::LT, max);
      body_alt.AddGuard(body_guard);
    }
    GuardedAlternative rest_alt(on_success);
    if (has_min) {
-    Guard* rest_guard = new Guard(reg_ctr, Guard::GEQ, min());
+    Guard* rest_guard = new Guard(reg_ctr, Guard::GEQ, min);
      rest_alt.AddGuard(rest_guard);
    }
-  if (is_greedy()) {
+  if (is_greedy) {
      center->AddChild(body_alt);
      center->AddChild(rest_alt);
    } else {
@@ -1162,7 +1181,7 @@
      //   end submatch scope (nothing to clean up, just exit the scope)
      //   fail
      return ActionNode::BeginSubmatch(ActionNode::StorePosition(
-        position_register, compiler->Compile(body(),
+        position_register, body()->ToNode(compiler,
              ActionNode::RestorePosition(position_register,
                  ActionNode::EscapeSubmatch(on_success)),
              ActionNode::EndSubmatch(on_failure))));
@@ -1180,7 +1199,7 @@
      //       succeed
      ChoiceNode* try_node =
          new ChoiceNode(1, ActionNode::EndSubmatch(on_success));
-    RegExpNode* body_node = compiler->Compile(body(),
+    RegExpNode* body_node = body()->ToNode(compiler,
          ActionNode::EscapeSubmatch(on_failure),
          EndNode::GetBacktrack());
      GuardedAlternative body_alt(body_node);
@@ -1193,10 +1212,19 @@
  RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,
                                    RegExpNode* on_success,
                                    RegExpNode* on_failure) {
-  int start_reg = RegExpCapture::StartRegister(index());
-  int end_reg = RegExpCapture::EndRegister(index());
+  return ToNode(body(), index(), compiler, on_success, on_failure);
+}
+
+
+RegExpNode* RegExpCapture::ToNode(RegExpTree* body,
+                                  int index,
+                                  RegExpCompiler* compiler,
+                                  RegExpNode* on_success,
+                                  RegExpNode* on_failure) {
+  int start_reg = RegExpCapture::StartRegister(index);
+  int end_reg = RegExpCapture::EndRegister(index);
    RegExpNode* store_end = ActionNode::StorePosition(end_reg, on_success);
-  RegExpNode* body_node = compiler->Compile(body(), store_end, on_failure);
+  RegExpNode* body_node = body->ToNode(compiler, store_end, on_failure);
    return ActionNode::StorePosition(start_reg, body_node);
  }

@@ -1207,7 +1235,7 @@
    ZoneList<RegExpTree*>* children = nodes();
    RegExpNode* current = on_success;
    for (int i = children->length() - 1; i >= 0; i--) {
-    current = compiler->Compile(children->at(i), current, on_failure);
+    current = children->at(i)->ToNode(compiler, current, on_failure);
    }
    return current;
  }
@@ -1581,18 +1609,26 @@
  }


-RegExpNode* RegExpCompiler::Compile(RegExpTree* tree,
-                                    RegExpNode* on_success,
-                                    RegExpNode* on_failure) {
-  return tree->ToNode(this, on_success, on_failure);
-}
-
-
  RegExpNode* RegExpEngine::Compile(RegExpParseResult* input) {
    RegExpCompiler compiler(input->capture_count);
-  RegExpNode* node = compiler.Compile(input->tree,
-                                      EndNode::GetAccept(),
-                                      EndNode::GetBacktrack());
+  // Wrap the body of the regexp in capture #0.
+  RegExpNode* captured_body = RegExpCapture::ToNode(input->tree,
+                                                    0,
+                                                    &compiler,
+                                                    EndNode::GetAccept(),
+                                                     
EndNode::GetBacktrack());
+  // Add a .*? at the beginning, outside the body capture.
+  // Note: We could choose to not add this if the regexp is anchored at
+  //   the start of the input but I'm not sure how best to do that and
+  //   since we don't even handle ^ yet I'm saving that optimization for
+  //   later.
+  RegExpNode* node = RegExpQuantifier::ToNode(0,
+                                              RegExpQuantifier::kInfinity,
+                                              false,
+                                              new  
RegExpCharacterClass('.'),
+                                              &compiler,
+                                              captured_body,
+                                              EndNode::GetBacktrack());
    Analysis analysis(&compiler);
    analysis.Analyze(node);
    return node;

Modified: branches/experimental/regexp2000/test/cctest/test-regexp.cc
==============================================================================
--- branches/experimental/regexp2000/test/cctest/test-regexp.cc (original)
+++ branches/experimental/regexp2000/test/cctest/test-regexp.cc Thu Nov 13  
22:22:05 2008
@@ -699,5 +699,5 @@


  TEST(Graph) {
-  Execute(".*?[^a]|b", "", true);
+  Execute("a|(b|c)|d", "", true);
  }

--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---

Reply via email to