Author: [EMAIL PROTECTED]
Date: Thu Nov  6 09:36:02 2008
New Revision: 705

Modified:
    branches/experimental/regexp2000/src/ast.h
    branches/experimental/regexp2000/src/globals.h
    branches/experimental/regexp2000/src/jsregexp.cc
    branches/experimental/regexp2000/src/jsregexp.h
    branches/experimental/regexp2000/src/parser.cc
    branches/experimental/regexp2000/src/parser.h
    branches/experimental/regexp2000/test/cctest/test-regexp.cc

Log:
- Implemented some more translation from asts to automata, including
   upper/lower bounded repetition, captures, positive and negative
   forward assertions, etc.
- Un-templateified automaton nodes.


Modified: branches/experimental/regexp2000/src/ast.h
==============================================================================
--- branches/experimental/regexp2000/src/ast.h  (original)
+++ branches/experimental/regexp2000/src/ast.h  Thu Nov  6 09:36:02 2008
@@ -1204,8 +1204,6 @@
    VISIT(Empty)


-class RegExpVisitor;
-template <typename Char> class RegExpNode;
  #define FORWARD_DECLARE(Name) class RegExp##Name;
  FOR_EACH_REG_EXP_NODE_TYPE(FORWARD_DECLARE)
  #undef FORWARD_DECLARE
@@ -1215,6 +1213,9 @@
   public:
    virtual ~RegExpTree() { }
    virtual void* Accept(RegExpVisitor* visitor, void* data) = 0;
+  virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+                             RegExpNode* on_success,
+                             RegExpNode* on_failure) = 0;
    SmartPointer<char> ToString();
  #define MAKE_ASTYPE(Name)  virtual RegExp##Name* As##Name();
    FOR_EACH_REG_EXP_NODE_TYPE(MAKE_ASTYPE)
@@ -1226,6 +1227,9 @@
   public:
    explicit RegExpDisjunction(ZoneList<RegExpTree*>* nodes) : nodes_(nodes)  
{ }
    virtual void* Accept(RegExpVisitor* visitor, void* data);
+  virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+                             RegExpNode* on_success,
+                             RegExpNode* on_failure);
    virtual RegExpDisjunction* AsDisjunction();
    ZoneList<RegExpTree*>* nodes() { return nodes_; }
   private:
@@ -1237,6 +1241,9 @@
   public:
    explicit RegExpAlternative(ZoneList<RegExpTree*>* nodes) : nodes_(nodes)  
{ }
    virtual void* Accept(RegExpVisitor* visitor, void* data);
+  virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+                             RegExpNode* on_success,
+                             RegExpNode* on_failure);
    virtual RegExpAlternative* AsAlternative();
    ZoneList<RegExpTree*>* nodes() { return nodes_; }
   private:
@@ -1252,6 +1259,9 @@
    };
    explicit RegExpAssertion(Type type) : type_(type) { }
    virtual void* Accept(RegExpVisitor* visitor, void* data);
+  virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+                             RegExpNode* on_success,
+                             RegExpNode* on_failure);
    virtual RegExpAssertion* AsAssertion();
    Type type() { return type_; }
   private:
@@ -1265,6 +1275,9 @@
      : ranges_(ranges),
        is_negated_(is_negated) { }
    virtual void* Accept(RegExpVisitor* visitor, void* data);
+  virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+                             RegExpNode* on_success,
+                             RegExpNode* on_failure);
    virtual RegExpCharacterClass* AsCharacterClass();
    ZoneList<CharacterRange>* ranges() { return ranges_; }
    bool is_negated() { return is_negated_; }
@@ -1278,6 +1291,9 @@
   public:
    explicit RegExpAtom(Vector<const uc16> data) : data_(data) { }
    virtual void* Accept(RegExpVisitor* visitor, void* data);
+  virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+                             RegExpNode* on_success,
+                             RegExpNode* on_failure);
    virtual RegExpAtom* AsAtom();
    Vector<const uc16> data() { return data_; }
   private:
@@ -1293,6 +1309,9 @@
        is_greedy_(is_greedy),
        body_(body) { }
    virtual void* Accept(RegExpVisitor* visitor, void* data);
+  virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+                             RegExpNode* on_success,
+                             RegExpNode* on_failure);
    virtual RegExpQuantifier* AsQuantifier();
    int min() { return min_; }
    int max() { return max_; }
@@ -1314,9 +1333,14 @@
    explicit RegExpCapture(RegExpTree* body, int index)
      : body_(body), index_(index) { }
    virtual void* Accept(RegExpVisitor* visitor, void* data);
+  virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+                             RegExpNode* on_success,
+                             RegExpNode* on_failure);
    virtual RegExpCapture* AsCapture();
    RegExpTree* body() { return body_; }
    int index() { return index_; }
+  static int StartRegister(int index) { return (index - 1) * 2; }
+  static int EndRegister(int index) { return (index - 1) * 2 + 1; }
   private:
    RegExpTree* body_;
    int index_;
@@ -1329,6 +1353,9 @@
      : body_(body),
        is_positive_(is_positive) { }
    virtual void* Accept(RegExpVisitor* visitor, void* data);
+  virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+                             RegExpNode* on_success,
+                             RegExpNode* on_failure);
    virtual RegExpLookahead* AsLookahead();
    RegExpTree* body() { return body_; }
    bool is_positive() { return is_positive_; }
@@ -1342,6 +1369,9 @@
   public:
    explicit RegExpBackreference(int index) : index_(index) { }
    virtual void* Accept(RegExpVisitor* visitor, void* data);
+  virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+                             RegExpNode* on_success,
+                             RegExpNode* on_failure);
    virtual RegExpBackreference* AsBackreference();
    int index() { return index_; }
   private:
@@ -1353,6 +1383,9 @@
   public:
    RegExpEmpty() { }
    virtual void* Accept(RegExpVisitor* visitor, void* data);
+  virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+                             RegExpNode* on_success,
+                             RegExpNode* on_failure);
    virtual RegExpEmpty* AsEmpty();
    static RegExpEmpty* GetInstance() { return &kInstance; }
   private:

Modified: branches/experimental/regexp2000/src/globals.h
==============================================================================
--- branches/experimental/regexp2000/src/globals.h      (original)
+++ branches/experimental/regexp2000/src/globals.h      Thu Nov  6 09:36:02 2008
@@ -182,7 +182,11 @@
  class OldSpace;
  class Property;
  class Proxy;
+class RegExpNode;
+class RegExpParseResult;
  class RegExpTree;
+class RegExpCompiler;
+class RegExpVisitor;
  class Scope;
  template<class Allocator = FreeStoreAllocationPolicy> class ScopeInfo;
  class Script;

Modified: branches/experimental/regexp2000/src/jsregexp.cc
==============================================================================
--- branches/experimental/regexp2000/src/jsregexp.cc    (original)
+++ branches/experimental/regexp2000/src/jsregexp.cc    Thu Nov  6 09:36:02  
2008
@@ -204,17 +204,18 @@
      re->set_data(*cached);
      result = re;
    } else {
-     SafeStringInputBuffer buffer(pattern.location());
-    Handle<String> error_text;
-    bool has_escapes;
-    RegExpTree* ast = ParseRegExp(&buffer, &error_text, &has_escapes);
-    if (!error_text.is_null()) {
+    SafeStringInputBuffer buffer(pattern.location());
+    RegExpParseResult parse_result;
+    if (!ParseRegExp(&buffer, &parse_result)) {
        // Throw an exception if we fail to parse the pattern.
-      return CreateRegExpException(re, pattern,  
error_text, "malformed_regexp");
+      return CreateRegExpException(re,
+                                   pattern,
+                                   parse_result.error,
+                                   "malformed_regexp");
      }
-    RegExpAtom* atom = ast->AsAtom();
+    RegExpAtom* atom = parse_result.tree->AsAtom();
      if (atom != NULL && !flags.is_ignore_case()) {
-      if (has_escapes) {
+      if (parse_result.has_character_escapes) {
          Vector<const uc16> atom_pattern = atom->data();
          Handle<String> atom_string =
              Factory::NewStringFromTwoByte(atom_pattern);
@@ -629,121 +630,266 @@
  // New regular expression engine


-template <typename Char> class ExecutionState;
+class ExecutionState;


-template <typename Char>
  class DotPrinter {
   public:
    DotPrinter() : stream_(&alloc_) { }
-  void PrintNode(RegExpNode<Char>* node);
-  void Visit(RegExpNode<Char>* node);
+  void PrintNode(const char* label, RegExpNode* node);
+  void Visit(RegExpNode* node);
    StringStream* stream() { return &stream_; }
   private:
    HeapStringAllocator alloc_;
    StringStream stream_;
-  std::set<RegExpNode<Char>*> seen_;
+  std::set<RegExpNode*> seen_;
  };


-template <typename Char>
-class RegExpCompiler: public RegExpVisitor {
+class RegExpCompiler {
   public:
-  RegExpCompiler() { }
-  RegExpNode<Char>* Compile(RegExpTree* tree, RegExpNode<Char>* rest) {
-    return static_cast<RegExpNode<Char>*>(tree->Accept(this, rest));
-  }
-#define MAKE_CASE(Name) virtual void* Visit##Name(RegExp##Name*, void*);
-  FOR_EACH_REG_EXP_NODE_TYPE(MAKE_CASE)
-#undef MAKE_CASE
+  explicit RegExpCompiler(int capture_count)
+    : next_register_(2 * capture_count) { }
+
+  RegExpNode* Compile(RegExpTree* tree,
+                      RegExpNode* on_success,
+                      RegExpNode* on_failure) {
+    return tree->ToNode(this, on_success, on_failure);
+  }
+
+  int AllocateRegister() { return next_register_++; }
+
+ private:
+  int next_register_;
  };


-template <typename Char>
  class RegExpNode: public ZoneObject {
   public:
    virtual ~RegExpNode() { }
-  virtual void EmitDot(DotPrinter<Char>* out);
-  virtual bool Step(ExecutionState<Char>* state) = 0;
+  virtual void EmitDot(DotPrinter* out);
  };


-template <typename Char>
-class SeqRegExpNode: public RegExpNode<Char> {
+class SeqRegExpNode: public RegExpNode {
   public:
-  explicit SeqRegExpNode(RegExpNode<Char>* next) : next_(next) { }
-  RegExpNode<Char>* next() { return next_; }
+  explicit SeqRegExpNode(RegExpNode* on_success)
+    : on_success_(on_success) { }
+  RegExpNode* on_success() { return on_success_; }
   private:
-  RegExpNode<Char>* next_;
+  RegExpNode* on_success_;
  };


-template <typename Char>
-class EndNode: public RegExpNode<Char> {
+class EndNode: public RegExpNode {
   public:
-  virtual void EmitDot(DotPrinter<Char>* out);
-  virtual bool Step(ExecutionState<Char>* state);
+  enum Action { ACCEPT, BACKTRACK };
+  virtual void EmitDot(DotPrinter* out);
+  static EndNode* GetAccept() { return &kAccept; }
+  static EndNode* GetBacktrack() { return &kBacktrack; }
+ private:
+  explicit EndNode(Action action) : action_(action) { }
+  Action action_;
+  static EndNode kAccept;
+  static EndNode kBacktrack;
  };


-template <typename Char>
-class AtomNode: public SeqRegExpNode<Char> {
+EndNode EndNode::kAccept(ACCEPT);
+EndNode EndNode::kBacktrack(BACKTRACK);
+
+
+class AtomNode: public SeqRegExpNode {
   public:
-  AtomNode(Vector<const uc16> data, RegExpNode<Char>* next)
-    : SeqRegExpNode<Char>(next),
+  AtomNode(Vector<const uc16> data,
+           RegExpNode* on_success,
+           RegExpNode* on_failure)
+    : SeqRegExpNode(on_success),
+      on_failure_(on_failure),
        data_(data) { }
-  virtual void EmitDot(DotPrinter<Char>* out);
-  virtual bool Step(ExecutionState<Char>* state);
+  virtual void EmitDot(DotPrinter* out);
    Vector<const uc16> data() { return data_; }
+  RegExpNode* on_failure() { return on_failure_; }
   private:
+  RegExpNode* on_failure_;
    Vector<const uc16> data_;
  };


-template <typename Char>
-class CharacterClassNode: public SeqRegExpNode<Char> {
+class BackreferenceNode: public SeqRegExpNode {
   public:
-  CharacterClassNode(ZoneList<CharacterRange>* ranges, RegExpNode<Char>*  
next)
-    : SeqRegExpNode<Char>(next),
+  BackreferenceNode(int start_reg,
+                    int end_reg,
+                    RegExpNode* on_success,
+                    RegExpNode* on_failure)
+    : SeqRegExpNode(on_success),
+      on_failure_(on_failure),
+      start_reg_(start_reg),
+      end_reg_(end_reg) { }
+  virtual void EmitDot(DotPrinter* out);
+  RegExpNode* on_failure() { return on_failure_; }
+  int start_register() { return start_reg_; }
+  int end_register() { return end_reg_; }
+ private:
+  RegExpNode* on_failure_;
+  int start_reg_;
+  int end_reg_;
+};
+
+
+class CharacterClassNode: public SeqRegExpNode {
+ public:
+  CharacterClassNode(ZoneList<CharacterRange>* ranges,
+                     RegExpNode* on_success,
+                     RegExpNode* on_failure)
+    : SeqRegExpNode(on_success),
+      on_failure_(on_failure),
        ranges_(ranges) { }
-  virtual void EmitDot(DotPrinter<Char>* out);
-  virtual bool Step(ExecutionState<Char>* state);
+  virtual void EmitDot(DotPrinter* out);
    ZoneList<CharacterRange>* ranges() { return ranges_; }
+  RegExpNode* on_failure() { return on_failure_; }
   private:
+  RegExpNode* on_failure_;
    ZoneList<CharacterRange>* ranges_;
  };


-template <typename Char>
-class ChoiceNode: public RegExpNode<Char> {
+class Guard: public ZoneObject {
   public:
-  explicit ChoiceNode(ZoneList<RegExpNode<Char>*>* choices)
-    : choices_(choices) { }
-  virtual void EmitDot(DotPrinter<Char>* out);
-  virtual bool Step(ExecutionState<Char>* state);
-  ZoneList<RegExpNode<Char>*>* choices() { return choices_; }
-  DispatchTable* table() { return table_; }
+  enum Relation { LT, GEQ };
+  Guard(int reg, Relation op, int value)
+    : reg_(reg),
+      op_(op),
+      value_(value) { }
+  int reg() { return reg_; }
+  Relation op() { return op_; }
+  int value() { return value_; }
   private:
-  ZoneList<RegExpNode<Char>*>* choices_;
+  int reg_;
+  Relation op_;
+  int value_;
+};
+
+
+class GuardedAlternative {
+ public:
+  explicit GuardedAlternative(RegExpNode* node) : node_(node),  
guards_(NULL) { }
+  void AddGuard(Guard* guard);
+  RegExpNode* node() { return node_; }
+  ZoneList<Guard*>* guards() { return guards_; }
+ private:
+  RegExpNode* node_;
+  ZoneList<Guard*>* guards_;
+};
+
+
+void GuardedAlternative::AddGuard(Guard* guard) {
+  if (guards_ == NULL)
+    guards_ = new ZoneList<Guard*>(1);
+  guards_->Add(guard);
+}
+
+
+class ChoiceNode: public RegExpNode {
+ public:
+  explicit ChoiceNode(int expected_size, RegExpNode* on_failure)
+    : on_failure_(on_failure),
+      choices_(new ZoneList<GuardedAlternative>(expected_size)) { }
+  virtual void EmitDot(DotPrinter* out);
+  void AddChild(GuardedAlternative node) { choices()->Add(node); }
+  ZoneList<GuardedAlternative>* choices() { return choices_; }
+  DispatchTable* table() { return &table_; }
+  RegExpNode* on_failure() { return on_failure_; }
+ private:
+  RegExpNode* on_failure_;
+  ZoneList<GuardedAlternative>* choices_;
    DispatchTable table_;
  };


+class ActionNode: public SeqRegExpNode {
+ public:
+  enum Type {
+    STORE_REGISTER,
+    INCREMENT_REGISTER,
+    STORE_POSITION,
+    BEGIN_SUBMATCH,
+    ESCAPE_SUBMATCH,
+    END_SUBMATCH
+  };
+  static ActionNode* StoreRegister(int reg, int val, RegExpNode*  
on_success) {
+    ActionNode* result = new ActionNode(STORE_REGISTER, on_success);
+    result->data_.u_store_register.reg_ = reg;
+    result->data_.u_store_register.value_ = val;
+    return result;
+  }
+  static ActionNode* IncrementRegister(int reg, RegExpNode* on_success) {
+    ActionNode* result = new ActionNode(INCREMENT_REGISTER, on_success);
+    result->data_.u_increment_register.reg_ = reg;
+    return result;
+  }
+  static ActionNode* StorePosition(int reg, RegExpNode* on_success) {
+    ActionNode* result = new ActionNode(STORE_POSITION, on_success);
+    result->data_.u_store_position.reg_ = reg;
+    return result;
+  }
+  static ActionNode* BeginSubmatch(RegExpNode* on_success) {
+    return new ActionNode(BEGIN_SUBMATCH, on_success);
+  }
+  static ActionNode* EscapeSubmatch(RegExpNode* on_success) {
+    return new ActionNode(ESCAPE_SUBMATCH, on_success);
+  }
+  static ActionNode* EndSubmatch(RegExpNode* on_success) {
+    return new ActionNode(END_SUBMATCH, on_success);
+  }
+  virtual void EmitDot(DotPrinter* out);
+  Type type() { return type_; }
+ private:
+  ActionNode(Type type, RegExpNode* on_success)
+    : SeqRegExpNode(on_success),
+      type_(type) { }
+  Type type_;
+  union {
+    struct {
+      int reg_;
+      int value_;
+    } u_store_register;
+    struct {
+      int reg_;
+    } u_increment_register;
+    struct {
+      int reg_;
+    } u_store_position;
+  } data_;
+};
+
+
  // -------------------------------------------------------------------
  // Dot/dotty output

-
-template <typename Char>
-void DotPrinter<Char>::PrintNode(RegExpNode<Char>* node) {
-  stream()->Add("digraph G {\n");
+void DotPrinter::PrintNode(const char* label, RegExpNode* node) {
+  stream()->Add("digraph G {\n  graph [label=\"");
+  for (int i = 0; label[i]; i++) {
+    switch (label[i]) {
+      case '\\':
+        stream()->Add("\\\\");
+        break;
+      case '"':
+        stream()->Add("\"");
+        break;
+      default:
+        stream()->Put(label[i]);
+        break;
+    }
+  }
+  stream()->Add("\"]; \n");
    Visit(node);
    stream()->Add("}\n");
    printf("%s", *(stream()->ToCString()));
  }


-template <typename Char>
-void DotPrinter<Char>::Visit(RegExpNode<Char>* node) {
+void DotPrinter::Visit(RegExpNode* node) {
    if (seen_.find(node) != seen_.end())
      return;
    seen_.insert(node);
@@ -751,44 +897,114 @@
  }


-template <typename Char>
-void RegExpNode<Char>::EmitDot(DotPrinter<Char>* out) {
+void RegExpNode::EmitDot(DotPrinter* out) {
    UNIMPLEMENTED();
  }


-template <typename Char>
-void ChoiceNode<Char>::EmitDot(DotPrinter<Char>* out) {
-  out->stream()->Add("n%p [label=\"?\"];\n", this);
+static void PrintOnFailure(DotPrinter* out,
+                           RegExpNode* from,
+                           RegExpNode* on_failure) {
+  if (on_failure == EndNode::GetBacktrack()) return;
+  out->stream()->Add("  n%p -> n%p [style=dotted];\n", from, on_failure);
+  out->Visit(on_failure);
+}
+
+
+void ChoiceNode::EmitDot(DotPrinter* out) {
+  out->stream()->Add("  n%p [label=\"?\", shape=circle];\n", this);
+  PrintOnFailure(out, this, this->on_failure());
    for (int i = 0; i < choices()->length(); i++) {
-    out->stream()->Add("n%p -> n%p [label=\"%i\"];\n",
+    GuardedAlternative alt = choices()->at(i);
+    out->stream()->Add("  n%p -> n%p [label=\"%i",
                         this,
-                       choices()->at(i),
+                       alt.node(),
                         i);
-    out->Visit(choices()->at(i));
+    if (alt.guards() != NULL) {
+      out->stream()->Add(" [");
+      for (int j = 0; j < alt.guards()->length(); j++) {
+        if (j > 0) out->stream()->Add(" ");
+        Guard* guard = alt.guards()->at(j);
+        switch (guard->op()) {
+          case Guard::GEQ:
+            out->stream()->Add("$%i &#8805; %i", guard->reg(),  
guard->value());
+            break;
+          case Guard::LT:
+            out->stream()->Add("$%i < %i", guard->reg(), guard->value());
+            break;
+        }
+      }
+      out->stream()->Add("]");
+    }
+    out->stream()->Add("\"];\n");
+    out->Visit(choices()->at(i).node());
    }
  }


-template <typename Char>
-void AtomNode<Char>::EmitDot(DotPrinter<Char>* out) {
-  out->stream()->Add("n%p [label=\"'%w'\"];\n", this, data());
-  out->stream()->Add("n%p -> n%p;\n", this, this->next());
-  out->Visit(this->next());
+void AtomNode::EmitDot(DotPrinter* out) {
+  out->stream()->Add("  n%p [label=\"'%w'\", shape=doubleoctagon];\n",
+                     this,
+                     data());
+  out->stream()->Add("  n%p -> n%p;\n", this, this->on_success());
+  out->Visit(this->on_success());
+  PrintOnFailure(out, this, this->on_failure());
+}
+
+
+void BackreferenceNode::EmitDot(DotPrinter* out) {
+  out->stream()->Add("  n%p [label=\"$%i..$%i\", shape=doubleoctagon];\n",
+                     this,
+                     start_register(),
+                     end_register());
+  out->stream()->Add("  n%p -> n%p;\n", this, this->on_success());
+  out->Visit(this->on_success());
+  PrintOnFailure(out, this, this->on_failure());
  }


-template <typename Char>
-void EndNode<Char>::EmitDot(DotPrinter<Char>* out) {
-  out->stream()->Add("n%p [style=bold, label=\"done\"];\n", this);
+void EndNode::EmitDot(DotPrinter* out) {
+  out->stream()->Add("  n%p [style=bold, shape=point];\n", this);
  }


-template <typename Char>
-void CharacterClassNode<Char>::EmitDot(DotPrinter<Char>* out) {
-  out->stream()->Add("n%p [label=\"[...]\"];\n", this);
-  out->stream()->Add("n%p -> n%p;\n", this, this->next());
-  out->Visit(this->next());
+void CharacterClassNode::EmitDot(DotPrinter* out) {
+  out->stream()->Add("  n%p [label=\"[...]\"];\n", this);
+  out->stream()->Add("  n%p -> n%p;\n", this, this->on_success());
+  out->Visit(this->on_success());
+  PrintOnFailure(out, this, this->on_failure());
+}
+
+
+void ActionNode::EmitDot(DotPrinter* out) {
+  out->stream()->Add("  n%p [", this);
+  switch (type()) {
+    case STORE_REGISTER:
+      out->stream()->Add("label=\"$%i:=%i\", shape=box",
+                         data_.u_store_register.reg_,
+                         data_.u_store_register.value_);
+      break;
+    case INCREMENT_REGISTER:
+      out->stream()->Add("label=\"$%i++\", shape=box",
+                         data_.u_increment_register.reg_);
+      break;
+    case STORE_POSITION:
+      out->stream()->Add("label=\"$%i:=$pos\", shape=box",
+                         data_.u_store_position.reg_);
+      break;
+    case BEGIN_SUBMATCH:
+      out->stream()->Add("label=\"begin\", shape=septagon");
+      break;
+    case ESCAPE_SUBMATCH:
+      out->stream()->Add("label=\"escape\", shape=septagon");
+      break;
+    case END_SUBMATCH:
+      out->stream()->Add("label=\"end\", shape=septagon");
+      break;
+  }
+  out->stream()->Add("];\n");
+  out->stream()->Add("  n%p -> n%p;\n", this, this->on_success());
+  out->Visit(this->on_success());
  }


@@ -796,108 +1012,146 @@
  // Tree to graph conversion


-template <typename Char>
-void* RegExpCompiler<Char>::VisitAtom(RegExpAtom* that, void* rest) {
-  return new AtomNode<Char>(that->data(),
-                            static_cast<RegExpNode<Char>*>(rest));
+RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,
+                               RegExpNode* on_success,
+                               RegExpNode* on_failure) {
+  return new AtomNode(data(), on_success, on_failure);
  }


-template <typename Char>
-void* RegExpCompiler<Char>::VisitCharacterClass(RegExpCharacterClass* that,
-                                                void* rest) {
-  return new CharacterClassNode<Char>(that->ranges(),
-                                       
static_cast<RegExpNode<Char>*>(rest));
+RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
+                                         RegExpNode* on_success,
+                                         RegExpNode* on_failure) {
+  return new CharacterClassNode(ranges(), on_success, on_failure);
  }


-template <typename Char>
-void* RegExpCompiler<Char>::VisitDisjunction(RegExpDisjunction* that,
-                                             void* rest_ptr) {
-  RegExpNode<Char>* rest = static_cast<RegExpNode<Char>*>(rest_ptr);
-  ZoneList<RegExpTree*>* children = that->nodes();
+RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,
+                                      RegExpNode* on_success,
+                                      RegExpNode* on_failure) {
+  ZoneList<RegExpTree*>* children = nodes();
    int length = children->length();
-  ZoneList<RegExpNode<Char>*>* choices
-      = new ZoneList<RegExpNode<Char>*>(length);
-  for (int i = 0; i < length; i++)
-    choices->Add(Compile(children->at(i), rest));
-  return new ChoiceNode<Char>(choices);
+  ChoiceNode* result = new ChoiceNode(length, on_failure);
+  for (int i = 0; i < length; i++) {
+    GuardedAlternative child(compiler->Compile(children->at(i),
+                                               on_success,
+                                               on_failure));
+    result->AddChild(child);
+  }
+  return result;
  }


-template <typename Char>
-void* RegExpCompiler<Char>::VisitQuantifier(RegExpQuantifier* that,
-                                            void* rest_ptr) {
-  RegExpNode<Char>* rest = static_cast<RegExpNode<Char>*>(rest_ptr);
-  if (that->max() >= RegExpQuantifier::kInfinity) {
-    // Don't try to count the number of iterations if the max it too
-    // large.
-    if (that->min() != 0) {
-      UNIMPLEMENTED();
-    }
-    ZoneList<RegExpNode<Char>*>* loop_choices
-        = new ZoneList<RegExpNode<Char>*>(2);
-    RegExpNode<Char>* loop_node = new ChoiceNode<Char>(loop_choices);
-    RegExpNode<Char>* body_node = Compile(that->body(), loop_node);
-    if (that->is_greedy()) {
-      loop_choices->Add(body_node);
-      loop_choices->Add(rest);
-    } else {
-      loop_choices->Add(rest);
-      loop_choices->Add(body_node);
-    }
-    return loop_node;
+RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler,
+                                     RegExpNode* on_success,
+                                     RegExpNode* on_failure) {
+  // x{f, t} becomes this:
+  //
+  //             (r++)<-.
+  //               |     `
+  //               |     (x)
+  //               v     ^
+  //      (r=0)-->(?)---/ [if r < t]
+  //               |
+  //   [if r >= f] \----> ...
+  //
+  //
+  // TODO(someone): clear captures on repetition and handle empty
+  //   matches.
+  bool has_min = min() > 0;
+  bool has_max = max() < RegExpQuantifier::kInfinity;
+  bool needs_counter = has_min || has_max;
+  int reg = needs_counter ? compiler->AllocateRegister() : -1;
+  ChoiceNode* center = new ChoiceNode(2, on_failure);
+  RegExpNode* loop_return = needs_counter
+      ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg,  
center))
+      : static_cast<RegExpNode*>(center);
+  RegExpNode* body_node = compiler->Compile(body(), loop_return,  
on_failure);
+  GuardedAlternative body_alt(body_node);
+  if (has_max) {
+    Guard* body_guard = new Guard(reg, Guard::LT, max());
+    body_alt.AddGuard(body_guard);
+  }
+  GuardedAlternative rest_alt(on_success);
+  if (has_min) {
+    Guard* rest_guard = new Guard(reg, Guard::GEQ, min());
+    rest_alt.AddGuard(rest_guard);
+  }
+  if (is_greedy()) {
+    center->AddChild(body_alt);
+    center->AddChild(rest_alt);
+  } else {
+    center->AddChild(rest_alt);
+    center->AddChild(body_alt);
+  }
+  if (needs_counter) {
+    return ActionNode::StoreRegister(reg, 0, center);
    } else {
-    UNIMPLEMENTED();
-    return NULL;
+    return center;
    }
  }


-template <typename Char>
-void* RegExpCompiler<Char>::VisitAssertion(RegExpAssertion* that,
-                                           void* rest) {
-  UNIMPLEMENTED();
-  return NULL;
+RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
+                                    RegExpNode* on_success,
+                                    RegExpNode* on_failure) {
+  // TODO(self): implement assertions.
+  return on_success;
  }


-template <typename Char>
-void* RegExpCompiler<Char>::VisitCapture(RegExpCapture* that, void* rest) {
-  UNIMPLEMENTED();
-  return NULL;
+RegExpNode* RegExpBackreference::ToNode(RegExpCompiler* compiler,
+                                        RegExpNode* on_success,
+                                        RegExpNode* on_failure) {
+  return new BackreferenceNode(RegExpCapture::StartRegister(index()),
+                               RegExpCapture::EndRegister(index()),
+                               on_success,
+                               on_failure);
  }


-template <typename Char>
-void* RegExpCompiler<Char>::VisitLookahead(RegExpLookahead* that,
-                                           void* rest) {
-  UNIMPLEMENTED();
-  return NULL;
+RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
+                                RegExpNode* on_success,
+                                RegExpNode* on_failure) {
+  return on_success;
  }


-template <typename Char>
-void* RegExpCompiler<Char>::VisitBackreference(RegExpBackreference* that,
-                                               void* rest) {
-  UNIMPLEMENTED();
-  return NULL;
+RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,
+                                    RegExpNode* on_success,
+                                    RegExpNode* on_failure) {
+  if (is_positive()) {
+    RegExpNode* proceed = ActionNode::EndSubmatch(on_success);
+    RegExpNode* escape = ActionNode::EscapeSubmatch(on_failure);
+    RegExpNode* body_node = compiler->Compile(body(), proceed, escape);
+    return ActionNode::BeginSubmatch(body_node);
+  } else {
+    RegExpNode* failed = ActionNode::EscapeSubmatch(on_success);
+    RegExpNode* succeeded = ActionNode::EndSubmatch(on_failure);
+    RegExpNode* body_node = compiler->Compile(body(), succeeded, failed);
+    return ActionNode::BeginSubmatch(body_node);
+  }
  }


-template <typename Char>
-void* RegExpCompiler<Char>::VisitEmpty(RegExpEmpty* that, void* rest) {
-  return rest;
+RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,
+                                  RegExpNode* on_success,
+                                  RegExpNode* on_failure) {
+  int start_reg = RegExpCapture::StartRegister(index());
+  int end_reg = RegExpCapture::EndRegister(index());
+  RegExpNode* store_end = ActionNode::StorePosition(end_reg, on_success);
+  RegExpNode* body_node = compiler->Compile(body(), store_end, on_failure);
+  return ActionNode::StorePosition(start_reg, body_node);
  }


-template <typename Char>
-void* RegExpCompiler<Char>::VisitAlternative(RegExpAlternative* that,
-                                             void* rest) {
-  ZoneList<RegExpTree*>* children = that->nodes();
-  RegExpNode<Char>* current = static_cast<RegExpNode<Char>*>(rest);
+RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,
+                                      RegExpNode* on_success,
+                                      RegExpNode* on_failure) {
+  ZoneList<RegExpTree*>* children = nodes();
+  RegExpNode* current = on_success;
    for (int i = children->length() - 1; i >= 0; i--) {
-    current = Compile(children->at(i), current);
+    current = compiler->Compile(children->at(i), current, on_failure);
    }
    return current;
  }
@@ -1112,201 +1366,19 @@
  }


-// -------------------------------------------------------------------
-// Execution
-
-
-template <typename Char>
-class ExecutionState {
- public:
-  ExecutionState(RegExpNode<Char>* start, Vector<Char> input)
-    : current_(start),
-      input_(input),
-      pos_(0),
-      backtrack_stack_(8),
-      is_done_(false) { }
-
-  class BacktrackState {
-   public:
-    BacktrackState(ChoiceNode<Char>* choice, int next, int pos)
-      : choice_(choice),
-        next_(next),
-        pos_(pos) { }
-    ChoiceNode<Char>* choice() { return choice_; }
-    int next() { return next_; }
-    void set_next(int value) { next_ = value; }
-    int pos() { return pos_; }
-   private:
-    ChoiceNode<Char>* choice_;
-    int next_;
-    int pos_;
-  };
-
-  // Execute a single step, returning true if it succeeded
-  inline bool Step() { return current()->Step(this); }
-
-  // Stores the given choice node and the execution state on the
-  // backtrack stack.
-  void SaveBacktrack(ChoiceNode<Char>* choice);
-
-  // Reverts to the next unused backtrack if there is one.  Returns
-  // false exactly if there was no backtrack to restore.
-  bool Backtrack();
-
-  Char current_char() { return input()[pos()]; }
-
-  void Advance(int delta, RegExpNode<Char>* next) {
-    pos_ += delta;
-    current_ = next;
-  }
-
-  bool AtEnd() { return pos_ >= input_.length(); }
-
-  bool is_done() { return is_done_; }
-  void set_done() { is_done_ = true; }
-
-  List<BacktrackState>* backtrack_stack() { return &backtrack_stack_; }
-  RegExpNode<Char>* current() { return current_; }
-  void set_current(RegExpNode<Char>* value) { current_ = value; }
-  Vector<Char> input() { return input_; }
-  int pos() { return pos_; }
- private:
-  RegExpNode<Char>* current_;
-  Vector<Char> input_;
-  int pos_;
-  List<BacktrackState> backtrack_stack_;
-  bool is_done_;
-};
-
-
-template <typename Char>
-void ExecutionState<Char>::SaveBacktrack(ChoiceNode<Char>* choice) {
-  ASSERT(choice->choices()->length() > 1);
-  if (FLAG_trace_regexps) {
-    PrintF("Setting up backtrack on level %i for choice %p\n",
-           backtrack_stack()->length(),
-           choice);
-  }
-  backtrack_stack()->Add(BacktrackState(choice, 1, pos_));
-}
-
-
-template <typename Char>
-bool ExecutionState<Char>::Backtrack() {
-  if (backtrack_stack()->is_empty()) return false;
-  BacktrackState& top = backtrack_stack()->at(backtrack_stack()->length()  
- 1);
-  ZoneList<RegExpNode<Char>*>* choices = top.choice()->choices();
-  int next_index = top.next();
-  current_ = choices->at(next_index);
-  pos_ = top.pos();
-  if (FLAG_trace_regexps) {
-    PrintF("Backtracking to %p[%i] on level %i\n",
-           top.choice(),
-           next_index,
-           backtrack_stack()->length() - 1);
-  }
-  if (next_index == choices->length() - 1) {
-    if (FLAG_trace_regexps)
-      PrintF("Popping backtrack on level %i\n",
-             backtrack_stack()->length() - 1);
-    // If this was the last alternative we're done with this backtrack
-    // state and can pop it off the stack.
-    backtrack_stack()->RemoveLast();
-  } else {
-    if (FLAG_trace_regexps)
-      PrintF("Advancing backtrack on level %i\n",
-             backtrack_stack()->length() - 1);
-    // Otherwise we set the next choice to visit if this one fails.
-    top.set_next(next_index + 1);
-  }
-  return true;
-}
-
-
-template <typename Char>
-bool ChoiceNode<Char>::Step(ExecutionState<Char>* state) {
-  state->SaveBacktrack(this);
-  state->set_current(this->choices()->at(0));
-  return true;
-}
-
-
-template <typename Char>
-bool AtomNode<Char>::Step(ExecutionState<Char>* state) {
-  Vector<const uc16> data = this->data();
-  int length = data.length();
-  Vector<Char> input = state->input();
-  int p = state->pos();
-  if (p + length > input.length())
-    return false;
-  for (int i = 0; i < length; i++, p++) {
-    if (data[i] != input[p])
-      return false;
-  }
-  state->Advance(length, this->next());
-  return true;
+void RegExpEngine::DotPrint(const char* label, RegExpNode* node) {
+  DotPrinter printer;
+  printer.PrintNode(label, node);
  }


-template <typename Char>
-bool CharacterClassNode<Char>::Step(ExecutionState<Char>* state) {
-  if (state->AtEnd()) return false;
-  ZoneList<CharacterRange>* ranges = this->ranges();
-  unsigned current = state->current_char();
-  for (int i = 0; i < ranges->length(); i++) {
-    CharacterRange& range = ranges->at(i);
-    if (range.from() <= current && current <= range.to()) {
-      state->Advance(1, this->next());
-      return true;
-    }
-  }
-  return false;
+RegExpNode* RegExpEngine::Compile(RegExpParseResult* input) {
+  RegExpCompiler compiler(input->capture_count);
+  RegExpNode* node = compiler.Compile(input->tree,
+                                      EndNode::GetAccept(),
+                                      EndNode::GetBacktrack());
+  return node;
  }
-
-
-template <typename Char>
-bool EndNode<Char>::Step(ExecutionState<Char>* state) {
-  state->set_done();
-  return false;
-}
-
-
-template <typename Char>
-bool RegExpEngine::Execute(RegExpNode<Char>* start, Vector<Char> input) {
-  ExecutionState<Char> state(start, input);
-  if (FLAG_trace_regexps) {
-    PrintF("Beginning regexp execution\n");
-  }
-  while (state.Step() || (!state.is_done() && state.Backtrack()))
-    ;
-  if (FLAG_trace_regexps) {
-    PrintF("Matching %s\n", state.is_done() ? "succeeded" : "failed");
-  }
-  return state.is_done();
-}
-
-
-template <typename Char>
-RegExpNode<Char>* RegExpEngine::Compile(RegExpTree* regexp) {
-  RegExpNode<Char>* end = new EndNode<Char>();
-  RegExpCompiler<Char> compiler;
-  return compiler.Compile(regexp, end);
-}
-
-
-template
-RegExpNode<const char>* RegExpEngine::Compile<const char>(RegExpTree*  
regexp);
-
-template
-RegExpNode<const uc16>* RegExpEngine::Compile<const uc16>(RegExpTree*  
regexp);
-
-template
-bool RegExpEngine::Execute<const char>(RegExpNode<const char>* start,
-                                       Vector<const char> input);
-
-template
-bool RegExpEngine::Execute<const uc16>(RegExpNode<const uc16>* start,
-                                       Vector<const uc16> input);


  }}  // namespace v8::internal

Modified: branches/experimental/regexp2000/src/jsregexp.h
==============================================================================
--- branches/experimental/regexp2000/src/jsregexp.h     (original)
+++ branches/experimental/regexp2000/src/jsregexp.h     Thu Nov  6 09:36:02 2008
@@ -136,9 +136,6 @@
  };


-template <typename Char> class RegExpNode;
-
-
  class CharacterRange {
   public:
    // For compatibility with the CHECK_OK macro
@@ -318,13 +315,18 @@
  };


+struct RegExpParseResult {
+  RegExpTree* tree;
+  bool has_character_escapes;
+  Handle<String> error;
+  int capture_count;
+};
+
+
  class RegExpEngine: public AllStatic {
   public:
-  template <typename Char>
-  static RegExpNode<Char>* Compile(RegExpTree* regexp);
-
-  template <typename Char>
-  static bool Execute(RegExpNode<Char>* start, Vector<Char> input);
+  static RegExpNode* Compile(RegExpParseResult* input);
+  static void DotPrint(const char* label, RegExpNode* node);
  };



Modified: branches/experimental/regexp2000/src/parser.cc
==============================================================================
--- branches/experimental/regexp2000/src/parser.cc      (original)
+++ branches/experimental/regexp2000/src/parser.cc      Thu Nov  6 09:36:02 2008
@@ -465,6 +465,8 @@

    bool HasCharacterEscapes();

+  int captures_started() { return captures_started_; }
+
    static const uc32 kEndMarker = unibrow::Utf8::kBadChar;
   private:
    uc32 current() { return current_; }
@@ -4146,24 +4148,24 @@
  }


-RegExpTree* ParseRegExp(unibrow::CharacterStream* stream,
-                        Handle<String>* error,
-                        bool* has_character_escapes) {
-  ASSERT(error->is_null());
-  RegExpParser parser(stream, error, false);  // Get multiline flag somehow
+bool ParseRegExp(unibrow::CharacterStream* stream, RegExpParseResult*  
result) {
+  ASSERT(result != NULL);
+  // Get multiline flag somehow
+  RegExpParser parser(stream, &result->error, false);
    bool ok = true;
-  RegExpTree* result = parser.ParsePattern(&ok);
+  result->tree = parser.ParsePattern(&ok);
    if (!ok) {
-    ASSERT(result == NULL);
-    ASSERT(!error->is_null());
+    ASSERT(result->tree == NULL);
+    ASSERT(!result->error.is_null());
    } else {
-    ASSERT(result != NULL);
-    ASSERT(error->is_null());
+    ASSERT(result->tree != NULL);
+    ASSERT(result->error.is_null());
    }
-  if (ok && has_character_escapes != NULL) {
-    *has_character_escapes = parser.HasCharacterEscapes();
+  if (ok) {
+    result->has_character_escapes = parser.HasCharacterEscapes();
+    result->capture_count = parser.captures_started();
    }
-  return result;
+  return ok;
  }



Modified: branches/experimental/regexp2000/src/parser.h
==============================================================================
--- branches/experimental/regexp2000/src/parser.h       (original)
+++ branches/experimental/regexp2000/src/parser.h       Thu Nov  6 09:36:02 2008
@@ -144,9 +144,8 @@
  ScriptDataImpl* PreParse(unibrow::CharacterStream* stream,
                           v8::Extension* extension);

-RegExpTree* ParseRegExp(unibrow::CharacterStream* stream,
-                        Handle<String>* error,
-                        bool* has_character_escapes);
+
+bool ParseRegExp(unibrow::CharacterStream* stream, RegExpParseResult*  
result);


  // Support for doing lazy compilation. The script is the script containing  
full

Modified: branches/experimental/regexp2000/test/cctest/test-regexp.cc
==============================================================================
--- branches/experimental/regexp2000/test/cctest/test-regexp.cc (original)
+++ branches/experimental/regexp2000/test/cctest/test-regexp.cc Thu Nov  6  
09:36:02 2008
@@ -43,40 +43,15 @@
  using namespace v8::internal;


-class RegExpTestCase {
- public:
-  RegExpTestCase()
-    : pattern_(NULL),
-      flags_(NULL),
-      input_(NULL),
-      compile_error_(NULL) { }
-  RegExpTestCase(const char* pattern,
-                 const char* flags,
-                 const char* input,
-                 const char* compile_error)
-    : pattern_(pattern),
-      flags_(flags),
-      input_(input),
-      compile_error_(compile_error) { }
-  const char* pattern() const { return pattern_; }
-  bool expect_error() const { return compile_error_ != NULL; }
- private:
-  const char* pattern_;
-  const char* flags_;
-  const char* input_;
-  const char* compile_error_;
-};
-
-
  static SmartPointer<char> Parse(const char* input) {
    v8::HandleScope scope;
    unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
    ZoneScope zone_scope(DELETE_ON_EXIT);
-  Handle<String> error;
-  RegExpTree* node = v8::internal::ParseRegExp(&buffer, &error, NULL);
-  CHECK(node != NULL);
-  CHECK(error.is_null());
-  SmartPointer<char> output = node->ToString();
+  RegExpParseResult result;
+  CHECK(v8::internal::ParseRegExp(&buffer, &result));
+  CHECK(result.tree != NULL);
+  CHECK(result.error.is_null());
+  SmartPointer<char> output = result.tree->ToString();
    return output;
  }

@@ -84,12 +59,11 @@
    v8::HandleScope scope;
    unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
    ZoneScope zone_scope(DELETE_ON_EXIT);
-  Handle<String> error;
-  bool has_escapes;
-  RegExpTree* node = v8::internal::ParseRegExp(&buffer, &error,  
&has_escapes);
-  CHECK(node != NULL);
-  CHECK(error.is_null());
-  return has_escapes;
+  RegExpParseResult result;
+  CHECK(v8::internal::ParseRegExp(&buffer, &result));
+  CHECK(result.tree != NULL);
+  CHECK(result.error.is_null());
+  return result.has_character_escapes;
  }


@@ -245,11 +219,11 @@
    v8::HandleScope scope;
    unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
    ZoneScope zone_scope(DELETE_ON_EXIT);
-  Handle<String> error;
-  RegExpTree* node = v8::internal::ParseRegExp(&buffer, &error, NULL);
-  CHECK(node == NULL);
-  CHECK(!error.is_null());
-  SmartPointer<char> str = error->ToCString(ALLOW_NULLS);
+  RegExpParseResult result;
+  CHECK_EQ(false, v8::internal::ParseRegExp(&buffer, &result));
+  CHECK(result.tree == NULL);
+  CHECK(!result.error.is_null());
+  SmartPointer<char> str = result.error->ToCString(ALLOW_NULLS);
    CHECK_EQ(expected, *str);
  }

@@ -350,25 +324,28 @@
  }


-static void Execute(bool expected, const char* input, const char* str) {
+static void Execute(const char* input,
+                    const char* str,
+                    bool dot_output = false) {
    v8::HandleScope scope;
    unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
    ZoneScope zone_scope(DELETE_ON_EXIT);
-  Handle<String> error;
-  RegExpTree* tree = v8::internal::ParseRegExp(&buffer, &error, NULL);
-  CHECK(tree != NULL);
-  CHECK(error.is_null());
-  RegExpNode<const char>* node = RegExpEngine::Compile<const char>(tree);
-  bool outcome = RegExpEngine::Execute(node, CStrVector(str));
-  CHECK_EQ(outcome, expected);
+  RegExpParseResult result;
+  if (!v8::internal::ParseRegExp(&buffer, &result))
+    return;
+  RegExpNode* node = RegExpEngine::Compile(&result);
+  if (dot_output) {
+    RegExpEngine::DotPrint(input, node);
+    exit(0);
+  }
  }


  TEST(Execution) {
    V8::Initialize(NULL);
-  Execute(true, ".*?(?:a[bc]d|e[fg]h)", "xxxabbegh");
-  Execute(true, ".*?(?:a[bc]d|e[fg]h)", "xxxabbefh");
-  Execute(false, ".*?(?:a[bc]d|e[fg]h)", "xxxabbefd");
+  Execute(".*?(?:a[bc]d|e[fg]h)", "xxxabbegh");
+  Execute(".*?(?:a[bc]d|e[fg]h)", "xxxabbefh");
+  Execute(".*?(?:a[bc]d|e[fg]h)", "xxxabbefd");
  }


@@ -502,7 +479,6 @@

  TEST(Assembler) {
    V8::Initialize(NULL);
-
    byte codes[1024];
    Re2kAssembler assembler(Vector<byte>(codes, 1024));
  #define __ assembler.
@@ -541,6 +517,3 @@
    CHECK_EQ(3, captures[0]);
    CHECK_EQ(5, captures[1]);
  }
-
-
-

--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---

Reply via email to