Author: [EMAIL PROTECTED]
Date: Thu Nov 6 09:36:02 2008
New Revision: 705
Modified:
branches/experimental/regexp2000/src/ast.h
branches/experimental/regexp2000/src/globals.h
branches/experimental/regexp2000/src/jsregexp.cc
branches/experimental/regexp2000/src/jsregexp.h
branches/experimental/regexp2000/src/parser.cc
branches/experimental/regexp2000/src/parser.h
branches/experimental/regexp2000/test/cctest/test-regexp.cc
Log:
- Implemented some more translation from asts to automata, including
upper/lower bounded repetition, captures, positive and negative
forward assertions, etc.
- Un-templateified automaton nodes.
Modified: branches/experimental/regexp2000/src/ast.h
==============================================================================
--- branches/experimental/regexp2000/src/ast.h (original)
+++ branches/experimental/regexp2000/src/ast.h Thu Nov 6 09:36:02 2008
@@ -1204,8 +1204,6 @@
VISIT(Empty)
-class RegExpVisitor;
-template <typename Char> class RegExpNode;
#define FORWARD_DECLARE(Name) class RegExp##Name;
FOR_EACH_REG_EXP_NODE_TYPE(FORWARD_DECLARE)
#undef FORWARD_DECLARE
@@ -1215,6 +1213,9 @@
public:
virtual ~RegExpTree() { }
virtual void* Accept(RegExpVisitor* visitor, void* data) = 0;
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success,
+ RegExpNode* on_failure) = 0;
SmartPointer<char> ToString();
#define MAKE_ASTYPE(Name) virtual RegExp##Name* As##Name();
FOR_EACH_REG_EXP_NODE_TYPE(MAKE_ASTYPE)
@@ -1226,6 +1227,9 @@
public:
explicit RegExpDisjunction(ZoneList<RegExpTree*>* nodes) : nodes_(nodes)
{ }
virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success,
+ RegExpNode* on_failure);
virtual RegExpDisjunction* AsDisjunction();
ZoneList<RegExpTree*>* nodes() { return nodes_; }
private:
@@ -1237,6 +1241,9 @@
public:
explicit RegExpAlternative(ZoneList<RegExpTree*>* nodes) : nodes_(nodes)
{ }
virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success,
+ RegExpNode* on_failure);
virtual RegExpAlternative* AsAlternative();
ZoneList<RegExpTree*>* nodes() { return nodes_; }
private:
@@ -1252,6 +1259,9 @@
};
explicit RegExpAssertion(Type type) : type_(type) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success,
+ RegExpNode* on_failure);
virtual RegExpAssertion* AsAssertion();
Type type() { return type_; }
private:
@@ -1265,6 +1275,9 @@
: ranges_(ranges),
is_negated_(is_negated) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success,
+ RegExpNode* on_failure);
virtual RegExpCharacterClass* AsCharacterClass();
ZoneList<CharacterRange>* ranges() { return ranges_; }
bool is_negated() { return is_negated_; }
@@ -1278,6 +1291,9 @@
public:
explicit RegExpAtom(Vector<const uc16> data) : data_(data) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success,
+ RegExpNode* on_failure);
virtual RegExpAtom* AsAtom();
Vector<const uc16> data() { return data_; }
private:
@@ -1293,6 +1309,9 @@
is_greedy_(is_greedy),
body_(body) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success,
+ RegExpNode* on_failure);
virtual RegExpQuantifier* AsQuantifier();
int min() { return min_; }
int max() { return max_; }
@@ -1314,9 +1333,14 @@
explicit RegExpCapture(RegExpTree* body, int index)
: body_(body), index_(index) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success,
+ RegExpNode* on_failure);
virtual RegExpCapture* AsCapture();
RegExpTree* body() { return body_; }
int index() { return index_; }
+ static int StartRegister(int index) { return (index - 1) * 2; }
+ static int EndRegister(int index) { return (index - 1) * 2 + 1; }
private:
RegExpTree* body_;
int index_;
@@ -1329,6 +1353,9 @@
: body_(body),
is_positive_(is_positive) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success,
+ RegExpNode* on_failure);
virtual RegExpLookahead* AsLookahead();
RegExpTree* body() { return body_; }
bool is_positive() { return is_positive_; }
@@ -1342,6 +1369,9 @@
public:
explicit RegExpBackreference(int index) : index_(index) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success,
+ RegExpNode* on_failure);
virtual RegExpBackreference* AsBackreference();
int index() { return index_; }
private:
@@ -1353,6 +1383,9 @@
public:
RegExpEmpty() { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success,
+ RegExpNode* on_failure);
virtual RegExpEmpty* AsEmpty();
static RegExpEmpty* GetInstance() { return &kInstance; }
private:
Modified: branches/experimental/regexp2000/src/globals.h
==============================================================================
--- branches/experimental/regexp2000/src/globals.h (original)
+++ branches/experimental/regexp2000/src/globals.h Thu Nov 6 09:36:02 2008
@@ -182,7 +182,11 @@
class OldSpace;
class Property;
class Proxy;
+class RegExpNode;
+class RegExpParseResult;
class RegExpTree;
+class RegExpCompiler;
+class RegExpVisitor;
class Scope;
template<class Allocator = FreeStoreAllocationPolicy> class ScopeInfo;
class Script;
Modified: branches/experimental/regexp2000/src/jsregexp.cc
==============================================================================
--- branches/experimental/regexp2000/src/jsregexp.cc (original)
+++ branches/experimental/regexp2000/src/jsregexp.cc Thu Nov 6 09:36:02
2008
@@ -204,17 +204,18 @@
re->set_data(*cached);
result = re;
} else {
- SafeStringInputBuffer buffer(pattern.location());
- Handle<String> error_text;
- bool has_escapes;
- RegExpTree* ast = ParseRegExp(&buffer, &error_text, &has_escapes);
- if (!error_text.is_null()) {
+ SafeStringInputBuffer buffer(pattern.location());
+ RegExpParseResult parse_result;
+ if (!ParseRegExp(&buffer, &parse_result)) {
// Throw an exception if we fail to parse the pattern.
- return CreateRegExpException(re, pattern,
error_text, "malformed_regexp");
+ return CreateRegExpException(re,
+ pattern,
+ parse_result.error,
+ "malformed_regexp");
}
- RegExpAtom* atom = ast->AsAtom();
+ RegExpAtom* atom = parse_result.tree->AsAtom();
if (atom != NULL && !flags.is_ignore_case()) {
- if (has_escapes) {
+ if (parse_result.has_character_escapes) {
Vector<const uc16> atom_pattern = atom->data();
Handle<String> atom_string =
Factory::NewStringFromTwoByte(atom_pattern);
@@ -629,121 +630,266 @@
// New regular expression engine
-template <typename Char> class ExecutionState;
+class ExecutionState;
-template <typename Char>
class DotPrinter {
public:
DotPrinter() : stream_(&alloc_) { }
- void PrintNode(RegExpNode<Char>* node);
- void Visit(RegExpNode<Char>* node);
+ void PrintNode(const char* label, RegExpNode* node);
+ void Visit(RegExpNode* node);
StringStream* stream() { return &stream_; }
private:
HeapStringAllocator alloc_;
StringStream stream_;
- std::set<RegExpNode<Char>*> seen_;
+ std::set<RegExpNode*> seen_;
};
-template <typename Char>
-class RegExpCompiler: public RegExpVisitor {
+class RegExpCompiler {
public:
- RegExpCompiler() { }
- RegExpNode<Char>* Compile(RegExpTree* tree, RegExpNode<Char>* rest) {
- return static_cast<RegExpNode<Char>*>(tree->Accept(this, rest));
- }
-#define MAKE_CASE(Name) virtual void* Visit##Name(RegExp##Name*, void*);
- FOR_EACH_REG_EXP_NODE_TYPE(MAKE_CASE)
-#undef MAKE_CASE
+ explicit RegExpCompiler(int capture_count)
+ : next_register_(2 * capture_count) { }
+
+ RegExpNode* Compile(RegExpTree* tree,
+ RegExpNode* on_success,
+ RegExpNode* on_failure) {
+ return tree->ToNode(this, on_success, on_failure);
+ }
+
+ int AllocateRegister() { return next_register_++; }
+
+ private:
+ int next_register_;
};
-template <typename Char>
class RegExpNode: public ZoneObject {
public:
virtual ~RegExpNode() { }
- virtual void EmitDot(DotPrinter<Char>* out);
- virtual bool Step(ExecutionState<Char>* state) = 0;
+ virtual void EmitDot(DotPrinter* out);
};
-template <typename Char>
-class SeqRegExpNode: public RegExpNode<Char> {
+class SeqRegExpNode: public RegExpNode {
public:
- explicit SeqRegExpNode(RegExpNode<Char>* next) : next_(next) { }
- RegExpNode<Char>* next() { return next_; }
+ explicit SeqRegExpNode(RegExpNode* on_success)
+ : on_success_(on_success) { }
+ RegExpNode* on_success() { return on_success_; }
private:
- RegExpNode<Char>* next_;
+ RegExpNode* on_success_;
};
-template <typename Char>
-class EndNode: public RegExpNode<Char> {
+class EndNode: public RegExpNode {
public:
- virtual void EmitDot(DotPrinter<Char>* out);
- virtual bool Step(ExecutionState<Char>* state);
+ enum Action { ACCEPT, BACKTRACK };
+ virtual void EmitDot(DotPrinter* out);
+ static EndNode* GetAccept() { return &kAccept; }
+ static EndNode* GetBacktrack() { return &kBacktrack; }
+ private:
+ explicit EndNode(Action action) : action_(action) { }
+ Action action_;
+ static EndNode kAccept;
+ static EndNode kBacktrack;
};
-template <typename Char>
-class AtomNode: public SeqRegExpNode<Char> {
+EndNode EndNode::kAccept(ACCEPT);
+EndNode EndNode::kBacktrack(BACKTRACK);
+
+
+class AtomNode: public SeqRegExpNode {
public:
- AtomNode(Vector<const uc16> data, RegExpNode<Char>* next)
- : SeqRegExpNode<Char>(next),
+ AtomNode(Vector<const uc16> data,
+ RegExpNode* on_success,
+ RegExpNode* on_failure)
+ : SeqRegExpNode(on_success),
+ on_failure_(on_failure),
data_(data) { }
- virtual void EmitDot(DotPrinter<Char>* out);
- virtual bool Step(ExecutionState<Char>* state);
+ virtual void EmitDot(DotPrinter* out);
Vector<const uc16> data() { return data_; }
+ RegExpNode* on_failure() { return on_failure_; }
private:
+ RegExpNode* on_failure_;
Vector<const uc16> data_;
};
-template <typename Char>
-class CharacterClassNode: public SeqRegExpNode<Char> {
+class BackreferenceNode: public SeqRegExpNode {
public:
- CharacterClassNode(ZoneList<CharacterRange>* ranges, RegExpNode<Char>*
next)
- : SeqRegExpNode<Char>(next),
+ BackreferenceNode(int start_reg,
+ int end_reg,
+ RegExpNode* on_success,
+ RegExpNode* on_failure)
+ : SeqRegExpNode(on_success),
+ on_failure_(on_failure),
+ start_reg_(start_reg),
+ end_reg_(end_reg) { }
+ virtual void EmitDot(DotPrinter* out);
+ RegExpNode* on_failure() { return on_failure_; }
+ int start_register() { return start_reg_; }
+ int end_register() { return end_reg_; }
+ private:
+ RegExpNode* on_failure_;
+ int start_reg_;
+ int end_reg_;
+};
+
+
+class CharacterClassNode: public SeqRegExpNode {
+ public:
+ CharacterClassNode(ZoneList<CharacterRange>* ranges,
+ RegExpNode* on_success,
+ RegExpNode* on_failure)
+ : SeqRegExpNode(on_success),
+ on_failure_(on_failure),
ranges_(ranges) { }
- virtual void EmitDot(DotPrinter<Char>* out);
- virtual bool Step(ExecutionState<Char>* state);
+ virtual void EmitDot(DotPrinter* out);
ZoneList<CharacterRange>* ranges() { return ranges_; }
+ RegExpNode* on_failure() { return on_failure_; }
private:
+ RegExpNode* on_failure_;
ZoneList<CharacterRange>* ranges_;
};
-template <typename Char>
-class ChoiceNode: public RegExpNode<Char> {
+class Guard: public ZoneObject {
public:
- explicit ChoiceNode(ZoneList<RegExpNode<Char>*>* choices)
- : choices_(choices) { }
- virtual void EmitDot(DotPrinter<Char>* out);
- virtual bool Step(ExecutionState<Char>* state);
- ZoneList<RegExpNode<Char>*>* choices() { return choices_; }
- DispatchTable* table() { return table_; }
+ enum Relation { LT, GEQ };
+ Guard(int reg, Relation op, int value)
+ : reg_(reg),
+ op_(op),
+ value_(value) { }
+ int reg() { return reg_; }
+ Relation op() { return op_; }
+ int value() { return value_; }
private:
- ZoneList<RegExpNode<Char>*>* choices_;
+ int reg_;
+ Relation op_;
+ int value_;
+};
+
+
+class GuardedAlternative {
+ public:
+ explicit GuardedAlternative(RegExpNode* node) : node_(node),
guards_(NULL) { }
+ void AddGuard(Guard* guard);
+ RegExpNode* node() { return node_; }
+ ZoneList<Guard*>* guards() { return guards_; }
+ private:
+ RegExpNode* node_;
+ ZoneList<Guard*>* guards_;
+};
+
+
+void GuardedAlternative::AddGuard(Guard* guard) {
+ if (guards_ == NULL)
+ guards_ = new ZoneList<Guard*>(1);
+ guards_->Add(guard);
+}
+
+
+class ChoiceNode: public RegExpNode {
+ public:
+ explicit ChoiceNode(int expected_size, RegExpNode* on_failure)
+ : on_failure_(on_failure),
+ choices_(new ZoneList<GuardedAlternative>(expected_size)) { }
+ virtual void EmitDot(DotPrinter* out);
+ void AddChild(GuardedAlternative node) { choices()->Add(node); }
+ ZoneList<GuardedAlternative>* choices() { return choices_; }
+ DispatchTable* table() { return &table_; }
+ RegExpNode* on_failure() { return on_failure_; }
+ private:
+ RegExpNode* on_failure_;
+ ZoneList<GuardedAlternative>* choices_;
DispatchTable table_;
};
+class ActionNode: public SeqRegExpNode {
+ public:
+ enum Type {
+ STORE_REGISTER,
+ INCREMENT_REGISTER,
+ STORE_POSITION,
+ BEGIN_SUBMATCH,
+ ESCAPE_SUBMATCH,
+ END_SUBMATCH
+ };
+ static ActionNode* StoreRegister(int reg, int val, RegExpNode*
on_success) {
+ ActionNode* result = new ActionNode(STORE_REGISTER, on_success);
+ result->data_.u_store_register.reg_ = reg;
+ result->data_.u_store_register.value_ = val;
+ return result;
+ }
+ static ActionNode* IncrementRegister(int reg, RegExpNode* on_success) {
+ ActionNode* result = new ActionNode(INCREMENT_REGISTER, on_success);
+ result->data_.u_increment_register.reg_ = reg;
+ return result;
+ }
+ static ActionNode* StorePosition(int reg, RegExpNode* on_success) {
+ ActionNode* result = new ActionNode(STORE_POSITION, on_success);
+ result->data_.u_store_position.reg_ = reg;
+ return result;
+ }
+ static ActionNode* BeginSubmatch(RegExpNode* on_success) {
+ return new ActionNode(BEGIN_SUBMATCH, on_success);
+ }
+ static ActionNode* EscapeSubmatch(RegExpNode* on_success) {
+ return new ActionNode(ESCAPE_SUBMATCH, on_success);
+ }
+ static ActionNode* EndSubmatch(RegExpNode* on_success) {
+ return new ActionNode(END_SUBMATCH, on_success);
+ }
+ virtual void EmitDot(DotPrinter* out);
+ Type type() { return type_; }
+ private:
+ ActionNode(Type type, RegExpNode* on_success)
+ : SeqRegExpNode(on_success),
+ type_(type) { }
+ Type type_;
+ union {
+ struct {
+ int reg_;
+ int value_;
+ } u_store_register;
+ struct {
+ int reg_;
+ } u_increment_register;
+ struct {
+ int reg_;
+ } u_store_position;
+ } data_;
+};
+
+
// -------------------------------------------------------------------
// Dot/dotty output
-
-template <typename Char>
-void DotPrinter<Char>::PrintNode(RegExpNode<Char>* node) {
- stream()->Add("digraph G {\n");
+void DotPrinter::PrintNode(const char* label, RegExpNode* node) {
+ stream()->Add("digraph G {\n graph [label=\"");
+ for (int i = 0; label[i]; i++) {
+ switch (label[i]) {
+ case '\\':
+ stream()->Add("\\\\");
+ break;
+ case '"':
+ stream()->Add("\"");
+ break;
+ default:
+ stream()->Put(label[i]);
+ break;
+ }
+ }
+ stream()->Add("\"]; \n");
Visit(node);
stream()->Add("}\n");
printf("%s", *(stream()->ToCString()));
}
-template <typename Char>
-void DotPrinter<Char>::Visit(RegExpNode<Char>* node) {
+void DotPrinter::Visit(RegExpNode* node) {
if (seen_.find(node) != seen_.end())
return;
seen_.insert(node);
@@ -751,44 +897,114 @@
}
-template <typename Char>
-void RegExpNode<Char>::EmitDot(DotPrinter<Char>* out) {
+void RegExpNode::EmitDot(DotPrinter* out) {
UNIMPLEMENTED();
}
-template <typename Char>
-void ChoiceNode<Char>::EmitDot(DotPrinter<Char>* out) {
- out->stream()->Add("n%p [label=\"?\"];\n", this);
+static void PrintOnFailure(DotPrinter* out,
+ RegExpNode* from,
+ RegExpNode* on_failure) {
+ if (on_failure == EndNode::GetBacktrack()) return;
+ out->stream()->Add(" n%p -> n%p [style=dotted];\n", from, on_failure);
+ out->Visit(on_failure);
+}
+
+
+void ChoiceNode::EmitDot(DotPrinter* out) {
+ out->stream()->Add(" n%p [label=\"?\", shape=circle];\n", this);
+ PrintOnFailure(out, this, this->on_failure());
for (int i = 0; i < choices()->length(); i++) {
- out->stream()->Add("n%p -> n%p [label=\"%i\"];\n",
+ GuardedAlternative alt = choices()->at(i);
+ out->stream()->Add(" n%p -> n%p [label=\"%i",
this,
- choices()->at(i),
+ alt.node(),
i);
- out->Visit(choices()->at(i));
+ if (alt.guards() != NULL) {
+ out->stream()->Add(" [");
+ for (int j = 0; j < alt.guards()->length(); j++) {
+ if (j > 0) out->stream()->Add(" ");
+ Guard* guard = alt.guards()->at(j);
+ switch (guard->op()) {
+ case Guard::GEQ:
+ out->stream()->Add("$%i ≥ %i", guard->reg(),
guard->value());
+ break;
+ case Guard::LT:
+ out->stream()->Add("$%i < %i", guard->reg(), guard->value());
+ break;
+ }
+ }
+ out->stream()->Add("]");
+ }
+ out->stream()->Add("\"];\n");
+ out->Visit(choices()->at(i).node());
}
}
-template <typename Char>
-void AtomNode<Char>::EmitDot(DotPrinter<Char>* out) {
- out->stream()->Add("n%p [label=\"'%w'\"];\n", this, data());
- out->stream()->Add("n%p -> n%p;\n", this, this->next());
- out->Visit(this->next());
+void AtomNode::EmitDot(DotPrinter* out) {
+ out->stream()->Add(" n%p [label=\"'%w'\", shape=doubleoctagon];\n",
+ this,
+ data());
+ out->stream()->Add(" n%p -> n%p;\n", this, this->on_success());
+ out->Visit(this->on_success());
+ PrintOnFailure(out, this, this->on_failure());
+}
+
+
+void BackreferenceNode::EmitDot(DotPrinter* out) {
+ out->stream()->Add(" n%p [label=\"$%i..$%i\", shape=doubleoctagon];\n",
+ this,
+ start_register(),
+ end_register());
+ out->stream()->Add(" n%p -> n%p;\n", this, this->on_success());
+ out->Visit(this->on_success());
+ PrintOnFailure(out, this, this->on_failure());
}
-template <typename Char>
-void EndNode<Char>::EmitDot(DotPrinter<Char>* out) {
- out->stream()->Add("n%p [style=bold, label=\"done\"];\n", this);
+void EndNode::EmitDot(DotPrinter* out) {
+ out->stream()->Add(" n%p [style=bold, shape=point];\n", this);
}
-template <typename Char>
-void CharacterClassNode<Char>::EmitDot(DotPrinter<Char>* out) {
- out->stream()->Add("n%p [label=\"[...]\"];\n", this);
- out->stream()->Add("n%p -> n%p;\n", this, this->next());
- out->Visit(this->next());
+void CharacterClassNode::EmitDot(DotPrinter* out) {
+ out->stream()->Add(" n%p [label=\"[...]\"];\n", this);
+ out->stream()->Add(" n%p -> n%p;\n", this, this->on_success());
+ out->Visit(this->on_success());
+ PrintOnFailure(out, this, this->on_failure());
+}
+
+
+void ActionNode::EmitDot(DotPrinter* out) {
+ out->stream()->Add(" n%p [", this);
+ switch (type()) {
+ case STORE_REGISTER:
+ out->stream()->Add("label=\"$%i:=%i\", shape=box",
+ data_.u_store_register.reg_,
+ data_.u_store_register.value_);
+ break;
+ case INCREMENT_REGISTER:
+ out->stream()->Add("label=\"$%i++\", shape=box",
+ data_.u_increment_register.reg_);
+ break;
+ case STORE_POSITION:
+ out->stream()->Add("label=\"$%i:=$pos\", shape=box",
+ data_.u_store_position.reg_);
+ break;
+ case BEGIN_SUBMATCH:
+ out->stream()->Add("label=\"begin\", shape=septagon");
+ break;
+ case ESCAPE_SUBMATCH:
+ out->stream()->Add("label=\"escape\", shape=septagon");
+ break;
+ case END_SUBMATCH:
+ out->stream()->Add("label=\"end\", shape=septagon");
+ break;
+ }
+ out->stream()->Add("];\n");
+ out->stream()->Add(" n%p -> n%p;\n", this, this->on_success());
+ out->Visit(this->on_success());
}
@@ -796,108 +1012,146 @@
// Tree to graph conversion
-template <typename Char>
-void* RegExpCompiler<Char>::VisitAtom(RegExpAtom* that, void* rest) {
- return new AtomNode<Char>(that->data(),
- static_cast<RegExpNode<Char>*>(rest));
+RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success,
+ RegExpNode* on_failure) {
+ return new AtomNode(data(), on_success, on_failure);
}
-template <typename Char>
-void* RegExpCompiler<Char>::VisitCharacterClass(RegExpCharacterClass* that,
- void* rest) {
- return new CharacterClassNode<Char>(that->ranges(),
-
static_cast<RegExpNode<Char>*>(rest));
+RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success,
+ RegExpNode* on_failure) {
+ return new CharacterClassNode(ranges(), on_success, on_failure);
}
-template <typename Char>
-void* RegExpCompiler<Char>::VisitDisjunction(RegExpDisjunction* that,
- void* rest_ptr) {
- RegExpNode<Char>* rest = static_cast<RegExpNode<Char>*>(rest_ptr);
- ZoneList<RegExpTree*>* children = that->nodes();
+RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success,
+ RegExpNode* on_failure) {
+ ZoneList<RegExpTree*>* children = nodes();
int length = children->length();
- ZoneList<RegExpNode<Char>*>* choices
- = new ZoneList<RegExpNode<Char>*>(length);
- for (int i = 0; i < length; i++)
- choices->Add(Compile(children->at(i), rest));
- return new ChoiceNode<Char>(choices);
+ ChoiceNode* result = new ChoiceNode(length, on_failure);
+ for (int i = 0; i < length; i++) {
+ GuardedAlternative child(compiler->Compile(children->at(i),
+ on_success,
+ on_failure));
+ result->AddChild(child);
+ }
+ return result;
}
-template <typename Char>
-void* RegExpCompiler<Char>::VisitQuantifier(RegExpQuantifier* that,
- void* rest_ptr) {
- RegExpNode<Char>* rest = static_cast<RegExpNode<Char>*>(rest_ptr);
- if (that->max() >= RegExpQuantifier::kInfinity) {
- // Don't try to count the number of iterations if the max it too
- // large.
- if (that->min() != 0) {
- UNIMPLEMENTED();
- }
- ZoneList<RegExpNode<Char>*>* loop_choices
- = new ZoneList<RegExpNode<Char>*>(2);
- RegExpNode<Char>* loop_node = new ChoiceNode<Char>(loop_choices);
- RegExpNode<Char>* body_node = Compile(that->body(), loop_node);
- if (that->is_greedy()) {
- loop_choices->Add(body_node);
- loop_choices->Add(rest);
- } else {
- loop_choices->Add(rest);
- loop_choices->Add(body_node);
- }
- return loop_node;
+RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success,
+ RegExpNode* on_failure) {
+ // x{f, t} becomes this:
+ //
+ // (r++)<-.
+ // | `
+ // | (x)
+ // v ^
+ // (r=0)-->(?)---/ [if r < t]
+ // |
+ // [if r >= f] \----> ...
+ //
+ //
+ // TODO(someone): clear captures on repetition and handle empty
+ // matches.
+ bool has_min = min() > 0;
+ bool has_max = max() < RegExpQuantifier::kInfinity;
+ bool needs_counter = has_min || has_max;
+ int reg = needs_counter ? compiler->AllocateRegister() : -1;
+ ChoiceNode* center = new ChoiceNode(2, on_failure);
+ RegExpNode* loop_return = needs_counter
+ ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg,
center))
+ : static_cast<RegExpNode*>(center);
+ RegExpNode* body_node = compiler->Compile(body(), loop_return,
on_failure);
+ GuardedAlternative body_alt(body_node);
+ if (has_max) {
+ Guard* body_guard = new Guard(reg, Guard::LT, max());
+ body_alt.AddGuard(body_guard);
+ }
+ GuardedAlternative rest_alt(on_success);
+ if (has_min) {
+ Guard* rest_guard = new Guard(reg, Guard::GEQ, min());
+ rest_alt.AddGuard(rest_guard);
+ }
+ if (is_greedy()) {
+ center->AddChild(body_alt);
+ center->AddChild(rest_alt);
+ } else {
+ center->AddChild(rest_alt);
+ center->AddChild(body_alt);
+ }
+ if (needs_counter) {
+ return ActionNode::StoreRegister(reg, 0, center);
} else {
- UNIMPLEMENTED();
- return NULL;
+ return center;
}
}
-template <typename Char>
-void* RegExpCompiler<Char>::VisitAssertion(RegExpAssertion* that,
- void* rest) {
- UNIMPLEMENTED();
- return NULL;
+RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success,
+ RegExpNode* on_failure) {
+ // TODO(self): implement assertions.
+ return on_success;
}
-template <typename Char>
-void* RegExpCompiler<Char>::VisitCapture(RegExpCapture* that, void* rest) {
- UNIMPLEMENTED();
- return NULL;
+RegExpNode* RegExpBackreference::ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success,
+ RegExpNode* on_failure) {
+ return new BackreferenceNode(RegExpCapture::StartRegister(index()),
+ RegExpCapture::EndRegister(index()),
+ on_success,
+ on_failure);
}
-template <typename Char>
-void* RegExpCompiler<Char>::VisitLookahead(RegExpLookahead* that,
- void* rest) {
- UNIMPLEMENTED();
- return NULL;
+RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success,
+ RegExpNode* on_failure) {
+ return on_success;
}
-template <typename Char>
-void* RegExpCompiler<Char>::VisitBackreference(RegExpBackreference* that,
- void* rest) {
- UNIMPLEMENTED();
- return NULL;
+RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success,
+ RegExpNode* on_failure) {
+ if (is_positive()) {
+ RegExpNode* proceed = ActionNode::EndSubmatch(on_success);
+ RegExpNode* escape = ActionNode::EscapeSubmatch(on_failure);
+ RegExpNode* body_node = compiler->Compile(body(), proceed, escape);
+ return ActionNode::BeginSubmatch(body_node);
+ } else {
+ RegExpNode* failed = ActionNode::EscapeSubmatch(on_success);
+ RegExpNode* succeeded = ActionNode::EndSubmatch(on_failure);
+ RegExpNode* body_node = compiler->Compile(body(), succeeded, failed);
+ return ActionNode::BeginSubmatch(body_node);
+ }
}
-template <typename Char>
-void* RegExpCompiler<Char>::VisitEmpty(RegExpEmpty* that, void* rest) {
- return rest;
+RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success,
+ RegExpNode* on_failure) {
+ int start_reg = RegExpCapture::StartRegister(index());
+ int end_reg = RegExpCapture::EndRegister(index());
+ RegExpNode* store_end = ActionNode::StorePosition(end_reg, on_success);
+ RegExpNode* body_node = compiler->Compile(body(), store_end, on_failure);
+ return ActionNode::StorePosition(start_reg, body_node);
}
-template <typename Char>
-void* RegExpCompiler<Char>::VisitAlternative(RegExpAlternative* that,
- void* rest) {
- ZoneList<RegExpTree*>* children = that->nodes();
- RegExpNode<Char>* current = static_cast<RegExpNode<Char>*>(rest);
+RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success,
+ RegExpNode* on_failure) {
+ ZoneList<RegExpTree*>* children = nodes();
+ RegExpNode* current = on_success;
for (int i = children->length() - 1; i >= 0; i--) {
- current = Compile(children->at(i), current);
+ current = compiler->Compile(children->at(i), current, on_failure);
}
return current;
}
@@ -1112,201 +1366,19 @@
}
-// -------------------------------------------------------------------
-// Execution
-
-
-template <typename Char>
-class ExecutionState {
- public:
- ExecutionState(RegExpNode<Char>* start, Vector<Char> input)
- : current_(start),
- input_(input),
- pos_(0),
- backtrack_stack_(8),
- is_done_(false) { }
-
- class BacktrackState {
- public:
- BacktrackState(ChoiceNode<Char>* choice, int next, int pos)
- : choice_(choice),
- next_(next),
- pos_(pos) { }
- ChoiceNode<Char>* choice() { return choice_; }
- int next() { return next_; }
- void set_next(int value) { next_ = value; }
- int pos() { return pos_; }
- private:
- ChoiceNode<Char>* choice_;
- int next_;
- int pos_;
- };
-
- // Execute a single step, returning true if it succeeded
- inline bool Step() { return current()->Step(this); }
-
- // Stores the given choice node and the execution state on the
- // backtrack stack.
- void SaveBacktrack(ChoiceNode<Char>* choice);
-
- // Reverts to the next unused backtrack if there is one. Returns
- // false exactly if there was no backtrack to restore.
- bool Backtrack();
-
- Char current_char() { return input()[pos()]; }
-
- void Advance(int delta, RegExpNode<Char>* next) {
- pos_ += delta;
- current_ = next;
- }
-
- bool AtEnd() { return pos_ >= input_.length(); }
-
- bool is_done() { return is_done_; }
- void set_done() { is_done_ = true; }
-
- List<BacktrackState>* backtrack_stack() { return &backtrack_stack_; }
- RegExpNode<Char>* current() { return current_; }
- void set_current(RegExpNode<Char>* value) { current_ = value; }
- Vector<Char> input() { return input_; }
- int pos() { return pos_; }
- private:
- RegExpNode<Char>* current_;
- Vector<Char> input_;
- int pos_;
- List<BacktrackState> backtrack_stack_;
- bool is_done_;
-};
-
-
-template <typename Char>
-void ExecutionState<Char>::SaveBacktrack(ChoiceNode<Char>* choice) {
- ASSERT(choice->choices()->length() > 1);
- if (FLAG_trace_regexps) {
- PrintF("Setting up backtrack on level %i for choice %p\n",
- backtrack_stack()->length(),
- choice);
- }
- backtrack_stack()->Add(BacktrackState(choice, 1, pos_));
-}
-
-
-template <typename Char>
-bool ExecutionState<Char>::Backtrack() {
- if (backtrack_stack()->is_empty()) return false;
- BacktrackState& top = backtrack_stack()->at(backtrack_stack()->length()
- 1);
- ZoneList<RegExpNode<Char>*>* choices = top.choice()->choices();
- int next_index = top.next();
- current_ = choices->at(next_index);
- pos_ = top.pos();
- if (FLAG_trace_regexps) {
- PrintF("Backtracking to %p[%i] on level %i\n",
- top.choice(),
- next_index,
- backtrack_stack()->length() - 1);
- }
- if (next_index == choices->length() - 1) {
- if (FLAG_trace_regexps)
- PrintF("Popping backtrack on level %i\n",
- backtrack_stack()->length() - 1);
- // If this was the last alternative we're done with this backtrack
- // state and can pop it off the stack.
- backtrack_stack()->RemoveLast();
- } else {
- if (FLAG_trace_regexps)
- PrintF("Advancing backtrack on level %i\n",
- backtrack_stack()->length() - 1);
- // Otherwise we set the next choice to visit if this one fails.
- top.set_next(next_index + 1);
- }
- return true;
-}
-
-
-template <typename Char>
-bool ChoiceNode<Char>::Step(ExecutionState<Char>* state) {
- state->SaveBacktrack(this);
- state->set_current(this->choices()->at(0));
- return true;
-}
-
-
-template <typename Char>
-bool AtomNode<Char>::Step(ExecutionState<Char>* state) {
- Vector<const uc16> data = this->data();
- int length = data.length();
- Vector<Char> input = state->input();
- int p = state->pos();
- if (p + length > input.length())
- return false;
- for (int i = 0; i < length; i++, p++) {
- if (data[i] != input[p])
- return false;
- }
- state->Advance(length, this->next());
- return true;
+void RegExpEngine::DotPrint(const char* label, RegExpNode* node) {
+ DotPrinter printer;
+ printer.PrintNode(label, node);
}
-template <typename Char>
-bool CharacterClassNode<Char>::Step(ExecutionState<Char>* state) {
- if (state->AtEnd()) return false;
- ZoneList<CharacterRange>* ranges = this->ranges();
- unsigned current = state->current_char();
- for (int i = 0; i < ranges->length(); i++) {
- CharacterRange& range = ranges->at(i);
- if (range.from() <= current && current <= range.to()) {
- state->Advance(1, this->next());
- return true;
- }
- }
- return false;
+RegExpNode* RegExpEngine::Compile(RegExpParseResult* input) {
+ RegExpCompiler compiler(input->capture_count);
+ RegExpNode* node = compiler.Compile(input->tree,
+ EndNode::GetAccept(),
+ EndNode::GetBacktrack());
+ return node;
}
-
-
-template <typename Char>
-bool EndNode<Char>::Step(ExecutionState<Char>* state) {
- state->set_done();
- return false;
-}
-
-
-template <typename Char>
-bool RegExpEngine::Execute(RegExpNode<Char>* start, Vector<Char> input) {
- ExecutionState<Char> state(start, input);
- if (FLAG_trace_regexps) {
- PrintF("Beginning regexp execution\n");
- }
- while (state.Step() || (!state.is_done() && state.Backtrack()))
- ;
- if (FLAG_trace_regexps) {
- PrintF("Matching %s\n", state.is_done() ? "succeeded" : "failed");
- }
- return state.is_done();
-}
-
-
-template <typename Char>
-RegExpNode<Char>* RegExpEngine::Compile(RegExpTree* regexp) {
- RegExpNode<Char>* end = new EndNode<Char>();
- RegExpCompiler<Char> compiler;
- return compiler.Compile(regexp, end);
-}
-
-
-template
-RegExpNode<const char>* RegExpEngine::Compile<const char>(RegExpTree*
regexp);
-
-template
-RegExpNode<const uc16>* RegExpEngine::Compile<const uc16>(RegExpTree*
regexp);
-
-template
-bool RegExpEngine::Execute<const char>(RegExpNode<const char>* start,
- Vector<const char> input);
-
-template
-bool RegExpEngine::Execute<const uc16>(RegExpNode<const uc16>* start,
- Vector<const uc16> input);
}} // namespace v8::internal
Modified: branches/experimental/regexp2000/src/jsregexp.h
==============================================================================
--- branches/experimental/regexp2000/src/jsregexp.h (original)
+++ branches/experimental/regexp2000/src/jsregexp.h Thu Nov 6 09:36:02 2008
@@ -136,9 +136,6 @@
};
-template <typename Char> class RegExpNode;
-
-
class CharacterRange {
public:
// For compatibility with the CHECK_OK macro
@@ -318,13 +315,18 @@
};
+struct RegExpParseResult {
+ RegExpTree* tree;
+ bool has_character_escapes;
+ Handle<String> error;
+ int capture_count;
+};
+
+
class RegExpEngine: public AllStatic {
public:
- template <typename Char>
- static RegExpNode<Char>* Compile(RegExpTree* regexp);
-
- template <typename Char>
- static bool Execute(RegExpNode<Char>* start, Vector<Char> input);
+ static RegExpNode* Compile(RegExpParseResult* input);
+ static void DotPrint(const char* label, RegExpNode* node);
};
Modified: branches/experimental/regexp2000/src/parser.cc
==============================================================================
--- branches/experimental/regexp2000/src/parser.cc (original)
+++ branches/experimental/regexp2000/src/parser.cc Thu Nov 6 09:36:02 2008
@@ -465,6 +465,8 @@
bool HasCharacterEscapes();
+ int captures_started() { return captures_started_; }
+
static const uc32 kEndMarker = unibrow::Utf8::kBadChar;
private:
uc32 current() { return current_; }
@@ -4146,24 +4148,24 @@
}
-RegExpTree* ParseRegExp(unibrow::CharacterStream* stream,
- Handle<String>* error,
- bool* has_character_escapes) {
- ASSERT(error->is_null());
- RegExpParser parser(stream, error, false); // Get multiline flag somehow
+bool ParseRegExp(unibrow::CharacterStream* stream, RegExpParseResult*
result) {
+ ASSERT(result != NULL);
+ // Get multiline flag somehow
+ RegExpParser parser(stream, &result->error, false);
bool ok = true;
- RegExpTree* result = parser.ParsePattern(&ok);
+ result->tree = parser.ParsePattern(&ok);
if (!ok) {
- ASSERT(result == NULL);
- ASSERT(!error->is_null());
+ ASSERT(result->tree == NULL);
+ ASSERT(!result->error.is_null());
} else {
- ASSERT(result != NULL);
- ASSERT(error->is_null());
+ ASSERT(result->tree != NULL);
+ ASSERT(result->error.is_null());
}
- if (ok && has_character_escapes != NULL) {
- *has_character_escapes = parser.HasCharacterEscapes();
+ if (ok) {
+ result->has_character_escapes = parser.HasCharacterEscapes();
+ result->capture_count = parser.captures_started();
}
- return result;
+ return ok;
}
Modified: branches/experimental/regexp2000/src/parser.h
==============================================================================
--- branches/experimental/regexp2000/src/parser.h (original)
+++ branches/experimental/regexp2000/src/parser.h Thu Nov 6 09:36:02 2008
@@ -144,9 +144,8 @@
ScriptDataImpl* PreParse(unibrow::CharacterStream* stream,
v8::Extension* extension);
-RegExpTree* ParseRegExp(unibrow::CharacterStream* stream,
- Handle<String>* error,
- bool* has_character_escapes);
+
+bool ParseRegExp(unibrow::CharacterStream* stream, RegExpParseResult*
result);
// Support for doing lazy compilation. The script is the script containing
full
Modified: branches/experimental/regexp2000/test/cctest/test-regexp.cc
==============================================================================
--- branches/experimental/regexp2000/test/cctest/test-regexp.cc (original)
+++ branches/experimental/regexp2000/test/cctest/test-regexp.cc Thu Nov 6
09:36:02 2008
@@ -43,40 +43,15 @@
using namespace v8::internal;
-class RegExpTestCase {
- public:
- RegExpTestCase()
- : pattern_(NULL),
- flags_(NULL),
- input_(NULL),
- compile_error_(NULL) { }
- RegExpTestCase(const char* pattern,
- const char* flags,
- const char* input,
- const char* compile_error)
- : pattern_(pattern),
- flags_(flags),
- input_(input),
- compile_error_(compile_error) { }
- const char* pattern() const { return pattern_; }
- bool expect_error() const { return compile_error_ != NULL; }
- private:
- const char* pattern_;
- const char* flags_;
- const char* input_;
- const char* compile_error_;
-};
-
-
static SmartPointer<char> Parse(const char* input) {
v8::HandleScope scope;
unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
ZoneScope zone_scope(DELETE_ON_EXIT);
- Handle<String> error;
- RegExpTree* node = v8::internal::ParseRegExp(&buffer, &error, NULL);
- CHECK(node != NULL);
- CHECK(error.is_null());
- SmartPointer<char> output = node->ToString();
+ RegExpParseResult result;
+ CHECK(v8::internal::ParseRegExp(&buffer, &result));
+ CHECK(result.tree != NULL);
+ CHECK(result.error.is_null());
+ SmartPointer<char> output = result.tree->ToString();
return output;
}
@@ -84,12 +59,11 @@
v8::HandleScope scope;
unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
ZoneScope zone_scope(DELETE_ON_EXIT);
- Handle<String> error;
- bool has_escapes;
- RegExpTree* node = v8::internal::ParseRegExp(&buffer, &error,
&has_escapes);
- CHECK(node != NULL);
- CHECK(error.is_null());
- return has_escapes;
+ RegExpParseResult result;
+ CHECK(v8::internal::ParseRegExp(&buffer, &result));
+ CHECK(result.tree != NULL);
+ CHECK(result.error.is_null());
+ return result.has_character_escapes;
}
@@ -245,11 +219,11 @@
v8::HandleScope scope;
unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
ZoneScope zone_scope(DELETE_ON_EXIT);
- Handle<String> error;
- RegExpTree* node = v8::internal::ParseRegExp(&buffer, &error, NULL);
- CHECK(node == NULL);
- CHECK(!error.is_null());
- SmartPointer<char> str = error->ToCString(ALLOW_NULLS);
+ RegExpParseResult result;
+ CHECK_EQ(false, v8::internal::ParseRegExp(&buffer, &result));
+ CHECK(result.tree == NULL);
+ CHECK(!result.error.is_null());
+ SmartPointer<char> str = result.error->ToCString(ALLOW_NULLS);
CHECK_EQ(expected, *str);
}
@@ -350,25 +324,28 @@
}
-static void Execute(bool expected, const char* input, const char* str) {
+static void Execute(const char* input,
+ const char* str,
+ bool dot_output = false) {
v8::HandleScope scope;
unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
ZoneScope zone_scope(DELETE_ON_EXIT);
- Handle<String> error;
- RegExpTree* tree = v8::internal::ParseRegExp(&buffer, &error, NULL);
- CHECK(tree != NULL);
- CHECK(error.is_null());
- RegExpNode<const char>* node = RegExpEngine::Compile<const char>(tree);
- bool outcome = RegExpEngine::Execute(node, CStrVector(str));
- CHECK_EQ(outcome, expected);
+ RegExpParseResult result;
+ if (!v8::internal::ParseRegExp(&buffer, &result))
+ return;
+ RegExpNode* node = RegExpEngine::Compile(&result);
+ if (dot_output) {
+ RegExpEngine::DotPrint(input, node);
+ exit(0);
+ }
}
TEST(Execution) {
V8::Initialize(NULL);
- Execute(true, ".*?(?:a[bc]d|e[fg]h)", "xxxabbegh");
- Execute(true, ".*?(?:a[bc]d|e[fg]h)", "xxxabbefh");
- Execute(false, ".*?(?:a[bc]d|e[fg]h)", "xxxabbefd");
+ Execute(".*?(?:a[bc]d|e[fg]h)", "xxxabbegh");
+ Execute(".*?(?:a[bc]d|e[fg]h)", "xxxabbefh");
+ Execute(".*?(?:a[bc]d|e[fg]h)", "xxxabbefd");
}
@@ -502,7 +479,6 @@
TEST(Assembler) {
V8::Initialize(NULL);
-
byte codes[1024];
Re2kAssembler assembler(Vector<byte>(codes, 1024));
#define __ assembler.
@@ -541,6 +517,3 @@
CHECK_EQ(3, captures[0]);
CHECK_EQ(5, captures[1]);
}
-
-
-
--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---