Author: [EMAIL PROTECTED]
Date: Thu Nov 13 22:22:05 2008
New Revision: 749
Modified:
branches/experimental/regexp2000/src/ast.h
branches/experimental/regexp2000/src/jsregexp.cc
branches/experimental/regexp2000/test/cctest/test-regexp.cc
Log:
Added .*? in the beginning and an enclosing capture around regexps.
Restructured a little bit too.
Modified: branches/experimental/regexp2000/src/ast.h
==============================================================================
--- branches/experimental/regexp2000/src/ast.h (original)
+++ branches/experimental/regexp2000/src/ast.h Thu Nov 13 22:22:05 2008
@@ -1274,6 +1274,11 @@
RegExpCharacterClass(ZoneList<CharacterRange>* ranges, bool is_negated)
: ranges_(ranges),
is_negated_(is_negated) { }
+ RegExpCharacterClass(uc16 type)
+ : ranges_(new ZoneList<CharacterRange>(2)),
+ is_negated_(false) {
+ CharacterRange::AddClassEscape(type, ranges_);
+ }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
@@ -1312,6 +1317,13 @@
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
+ static RegExpNode* ToNode(int min,
+ int max,
+ bool is_greedy,
+ RegExpTree* body,
+ RegExpCompiler* compiler,
+ RegExpNode* on_success,
+ RegExpNode* on_failure);
virtual RegExpQuantifier* AsQuantifier();
int min() { return min_; }
int max() { return max_; }
@@ -1336,11 +1348,16 @@
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
+ static RegExpNode* ToNode(RegExpTree* body,
+ int index,
+ RegExpCompiler* compiler,
+ RegExpNode* on_success,
+ RegExpNode* on_failure);
virtual RegExpCapture* AsCapture();
RegExpTree* body() { return body_; }
int index() { return index_; }
- static int StartRegister(int index) { return (index - 1) * 2; }
- static int EndRegister(int index) { return (index - 1) * 2 + 1; }
+ static int StartRegister(int index) { return index * 2; }
+ static int EndRegister(int index) { return index * 2 + 1; }
private:
RegExpTree* body_;
int index_;
Modified: branches/experimental/regexp2000/src/jsregexp.cc
==============================================================================
--- branches/experimental/regexp2000/src/jsregexp.cc (original)
+++ branches/experimental/regexp2000/src/jsregexp.cc Thu Nov 13 22:22:05
2008
@@ -635,7 +635,6 @@
// New regular expression engine
-class RegExpCompiler;
class DotPrinter;
@@ -645,10 +644,6 @@
: next_register_(2 * capture_count),
work_list_(NULL) { }
- RegExpNode* Compile(RegExpTree* tree,
- RegExpNode* on_success,
- RegExpNode* on_failure);
-
int AllocateRegister() { return next_register_++; }
Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler,
@@ -914,7 +909,7 @@
stream()->Add("\"];\n");
that->choices()->at(i).node()->Accept(this);
}
- OS::PrintError("--- %p ---\n", static_cast<void*>(this));
+ OS::PrintError("--- %p ---\n", static_cast<void*>(that));
that->table()->Dump();
}
@@ -946,7 +941,14 @@
void DotPrinter::VisitCharacterClass(CharacterClassNode* that) {
- stream()->Add(" n%p [label=\"[...]\"];\n", that);
+ stream()->Add(" n%p [label=\"[", that);
+ if (that->is_negated())
+ stream()->Add("^");
+ for (int i = 0; i < that->ranges()->length(); i++) {
+ CharacterRange range = that->ranges()->at(i);
+ stream()->Add("%k-%k", range.from(), range.to());
+ }
+ stream()->Add("]\"];\n");
stream()->Add(" n%p -> n%p;\n", that, that->on_success());
Visit(that->on_success());
PrintOnFailure(that, that->on_failure());
@@ -1062,9 +1064,9 @@
int length = children->length();
ChoiceNode* result = new ChoiceNode(length, on_failure);
for (int i = 0; i < length; i++) {
- GuardedAlternative child(compiler->Compile(children->at(i),
- on_success,
- on_failure));
+ GuardedAlternative child(children->at(i)->ToNode(compiler,
+ on_success,
+ on_failure));
result->AddChild(child);
}
return result;
@@ -1074,6 +1076,23 @@
RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure) {
+ return ToNode(min(),
+ max(),
+ is_greedy(),
+ body(),
+ compiler,
+ on_success,
+ on_failure);
+}
+
+
+RegExpNode* RegExpQuantifier::ToNode(int min,
+ int max,
+ bool is_greedy,
+ RegExpTree* body,
+ RegExpCompiler* compiler,
+ RegExpNode* on_success,
+ RegExpNode* on_failure) {
// x{f, t} becomes this:
//
// (r++)<-.
@@ -1087,26 +1106,26 @@
//
// TODO(someone): clear captures on repetition and handle empty
// matches.
- bool has_min = min() > 0;
- bool has_max = max() < RegExpQuantifier::kInfinity;
+ bool has_min = min > 0;
+ bool has_max = max < RegExpQuantifier::kInfinity;
bool needs_counter = has_min || has_max;
int reg_ctr = needs_counter ? compiler->AllocateRegister() : -1;
ChoiceNode* center = new ChoiceNode(2, on_failure);
RegExpNode* loop_return = needs_counter
? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr,
center))
: static_cast<RegExpNode*>(center);
- RegExpNode* body_node = compiler->Compile(body(), loop_return,
on_failure);
+ RegExpNode* body_node = body->ToNode(compiler, loop_return, on_failure);
GuardedAlternative body_alt(body_node);
if (has_max) {
- Guard* body_guard = new Guard(reg_ctr, Guard::LT, max());
+ Guard* body_guard = new Guard(reg_ctr, Guard::LT, max);
body_alt.AddGuard(body_guard);
}
GuardedAlternative rest_alt(on_success);
if (has_min) {
- Guard* rest_guard = new Guard(reg_ctr, Guard::GEQ, min());
+ Guard* rest_guard = new Guard(reg_ctr, Guard::GEQ, min);
rest_alt.AddGuard(rest_guard);
}
- if (is_greedy()) {
+ if (is_greedy) {
center->AddChild(body_alt);
center->AddChild(rest_alt);
} else {
@@ -1162,7 +1181,7 @@
// end submatch scope (nothing to clean up, just exit the scope)
// fail
return ActionNode::BeginSubmatch(ActionNode::StorePosition(
- position_register, compiler->Compile(body(),
+ position_register, body()->ToNode(compiler,
ActionNode::RestorePosition(position_register,
ActionNode::EscapeSubmatch(on_success)),
ActionNode::EndSubmatch(on_failure))));
@@ -1180,7 +1199,7 @@
// succeed
ChoiceNode* try_node =
new ChoiceNode(1, ActionNode::EndSubmatch(on_success));
- RegExpNode* body_node = compiler->Compile(body(),
+ RegExpNode* body_node = body()->ToNode(compiler,
ActionNode::EscapeSubmatch(on_failure),
EndNode::GetBacktrack());
GuardedAlternative body_alt(body_node);
@@ -1193,10 +1212,19 @@
RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure) {
- int start_reg = RegExpCapture::StartRegister(index());
- int end_reg = RegExpCapture::EndRegister(index());
+ return ToNode(body(), index(), compiler, on_success, on_failure);
+}
+
+
+RegExpNode* RegExpCapture::ToNode(RegExpTree* body,
+ int index,
+ RegExpCompiler* compiler,
+ RegExpNode* on_success,
+ RegExpNode* on_failure) {
+ int start_reg = RegExpCapture::StartRegister(index);
+ int end_reg = RegExpCapture::EndRegister(index);
RegExpNode* store_end = ActionNode::StorePosition(end_reg, on_success);
- RegExpNode* body_node = compiler->Compile(body(), store_end, on_failure);
+ RegExpNode* body_node = body->ToNode(compiler, store_end, on_failure);
return ActionNode::StorePosition(start_reg, body_node);
}
@@ -1207,7 +1235,7 @@
ZoneList<RegExpTree*>* children = nodes();
RegExpNode* current = on_success;
for (int i = children->length() - 1; i >= 0; i--) {
- current = compiler->Compile(children->at(i), current, on_failure);
+ current = children->at(i)->ToNode(compiler, current, on_failure);
}
return current;
}
@@ -1581,18 +1609,26 @@
}
-RegExpNode* RegExpCompiler::Compile(RegExpTree* tree,
- RegExpNode* on_success,
- RegExpNode* on_failure) {
- return tree->ToNode(this, on_success, on_failure);
-}
-
-
RegExpNode* RegExpEngine::Compile(RegExpParseResult* input) {
RegExpCompiler compiler(input->capture_count);
- RegExpNode* node = compiler.Compile(input->tree,
- EndNode::GetAccept(),
- EndNode::GetBacktrack());
+ // Wrap the body of the regexp in capture #0.
+ RegExpNode* captured_body = RegExpCapture::ToNode(input->tree,
+ 0,
+ &compiler,
+ EndNode::GetAccept(),
+
EndNode::GetBacktrack());
+ // Add a .*? at the beginning, outside the body capture.
+ // Note: We could choose to not add this if the regexp is anchored at
+ // the start of the input but I'm not sure how best to do that and
+ // since we don't even handle ^ yet I'm saving that optimization for
+ // later.
+ RegExpNode* node = RegExpQuantifier::ToNode(0,
+ RegExpQuantifier::kInfinity,
+ false,
+ new
RegExpCharacterClass('.'),
+ &compiler,
+ captured_body,
+ EndNode::GetBacktrack());
Analysis analysis(&compiler);
analysis.Analyze(node);
return node;
Modified: branches/experimental/regexp2000/test/cctest/test-regexp.cc
==============================================================================
--- branches/experimental/regexp2000/test/cctest/test-regexp.cc (original)
+++ branches/experimental/regexp2000/test/cctest/test-regexp.cc Thu Nov 13
22:22:05 2008
@@ -699,5 +699,5 @@
TEST(Graph) {
- Execute(".*?[^a]|b", "", true);
+ Execute("a|(b|c)|d", "", true);
}
--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---