Author: [EMAIL PROTECTED]
Date: Mon Dec 1 07:42:35 2008
New Revision: 880
Modified:
branches/bleeding_edge/src/char-predicates-inl.h
branches/bleeding_edge/src/char-predicates.h
branches/bleeding_edge/src/jsregexp.cc
branches/bleeding_edge/src/jsregexp.h
branches/bleeding_edge/src/parser.cc
branches/bleeding_edge/test/cctest/test-regexp.cc
Log:
- Added some expansion of assertions.
- Splitting of character classes into word and non-word parts.
- A bunch of refactorings.
- Made dispatch table construction lazy.
Modified: branches/bleeding_edge/src/char-predicates-inl.h
==============================================================================
--- branches/bleeding_edge/src/char-predicates-inl.h (original)
+++ branches/bleeding_edge/src/char-predicates-inl.h Mon Dec 1 07:42:35
2008
@@ -59,6 +59,25 @@
}
+inline bool IsRegExpWord(uc16 c) {
+ return ('a' <= c && c <= 'z')
+ || ('A' <= c && c <= 'Z')
+ || ('0' <= c && c <= '9')
+ || (c == '_');
+}
+
+
+inline bool IsRegExpNewline(uc16 c) {
+ switch (c) {
+ // CR LF LS PS
+ case 0x000A: case 0x000D: case 0x2028: case 0x2029:
+ return false;
+ default:
+ return true;
+ }
+}
+
+
} } // namespace v8::internal
#endif // V8_CHAR_PREDICATES_INL_H_
Modified: branches/bleeding_edge/src/char-predicates.h
==============================================================================
--- branches/bleeding_edge/src/char-predicates.h (original)
+++ branches/bleeding_edge/src/char-predicates.h Mon Dec 1 07:42:35 2008
@@ -37,6 +37,8 @@
inline bool IsLineFeed(uc32 c);
inline bool IsDecimalDigit(uc32 c);
inline bool IsHexDigit(uc32 c);
+inline bool IsRegExpWord(uc32 c);
+inline bool IsRegExpNewline(uc32 c);
struct IdentifierStart {
static inline bool Is(uc32 c) {
Modified: branches/bleeding_edge/src/jsregexp.cc
==============================================================================
--- branches/bleeding_edge/src/jsregexp.cc (original)
+++ branches/bleeding_edge/src/jsregexp.cc Mon Dec 1 07:42:35 2008
@@ -897,6 +897,16 @@
}
+DispatchTable* ChoiceNode::GetTable(bool ignore_case) {
+ if (table_ == NULL) {
+ table_ = new DispatchTable();
+ DispatchTableConstructor cons(table_, ignore_case);
+ cons.BuildTable(this);
+ }
+ return table_;
+}
+
+
class RegExpCompiler {
public:
RegExpCompiler(int capture_count, bool ignore_case);
@@ -1561,7 +1571,9 @@
class DotPrinter: public NodeVisitor {
public:
- DotPrinter() : stream_(&alloc_) { }
+ explicit DotPrinter(bool ignore_case)
+ : ignore_case_(ignore_case),
+ stream_(&alloc_) { }
void PrintNode(const char* label, RegExpNode* node);
void Visit(RegExpNode* node);
void PrintOnFailure(RegExpNode* from, RegExpNode* on_failure);
@@ -1572,6 +1584,7 @@
FOR_EACH_NODE_TYPE(DECLARE_VISIT)
#undef DECLARE_VISIT
private:
+ bool ignore_case_;
HeapStringAllocator alloc_;
StringStream stream_;
std::set<RegExpNode*> seen_;
@@ -1668,44 +1681,80 @@
};
+class AttributePrinter {
+ public:
+ explicit AttributePrinter(DotPrinter* out)
+ : out_(out), first_(true) { }
+ void PrintSeparator() {
+ if (first_) {
+ first_ = false;
+ } else {
+ out_->stream()->Add("|");
+ }
+ }
+ void PrintBit(const char* name, bool value) {
+ if (!value) return;
+ PrintSeparator();
+ out_->stream()->Add("{%s}", name);
+ }
+ void PrintPositive(const char* name, int value) {
+ if (value < 0) return;
+ PrintSeparator();
+ out_->stream()->Add("{%s|%x}", name, value);
+ }
+ private:
+ DotPrinter* out_;
+ bool first_;
+};
+
+
void DotPrinter::PrintAttributes(RegExpNode* that) {
- stream()->Add(" a%p [shape=Mrecord, style=dashed, color=lightgrey, "
- "fontcolor=lightgrey, margin=0.1, fontsize=10, label=\"{",
+ stream()->Add(" a%p [shape=Mrecord, color=grey, fontcolor=grey, "
+ "margin=0.1, fontsize=10, label=\"{",
that);
+ AttributePrinter printer(this);
NodeInfo* info = that->info();
- stream()->Add("{NI|%i}|{WI|%i}|{SI|%i}",
- info->follows_newline_interest,
- info->follows_word_interest,
- info->follows_start_interest);
- stream()->Add("|{DN|%i}|{DW|%i}|{DS|%i}|{AE|%i}",
- info->determine_newline,
- info->determine_word,
- info->determine_start,
- info->at_end);
- if (info->follows_newline != NodeInfo::UNKNOWN)
- stream()->Add("|{FN|%i}", info->follows_newline);
- if (info->follows_word != NodeInfo::UNKNOWN)
- stream()->Add("|{FW|%i}", info->follows_word);
- if (info->follows_start != NodeInfo::UNKNOWN)
- stream()->Add("|{FS|%i}", info->follows_start);
+ printer.PrintBit("NI", info->follows_newline_interest);
+ printer.PrintBit("WI", info->follows_word_interest);
+ printer.PrintBit("SI", info->follows_start_interest);
+ printer.PrintBit("DN", info->determine_newline);
+ printer.PrintBit("DW", info->determine_word);
+ printer.PrintBit("DS", info->determine_start);
+ printer.PrintBit("DDN", info->does_determine_newline);
+ printer.PrintBit("DDW", info->does_determine_word);
+ printer.PrintBit("DDS", info->does_determine_start);
+ printer.PrintPositive("IW", info->is_word);
+ printer.PrintPositive("IN", info->is_newline);
+ printer.PrintPositive("FN", info->follows_newline);
+ printer.PrintPositive("FW", info->follows_word);
+ printer.PrintPositive("FS", info->follows_start);
Label* label = that->label();
if (label->is_bound())
- stream()->Add("|{@|%x}", label->pos());
+ printer.PrintPositive("@", label->pos());
stream()->Add("}\"];\n");
- stream()->Add(" a%p -> n%p [style=dashed, color=lightgrey, "
+ stream()->Add(" a%p -> n%p [style=dashed, color=grey, "
"arrowhead=none];\n", that, that);
}
+static const bool kPrintDispatchTable = false;
void DotPrinter::VisitChoice(ChoiceNode* that) {
- stream()->Add(" n%p [shape=Mrecord, label=\"", that);
- TableEntryHeaderPrinter header_printer(stream());
- that->table()->ForEach(&header_printer);
- stream()->Add("\"]\n", that);
- PrintAttributes(that);
- TableEntryBodyPrinter body_printer(stream(), that);
- that->table()->ForEach(&body_printer);
- PrintOnFailure(that, that->on_failure());
+ if (kPrintDispatchTable) {
+ stream()->Add(" n%p [shape=Mrecord, label=\"", that);
+ TableEntryHeaderPrinter header_printer(stream());
+ that->GetTable(ignore_case_)->ForEach(&header_printer);
+ stream()->Add("\"]\n", that);
+ PrintAttributes(that);
+ TableEntryBodyPrinter body_printer(stream(), that);
+ that->GetTable(ignore_case_)->ForEach(&body_printer);
+ PrintOnFailure(that, that->on_failure());
+ } else {
+ stream()->Add(" n%p [shape=Mrecord, label=\"?\"];\n", that);
+ for (int i = 0; i < that->alternatives()->length(); i++) {
+ GuardedAlternative alt = that->alternatives()->at(i);
+ stream()->Add(" n%p -> n%p;\n", that, alt.node());
+ }
+ }
for (int i = 0; i < that->alternatives()->length(); i++) {
GuardedAlternative alt = that->alternatives()->at(i);
alt.node()->Accept(this);
@@ -1837,8 +1886,10 @@
}
-void RegExpEngine::DotPrint(const char* label, RegExpNode* node) {
- DotPrinter printer;
+void RegExpEngine::DotPrint(const char* label,
+ RegExpNode* node,
+ bool ignore_case) {
+ DotPrinter printer(ignore_case);
printer.PrintNode(label, node);
}
@@ -2181,6 +2232,56 @@
}
+Vector<const uc16> CharacterRange::GetWordBounds() {
+ return Vector<const uc16>(kWordRanges, kWordRangeCount);
+}
+
+
+class CharacterRangeSplitter {
+ public:
+ CharacterRangeSplitter(ZoneList<CharacterRange>** included,
+ ZoneList<CharacterRange>** excluded)
+ : included_(included),
+ excluded_(excluded) { }
+ void Call(uc16 from, DispatchTable::Entry entry);
+
+ static const int kInBase = 0;
+ static const int kInOverlay = 1;
+
+ private:
+ ZoneList<CharacterRange>** included_;
+ ZoneList<CharacterRange>** excluded_;
+};
+
+
+void CharacterRangeSplitter::Call(uc16 from, DispatchTable::Entry entry) {
+ if (!entry.out_set()->Get(kInBase)) return;
+ ZoneList<CharacterRange>** target = entry.out_set()->Get(kInOverlay)
+ ? included_
+ : excluded_;
+ if (*target == NULL) *target = new ZoneList<CharacterRange>(2);
+ (*target)->Add(CharacterRange(entry.from(), entry.to()));
+}
+
+
+void CharacterRange::Split(ZoneList<CharacterRange>* base,
+ Vector<const uc16> overlay,
+ ZoneList<CharacterRange>** included,
+ ZoneList<CharacterRange>** excluded) {
+ ASSERT_EQ(NULL, *included);
+ ASSERT_EQ(NULL, *excluded);
+ DispatchTable table;
+ for (int i = 0; i < base->length(); i++)
+ table.AddRange(base->at(i), CharacterRangeSplitter::kInBase);
+ for (int i = 0; i < overlay.length(); i += 2) {
+ table.AddRange(CharacterRange(overlay[i], overlay[i+1]),
+ CharacterRangeSplitter::kInOverlay);
+ }
+ CharacterRangeSplitter callback(included, excluded);
+ table.ForEach(&callback);
+}
+
+
void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges) {
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
if (IsSingleton()) {
@@ -2266,40 +2367,47 @@
// Interest propagation
-RegExpNode* RegExpNode::GetSibling(NodeInfo* info) {
+RegExpNode* RegExpNode::TryGetSibling(NodeInfo* info) {
for (int i = 0; i < siblings_.length(); i++) {
RegExpNode* sibling = siblings_.Get(i);
- if (sibling->info()->HasSameForwardInterests(info))
+ if (sibling->info()->Matches(info))
return sibling;
}
return NULL;
}
+RegExpNode* RegExpNode::EnsureSibling(NodeInfo* info, bool* cloned) {
+ ASSERT_EQ(false, *cloned);
+ ASSERT(!info->HasAssertions());
+ siblings_.Ensure(this);
+ RegExpNode* result = TryGetSibling(info);
+ if (result != NULL) return result;
+ result = this->Clone();
+ NodeInfo* new_info = result->info();
+ new_info->ResetCompilationState();
+ new_info->AddFromPreceding(info);
+ AddSibling(result);
+ *cloned = true;
+ return result;
+}
+
+
template <class C>
static RegExpNode* PropagateToEndpoint(C* node, NodeInfo* info) {
NodeInfo full_info(*node->info());
full_info.AddFromPreceding(info);
- RegExpNode* sibling = node->GetSibling(&full_info);
- if (sibling != NULL) return sibling;
- node->EnsureSiblings();
- sibling = new C(*node);
- sibling->info()->AddFromPreceding(&full_info);
- node->AddSibling(sibling);
- return sibling;
+ bool cloned = false;
+ return RegExpNode::EnsureSibling(node, &full_info, &cloned);
}
RegExpNode* ActionNode::PropagateForward(NodeInfo* info) {
NodeInfo full_info(*this->info());
full_info.AddFromPreceding(info);
- RegExpNode* sibling = GetSibling(&full_info);
- if (sibling != NULL) return sibling;
- EnsureSiblings();
- ActionNode* action = new ActionNode(*this);
- action->info()->AddFromPreceding(&full_info);
- AddSibling(action);
- if (type_ != ESCAPE_SUBMATCH) {
+ bool cloned = false;
+ ActionNode* action = EnsureSibling(this, &full_info, &cloned);
+ if (cloned && type_ != ESCAPE_SUBMATCH) {
action->set_on_success(action->on_success()->PropagateForward(info));
}
return action;
@@ -2309,22 +2417,20 @@
RegExpNode* ChoiceNode::PropagateForward(NodeInfo* info) {
NodeInfo full_info(*this->info());
full_info.AddFromPreceding(info);
- RegExpNode* sibling = GetSibling(&full_info);
- if (sibling != NULL) return sibling;
- EnsureSiblings();
- ChoiceNode* choice = new ChoiceNode(*this);
- choice->info()->AddFromPreceding(&full_info);
- AddSibling(choice);
- ZoneList<GuardedAlternative>* old_alternatives = alternatives();
- int count = old_alternatives->length();
- choice->alternatives_ = new ZoneList<GuardedAlternative>(count);
- for (int i = 0; i < count; i++) {
- GuardedAlternative alternative = old_alternatives->at(i);
- alternative.set_node(alternative.node()->PropagateForward(info));
- choice->alternatives()->Add(alternative);
- }
- if (!choice->on_failure_->IsBacktrack()) {
- choice->on_failure_ = choice->on_failure_->PropagateForward(info);
+ bool cloned = false;
+ ChoiceNode* choice = EnsureSibling(this, &full_info, &cloned);
+ if (cloned) {
+ ZoneList<GuardedAlternative>* old_alternatives = alternatives();
+ int count = old_alternatives->length();
+ choice->alternatives_ = new ZoneList<GuardedAlternative>(count);
+ for (int i = 0; i < count; i++) {
+ GuardedAlternative alternative = old_alternatives->at(i);
+ alternative.set_node(alternative.node()->PropagateForward(info));
+ choice->alternatives()->Add(alternative);
+ }
+ if (!choice->on_failure_->IsBacktrack()) {
+ choice->on_failure_ = choice->on_failure_->PropagateForward(info);
+ }
}
return choice;
}
@@ -2338,18 +2444,16 @@
RegExpNode* BackReferenceNode::PropagateForward(NodeInfo* info) {
NodeInfo full_info(*this->info());
full_info.AddFromPreceding(info);
- RegExpNode* sibling = GetSibling(&full_info);
- if (sibling != NULL) return sibling;
- EnsureSiblings();
- BackReferenceNode* back_ref = new BackReferenceNode(*this);
- back_ref->info()->AddFromPreceding(&full_info);
- AddSibling(back_ref);
- // TODO(erikcorry): A back reference has to have two successors (by
default
- // the same node). The first is used if the back reference matches a
non-
- // empty back reference, the second if it matches an empty one. This
doesn't
- // matter for at_end, which is the only one implemented right now, but
it will
- // matter for other pieces of info.
- back_ref->set_on_success(back_ref->on_success()->PropagateForward(info));
+ bool cloned = false;
+ BackReferenceNode* back_ref = EnsureSibling(this, &full_info, &cloned);
+ if (cloned) {
+ // TODO(erikcorry): A back reference has to have two successors (by
default
+ // the same node). The first is used if the back reference matches a
non-
+ // empty back reference, the second if it matches an empty one. This
+ // doesn't matter for at_end, which is the only one implemented right
now,
+ // but it will matter for other pieces of info.
+
back_ref->set_on_success(back_ref->on_success()->PropagateForward(info));
+ }
return back_ref;
}
@@ -2560,10 +2664,6 @@
// this node also, so it can pass it on.
info->AddFromFollowing(node->info());
}
- if (!that->table_calculated()) {
- DispatchTableConstructor cons(that->table());
- cons.BuildTable(that);
- }
EnsureAnalyzed(that->on_failure());
}
@@ -2575,6 +2675,208 @@
// -------------------------------------------------------------------
+// Assumption expansion
+
+
+RegExpNode* RegExpNode::EnsureExpanded(NodeInfo* info) {
+ siblings_.Ensure(this);
+ NodeInfo new_info = *this->info();
+ if (new_info.follows_word_interest)
+ new_info.follows_word = info->follows_word;
+ if (new_info.follows_newline_interest)
+ new_info.follows_newline = info->follows_newline;
+ // If the following node should determine something we need to get
+ // a sibling that determines it.
+ new_info.does_determine_newline = new_info.determine_newline;
+ new_info.does_determine_word = new_info.determine_word;
+ new_info.does_determine_start = new_info.determine_start;
+ RegExpNode* sibling = TryGetSibling(&new_info);
+ if (sibling == NULL) {
+ sibling = ExpandLocal(&new_info);
+ siblings_.Add(sibling);
+ sibling->info()->being_expanded = true;
+ sibling->ExpandChildren();
+ sibling->info()->being_expanded = false;
+ sibling->info()->been_expanded = true;
+ } else {
+ NodeInfo* sib_info = sibling->info();
+ if (!sib_info->been_expanded && !sib_info->being_expanded) {
+ sibling->info()->being_expanded = true;
+ sibling->ExpandChildren();
+ sibling->info()->being_expanded = false;
+ sibling->info()->been_expanded = true;
+ }
+ }
+ return sibling;
+}
+
+
+RegExpNode* ChoiceNode::ExpandLocal(NodeInfo* info) {
+ ChoiceNode* clone = this->Clone();
+ clone->info()->ResetCompilationState();
+ clone->info()->AddAssumptions(info);
+ return clone;
+}
+
+
+void ChoiceNode::ExpandChildren() {
+ ZoneList<GuardedAlternative>* alts = alternatives();
+ ZoneList<GuardedAlternative>* new_alts
+ = new ZoneList<GuardedAlternative>(alts->length());
+ for (int i = 0; i < alts->length(); i++) {
+ GuardedAlternative next = alts->at(i);
+ next.set_node(next.node()->EnsureExpanded(info()));
+ new_alts->Add(next);
+ }
+ alternatives_ = new_alts;
+}
+
+
+RegExpNode* TextNode::ExpandLocal(NodeInfo* info) {
+ TextElement last = elements()->last();
+ if (last.type == TextElement::CHAR_CLASS) {
+ RegExpCharacterClass* char_class = last.data.u_char_class;
+ if (info->does_determine_word) {
+ ZoneList<CharacterRange>* word = NULL;
+ ZoneList<CharacterRange>* non_word = NULL;
+ CharacterRange::Split(char_class->ranges(),
+ CharacterRange::GetWordBounds(),
+ &word,
+ &non_word);
+ if (non_word == NULL) {
+ // This node contains no non-word characters so it must be
+ // all word.
+ this->info()->is_word = NodeInfo::TRUE;
+ } else if (word == NULL) {
+ // Vice versa.
+ this->info()->is_word = NodeInfo::FALSE;
+ } else {
+ // If this character class contains both word and non-word
+ // characters we need to split it into two.
+ ChoiceNode* result = new ChoiceNode(2, on_failure());
+ // Welcome to the family, son!
+ result->set_siblings(this->siblings());
+ *result->info() = *this->info();
+ result->info()->ResetCompilationState();
+ result->info()->AddAssumptions(info);
+ RegExpNode* word_node
+ = new TextNode(new RegExpCharacterClass(word, false),
+ on_success(),
+ on_failure());
+ word_node->info()->determine_word = true;
+ word_node->info()->does_determine_word = true;
+ word_node->info()->is_word = NodeInfo::TRUE;
+ result->alternatives()->Add(GuardedAlternative(word_node));
+ RegExpNode* non_word_node
+ = new TextNode(new RegExpCharacterClass(non_word, false),
+ on_success(),
+ on_failure());
+ non_word_node->info()->determine_word = true;
+ non_word_node->info()->does_determine_word = true;
+ non_word_node->info()->is_word = NodeInfo::FALSE;
+ result->alternatives()->Add(GuardedAlternative(non_word_node));
+ return result;
+ }
+ }
+ }
+ TextNode* clone = this->Clone();
+ clone->info()->ResetCompilationState();
+ clone->info()->AddAssumptions(info);
+ return clone;
+}
+
+
+void TextNode::ExpandAtomChildren(RegExpAtom* that) {
+ NodeInfo new_info = *info();
+ uc16 last = that->data()[that->data().length() - 1];
+ if (info()->determine_word) {
+ new_info.follows_word = IsRegExpWord(last)
+ ? NodeInfo::TRUE : NodeInfo::FALSE;
+ } else {
+ new_info.follows_word = NodeInfo::UNKNOWN;
+ }
+ if (info()->determine_newline) {
+ new_info.follows_newline = IsRegExpNewline(last)
+ ? NodeInfo::TRUE : NodeInfo::FALSE;
+ } else {
+ new_info.follows_newline = NodeInfo::UNKNOWN;
+ }
+ if (info()->determine_start) {
+ new_info.follows_start = NodeInfo::FALSE;
+ } else {
+ new_info.follows_start = NodeInfo::UNKNOWN;
+ }
+ set_on_success(on_success()->EnsureExpanded(&new_info));
+}
+
+
+void TextNode::ExpandCharClassChildren(RegExpCharacterClass* that) {
+ if (info()->does_determine_word) {
+ // ASSERT(info()->is_word != NodeInfo::UNKNOWN);
+ NodeInfo next_info = *on_success()->info();
+ next_info.follows_word = info()->is_word;
+ set_on_success(on_success()->EnsureExpanded(&next_info));
+ } else {
+ set_on_success(on_success()->EnsureExpanded(info()));
+ }
+}
+
+
+void TextNode::ExpandChildren() {
+ TextElement last = elements()->last();
+ switch (last.type) {
+ case TextElement::ATOM:
+ ExpandAtomChildren(last.data.u_atom);
+ break;
+ case TextElement::CHAR_CLASS:
+ ExpandCharClassChildren(last.data.u_char_class);
+ break;
+ default:
+ UNREACHABLE();
+ }
+}
+
+
+RegExpNode* ActionNode::ExpandLocal(NodeInfo* info) {
+ ActionNode* clone = this->Clone();
+ clone->info()->ResetCompilationState();
+ clone->info()->AddAssumptions(info);
+ return clone;
+}
+
+
+void ActionNode::ExpandChildren() {
+ set_on_success(on_success()->EnsureExpanded(info()));
+}
+
+
+RegExpNode* BackReferenceNode::ExpandLocal(NodeInfo* info) {
+ BackReferenceNode* clone = this->Clone();
+ clone->info()->ResetCompilationState();
+ clone->info()->AddAssumptions(info);
+ return clone;
+}
+
+
+void BackReferenceNode::ExpandChildren() {
+ set_on_success(on_success()->EnsureExpanded(info()));
+}
+
+
+RegExpNode* EndNode::ExpandLocal(NodeInfo* info) {
+ EndNode* clone = this->Clone();
+ clone->info()->ResetCompilationState();
+ clone->info()->AddAssumptions(info);
+ return clone;
+}
+
+
+void EndNode::ExpandChildren() {
+ // nothing to do
+}
+
+
+// -------------------------------------------------------------------
// Dispatch table construction
@@ -2584,7 +2886,6 @@
void DispatchTableConstructor::BuildTable(ChoiceNode* node) {
- ASSERT(!node->table_calculated());
node->set_being_calculated(true);
ZoneList<GuardedAlternative>* alternatives = node->alternatives();
for (int i = 0; i < alternatives->length(); i++) {
@@ -2592,7 +2893,6 @@
alternatives->at(i).node()->Accept(this);
}
node->set_being_calculated(false);
- node->set_table_calculated(true);
}
@@ -2615,13 +2915,9 @@
void DispatchTableConstructor::VisitChoice(ChoiceNode* node) {
if (node->being_calculated())
return;
- if (!node->table_calculated()) {
- DispatchTableConstructor constructor(node->table());
- constructor.BuildTable(node);
- }
- ASSERT(node->table_calculated());
+ DispatchTable* table = node->GetTable(ignore_case_);
AddDispatchRange adder(this);
- node->table()->ForEach(&adder);
+ table->ForEach(&adder);
}
@@ -2715,6 +3011,9 @@
if (node_return != NULL) *node_return = node;
Analysis analysis(ignore_case);
analysis.EnsureAnalyzed(node);
+
+ NodeInfo info = *node->info();
+ node = node->EnsureExpanded(&info);
if (!FLAG_irregexp) {
return Handle<FixedArray>::null();
Modified: branches/bleeding_edge/src/jsregexp.h
==============================================================================
--- branches/bleeding_edge/src/jsregexp.h (original)
+++ branches/bleeding_edge/src/jsregexp.h Mon Dec 1 07:42:35 2008
@@ -185,6 +185,7 @@
CharacterRange(void* null) { ASSERT_EQ(NULL, null); } //NOLINT
CharacterRange(uc16 from, uc16 to) : from_(from), to_(to) { }
static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges);
+ static Vector<const uc16> GetWordBounds();
static inline CharacterRange Singleton(uc16 value) {
return CharacterRange(value, value);
}
@@ -203,9 +204,15 @@
bool is_valid() { return from_ <= to_; }
bool IsSingleton() { return (from_ == to_); }
void AddCaseEquivalents(ZoneList<CharacterRange>* ranges);
+ static void Split(ZoneList<CharacterRange>* base,
+ Vector<const uc16> overlay,
+ ZoneList<CharacterRange>** included,
+ ZoneList<CharacterRange>** excluded);
+
static const int kRangeCanonicalizeMax = 0x346;
static const int kStartMarker = (1 << 24);
static const int kPayloadMask = (1 << 24) - 1;
+
private:
uc16 from_;
uc16 to_;
@@ -344,7 +351,7 @@
// A mapping from integers, specified as ranges, to a set of integers.
// Used for mapping character ranges to choices.
-class DispatchTable {
+class DispatchTable : public ZoneObject {
public:
class Entry {
public:
@@ -437,29 +444,50 @@
struct NodeInfo {
- enum PrecedingInfo {
+ enum TriBool {
UNKNOWN = -1, FALSE = 0, TRUE = 1
};
NodeInfo()
: being_analyzed(false),
been_analyzed(false),
+ being_expanded(false),
+ been_expanded(false),
determine_word(false),
determine_newline(false),
determine_start(false),
+ does_determine_word(false),
+ does_determine_newline(false),
+ does_determine_start(false),
follows_word_interest(false),
follows_newline_interest(false),
follows_start_interest(false),
+ is_word(UNKNOWN),
+ is_newline(UNKNOWN),
at_end(false),
follows_word(UNKNOWN),
follows_newline(UNKNOWN),
follows_start(UNKNOWN) { }
- bool HasSameForwardInterests(NodeInfo* that) {
- return (at_end == that->at_end)
- && (follows_word_interest == that->follows_word_interest)
- && (follows_newline_interest == that->follows_newline_interest)
- && (follows_start_interest == that->follows_start_interest);
+ // Returns true if the interests and assumptions of this node
+ // matches the given one.
+ bool Matches(NodeInfo* that) {
+ return (at_end == that->at_end) &&
+ (follows_word_interest == that->follows_word_interest) &&
+ (follows_newline_interest == that->follows_newline_interest) &&
+ (follows_start_interest == that->follows_start_interest) &&
+ (follows_word == that->follows_word) &&
+ (follows_newline == that->follows_newline) &&
+ (follows_start == that->follows_start) &&
+ (does_determine_word == that->does_determine_word) &&
+ (does_determine_newline == that->does_determine_newline) &&
+ (does_determine_start == that->does_determine_start);
+ }
+
+ bool HasAssertions() {
+ return (follows_word != UNKNOWN) ||
+ (follows_newline != UNKNOWN) ||
+ (follows_start != UNKNOWN);
}
// Updates the interests of this node given the interests of the
@@ -471,6 +499,26 @@
follows_start_interest |= that->follows_start_interest;
}
+ void AddAssumptions(NodeInfo* that) {
+ if (that->follows_word != UNKNOWN) {
+ ASSERT(follows_word == UNKNOWN || follows_word ==
that->follows_word);
+ follows_word = that->follows_word;
+ }
+ if (that->follows_newline != UNKNOWN) {
+ ASSERT(follows_newline == UNKNOWN ||
+ follows_newline == that->follows_newline);
+ follows_newline = that->follows_newline;
+ }
+ if (that->follows_start != UNKNOWN) {
+ ASSERT(follows_start == UNKNOWN ||
+ follows_start == that->follows_start);
+ follows_start = that->follows_start;
+ }
+ does_determine_word = that->does_determine_word;
+ does_determine_newline = that->does_determine_newline;
+ does_determine_start = that->does_determine_start;
+ }
+
// Sets the interests of this node to include the interests of the
// following node.
void AddFromFollowing(NodeInfo* that) {
@@ -479,8 +527,17 @@
follows_start_interest |= that->follows_start_interest;
}
+ void ResetCompilationState() {
+ being_analyzed = false;
+ been_analyzed = false;
+ being_expanded = false;
+ been_expanded = false;
+ }
+
bool being_analyzed: 1;
bool been_analyzed: 1;
+ bool being_expanded: 1;
+ bool been_expanded: 1;
// These bits are set if this node must propagate forward information
// about the last character it consumed (or, in the case of 'start',
@@ -489,19 +546,40 @@
bool determine_newline: 1;
bool determine_start: 1;
+ bool does_determine_word: 1;
+ bool does_determine_newline: 1;
+ bool does_determine_start: 1;
+
// These bits are set of this node has to know what the preceding
// character was.
bool follows_word_interest: 1;
bool follows_newline_interest: 1;
bool follows_start_interest: 1;
+ TriBool is_word: 2;
+ TriBool is_newline: 2;
+
bool at_end: 1;
// These bits are set if the node can make assumptions about what
// the previous character was.
- PrecedingInfo follows_word: 2;
- PrecedingInfo follows_newline: 2;
- PrecedingInfo follows_start: 2;
+ TriBool follows_word: 2;
+ TriBool follows_newline: 2;
+ TriBool follows_start: 2;
+};
+
+
+class ExpansionGuard {
+ public:
+ explicit inline ExpansionGuard(NodeInfo* info) : info_(info) {
+ ASSERT(!info->being_expanded);
+ info->being_expanded = true;
+ }
+ inline ~ExpansionGuard() {
+ info_->being_expanded = false;
+ }
+ private:
+ NodeInfo* info_;
};
@@ -538,6 +616,10 @@
// false for failure.
virtual bool Emit(RegExpCompiler* compiler) = 0;
+ RegExpNode* EnsureExpanded(NodeInfo* info);
+ virtual RegExpNode* ExpandLocal(NodeInfo* info) = 0;
+ virtual void ExpandChildren() = 0;
+
// Propagates the given interest information forward. When seeing
// \bfoo for instance, the \b is implemented by propagating forward
// to the 'foo' string that it should only succeed if its first
@@ -546,11 +628,40 @@
NodeInfo* info() { return &info_; }
virtual bool IsBacktrack() { return false; }
- RegExpNode* GetSibling(NodeInfo* info);
- void EnsureSiblings() { siblings_.Ensure(this); }
+
void AddSibling(RegExpNode* node) { siblings_.Add(node); }
+
+ // Static version of EnsureSibling that expresses the fact that the
+ // result has the same type as the input.
+ template <class C>
+ static C* EnsureSibling(C* node, NodeInfo* info, bool* cloned) {
+ return static_cast<C*>(node->EnsureSibling(info, cloned));
+ }
+
+ SiblingList* siblings() { return &siblings_; }
+ void set_siblings(SiblingList* other) { siblings_ = *other; }
+
protected:
+
+ // Returns a sibling of this node whose interests and assumptions
+ // match the ones in the given node info. If no sibling exists NULL
+ // is returned.
+ RegExpNode* TryGetSibling(NodeInfo* info);
+
+ // Returns a sibling of this node whose interests match the ones in
+ // the given node info. The info must not contain any assertions.
+ // If no node exists a new one will be created by cloning the current
+ // node. The result will always be an instance of the same concrete
+ // class as this node.
+ RegExpNode* EnsureSibling(NodeInfo* info, bool* cloned);
+
+ // Returns a clone of this node initialized using the copy constructor
+ // of its concrete class. Note that the node may have to be pre-
+ // processed before it is on a useable state.
+ virtual RegExpNode* Clone() = 0;
+
inline void Bind(RegExpMacroAssembler* macro);
+
private:
Label label_;
NodeInfo info_;
@@ -593,7 +704,11 @@
RegExpNode* on_success);
virtual void Accept(NodeVisitor* visitor);
virtual bool Emit(RegExpCompiler* compiler);
+ virtual RegExpNode* ExpandLocal(NodeInfo* info);
+ virtual void ExpandChildren();
virtual RegExpNode* PropagateForward(NodeInfo* info);
+ virtual ActionNode* Clone() { return new ActionNode(*this); }
+
private:
union {
struct {
@@ -627,13 +742,28 @@
: SeqRegExpNode(on_success),
on_failure_(on_failure),
elms_(elms) { }
+ TextNode(RegExpCharacterClass* that,
+ RegExpNode* on_success,
+ RegExpNode* on_failure)
+ : SeqRegExpNode(on_success),
+ on_failure_(on_failure),
+ elms_(new ZoneList<TextElement>(1)) {
+ elms_->Add(TextElement::CharClass(that));
+ }
virtual void Accept(NodeVisitor* visitor);
virtual RegExpNode* PropagateForward(NodeInfo* info);
+ virtual RegExpNode* ExpandLocal(NodeInfo* info);
+ virtual void ExpandChildren();
RegExpNode* on_failure() { return on_failure_; }
virtual bool Emit(RegExpCompiler* compiler);
ZoneList<TextElement>* elements() { return elms_; }
void MakeCaseIndependent();
+ virtual TextNode* Clone() { return new TextNode(*this); }
+
private:
+ void ExpandAtomChildren(RegExpAtom* that);
+ void ExpandCharClassChildren(RegExpCharacterClass* that);
+
RegExpNode* on_failure_;
ZoneList<TextElement>* elms_;
};
@@ -655,6 +785,10 @@
int end_register() { return end_reg_; }
virtual bool Emit(RegExpCompiler* compiler);
virtual RegExpNode* PropagateForward(NodeInfo* info);
+ virtual RegExpNode* ExpandLocal(NodeInfo* info);
+ virtual void ExpandChildren();
+ virtual BackReferenceNode* Clone() { return new
BackReferenceNode(*this); }
+
private:
RegExpNode* on_failure_;
int start_reg_;
@@ -669,8 +803,12 @@
virtual void Accept(NodeVisitor* visitor);
virtual bool Emit(RegExpCompiler* compiler);
virtual RegExpNode* PropagateForward(NodeInfo* info);
+ virtual RegExpNode* ExpandLocal(NodeInfo* info);
+ virtual void ExpandChildren();
virtual bool IsBacktrack() { return action_ == BACKTRACK; }
virtual bool GoTo(RegExpCompiler* compiler);
+ virtual EndNode* Clone() { return new EndNode(*this); }
+
private:
Action action_;
};
@@ -686,6 +824,7 @@
int reg() { return reg_; }
Relation op() { return op_; }
int value() { return value_; }
+
private:
int reg_;
Relation op_;
@@ -700,6 +839,7 @@
RegExpNode* node() { return node_; }
void set_node(RegExpNode* node) { node_ = node; }
ZoneList<Guard*>* guards() { return guards_; }
+
private:
RegExpNode* node_;
ZoneList<Guard*>* guards_;
@@ -711,27 +851,31 @@
explicit ChoiceNode(int expected_size, RegExpNode* on_failure)
: on_failure_(on_failure),
alternatives_(new ZoneList<GuardedAlternative>(expected_size)),
- table_calculated_(false),
+ table_(NULL),
being_calculated_(false) { }
virtual void Accept(NodeVisitor* visitor);
void AddAlternative(GuardedAlternative node) {
alternatives()->Add(node); }
ZoneList<GuardedAlternative>* alternatives() { return alternatives_; }
- DispatchTable* table() { return &table_; }
+ DispatchTable* GetTable(bool ignore_case);
RegExpNode* on_failure() { return on_failure_; }
virtual bool Emit(RegExpCompiler* compiler);
virtual RegExpNode* PropagateForward(NodeInfo* info);
- bool table_calculated() { return table_calculated_; }
- void set_table_calculated(bool b) { table_calculated_ = b; }
+ virtual RegExpNode* ExpandLocal(NodeInfo* info);
+ virtual void ExpandChildren();
+ virtual ChoiceNode* Clone() { return new ChoiceNode(*this); }
+
bool being_calculated() { return being_calculated_; }
void set_being_calculated(bool b) { being_calculated_ = b; }
+
private:
+ friend class DispatchTableConstructor;
+ friend class Analysis;
void GenerateGuard(RegExpMacroAssembler* macro_assembler,
Guard *guard,
Label* on_failure);
RegExpNode* on_failure_;
ZoneList<GuardedAlternative>* alternatives_;
- DispatchTable table_;
- bool table_calculated_;
+ DispatchTable* table_;
bool being_calculated_;
};
@@ -750,9 +894,10 @@
// dispatch table of a choice node.
class DispatchTableConstructor: public NodeVisitor {
public:
- explicit DispatchTableConstructor(DispatchTable* table)
+ DispatchTableConstructor(DispatchTable* table, bool ignore_case)
: table_(table),
- choice_index_(-1) { }
+ choice_index_(-1),
+ ignore_case_(ignore_case) { }
void BuildTable(ChoiceNode* node);
@@ -773,6 +918,7 @@
protected:
DispatchTable *table_;
int choice_index_;
+ bool ignore_case_;
};
@@ -808,7 +954,7 @@
RegExpNode** node_return,
bool ignore_case,
bool multiline);
- static void DotPrint(const char* label, RegExpNode* node);
+ static void DotPrint(const char* label, RegExpNode* node, bool
ignore_case);
};
Modified: branches/bleeding_edge/src/parser.cc
==============================================================================
--- branches/bleeding_edge/src/parser.cc (original)
+++ branches/bleeding_edge/src/parser.cc Mon Dec 1 07:42:35 2008
@@ -1028,7 +1028,7 @@
#define DUMMY ) // to make indentation work
#undef DUMMY
-#define CHECK_FAILED ); \
+#define CHECK_FAILED /**/); \
if (failed_) return NULL; \
((void)0
#define DUMMY ) // to make indentation work
Modified: branches/bleeding_edge/test/cctest/test-regexp.cc
==============================================================================
--- branches/bleeding_edge/test/cctest/test-regexp.cc (original)
+++ branches/bleeding_edge/test/cctest/test-regexp.cc Mon Dec 1 07:42:35
2008
@@ -324,27 +324,8 @@
}
-static bool IsWord(uc16 c) {
- return ('a' <= c && c <= 'z')
- || ('A' <= c && c <= 'Z')
- || ('0' <= c && c <= '9')
- || (c == '_');
-}
-
-
static bool NotWord(uc16 c) {
- return !IsWord(c);
-}
-
-
-static bool Dot(uc16 c) {
- switch (c) {
- // CR LF LS PS
- case 0x000A: case 0x000D: case 0x2028: case 0x2029:
- return false;
- default:
- return true;
- }
+ return !IsRegExpWord(c);
}
@@ -364,12 +345,12 @@
TEST(CharacterClassEscapes) {
- TestCharacterClassEscapes('.', Dot);
+ TestCharacterClassEscapes('.', IsRegExpNewline);
TestCharacterClassEscapes('d', IsDigit);
TestCharacterClassEscapes('D', NotDigit);
TestCharacterClassEscapes('s', IsWhiteSpace);
TestCharacterClassEscapes('S', NotWhiteSpace);
- TestCharacterClassEscapes('w', IsWord);
+ TestCharacterClassEscapes('w', IsRegExpWord);
TestCharacterClassEscapes('W', NotWord);
}
@@ -395,7 +376,7 @@
USE(node);
#ifdef DEBUG
if (dot_output) {
- RegExpEngine::DotPrint(input, node);
+ RegExpEngine::DotPrint(input, node, false);
exit(0);
}
#endif // DEBUG
@@ -964,7 +945,7 @@
ranges->Add(CharacterRange(from, to));
}
DispatchTable table;
- DispatchTableConstructor cons(&table);
+ DispatchTableConstructor cons(&table, false);
cons.set_choice_index(0);
cons.AddInverse(ranges);
for (int i = 0; i < kLimit; i++) {
@@ -980,7 +961,7 @@
new ZoneList<CharacterRange>(1);
ranges->Add(CharacterRange(0xFFF0, 0xFFFE));
DispatchTable table;
- DispatchTableConstructor cons(&table);
+ DispatchTableConstructor cons(&table, false);
cons.set_choice_index(0);
cons.AddInverse(ranges);
CHECK(!table.Get(0xFFFE)->Get(0));
@@ -1162,7 +1143,45 @@
}
+static bool InClass(uc16 c, ZoneList<CharacterRange>* ranges) {
+ if (ranges == NULL)
+ return false;
+ for (int i = 0; i < ranges->length(); i++) {
+ CharacterRange range = ranges->at(i);
+ if (range.from() <= c && c <= range.to())
+ return true;
+ }
+ return false;
+}
+
+
+TEST(CharClassDifference) {
+ ZoneScope zone_scope(DELETE_ON_EXIT);
+ ZoneList<CharacterRange>* base = new ZoneList<CharacterRange>(1);
+ base->Add(CharacterRange::Everything());
+ Vector<const uc16> overlay = CharacterRange::GetWordBounds();
+ ZoneList<CharacterRange>* included = NULL;
+ ZoneList<CharacterRange>* excluded = NULL;
+ CharacterRange::Split(base, overlay, &included, &excluded);
+ for (int i = 0; i < (1 << 16); i++) {
+ bool in_base = InClass(i, base);
+ if (in_base) {
+ bool in_overlay = false;
+ for (int j = 0; !in_overlay && j < overlay.length(); j += 2) {
+ if (overlay[j] <= i && i <= overlay[j+1])
+ in_overlay = true;
+ }
+ CHECK_EQ(in_overlay, InClass(i, included));
+ CHECK_EQ(!in_overlay, InClass(i, excluded));
+ } else {
+ CHECK(!InClass(i, included));
+ CHECK(!InClass(i, excluded));
+ }
+ }
+}
+
+
TEST(Graph) {
V8::Initialize(NULL);
- Execute("foo$(?!bar)", false, true);
+ Execute("\\b\\w", false, true);
}
--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---