Author: [EMAIL PROTECTED]
Date: Thu Nov 13 03:54:50 2008
New Revision: 746
Modified:
branches/experimental/regexp2000/src/globals.h
branches/experimental/regexp2000/src/jsregexp.cc
branches/experimental/regexp2000/src/jsregexp.h
branches/experimental/regexp2000/src/list-inl.h
branches/experimental/regexp2000/src/list.h
branches/experimental/regexp2000/src/utils.h
branches/experimental/regexp2000/test/cctest/test-regexp.cc
Log:
- Moved node class declarations into .h file where they belong.
- Implemented adding of inverted character classes to dispatch tables.
- Cleaned sorting up a bit and fixed a windows build problem.
Modified: branches/experimental/regexp2000/src/globals.h
==============================================================================
--- branches/experimental/regexp2000/src/globals.h (original)
+++ branches/experimental/regexp2000/src/globals.h Thu Nov 13 03:54:50 2008
@@ -184,7 +184,7 @@
class Property;
class Proxy;
class RegExpNode;
-class RegExpParseResult;
+struct RegExpParseResult;
class RegExpTree;
class RegExpCompiler;
class RegExpVisitor;
Modified: branches/experimental/regexp2000/src/jsregexp.cc
==============================================================================
--- branches/experimental/regexp2000/src/jsregexp.cc (original)
+++ branches/experimental/regexp2000/src/jsregexp.cc Thu Nov 13 03:54:50
2008
@@ -691,15 +691,6 @@
}
-#define FOR_EACH_NODE_TYPE(VISIT) \
- VISIT(End) \
- VISIT(Atom) \
- VISIT(Action) \
- VISIT(Choice) \
- VISIT(Backreference) \
- VISIT(CharacterClass)
-
-
void RegExpNode::GoTo(RegExpCompiler* compiler) {
if (label.is_bound()) {
compiler->macro_assembler()->GoTo(&label);
@@ -714,130 +705,10 @@
}
-class SeqRegExpNode: public RegExpNode {
- public:
- explicit SeqRegExpNode(RegExpNode* on_success)
- : on_success_(on_success) { }
- RegExpNode* on_success() { return on_success_; }
- virtual void Emit(RegExpCompiler* compiler) { UNREACHABLE(); }
- private:
- RegExpNode* on_success_;
-};
-
-
-class EndNode: public RegExpNode {
- public:
- enum Action { ACCEPT, BACKTRACK };
- virtual void Accept(NodeVisitor* visitor);
- static EndNode* GetAccept() { return &kAccept; }
- static EndNode* GetBacktrack() { return &kBacktrack; }
- virtual void Emit(RegExpCompiler* compiler) { UNREACHABLE(); }
- private:
- explicit EndNode(Action action) : action_(action) { }
- Action action_;
- static EndNode kAccept;
- static EndNode kBacktrack;
-};
-
-
EndNode EndNode::kAccept(ACCEPT);
EndNode EndNode::kBacktrack(BACKTRACK);
-class AtomNode: public SeqRegExpNode {
- public:
- AtomNode(Vector<const uc16> data,
- RegExpNode* on_success,
- RegExpNode* on_failure)
- : SeqRegExpNode(on_success),
- on_failure_(on_failure),
- data_(data) { }
- virtual void Accept(NodeVisitor* visitor);
- Vector<const uc16> data() { return data_; }
- RegExpNode* on_failure() { return on_failure_; }
- virtual void Emit(RegExpCompiler* compiler) { UNREACHABLE(); }
- private:
- RegExpNode* on_failure_;
- Vector<const uc16> data_;
-};
-
-
-class BackreferenceNode: public SeqRegExpNode {
- public:
- BackreferenceNode(int start_reg,
- int end_reg,
- RegExpNode* on_success,
- RegExpNode* on_failure)
- : SeqRegExpNode(on_success),
- on_failure_(on_failure),
- start_reg_(start_reg),
- end_reg_(end_reg) { }
- virtual void Accept(NodeVisitor* visitor);
- RegExpNode* on_failure() { return on_failure_; }
- int start_register() { return start_reg_; }
- int end_register() { return end_reg_; }
- virtual void Emit(RegExpCompiler* compiler) { UNREACHABLE(); }
- private:
- RegExpNode* on_failure_;
- int start_reg_;
- int end_reg_;
-};
-
-
-class CharacterClassNode: public SeqRegExpNode {
- public:
- CharacterClassNode(ZoneList<CharacterRange>* ranges,
- bool is_negated,
- RegExpNode* on_success,
- RegExpNode* on_failure)
- : SeqRegExpNode(on_success),
- on_failure_(on_failure),
- ranges_(ranges),
- is_negated_(is_negated ) { }
- virtual void Accept(NodeVisitor* visitor);
- ZoneList<CharacterRange>* ranges() { return ranges_; }
- bool is_negated() { return is_negated_; }
- RegExpNode* on_failure() { return on_failure_; }
- virtual void Emit(RegExpCompiler* compiler) { UNREACHABLE(); }
- static void AddInverseToTable(List<CharacterRange> ranges,
- DispatchTable* table,
- int index);
- private:
- RegExpNode* on_failure_;
- ZoneList<CharacterRange>* ranges_;
- bool is_negated_;
-};
-
-
-class Guard: public ZoneObject {
- public:
- enum Relation { LT, GEQ };
- Guard(int reg, Relation op, int value)
- : reg_(reg),
- op_(op),
- value_(value) { }
- int reg() { return reg_; }
- Relation op() { return op_; }
- int value() { return value_; }
- private:
- int reg_;
- Relation op_;
- int value_;
-};
-
-
-class GuardedAlternative {
- public:
- explicit GuardedAlternative(RegExpNode* node) : node_(node),
guards_(NULL) { }
- void AddGuard(Guard* guard);
- RegExpNode* node() { return node_; }
- ZoneList<Guard*>* guards() { return guards_; }
- private:
- RegExpNode* node_;
- ZoneList<Guard*>* guards_;
-};
-
-
void GuardedAlternative::AddGuard(Guard* guard) {
if (guards_ == NULL)
guards_ = new ZoneList<Guard*>(1);
@@ -845,69 +716,6 @@
}
-class ChoiceNode: public RegExpNode {
- public:
- explicit ChoiceNode(int expected_size, RegExpNode* on_failure)
- : on_failure_(on_failure),
- choices_(new ZoneList<GuardedAlternative>(expected_size)),
- visited_(false) { }
- virtual void Accept(NodeVisitor* visitor);
- void AddChild(GuardedAlternative node) { choices()->Add(node); }
- ZoneList<GuardedAlternative>* choices() { return choices_; }
- DispatchTable* table() { return &table_; }
- RegExpNode* on_failure() { return on_failure_; }
- virtual void Emit(RegExpCompiler* compiler);
- bool visited() { return visited_; }
- void set_visited(bool value) { visited_ = value; }
- private:
- RegExpNode* on_failure_;
- ZoneList<GuardedAlternative>* choices_;
- DispatchTable table_;
- bool visited_;
-};
-
-
-class ActionNode: public SeqRegExpNode {
- public:
- enum Type {
- STORE_REGISTER,
- INCREMENT_REGISTER,
- STORE_POSITION,
- RESTORE_POSITION,
- BEGIN_SUBMATCH,
- ESCAPE_SUBMATCH,
- END_SUBMATCH
- };
- static ActionNode* StoreRegister(int reg, int val, RegExpNode*
on_success);
- static ActionNode* IncrementRegister(int reg, RegExpNode* on_success);
- static ActionNode* StorePosition(int reg, RegExpNode* on_success);
- static ActionNode* RestorePosition(int reg, RegExpNode* on_success);
- static ActionNode* BeginSubmatch(RegExpNode* on_success);
- static ActionNode* EscapeSubmatch(RegExpNode* on_success);
- static ActionNode* EndSubmatch(RegExpNode* on_success);
- virtual void Accept(NodeVisitor* visitor);
- virtual void Emit(RegExpCompiler* compiler);
- private:
- union {
- struct {
- int reg;
- int value;
- } u_store_register;
- struct {
- int reg;
- } u_increment_register;
- struct {
- int reg;
- } u_position_register;
- } data_;
- ActionNode(Type type, RegExpNode* on_success)
- : SeqRegExpNode(on_success),
- type_(type) { }
- Type type_;
- friend class DotPrinter;
-};
-
-
ActionNode* ActionNode::StoreRegister(int reg,
int val,
RegExpNode* on_success) {
@@ -1708,13 +1516,45 @@
}
+
+static int CompareRangeByFrom(const CharacterRange* a,
+ const CharacterRange* b) {
+ return Spaceship<uc16>(a->from(), b->from());
+}
+
+
+void CharacterClassNode::AddInverseToTable(ZoneList<CharacterRange>*
ranges,
+ DispatchTable* table,
+ int index) {
+ ranges->Sort(CompareRangeByFrom);
+ uc16 last = 0;
+ for (int i = 0; i < ranges->length(); i++) {
+ CharacterRange range = ranges->at(i);
+ if (last < range.from())
+ table->AddRange(CharacterRange(last, range.from() - 1), index);
+ if (range.to() >= last) {
+ if (range.to() == 0xFFFF) {
+ return;
+ } else {
+ last = range.to() + 1;
+ }
+ }
+ }
+ table->AddRange(CharacterRange(last, 0xFFFF), index);
+}
+
+
void Analysis::VisitCharacterClass(CharacterClassNode* that) {
if (table() != NULL) {
int index = choice_index();
ZoneList<CharacterRange>* ranges = that->ranges();
- for (int i = 0; i < ranges->length(); i++) {
- CharacterRange range = ranges->at(i);
- table()->AddRange(range, index);
+ if (that->is_negated()) {
+ CharacterClassNode::AddInverseToTable(ranges, table(), index);
+ } else {
+ for (int i = 0; i < ranges->length(); i++) {
+ CharacterRange range = ranges->at(i);
+ table()->AddRange(range, index);
+ }
}
}
AnalysisBuilder outgoing(this);
Modified: branches/experimental/regexp2000/src/jsregexp.h
==============================================================================
--- branches/experimental/regexp2000/src/jsregexp.h (original)
+++ branches/experimental/regexp2000/src/jsregexp.h Thu Nov 13 03:54:50 2008
@@ -152,9 +152,10 @@
ASSERT(from <= to);
return CharacterRange(from, to);
}
- uc16 from() { return from_; }
+ bool Contains(uc16 i) { return from_ <= i && i <= to_; }
+ uc16 from() const { return from_; }
void set_from(uc16 value) { from_ = value; }
- uc16 to() { return to_; }
+ uc16 to() const { return to_; }
void set_to(uc16 value) { to_ = value; }
bool is_valid() { return from_ <= to_; }
bool IsSingleton() { return (from_ == to_); }
@@ -347,6 +348,211 @@
};
+#define FOR_EACH_NODE_TYPE(VISIT) \
+ VISIT(End) \
+ VISIT(Atom) \
+ VISIT(Action) \
+ VISIT(Choice) \
+ VISIT(Backreference) \
+ VISIT(CharacterClass)
+
+
+class RegExpNode: public ZoneObject {
+ public:
+ virtual ~RegExpNode() { }
+ virtual void Accept(NodeVisitor* visitor) = 0;
+ // Generates a goto to this node or actually generates the code at this
point.
+ void GoTo(RegExpCompiler* compiler);
+ void EmitAddress(RegExpCompiler* compiler);
+ virtual void Emit(RegExpCompiler* compiler) = 0;
+ private:
+ Label label;
+};
+
+
+class SeqRegExpNode: public RegExpNode {
+ public:
+ explicit SeqRegExpNode(RegExpNode* on_success)
+ : on_success_(on_success) { }
+ RegExpNode* on_success() { return on_success_; }
+ virtual void Emit(RegExpCompiler* compiler) { UNREACHABLE(); }
+ private:
+ RegExpNode* on_success_;
+};
+
+
+class ActionNode: public SeqRegExpNode {
+ public:
+ enum Type {
+ STORE_REGISTER,
+ INCREMENT_REGISTER,
+ STORE_POSITION,
+ RESTORE_POSITION,
+ BEGIN_SUBMATCH,
+ ESCAPE_SUBMATCH,
+ END_SUBMATCH
+ };
+ static ActionNode* StoreRegister(int reg, int val, RegExpNode*
on_success);
+ static ActionNode* IncrementRegister(int reg, RegExpNode* on_success);
+ static ActionNode* StorePosition(int reg, RegExpNode* on_success);
+ static ActionNode* RestorePosition(int reg, RegExpNode* on_success);
+ static ActionNode* BeginSubmatch(RegExpNode* on_success);
+ static ActionNode* EscapeSubmatch(RegExpNode* on_success);
+ static ActionNode* EndSubmatch(RegExpNode* on_success);
+ virtual void Accept(NodeVisitor* visitor);
+ virtual void Emit(RegExpCompiler* compiler);
+ private:
+ union {
+ struct {
+ int reg;
+ int value;
+ } u_store_register;
+ struct {
+ int reg;
+ } u_increment_register;
+ struct {
+ int reg;
+ } u_position_register;
+ } data_;
+ ActionNode(Type type, RegExpNode* on_success)
+ : SeqRegExpNode(on_success),
+ type_(type) { }
+ Type type_;
+ friend class DotPrinter;
+};
+
+
+class AtomNode: public SeqRegExpNode {
+ public:
+ AtomNode(Vector<const uc16> data,
+ RegExpNode* on_success,
+ RegExpNode* on_failure)
+ : SeqRegExpNode(on_success),
+ on_failure_(on_failure),
+ data_(data) { }
+ virtual void Accept(NodeVisitor* visitor);
+ Vector<const uc16> data() { return data_; }
+ RegExpNode* on_failure() { return on_failure_; }
+ virtual void Emit(RegExpCompiler* compiler) { UNREACHABLE(); }
+ private:
+ RegExpNode* on_failure_;
+ Vector<const uc16> data_;
+};
+
+
+class BackreferenceNode: public SeqRegExpNode {
+ public:
+ BackreferenceNode(int start_reg,
+ int end_reg,
+ RegExpNode* on_success,
+ RegExpNode* on_failure)
+ : SeqRegExpNode(on_success),
+ on_failure_(on_failure),
+ start_reg_(start_reg),
+ end_reg_(end_reg) { }
+ virtual void Accept(NodeVisitor* visitor);
+ RegExpNode* on_failure() { return on_failure_; }
+ int start_register() { return start_reg_; }
+ int end_register() { return end_reg_; }
+ virtual void Emit(RegExpCompiler* compiler) { UNREACHABLE(); }
+ private:
+ RegExpNode* on_failure_;
+ int start_reg_;
+ int end_reg_;
+};
+
+
+class CharacterClassNode: public SeqRegExpNode {
+ public:
+ CharacterClassNode(ZoneList<CharacterRange>* ranges,
+ bool is_negated,
+ RegExpNode* on_success,
+ RegExpNode* on_failure)
+ : SeqRegExpNode(on_success),
+ on_failure_(on_failure),
+ ranges_(ranges),
+ is_negated_(is_negated ) { }
+ virtual void Accept(NodeVisitor* visitor);
+ ZoneList<CharacterRange>* ranges() { return ranges_; }
+ bool is_negated() { return is_negated_; }
+ RegExpNode* on_failure() { return on_failure_; }
+ virtual void Emit(RegExpCompiler* compiler) { UNREACHABLE(); }
+ static void AddInverseToTable(ZoneList<CharacterRange>* ranges,
+ DispatchTable* table,
+ int index);
+ private:
+ RegExpNode* on_failure_;
+ ZoneList<CharacterRange>* ranges_;
+ bool is_negated_;
+};
+
+
+class EndNode: public RegExpNode {
+ public:
+ enum Action { ACCEPT, BACKTRACK };
+ virtual void Accept(NodeVisitor* visitor);
+ static EndNode* GetAccept() { return &kAccept; }
+ static EndNode* GetBacktrack() { return &kBacktrack; }
+ virtual void Emit(RegExpCompiler* compiler) { UNREACHABLE(); }
+ private:
+ explicit EndNode(Action action) : action_(action) { }
+ Action action_;
+ static EndNode kAccept;
+ static EndNode kBacktrack;
+};
+
+
+class Guard: public ZoneObject {
+ public:
+ enum Relation { LT, GEQ };
+ Guard(int reg, Relation op, int value)
+ : reg_(reg),
+ op_(op),
+ value_(value) { }
+ int reg() { return reg_; }
+ Relation op() { return op_; }
+ int value() { return value_; }
+ private:
+ int reg_;
+ Relation op_;
+ int value_;
+};
+
+
+class GuardedAlternative {
+ public:
+ explicit GuardedAlternative(RegExpNode* node) : node_(node),
guards_(NULL) { }
+ void AddGuard(Guard* guard);
+ RegExpNode* node() { return node_; }
+ ZoneList<Guard*>* guards() { return guards_; }
+ private:
+ RegExpNode* node_;
+ ZoneList<Guard*>* guards_;
+};
+
+
+class ChoiceNode: public RegExpNode {
+ public:
+ explicit ChoiceNode(int expected_size, RegExpNode* on_failure)
+ : on_failure_(on_failure),
+ choices_(new ZoneList<GuardedAlternative>(expected_size)),
+ visited_(false) { }
+ virtual void Accept(NodeVisitor* visitor);
+ void AddChild(GuardedAlternative node) { choices()->Add(node); }
+ ZoneList<GuardedAlternative>* choices() { return choices_; }
+ DispatchTable* table() { return &table_; }
+ RegExpNode* on_failure() { return on_failure_; }
+ virtual void Emit(RegExpCompiler* compiler);
+ bool visited() { return visited_; }
+ void set_visited(bool value) { visited_ = value; }
+ private:
+ RegExpNode* on_failure_;
+ ZoneList<GuardedAlternative>* choices_;
+ DispatchTable table_;
+ bool visited_;
+};
+
+
struct RegExpParseResult {
RegExpTree* tree;
bool has_character_escapes;
@@ -363,19 +569,6 @@
class RegExpCompiler;
-
-
-class RegExpNode: public ZoneObject {
- public:
- virtual ~RegExpNode() { }
- virtual void Accept(NodeVisitor* visitor) = 0;
- // Generates a goto to this node or actually generates the code at this
point.
- void GoTo(RegExpCompiler* compiler);
- void EmitAddress(RegExpCompiler* compiler);
- virtual void Emit(RegExpCompiler* compiler) = 0;
- private:
- Label label;
-};
} } // namespace v8::internal
Modified: branches/experimental/regexp2000/src/list-inl.h
==============================================================================
--- branches/experimental/regexp2000/src/list-inl.h (original)
+++ branches/experimental/regexp2000/src/list-inl.h Thu Nov 13 03:54:50 2008
@@ -101,14 +101,17 @@
template<typename T, class P>
void List<T, P>::Sort(int (*cmp)(const T* x, const T* y)) {
- qsort(data_,
- length_,
- sizeof(T),
- reinterpret_cast<int (*)(const void*, const void*)>(cmp));
+ ToVector().Sort(cmp);
#ifdef DEBUG
for (int i = 1; i < length_; i++)
ASSERT(cmp(&data_[i - 1], &data_[i]) <= 0);
#endif
+}
+
+
+template<typename T, class P>
+void List<T, P>::Sort() {
+ Sort(PointerSpaceship<T>);
}
Modified: branches/experimental/regexp2000/src/list.h
==============================================================================
--- branches/experimental/regexp2000/src/list.h (original)
+++ branches/experimental/regexp2000/src/list.h Thu Nov 13 03:54:50 2008
@@ -102,6 +102,7 @@
// Sort all list entries (using QuickSort)
void Sort(int (*cmp)(const T* x, const T* y));
+ void Sort();
INLINE(void Initialize(int capacity));
Modified: branches/experimental/regexp2000/src/utils.h
==============================================================================
--- branches/experimental/regexp2000/src/utils.h (original)
+++ branches/experimental/regexp2000/src/utils.h Thu Nov 13 03:54:50 2008
@@ -83,6 +83,23 @@
}
+template <typename T>
+static int Spaceship(const T& a, const T& b) {
+ if (a == b)
+ return 0;
+ else if (a < b)
+ return -1;
+ else
+ return 1;
+}
+
+
+template <typename T>
+static int PointerSpaceship(const T* a, const T* b) {
+ return Spaceship<T>(*a, *b);
+}
+
+
// Returns the smallest power of two which is >= x. If you pass in a
// number that is already a power of two, it is returned as is.
uint32_t RoundUpToPowerOf2(uint32_t x);
@@ -316,6 +333,18 @@
T* result = NewArray<T>(length_);
for (int i = 0; i < length_; i++) result[i] = start_[i];
return Vector<T>(result, length_);
+ }
+
+ void Sort(int (*cmp)(const T*, const T*)) {
+ typedef int (*RawComparer)(const void*, const void*);
+ qsort(start(),
+ length(),
+ sizeof(T),
+ reinterpret_cast<RawComparer>(cmp));
+ }
+
+ void Sort() {
+ Sort(PointerSpaceship<T>);
}
// Releases the array underlying this vector. Once disposed the
Modified: branches/experimental/regexp2000/test/cctest/test-regexp.cc
==============================================================================
--- branches/experimental/regexp2000/test/cctest/test-regexp.cc (original)
+++ branches/experimental/regexp2000/test/cctest/test-regexp.cc Thu Nov 13
03:54:50 2008
@@ -438,18 +438,6 @@
}
-static int CompareChars(const void* ap, const void* bp) {
- uc16 a = *static_cast<const uc16*>(ap);
- uc16 b = *static_cast<const uc16*>(bp);
- if (a < b)
- return -1;
- else if (a > b)
- return 1;
- else
- return 0;
-}
-
-
TEST(DispatchTableConstruction) {
// Initialize test data.
static const int kLimit = 1000;
@@ -457,11 +445,14 @@
static const int kRangeSize = 16;
uc16 ranges[kRangeCount][2 * kRangeSize];
for (int i = 0; i < kRangeCount; i++) {
- uc16* range = ranges[i];
+ Vector<uc16> range(ranges[i], 2 * kRangeSize);
for (int j = 0; j < 2 * kRangeSize; j++) {
range[j] = PseudoRandom(i + 25, j + 87) % kLimit;
}
- qsort(range, 2 * kRangeSize, sizeof(uc16), CompareChars);
+ range.Sort();
+ for (int j = 1; j < 2 * kRangeSize; j++) {
+ CHECK(range[j-1] <= range[j]);
+ }
}
// Enter test data into dispatch table.
ZoneScope zone_scope(DELETE_ON_EXIT);
@@ -676,19 +667,37 @@
TEST(AddInverseToTable) {
static const int kLimit = 1000;
static const int kRangeCount = 16;
- ZoneScope zone_scope(DELETE_ON_EXIT);
- ZoneList<CharacterRange>* range = new
ZoneList<CharacterRange>(kRangeCount);
- for (int i = 0; i < kRangeCount; i++) {
- int from = PseudoRandom(87, i + 25) % kLimit;
- int to = PseudoRandom(i + 87, 25) % (kLimit / 20);
- if (to > kLimit) to = kLimit;
- range->Add(CharacterRange(from, to));
+ for (int t = 0; t < 10; t++) {
+ ZoneScope zone_scope(DELETE_ON_EXIT);
+ ZoneList<CharacterRange>* ranges =
+ new ZoneList<CharacterRange>(kRangeCount);
+ for (int i = 0; i < kRangeCount; i++) {
+ int from = PseudoRandom(t + 87, i + 25) % kLimit;
+ int to = from + (PseudoRandom(i + 87, t + 25) % (kLimit / 20));
+ if (to > kLimit) to = kLimit;
+ ranges->Add(CharacterRange(from, to));
+ }
+ DispatchTable table;
+ CharacterClassNode::AddInverseToTable(ranges, &table, 0);
+ for (int i = 0; i < kLimit; i++) {
+ bool is_on = false;
+ for (int j = 0; !is_on && j < kRangeCount; j++)
+ is_on = ranges->at(j).Contains(i);
+ OutSet* set = table.Get(i);
+ CHECK_EQ(is_on, set->Get(0) == false);
+ }
}
+ ZoneScope zone_scope(DELETE_ON_EXIT);
+ ZoneList<CharacterRange>* ranges =
+ new ZoneList<CharacterRange>(1);
+ ranges->Add(CharacterRange(0xFFF0, 0xFFFE));
DispatchTable table;
- // CharacterClassNode::AddInverseToTable(range, &table, 0);
+ CharacterClassNode::AddInverseToTable(ranges, &table, 0);
+ CHECK(!table.Get(0xFFFE)->Get(0));
+ CHECK(table.Get(0xFFFF)->Get(0));
}
TEST(Graph) {
- Execute(".*?a", "", true);
+ Execute(".*?[^a]|b", "", true);
}
--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---