Author: [EMAIL PROTECTED]
Date: Mon Nov 17 03:28:48 2008
New Revision: 770
Modified:
branches/experimental/regexp2000/src/ast.cc
branches/experimental/regexp2000/src/ast.h
branches/experimental/regexp2000/src/jsregexp.cc
branches/experimental/regexp2000/src/jsregexp.h
branches/experimental/regexp2000/src/parser.cc
branches/experimental/regexp2000/test/cctest/test-regexp.cc
Log:
Characters in the range 0..31 and 128..65535 are now printed as \x or \u
escapes.
This allows tests written in C++ in a simple codepage to represent the
output.
Also avoids output containing \0, which ends string comparison.
A small optimization on quantifiers on empty atoms.
Modified: branches/experimental/regexp2000/src/ast.cc
==============================================================================
--- branches/experimental/regexp2000/src/ast.cc (original)
+++ branches/experimental/regexp2000/src/ast.cc Mon Nov 17 03:28:48 2008
@@ -190,19 +190,21 @@
FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ACCEPT)
#undef MAKE_ACCEPT
-#define MAKE_CONVERSION(Name) \
+#define MAKE_TYPE_CASE(Name) \
RegExp##Name* RegExpTree::As##Name() { \
return NULL; \
- }
- FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CONVERSION)
-#undef MAKE_CONVERSION
+ } \
+ bool RegExpTree::Is##Name() { return false; }
+ FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
+#undef MAKE_TYPE_CASE
-#define MAKE_CONVERSION(Name) \
+#define MAKE_TYPE_CASE(Name) \
RegExp##Name* RegExp##Name::As##Name() { \
return this; \
- }
-FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CONVERSION)
-#undef MAKE_CONVERSION
+ } \
+ bool RegExp##Name::Is##Name() { return true; }
+FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
+#undef MAKE_TYPE_CASE
RegExpEmpty RegExpEmpty::kInstance;
@@ -252,12 +254,21 @@
return NULL;
}
+static void AddChar(StringStream* stream, uc16 character) {
+ if (character < 32 || (character >= 128 && character < 256)) {
+ stream->Add("\\x%02x", character);
+ } else if (character >= 256) {
+ stream->Add("\\u%04x", character);
+ } else {
+ stream->Add("%c", character);
+ }
+}
void RegExpUnparser::VisitCharacterRange(CharacterRange that) {
- if (that.IsSingleton()) {
- stream()->Add("%c", that.from());
- } else {
- stream()->Add("%c-%c", that.from(), that.to());
+ AddChar(stream(), that.from());
+ if (!that.IsSingleton()) {
+ stream()->Add("-");
+ AddChar(stream(), that.to());
}
}
@@ -303,7 +314,12 @@
void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) {
- stream()->Add("'%w'", that->data());
+ stream()->Add("'");
+ Vector<const uc16> chardata = that->data();
+ for (int i = 0; i < chardata.length(); i++) {
+ AddChar(stream(), chardata[i]);
+ }
+ stream()->Add("'");
return NULL;
}
Modified: branches/experimental/regexp2000/src/ast.h
==============================================================================
--- branches/experimental/regexp2000/src/ast.h (original)
+++ branches/experimental/regexp2000/src/ast.h Mon Nov 17 03:28:48 2008
@@ -1201,7 +1201,9 @@
virtual bool IsTextElement() { return false; }
virtual void AppendToText(RegExpText* text);
SmartPointer<const char> ToString();
-#define MAKE_ASTYPE(Name) virtual RegExp##Name* As##Name();
+#define MAKE_ASTYPE(Name)
\
+ virtual RegExp##Name* As##Name();
\
+ virtual bool Is##Name();
FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE)
#undef MAKE_ASTYPE
};
@@ -1216,6 +1218,7 @@
RegExpNode* on_success,
RegExpNode* on_failure);
virtual RegExpDisjunction* AsDisjunction();
+ virtual bool IsDisjunction();
ZoneList<RegExpTree*>* alternatives() { return alternatives_; }
private:
ZoneList<RegExpTree*>* alternatives_;
@@ -1230,6 +1233,7 @@
RegExpNode* on_success,
RegExpNode* on_failure);
virtual RegExpAlternative* AsAlternative();
+ virtual bool IsAlternative();
ZoneList<RegExpTree*>* nodes() { return nodes_; }
private:
ZoneList<RegExpTree*>* nodes_;
@@ -1244,6 +1248,7 @@
RegExpNode* on_success,
RegExpNode* on_failure);
virtual RegExpText* AsText();
+ virtual bool IsText();
virtual bool IsTextElement() { return true; }
virtual void AppendToText(RegExpText* text);
void AddElement(TextElement elm) { elements_.Add(elm); }
@@ -1265,6 +1270,7 @@
RegExpNode* on_success,
RegExpNode* on_failure);
virtual RegExpAssertion* AsAssertion();
+ virtual bool IsAssertion();
Type type() { return type_; }
private:
Type type_;
@@ -1286,6 +1292,7 @@
RegExpNode* on_success,
RegExpNode* on_failure);
virtual RegExpCharacterClass* AsCharacterClass();
+ virtual bool IsCharacterClass();
virtual bool IsTextElement() { return true; }
virtual void AppendToText(RegExpText* text);
ZoneList<CharacterRange>* ranges() { return ranges_; }
@@ -1304,6 +1311,7 @@
RegExpNode* on_success,
RegExpNode* on_failure);
virtual RegExpAtom* AsAtom();
+ virtual bool IsAtom();
virtual bool IsTextElement() { return true; }
virtual void AppendToText(RegExpText* text);
Vector<const uc16> data() { return data_; }
@@ -1331,6 +1339,7 @@
RegExpNode* on_success,
RegExpNode* on_failure);
virtual RegExpQuantifier* AsQuantifier();
+ virtual bool IsQuantifier();
int min() { return min_; }
int max() { return max_; }
bool is_greedy() { return is_greedy_; }
@@ -1363,6 +1372,7 @@
RegExpNode* on_success,
RegExpNode* on_failure);
virtual RegExpCapture* AsCapture();
+ virtual bool IsCapture();
RegExpTree* body() { return body_; }
int index() { return index_; }
inline CaptureAvailability available() { return available_; }
@@ -1388,6 +1398,7 @@
RegExpNode* on_success,
RegExpNode* on_failure);
virtual RegExpLookahead* AsLookahead();
+ virtual bool IsLookahead();
RegExpTree* body() { return body_; }
bool is_positive() { return is_positive_; }
private:
@@ -1405,6 +1416,7 @@
RegExpNode* on_success,
RegExpNode* on_failure);
virtual RegExpBackreference* AsBackreference();
+ virtual bool IsBackreference();
int index() { return capture_->index(); }
RegExpCapture* capture() { return capture_; }
private:
@@ -1420,6 +1432,7 @@
RegExpNode* on_success,
RegExpNode* on_failure);
virtual RegExpEmpty* AsEmpty();
+ virtual bool IsEmpty();
static RegExpEmpty* GetInstance() { return &kInstance; }
private:
static RegExpEmpty kInstance;
Modified: branches/experimental/regexp2000/src/jsregexp.cc
==============================================================================
--- branches/experimental/regexp2000/src/jsregexp.cc (original)
+++ branches/experimental/regexp2000/src/jsregexp.cc Mon Nov 17 03:28:48
2008
@@ -998,7 +998,6 @@
void ChoiceNode::GenerateGuard(RegExpCompiler* compiler,
Guard *guard,
Label* on_failure) {
-
}
Modified: branches/experimental/regexp2000/src/jsregexp.h
==============================================================================
--- branches/experimental/regexp2000/src/jsregexp.h (original)
+++ branches/experimental/regexp2000/src/jsregexp.h Mon Nov 17 03:28:48 2008
@@ -421,7 +421,7 @@
public:
enum Type {UNINITIALIZED, ATOM, CHAR_CLASS};
TextElement() : type(UNINITIALIZED) { }
- TextElement(Type t) : type(t) { }
+ explicit TextElement(Type t) : type(t) { }
static TextElement Atom(RegExpAtom* atom);
static TextElement CharClass(RegExpCharacterClass* char_class);
Type type;
Modified: branches/experimental/regexp2000/src/parser.cc
==============================================================================
--- branches/experimental/regexp2000/src/parser.cc (original)
+++ branches/experimental/regexp2000/src/parser.cc Mon Nov 17 03:28:48 2008
@@ -387,6 +387,10 @@
void RegExpBuilder::AddAtom(RegExpTree* term) {
+ if (term->IsEmpty()) {
+ AddEmpty();
+ return;
+ }
if (term->IsTextElement()) {
FlushCharacters();
text_.Add(term);
@@ -470,6 +474,16 @@
} else if (terms_.length() > 0) {
ASSERT(last_added_ == ADD_ATOM);
atom = terms_.RemoveLast();
+ if (atom->IsLookahead() || atom->IsAssertion()) {
+ // Guaranteed not to match a non-empty string.
+ // Assertion as an atom can happen as, e.g., (?:\b)
+ LAST(ADD_TERM);
+ if (min == 0) {
+ return;
+ }
+ terms_.Add(atom);
+ return;
+ }
} else {
// Only call immediately after adding an atom or character!
UNREACHABLE();
Modified: branches/experimental/regexp2000/test/cctest/test-regexp.cc
==============================================================================
--- branches/experimental/regexp2000/test/cctest/test-regexp.cc (original)
+++ branches/experimental/regexp2000/test/cctest/test-regexp.cc Mon Nov 17
03:28:48 2008
@@ -96,8 +96,8 @@
CHECK_PARSE_EQ("xyz{1,32}?", "(: 'xy' (# 1 32 n 'z'))");
CHECK_PARSE_EQ("xyz{1,}", "(: 'xy' (# 1 - g 'z'))");
CHECK_PARSE_EQ("xyz{1,}?", "(: 'xy' (# 1 - n 'z'))");
- CHECK_PARSE_EQ("a\\fb\\nc\\rd\\te\\vf", "'a\fb\nc\rd\te\vf'");
- CHECK_PARSE_EQ("a\\nb\\bc", "(: 'a\nb' @b 'c')");
+
CHECK_PARSE_EQ("a\\fb\\nc\\rd\\te\\vf", "'a\\x0cb\\x0ac\\x0dd\\x09e\\x0bf'");
+ CHECK_PARSE_EQ("a\\nb\\bc", "(: 'a\\x0ab' @b 'c')");
CHECK_PARSE_EQ("(?:foo)", "'foo'");
CHECK_PARSE_EQ("(?: foo )", "' foo '");
CHECK_PARSE_EQ("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))");
@@ -106,8 +106,8 @@
CHECK_PARSE_EQ("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')");
CHECK_PARSE_EQ("()", "(^ %)");
CHECK_PARSE_EQ("(?=)", "(-> + %)");
- CHECK_PARSE_EQ("[]", "^[\x00-\uffff]");
- CHECK_PARSE_EQ("[^]", "[\x00-\uffff]");
+ CHECK_PARSE_EQ("[]", "^[\\x00-\\uffff]"); // Doesn't compile on windows
+ CHECK_PARSE_EQ("[^]", "[\\x00-\\uffff]"); // \uffff isn't in codepage
1252
CHECK_PARSE_EQ("[x]", "[x]");
CHECK_PARSE_EQ("[xyz]", "[x y z]");
CHECK_PARSE_EQ("[a-zA-Z0-9]", "[a-z A-Z 0-9]");
@@ -120,27 +120,28 @@
CHECK_PARSE_EQ("[x\\dz]", "[x 0-9 z]");
CHECK_PARSE_EQ("[\\d-z]", "[0-9 - z]");
CHECK_PARSE_EQ("[\\d-\\d]", "[0-9 - 0-9]");
- CHECK_PARSE_EQ("\\cj\\cJ\\ci\\cI\\ck\\cK", "'\n\n\t\t\v\v'");
+ CHECK_PARSE_EQ("\\cj\\cJ\\ci\\cI\\ck\\cK",
+ "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'");
CHECK_PARSE_EQ("\\c!", "'c!'");
CHECK_PARSE_EQ("\\c_", "'c_'");
CHECK_PARSE_EQ("\\c~", "'c~'");
CHECK_PARSE_EQ("[a\\]c]", "[a ] c]");
CHECK_PARSE_EQ("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '");
CHECK_PARSE_EQ("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ #
]");
- CHECK_PARSE_EQ("\\0", "'\0'");
+ CHECK_PARSE_EQ("\\0", "'\\x00'");
CHECK_PARSE_EQ("\\8", "'8'");
CHECK_PARSE_EQ("\\9", "'9'");
- CHECK_PARSE_EQ("\\11", "'\t'");
- CHECK_PARSE_EQ("\\11a", "'\ta'");
- CHECK_PARSE_EQ("\\011", "'\t'");
- CHECK_PARSE_EQ("\\00011", "'\00011'");
- CHECK_PARSE_EQ("\\118", "'\t8'");
+ CHECK_PARSE_EQ("\\11", "'\\x09'");
+ CHECK_PARSE_EQ("\\11a", "'\\x09a'");
+ CHECK_PARSE_EQ("\\011", "'\\x09'");
+ CHECK_PARSE_EQ("\\00011", "'\\x0011'");
+ CHECK_PARSE_EQ("\\118", "'\\x098'");
CHECK_PARSE_EQ("\\111", "'I'");
CHECK_PARSE_EQ("\\1111", "'I1'");
CHECK_PARSE_EQ("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))");
CHECK_PARSE_EQ("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))");
CHECK_PARSE_EQ("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))");
- CHECK_PARSE_EQ("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\x04')");
+ CHECK_PARSE_EQ("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\\x04')");
CHECK_PARSE_EQ("(x)(x)(x)\\1*", "(: (^ 'x') (^ 'x') (^ 'x')"
" (# 0 - g (<- 1)))");
CHECK_PARSE_EQ("(x)(x)(x)\\2*", "(: (^ 'x') (^ 'x') (^ 'x')"
@@ -148,25 +149,25 @@
CHECK_PARSE_EQ("(x)(x)(x)\\3*", "(: (^ 'x') (^ 'x') (^ 'x')"
" (# 0 - g (<- 3)))");
CHECK_PARSE_EQ("(x)(x)(x)\\4*", "(: (^ 'x') (^ 'x') (^ 'x')"
- " (# 0 - g '\x04'))");
+ " (# 0 - g '\\x04'))");
CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10",
"(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
" (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))");
CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11",
"(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
- " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\x09')");
+ " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\\x09')");
CHECK_PARSE_EQ("(a)\\1", "(: (^ 'a') (<- 1))");
CHECK_PARSE_EQ("(a\\1)", "(^ 'a')");
CHECK_PARSE_EQ("(\\1a)", "(^ 'a')");
- CHECK_PARSE_EQ("\\1(a)", "(: '\x01' (^ 'a'))");
+ CHECK_PARSE_EQ("\\1(a)", "(: '\\x01' (^ 'a'))");
CHECK_PARSE_EQ("(?!(a))\\1", "(-> - (^ 'a'))");
- CHECK_PARSE_EQ("(?!\\1(a\\1)\\1)\\1", "(-> - (: '\x01' (^ 'a') (<-
1)))");
- CHECK_PARSE_EQ("[\\0]", "[\0]");
- CHECK_PARSE_EQ("[\\11]", "[\t]");
- CHECK_PARSE_EQ("[\\11a]", "[\t a]");
- CHECK_PARSE_EQ("[\\011]", "[\t]");
- CHECK_PARSE_EQ("[\\00011]", "[\000 1 1]");
- CHECK_PARSE_EQ("[\\118]", "[\t 8]");
+ CHECK_PARSE_EQ("(?!\\1(a\\1)\\1)\\1", "(-> - (: '\\x01' (^ 'a') (<-
1)))");
+ CHECK_PARSE_EQ("[\\0]", "[\\x00]");
+ CHECK_PARSE_EQ("[\\11]", "[\\x09]");
+ CHECK_PARSE_EQ("[\\11a]", "[\\x09 a]");
+ CHECK_PARSE_EQ("[\\011]", "[\\x09]");
+ CHECK_PARSE_EQ("[\\00011]", "[\\x00 1 1]");
+ CHECK_PARSE_EQ("[\\118]", "[\\x09 8]");
CHECK_PARSE_EQ("[\\111]", "[I]");
CHECK_PARSE_EQ("[\\1111]", "[I 1]");
CHECK_PARSE_EQ("\\x34", "'\x34'");
--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---