Author: [EMAIL PROTECTED]
Date: Wed Oct 29 06:28:00 2008
New Revision: 640
Modified:
branches/experimental/regexp2000/src/parser.cc
branches/experimental/regexp2000/test/cctest/test-regexp.cc
Log:
* Modified RegExp parser to handle bad \c, \x, \u and decimal escapes
gracefully.
if the escape sequence is not valid, the \c, \x or \u are treated as
identity escapes (i.e., "c", "x", or "u").
Decimal escapes that are larger than the *current* number of left capture
parentheses are treated as 1..3 digit octal numbers, and \8 and \9 are
treated as identity escapes.
* Added multiline_flag to regexp parser.
Please ignore first two patch-sets. Third time is the charm.
Modified: branches/experimental/regexp2000/src/parser.cc
==============================================================================
--- branches/experimental/regexp2000/src/parser.cc (original)
+++ branches/experimental/regexp2000/src/parser.cc Wed Oct 29 06:28:00 2008
@@ -230,7 +230,9 @@
class RegExpParser {
public:
- RegExpParser(unibrow::CharacterStream* in, Handle<String>* error);
+ RegExpParser(unibrow::CharacterStream* in,
+ Handle<String>* error,
+ bool multiline_mode);
RegExpTree* ParsePattern(bool* ok);
RegExpTree* ParseDisjunction(bool* ok);
RegExpTree* ParseAlternative(bool* ok);
@@ -247,7 +249,9 @@
// must not be 'b' or 'B' since they are usually handle specially.
uc32 ParseCharacterEscape(bool* ok);
- uc32 ParseHexEscape(int length);
+ // Checks whether the following is a length-digit hexadecimal number,
+ // and sets the value if it is.
+ bool ParseHexEscape(int length, uc32* value);
uc32 ParseControlEscape(bool* ok);
uc32 ParseOctalLiteral(bool* ok);
@@ -262,6 +266,14 @@
RegExpTree* ReportError(Vector<const char> message, bool* ok);
void Advance();
void Advance(int dist);
+ // Pushes a read character (or potentially some other character) back
+ // on the input stream. After pushing it back, it becomes the character
+ // returned by current(). There is a limited amount of push-back buffer.
+ // A function using PushBack should check that it doesn't push back more
+ // than kMaxPushback characters, and it should not push back more
characters
+ // than it has read, or that it knows had been read prior to calling it.
+ void PushBack(uc32 character);
+ bool CanPushBack();
static const uc32 kEndMarker = unibrow::Utf8::kBadChar;
private:
uc32 current() { return current_; }
@@ -273,6 +285,7 @@
uc32 next_;
bool has_more_;
bool has_next_;
+ bool multiline_mode_;
int captures_seen_;
unibrow::CharacterStream* in_;
Handle<String>* error_;
@@ -3220,11 +3233,14 @@
// Regular expressions
-RegExpParser::RegExpParser(unibrow::CharacterStream* in, Handle<String>*
error)
+RegExpParser::RegExpParser(unibrow::CharacterStream* in,
+ Handle<String>* error,
+ bool multiline_mode)
: current_(kEndMarker),
next_(kEndMarker),
has_more_(true),
has_next_(true),
+ multiline_mode_(multiline_mode),
captures_seen_(0),
in_(in),
error_(error),
@@ -3255,6 +3271,26 @@
}
+void RegExpParser::PushBack(uc32 character) {
+ if (has_next_) {
+ ASSERT(pushback_count_ < kMaxPushback);
+ pushback_buffer_[pushback_count_] = next_;
+ pushback_count_++;
+ }
+ if (has_more_) {
+ next_ = current_;
+ has_next_ = true;
+ }
+ current_ = character;
+ has_more_ = true;
+}
+
+
+bool RegExpParser::CanPushBack() {
+ return (pushback_count_ < kMaxPushback);
+}
+
+
RegExpTree* RegExpParser::ReportError(Vector<const char> message, bool*
ok) {
*ok = false;
*error_ = Factory::NewStringFromAscii(message, NOT_TENURED);
@@ -3358,6 +3394,11 @@
ASSERT_EQ('\\', current());
ASSERT('1' <= next() && next() <= '9');
ASSERT_EQ(0, pushback_count_);
+ // Try to parse a decimal literal that is less than then number
+ // of previously encountered left capturing parentheses.
+ // This is a not according the the ECMAScript specification. According to
+ // that, one must accept values up to the total number of left capturing
+ // parentheses in the entire input, even if they are meaningless.
if (captures_seen_ == 0)
return false;
int value = next() - '0';
@@ -3377,21 +3418,15 @@
if (next_value > captures_seen_ || char_count > kMaxChars) {
// If we give up we have to push the characters we read back
// onto the pushback buffer in the reverse order.
- pushback_buffer_[0] = current();
- for (int i = 0; i < char_count; i++)
- pushback_buffer_[i + 1] = chars_seen[char_count - i - 1];
- pushback_buffer_[char_count + 1] = '\\';
- pushback_count_ = char_count + 2;
- // Then, once we've filled up the buffer, we read the two
- // first characters into the lookahead. This is a roundabout
- // way of doing it but makes the code simpler.
- Advance(2);
+ for (int i = 0; i < char_count; i++) {
+ PushBack(chars_seen[char_count - i - 1]);
+ }
+ PushBack('\\');
return false;
- } else {
- value = next_value;
- chars_seen[char_count++] = current();
- Advance();
}
+ value = next_value;
+ chars_seen[char_count++] = current();
+ Advance();
} else {
*index_out = value;
return true;
@@ -3414,12 +3449,14 @@
// \ B
case '^':
Advance();
- // Make the type of assertion dependent on multi/nonmultiline.
- return new RegExpAssertion(RegExpAssertion::START_OF_INPUT);
+ return new RegExpAssertion(
+ multiline_mode_ ? RegExpAssertion::START_OF_LINE
+ : RegExpAssertion::START_OF_INPUT);
case '$':
Advance();
- // Make the type of assertion dependent on multi/nonmultiline.
- return new RegExpAssertion(RegExpAssertion::END_OF_INPUT);
+ return new RegExpAssertion(
+ multiline_mode_ ? RegExpAssertion::END_OF_LINE
+ : RegExpAssertion::END_OF_INPUT);
case '.':
Advance();
atom = new RegExpCharacterClass(CharacterRange::CharacterClass('.'));
@@ -3460,7 +3497,7 @@
goto has_read_atom;
} else {
// If this is not a backreference we go to the atom parser
- // which will read it as an octal escape.
+ // which will read it as an octal escape or identity escape.
goto parse_atom;
}
}
@@ -3591,6 +3628,8 @@
return new RegExpAtom(buf->ToConstVector());
}
+// Upper and lower case letters differ by one bit.
+STATIC_CHECK('a'^'A' == 0x20);
uc32 RegExpParser::ParseControlEscape(bool* ok) {
ASSERT(current() == 'c');
@@ -3598,51 +3637,61 @@
if (!has_more()) {
ReportError(CStrVector("\\c at end of pattern"), ok);
return '\0';
- } else {
- uc32 letter = current();
- if (!('a' <= letter && letter <= 'z') &&
- !('A' <= letter && letter <= 'Z')) {
- ReportError(CStrVector("Illegal control letter"), ok);
- return '\0';
- }
- Advance();
- return letter & ((1 << 5) - 1);
}
+ uc32 letter = current() & ~(0x20); // Collapse upper and lower case
letters.
+ if (letter < 'A' || 'Z' < letter) {
+ // Non-spec error-correction: "\c" followed by non-control letter is
+ // interpreted as an IdentityEscape.
+ return 'c';
+ }
+ Advance();
+ return letter & 0x1f; // Remainder modulo 32, per specification.
}
uc32 RegExpParser::ParseOctalLiteral(bool* ok) {
ASSERT('0' <= current() && current() <= '7');
- // Here we're really supposed to break out after the first digit
- // if it is '0' but the other implementations don't do that so
- // neither do we. Is this deviation from the spec error prone?
- // Yes, it's probably as error prone as it's possible to get. Isn't
- // JavaScript wonderful?
- uc32 value = 0;
- while ('0' <= current() && current() <= '7') {
- int next = (8 * value) + (current() - '0');
- if (next >= 256) {
- break;
- } else {
- value = next;
+ // For compatibility with some other browsers (not all), we parse
+ // up to three octal digits with a value below 256.
+ uc32 value = current() - '0';
+ Advance();
+ if ('0' <= current() && current() <= '7') {
+ value = value * 8 + current() - '0';
+ Advance();
+ if (value < 32 && '0' <= current() && current() <= '7') {
+ value = value * 8 + current() - '0';
Advance();
}
}
return value;
}
-
-uc32 RegExpParser::ParseHexEscape(int length) {
- uc32 value = 0;
- for (int i = 0; i < length; i++) {
- int d = HexValue(current());
- if (d < 0)
- return value;
- value = value * 16 + d;
+bool RegExpParser::ParseHexEscape(int length, uc32 *value) {
+ static const int kMaxChars = kMaxPushback;
+ EmbeddedVector<uc32, kMaxChars> chars_seen;
+ ASSERT(length <= kMaxChars);
+ uc32 val = 0;
+ bool done = false;
+ for (int i = 0; !done; i++) {
+ uc32 c = current();
+ int d = HexValue(c);
+ if (d < 0) {
+ while (i > 0) {
+ i--;
+ PushBack(chars_seen[i]);
+ }
+ return false;
+ }
+ val = val * 16 + d;
Advance();
+ if (i < length - 1) {
+ chars_seen[i] = c;
+ } else {
+ done = true;
+ }
}
-
- return value;
+ *value = val;
+ return true;
}
@@ -3670,25 +3719,39 @@
Advance();
return '\v';
case 'c':
+ // Spec mandates that next character is ASCII letter.
+ // If not, we error-correct by interpreting "\c" as "c".
return ParseControlEscape(ok);
case '0': case '1': case '2': case '3': case '4': case '5':
case '6': case '7':
- // We're really supposed to read this as a decimal integer
- // literal which is base 10 but for whatever reason the other
- // implementations read base 8. It's hard to believe that the
- // spec was written by some ofthe same people that wrote the
- // other implementations...
+ // For compatibility, we interpret a decimal escape that isn't
+ // a back reference (and therefore either \0 or not valid according
+ // to the specification) as a 1..3 digit octal character code.
return ParseOctalLiteral(ok);
- case 'x':
+ case 'x': {
Advance();
- return ParseHexEscape(2);
- case 'A': case 'Z': {
- uc32 result = current();
+ uc32 value;
+ if (ParseHexEscape(2, &value)) {
+ return value;
+ }
+ // If \x is not followed by a two-digit hexadecimal, treat it
+ // as an identity escape.
+ return 'x';
+ }
+ case 'u': {
Advance();
- return result;
+ uc32 value;
+ if (ParseHexEscape(4, &value)) {
+ return value;
+ }
+ // If \u is not followed by a four-digit hexadecimal, treat it
+ // as an identity escape.
+ return 'u';
}
default: {
- ASSERT(!Scanner::kIsIdentifierPart.get(current()));
+ // Extended identity escape. We accept any character that hasn't
+ // been matched by a more specific case, not just the subset required
+ // by the ECMAScript specification.
uc32 result = current();
Advance();
return result;
@@ -3847,7 +3910,7 @@
RegExpTree* ParseRegExp(unibrow::CharacterStream* stream,
Handle<String>* error) {
ASSERT(error->is_null());
- RegExpParser parser(stream, error);
+ RegExpParser parser(stream, error, false); // Get multiline flag somehow
bool ok = true;
RegExpTree* result = parser.ParsePattern(&ok);
if (!ok) {
Modified: branches/experimental/regexp2000/test/cctest/test-regexp.cc
==============================================================================
--- branches/experimental/regexp2000/test/cctest/test-regexp.cc (original)
+++ branches/experimental/regexp2000/test/cctest/test-regexp.cc Wed Oct 29
06:28:00 2008
@@ -71,8 +71,7 @@
#endif
-static void ExpectParse(const char* input,
- const char* expected) {
+static SmartPointer<char> Parse(const char* input) {
v8::HandleScope scope;
unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
ZoneScope zone_scope(DELETE_ON_EXIT);
@@ -81,97 +80,107 @@
CHECK(node != NULL);
CHECK(error.is_null());
SmartPointer<char> output = node->ToString();
- CHECK_EQ(expected, *output);
+ return output;
}
+#define CHECK_PARSE_EQ(input, expected) CHECK_EQ(expected, *Parse(input))
+
+
TEST(Parser) {
V8::Initialize(NULL);
- ExpectParse("abc", "'abc'");
- ExpectParse("", "%");
- ExpectParse("abc|def", "(| 'abc' 'def')");
- ExpectParse("abc|def|ghi", "(| 'abc' 'def' 'ghi')");
- ExpectParse("\\w\\W\\s\\S\\d\\D", "(: [&w] [&W] [&s] [&S] [&d] [&D])");
- ExpectParse("^xxx$", "(: @^i 'xxx' @$i)");
- ExpectParse("ab\\b\\w\\bcd", "(: 'ab' @b [&w] @b 'cd')");
- ExpectParse("\\w|\\s|.", "(| [&w] [&s] [&.])");
- ExpectParse("a*", "(# 0 - g 'a')");
- ExpectParse("a*?", "(# 0 - n 'a')");
- ExpectParse("abc+", "(# 1 - g 'abc')");
- ExpectParse("abc+?", "(# 1 - n 'abc')");
- ExpectParse("xyz?", "(# 0 1 g 'xyz')");
- ExpectParse("xyz??", "(# 0 1 n 'xyz')");
- ExpectParse("xyz{0,1}", "(# 0 1 g 'xyz')");
- ExpectParse("xyz{0,1}?", "(# 0 1 n 'xyz')");
- ExpectParse("xyz{93}", "(# 93 93 g 'xyz')");
- ExpectParse("xyz{93}?", "(# 93 93 n 'xyz')");
- ExpectParse("xyz{1,32}", "(# 1 32 g 'xyz')");
- ExpectParse("xyz{1,32}?", "(# 1 32 n 'xyz')");
- ExpectParse("xyz{1,}", "(# 1 - g 'xyz')");
- ExpectParse("xyz{1,}?", "(# 1 - n 'xyz')");
- ExpectParse("a\\fb\\nc\\rd\\te\\vf", "'a\fb\nc\rd\te\vf'");
- ExpectParse("a\\nb\\bc", "(: 'a\nb' @b 'c')");
- ExpectParse("(?:foo)", "'foo'");
- ExpectParse("(?: foo )", "' foo '");
- ExpectParse("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))");
- ExpectParse("foo|(bar|baz)|quux", "(| 'foo' (^
(| 'bar' 'baz')) 'quux')");
- ExpectParse("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')");
- ExpectParse("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')");
- ExpectParse("()", "(^ %)");
- ExpectParse("(?=)", "(-> + %)");
- ExpectParse("[]", "%");
- ExpectParse("[x]", "[x]");
- ExpectParse("[xyz]", "[x y z]");
- ExpectParse("[a-zA-Z0-9]", "[a-z A-Z 0-9]");
- ExpectParse("[-123]", "[- 1 2 3]");
- ExpectParse("[^123]", "^[1 2 3]");
- ExpectParse("]", "']'");
- ExpectParse("}", "'}'");
- ExpectParse("[a-b-c]", "[a-b - c]");
- ExpectParse("[\\w]", "[&w]");
- ExpectParse("[x\\wz]", "[x &w z]");
- ExpectParse("[\\w-z]", "[&w - z]");
- ExpectParse("[\\w-\\d]", "[&w - &d]");
- ExpectParse("\\cj\\cJ\\ci\\cI\\ck\\cK", "'\n\n\t\t\v\v'");
- ExpectParse("[a\\]c]", "[a ] c]");
- ExpectParse("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '");
- ExpectParse("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ # ]");
- ExpectParse("\\0", "'\0'");
- ExpectParse("\\11", "'\t'");
- ExpectParse("\\11a", "'\ta'");
- ExpectParse("\\011", "'\t'");
- ExpectParse("\\00011", "'\t'");
- ExpectParse("\\118", "'\t8'");
- ExpectParse("\\111", "'I'");
- ExpectParse("\\1111", "'I1'");
- ExpectParse("(.)(.)(.)\\1", "(: (^ [&.]) (^ [&.]) (^ [&.]) (<- 1))");
- ExpectParse("(.)(.)(.)\\2", "(: (^ [&.]) (^ [&.]) (^ [&.]) (<- 2))");
- ExpectParse("(.)(.)(.)\\3", "(: (^ [&.]) (^ [&.]) (^ [&.]) (<- 3))");
- ExpectParse("(.)(.)(.)\\4", "(: (^ [&.]) (^ [&.]) (^ [&.]) '\x04')");
- ExpectParse("(.)(.)(.)\\1*", "(: (^ [&.]) (^ [&.]) (^ [&.])"
+ CHECK_PARSE_EQ("abc", "'abc'");
+ CHECK_PARSE_EQ("", "%");
+ CHECK_PARSE_EQ("abc|def", "(| 'abc' 'def')");
+ CHECK_PARSE_EQ("abc|def|ghi", "(| 'abc' 'def' 'ghi')");
+ CHECK_PARSE_EQ("\\w\\W\\s\\S\\d\\D", "(: [&w] [&W] [&s] [&S] [&d]
[&D])");
+ CHECK_PARSE_EQ("^xxx$", "(: @^i 'xxx' @$i)");
+ CHECK_PARSE_EQ("ab\\b\\w\\bcd", "(: 'ab' @b [&w] @b 'cd')");
+ CHECK_PARSE_EQ("\\w|\\s|.", "(| [&w] [&s] [&.])");
+ CHECK_PARSE_EQ("a*", "(# 0 - g 'a')");
+ CHECK_PARSE_EQ("a*?", "(# 0 - n 'a')");
+ CHECK_PARSE_EQ("abc+", "(# 1 - g 'abc')");
+ CHECK_PARSE_EQ("abc+?", "(# 1 - n 'abc')");
+ CHECK_PARSE_EQ("xyz?", "(# 0 1 g 'xyz')");
+ CHECK_PARSE_EQ("xyz??", "(# 0 1 n 'xyz')");
+ CHECK_PARSE_EQ("xyz{0,1}", "(# 0 1 g 'xyz')");
+ CHECK_PARSE_EQ("xyz{0,1}?", "(# 0 1 n 'xyz')");
+ CHECK_PARSE_EQ("xyz{93}", "(# 93 93 g 'xyz')");
+ CHECK_PARSE_EQ("xyz{93}?", "(# 93 93 n 'xyz')");
+ CHECK_PARSE_EQ("xyz{1,32}", "(# 1 32 g 'xyz')");
+ CHECK_PARSE_EQ("xyz{1,32}?", "(# 1 32 n 'xyz')");
+ CHECK_PARSE_EQ("xyz{1,}", "(# 1 - g 'xyz')");
+ CHECK_PARSE_EQ("xyz{1,}?", "(# 1 - n 'xyz')");
+ CHECK_PARSE_EQ("a\\fb\\nc\\rd\\te\\vf", "'a\fb\nc\rd\te\vf'");
+ CHECK_PARSE_EQ("a\\nb\\bc", "(: 'a\nb' @b 'c')");
+ CHECK_PARSE_EQ("(?:foo)", "'foo'");
+ CHECK_PARSE_EQ("(?: foo )", "' foo '");
+ CHECK_PARSE_EQ("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))");
+ CHECK_PARSE_EQ("foo|(bar|baz)|quux", "(| 'foo' (^
(| 'bar' 'baz')) 'quux')");
+ CHECK_PARSE_EQ("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')");
+ CHECK_PARSE_EQ("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')");
+ CHECK_PARSE_EQ("()", "(^ %)");
+ CHECK_PARSE_EQ("(?=)", "(-> + %)");
+ CHECK_PARSE_EQ("[]", "%");
+ CHECK_PARSE_EQ("[x]", "[x]");
+ CHECK_PARSE_EQ("[xyz]", "[x y z]");
+ CHECK_PARSE_EQ("[a-zA-Z0-9]", "[a-z A-Z 0-9]");
+ CHECK_PARSE_EQ("[-123]", "[- 1 2 3]");
+ CHECK_PARSE_EQ("[^123]", "^[1 2 3]");
+ CHECK_PARSE_EQ("]", "']'");
+ CHECK_PARSE_EQ("}", "'}'");
+ CHECK_PARSE_EQ("[a-b-c]", "[a-b - c]");
+ CHECK_PARSE_EQ("[\\w]", "[&w]");
+ CHECK_PARSE_EQ("[x\\wz]", "[x &w z]");
+ CHECK_PARSE_EQ("[\\w-z]", "[&w - z]");
+ CHECK_PARSE_EQ("[\\w-\\d]", "[&w - &d]");
+ CHECK_PARSE_EQ("\\cj\\cJ\\ci\\cI\\ck\\cK", "'\n\n\t\t\v\v'");
+ CHECK_PARSE_EQ("\\c!", "'c!'");
+ CHECK_PARSE_EQ("\\c_", "'c_'");
+ CHECK_PARSE_EQ("\\c~", "'c~'");
+ CHECK_PARSE_EQ("[a\\]c]", "[a ] c]");
+ CHECK_PARSE_EQ("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '");
+ CHECK_PARSE_EQ("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ #
]");
+ CHECK_PARSE_EQ("\\0", "'\0'");
+ CHECK_PARSE_EQ("\\8", "'8'");
+ CHECK_PARSE_EQ("\\9", "'9'");
+ CHECK_PARSE_EQ("\\11", "'\t'");
+ CHECK_PARSE_EQ("\\11a", "'\ta'");
+ CHECK_PARSE_EQ("\\011", "'\t'");
+ CHECK_PARSE_EQ("\\00011", "'\00011'");
+ CHECK_PARSE_EQ("\\118", "'\t8'");
+ CHECK_PARSE_EQ("\\111", "'I'");
+ CHECK_PARSE_EQ("\\1111", "'I1'");
+ CHECK_PARSE_EQ("(.)(.)(.)\\1", "(: (^ [&.]) (^ [&.]) (^ [&.]) (<- 1))");
+ CHECK_PARSE_EQ("(.)(.)(.)\\2", "(: (^ [&.]) (^ [&.]) (^ [&.]) (<- 2))");
+ CHECK_PARSE_EQ("(.)(.)(.)\\3", "(: (^ [&.]) (^ [&.]) (^ [&.]) (<- 3))");
+ CHECK_PARSE_EQ("(.)(.)(.)\\4", "(: (^ [&.]) (^ [&.]) (^ [&.]) '\x04')");
+ CHECK_PARSE_EQ("(.)(.)(.)\\1*", "(: (^ [&.]) (^ [&.]) (^ [&.])"
" (# 0 - g (<- 1)))");
- ExpectParse("(.)(.)(.)\\2*", "(: (^ [&.]) (^ [&.]) (^ [&.])"
+ CHECK_PARSE_EQ("(.)(.)(.)\\2*", "(: (^ [&.]) (^ [&.]) (^ [&.])"
" (# 0 - g (<- 2)))");
- ExpectParse("(.)(.)(.)\\3*", "(: (^ [&.]) (^ [&.]) (^ [&.])"
+ CHECK_PARSE_EQ("(.)(.)(.)\\3*", "(: (^ [&.]) (^ [&.]) (^ [&.])"
" (# 0 - g (<- 3)))");
- ExpectParse("(.)(.)(.)\\4*", "(: (^ [&.]) (^ [&.]) (^ [&.])"
+ CHECK_PARSE_EQ("(.)(.)(.)\\4*", "(: (^ [&.]) (^ [&.]) (^ [&.])"
" (# 0 - g '\x04'))");
- ExpectParse("(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)\\10",
+ CHECK_PARSE_EQ("(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)\\10",
"(: (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.])"
" (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (<- 10))");
- ExpectParse("(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)\\11",
+ CHECK_PARSE_EQ("(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)\\11",
"(: (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.])"
" (^ [&.]) (^ [&.]) (^ [&.]) (^ [&.]) '\x09')");
- ExpectParse("[\\0]", "[\0]");
- ExpectParse("[\\11]", "[\t]");
- ExpectParse("[\\11a]", "[\t a]");
- ExpectParse("[\\011]", "[\t]");
- ExpectParse("[\\00011]", "[\t]");
- ExpectParse("[\\118]", "[\t 8]");
- ExpectParse("[\\111]", "[I]");
- ExpectParse("[\\1111]", "[I 1]");
- ExpectParse("\\x34", "'\x34'");
- ExpectParse("\\x3z", "'\x03z'");
+ CHECK_PARSE_EQ("[\\0]", "[\0]");
+ CHECK_PARSE_EQ("[\\11]", "[\t]");
+ CHECK_PARSE_EQ("[\\11a]", "[\t a]");
+ CHECK_PARSE_EQ("[\\011]", "[\t]");
+ CHECK_PARSE_EQ("[\\00011]", "[\000 1 1]");
+ CHECK_PARSE_EQ("[\\118]", "[\t 8]");
+ CHECK_PARSE_EQ("[\\111]", "[I]");
+ CHECK_PARSE_EQ("[\\1111]", "[I 1]");
+ CHECK_PARSE_EQ("\\x34", "'\x34'");
+ CHECK_PARSE_EQ("\\x3z", "'x3z'");
+ CHECK_PARSE_EQ("\\u0034", "'\x34'");
+ CHECK_PARSE_EQ("\\u003z", "'u003z'");
}
@@ -213,8 +222,6 @@
ExpectError("[a-\\w]", kIllegalCharacterClass);
const char* kEndControl = "\\c at end of pattern";
ExpectError("\\c", kEndControl);
- const char* kIllegalControl = "Illegal control letter";
- ExpectError("\\c!", kIllegalControl);
}
--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups
"v8-dev" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to [EMAIL PROTECTED]
For more options, visit this group at
http://groups.google.com/group/v8-dev?hl=en
-~----------~----~----~----~------~----~------~--~---