Author: [EMAIL PROTECTED]
Date: Wed Nov 19 01:37:50 2008
New Revision: 790
Modified:
branches/experimental/regexp2000/src/assembler-re2k-inl.h
branches/experimental/regexp2000/src/assembler-re2k.cc
branches/experimental/regexp2000/src/assembler-re2k.h
branches/experimental/regexp2000/src/bytecodes-re2k.h
branches/experimental/regexp2000/src/interpreter-re2k.cc
branches/experimental/regexp2000/src/interpreter-re2k.h
branches/experimental/regexp2000/src/jsregexp-inl.h
branches/experimental/regexp2000/src/jsregexp.cc
branches/experimental/regexp2000/src/jsregexp.h
branches/experimental/regexp2000/src/parser.cc
branches/experimental/regexp2000/src/regexp-macro-assembler-ia32.cc
branches/experimental/regexp2000/src/regexp-macro-assembler-ia32.h
branches/experimental/regexp2000/src/regexp-macro-assembler-re2k.cc
branches/experimental/regexp2000/src/regexp-macro-assembler-re2k.h
branches/experimental/regexp2000/src/regexp-macro-assembler.h
branches/experimental/regexp2000/test/cctest/test-regexp.cc
branches/experimental/regexp2000/test/mjsunit/regexp.js
branches/experimental/regexp2000/test/mjsunit/unicode-test.js
Log:
* No failures on our own tests.
* 26 failures on Mozilla tests.
* Remember to include linebreaks in \s
* Interpreter takes flat 16 bit strings as input.
* Remove dubious test from mjsunit/regexp.js
(http://code.google.com/p/v8/issues/detail?id=152)
* Add debugging help (off by default) to unicode-test.js
* The regexp-macro-assembler interface now has the concept
of a current_character register.
* Removed CheckCharacterClass from regexp-macro-assembler
(too high level an operation for this level).
* Introduce CheckCharacterLT and CheckCharacterGT to the
macro assembler interface.
* Make the re2k assembler use a growable instruction buffer
to eliminate an arbitrary size limit.
* Add --trace-regexp-bytecodes option to debug build.
* Make RegExpNode::GoTo virtual so the backtrack node can
just inline itself.
* Add protected RegExpNode::Bind() that subclasses use when
emitting their code.
* Limit max recursion in Emit stage to avoid stack overflow.
* Remember to reserve at least 2 registers for 0th capture.
* Bail out to JSCRE when encountering \b, ^, $.
* Fix code emission and implement guards on ChoiceNode.
(Still doesn't use dispatch table).
* Implement code emission for TextNode.
* Remember to set up backtrack when writing capture indeces
to capture registers so they can be unwound if neccessary.
* DispatchTableConstructor::VisitBackreference isn't yet
implemented, but we don't crash the VM. (Later we
discover the regexp has backreferences and defer to jscre).
* \b in a character class means backspace.
Review URL: http://codereview.chromium.org/11228
Modified: branches/experimental/regexp2000/src/assembler-re2k-inl.h
==============================================================================
--- branches/experimental/regexp2000/src/assembler-re2k-inl.h (original)
+++ branches/experimental/regexp2000/src/assembler-re2k-inl.h Wed Nov 19
01:37:50 2008
@@ -38,17 +38,29 @@
void Re2kAssembler::Emit(uint32_t byte) {
+ ASSERT(pc_ <= buffer_.length());
+ if (pc_ == buffer_.length()) {
+ Expand();
+ }
buffer_[pc_++] = byte;
}
void Re2kAssembler::Emit16(uint32_t word) {
+ ASSERT(pc_ <= buffer_.length());
+ if (pc_ + 1 >= buffer_.length()) {
+ Expand();
+ }
Store16(buffer_.start() + pc_, word);
pc_ += 2;
}
void Re2kAssembler::Emit32(uint32_t word) {
+ ASSERT(pc_ <= buffer_.length());
+ if (pc_ + 3 >= buffer_.length()) {
+ Expand();
+ }
Store32(buffer_.start() + pc_, word);
pc_ += 4;
}
Modified: branches/experimental/regexp2000/src/assembler-re2k.cc
==============================================================================
--- branches/experimental/regexp2000/src/assembler-re2k.cc (original)
+++ branches/experimental/regexp2000/src/assembler-re2k.cc Wed Nov 19
01:37:50 2008
@@ -47,6 +47,9 @@
Re2kAssembler::~Re2kAssembler() {
+ if (own_buffer_) {
+ buffer_.Dispose();
+ }
}
@@ -173,22 +176,17 @@
}
-void Re2kAssembler::CheckRange(uc16 start, uc16 end, Label* on_mismatch) {
- if (start == end) {
- CheckChar(start, on_mismatch);
- }
- Emit(BC_CHECK_RANGE);
- Emit16(start);
- Emit16(end);
- EmitOrLink(on_mismatch);
+void Re2kAssembler::CheckCharacterLT(uc16 limit, Label* on_less) {
+ Emit(BC_CHECK_LT);
+ Emit16(limit);
+ EmitOrLink(on_less);
}
-void Re2kAssembler::CheckNotRange(uc16 start, uc16 end, Label* on_match) {
- Emit(BC_CHECK_NOT_RANGE);
- Emit16(start);
- Emit16(end);
- EmitOrLink(on_match);
+void Re2kAssembler::CheckCharacterGT(uc16 limit, Label* on_greater) {
+ Emit(BC_CHECK_GT);
+ Emit16(limit);
+ EmitOrLink(on_greater);
}
@@ -284,5 +282,18 @@
void Re2kAssembler::Copy(Address a) {
memcpy(a, buffer_.start(), length());
}
+
+
+void Re2kAssembler::Expand() {
+ bool old_buffer_was_our_own = own_buffer_;
+ Vector<byte> old_buffer = buffer_;
+ buffer_ = Vector<byte>::New(old_buffer.length() * 2);
+ own_buffer_ = true;
+ memcpy(buffer_.start(), old_buffer.start(), old_buffer.length());
+ if (old_buffer_was_our_own) {
+ old_buffer.Dispose();
+ }
+}
+
} } // namespace v8::internal
Modified: branches/experimental/regexp2000/src/assembler-re2k.h
==============================================================================
--- branches/experimental/regexp2000/src/assembler-re2k.h (original)
+++ branches/experimental/regexp2000/src/assembler-re2k.h Wed Nov 19
01:37:50 2008
@@ -60,11 +60,11 @@
void CheckChar(uc16 c, Label* on_mismatch);
void CheckNotChar(uc16 c, Label* on_match);
- // Checks current char register against a range.
- void CheckRange(uc16 start, uc16 end, Label* on_mismatch);
- void CheckNotRange(uc16 start, uc16 end, Label* on_match);
+ // Used to check current char register against a range.
+ void CheckCharacterLT(uc16 limit, Label* on_less);
+ void CheckCharacterGT(uc16 limit, Label* on_greater);
- // Checks current position (plus optional offset) for a match against a
+ // Checks current position for a match against a
// previous capture. Advances current position by the length of the
capture
// iff it matches. The capture is stored in a given register and the
// the register after. If a register contains -1 then the other register
@@ -122,6 +122,8 @@
// True if the assembler owns the buffer, false if buffer is external.
bool own_buffer_;
+
+ void Expand();
};
Modified: branches/experimental/regexp2000/src/bytecodes-re2k.h
==============================================================================
--- branches/experimental/regexp2000/src/bytecodes-re2k.h (original)
+++ branches/experimental/regexp2000/src/bytecodes-re2k.h Wed Nov 19
01:37:50 2008
@@ -49,8 +49,8 @@
V(LOAD_CURRENT_CHAR, 14, 9) /* load offset32
addr32 */ \
V(CHECK_CHAR, 15, 7) /* check_char uc16
addr32 */ \
V(CHECK_NOT_CHAR, 16, 7) /* check_not_char uc16
addr32 */ \
-V(CHECK_RANGE, 17, 9) /* check_range uc16 uc16
addr32 */ \
-V(CHECK_NOT_RANGE, 18, 9) /* check_not_range uc16 uc16
addr32 */ \
+V(CHECK_LT, 17, 7) /* check_lt uc16
addr32 */ \
+V(CHECK_GT, 18, 7) /* check_gr uc16
addr32 */ \
V(CHECK_BACKREF, 19, 9) /* check_backref offset32 capture_idx
addr32 */ \
V(CHECK_NOT_BACKREF, 20, 9) /* check_not_backref offset32 capture_idx
addr32*/ \
V(LOOKUP_MAP1, 21, 11) /* l_map1 start16 bit_map_addr32
addr32 */ \
@@ -64,6 +64,11 @@
static const int BC_##name = code;
BYTECODE_ITERATOR(DECLARE_BYTECODES)
#undef DECLARE_BYTECODES
+
+#define DECLARE_BYTECODE_LENGTH(name, code, length) \
+ static const int BC_##name##_LENGTH = length;
+BYTECODE_ITERATOR(DECLARE_BYTECODE_LENGTH)
+#undef DECLARE_BYTECODE_LENGTH
} }
#endif // V8_BYTECODES_IA32_H_
Modified: branches/experimental/regexp2000/src/interpreter-re2k.cc
==============================================================================
--- branches/experimental/regexp2000/src/interpreter-re2k.cc (original)
+++ branches/experimental/regexp2000/src/interpreter-re2k.cc Wed Nov 19
01:37:50 2008
@@ -39,15 +39,35 @@
#ifdef DEBUG
-# define BYTECODE(name) break; \
- case BC_##name: \
- if (FLAG_trace_regexp_bytecodes) { \
- PrintF("pc = %d, current = %d, bc = " \
- #name "\n", pc - code_base, current); \
- }
+static void TraceInterpreter(const byte* code_base,
+ const byte* pc,
+ int stack_depth,
+ int current_position,
+ int bytecode_length,
+ const char* bytecode_name) {
+ if (FLAG_trace_regexp_bytecodes) {
+ PrintF("pc = %02x, sp = %d, current = %d, bc = %s",
+ pc - code_base,
+ stack_depth,
+ current_position,
+ bytecode_name);
+ for (int i = 1; i < bytecode_length; i++) {
+ printf(", %02x", pc[i]);
+ }
+ printf("\n");
+ }
+}
+
+
+# define BYTECODE(name) case
BC_##name: \
+
TraceInterpreter(code_base, \
+
pc, \
+ backtrack_sp -
backtrack_stack, \
+
current, \
+
BC_##name##_LENGTH, \
+ #name);
#else
-# define BYTECODE(name) break; \
- case BC_##name:
+# define BYTECODE(name) case BC_##name:
#endif
@@ -57,8 +77,8 @@
int* registers,
int current) {
const byte* pc = code_base;
- int backtrack_stack[1000];
- int backtrack_stack_space = 1000;
+ int backtrack_stack[10000];
+ int backtrack_stack_space = 10000;
int* backtrack_sp = backtrack_stack;
int current_char = -1;
#ifdef DEBUG
@@ -76,106 +96,122 @@
return false; // No match on backtrack stack overflow.
}
*backtrack_sp++ = current + Load32(pc + 1);
- pc += 5;
+ pc += BC_PUSH_CP_LENGTH;
+ break;
BYTECODE(PUSH_BT)
if (--backtrack_stack_space < 0) {
return false; // No match on backtrack stack overflow.
}
*backtrack_sp++ = Load32(pc + 1);
- pc += 5;
+ pc += BC_PUSH_BT_LENGTH;
+ break;
BYTECODE(PUSH_REGISTER)
if (--backtrack_stack_space < 0) {
return false; // No match on backtrack stack overflow.
}
*backtrack_sp++ = registers[pc[1]];
- pc += 2;
+ pc += BC_PUSH_REGISTER_LENGTH;
+ break;
BYTECODE(SET_REGISTER)
registers[pc[1]] = Load32(pc + 2);
- pc += 6;
+ pc += BC_SET_REGISTER_LENGTH;
+ break;
BYTECODE(ADVANCE_REGISTER)
registers[pc[1]] += Load32(pc + 2);
- pc += 6;
+ pc += BC_ADVANCE_REGISTER_LENGTH;
+ break;
BYTECODE(SET_REGISTER_TO_CP)
registers[pc[1]] = current + Load32(pc + 2);
- pc += 6;
+ pc += BC_SET_REGISTER_TO_CP_LENGTH;
+ break;
BYTECODE(POP_CP)
backtrack_stack_space++;
--backtrack_sp;
current = *backtrack_sp;
- pc += 1;
+ pc += BC_POP_CP_LENGTH;
+ break;
BYTECODE(POP_BT)
backtrack_stack_space++;
--backtrack_sp;
pc = code_base + *backtrack_sp;
+ break;
BYTECODE(POP_REGISTER)
backtrack_stack_space++;
--backtrack_sp;
registers[pc[1]] = *backtrack_sp;
- pc += 2;
+ pc += BC_POP_REGISTER_LENGTH;
+ break;
BYTECODE(FAIL)
return false;
BYTECODE(SUCCEED)
return true;
BYTECODE(ADVANCE_CP)
current += Load32(pc + 1);
- pc += 5;
+ pc += BC_ADVANCE_CP_LENGTH;
+ break;
BYTECODE(GOTO)
pc = code_base + Load32(pc + 1);
+ break;
BYTECODE(LOAD_CURRENT_CHAR) {
int pos = current + Load32(pc + 1);
if (pos >= subject.length()) {
pc = code_base + Load32(pc + 5);
} else {
current_char = subject[pos];
- pc += 9;
+ pc += BC_LOAD_CURRENT_CHAR_LENGTH;
}
+ break;
}
BYTECODE(CHECK_CHAR) {
int c = Load16(pc + 1);
if (c != current_char) {
pc = code_base + Load32(pc + 3);
} else {
- pc += 7;
+ pc += BC_CHECK_CHAR_LENGTH;
}
+ break;
}
BYTECODE(CHECK_NOT_CHAR) {
int c = Load16(pc + 1);
if (c == current_char) {
pc = code_base + Load32(pc + 3);
} else {
- pc += 7;
+ pc += BC_CHECK_NOT_CHAR_LENGTH;
}
+ break;
}
- BYTECODE(CHECK_RANGE) {
- int start = Load16(pc + 1);
- int end = Load16(pc + 3);
- if (current_char < start || current_char > end) {
- pc = code_base + Load32(pc + 5);
+ BYTECODE(CHECK_LT) {
+ int limit = Load16(pc + 1);
+ if (current_char < limit) {
+ pc = code_base + Load32(pc + 3);
} else {
- pc += 9;
+ pc += BC_CHECK_LT_LENGTH;
}
+ break;
}
- BYTECODE(CHECK_NOT_RANGE) {
- int start = Load16(pc + 1);
- int end = Load16(pc + 3);
- if (current_char >= start && current_char <= end) {
- pc = code_base + Load32(pc + 5);
+ BYTECODE(CHECK_GT) {
+ int limit = Load16(pc + 1);
+ if (current_char > limit) {
+ pc = code_base + Load32(pc + 3);
} else {
- pc += 9;
+ pc += BC_CHECK_GT_LENGTH;
}
+ break;
}
BYTECODE(CHECK_REGISTER_LT)
if (registers[pc[1]] < Load16(pc + 2)) {
pc = code_base + Load32(pc + 4);
} else {
- pc += 8;
+ pc += BC_CHECK_REGISTER_LT_LENGTH;
}
+ break;
BYTECODE(CHECK_REGISTER_GE)
if (registers[pc[1]] >= Load16(pc + 2)) {
pc = code_base + Load32(pc + 4);
} else {
- pc += 8;
+ pc += BC_CHECK_REGISTER_GE_LENGTH;
}
+ break;
BYTECODE(LOOKUP_MAP1) {
// Look up character in a bitmap. If we find a 0, then jump to the
// location at pc + 7. Otherwise fall through!
@@ -185,8 +221,9 @@
if (map == 0) {
pc = code_base + Load32(pc + 7);
} else {
- pc += 11;
+ pc += BC_LOOKUP_MAP1_LENGTH;
}
+ break;
}
BYTECODE(LOOKUP_MAP2) {
// Look up character in a half-nibble map. If we find 00, then
jump to
@@ -208,6 +245,7 @@
pc = code_base + Load32(pc + 19);
}
}
+ break;
}
BYTECODE(LOOKUP_MAP8) {
// Look up character in a byte map. Use the byte as an index into
a
@@ -216,6 +254,7 @@
byte map = code_base[Load32(pc + 3) + index];
const byte* new_pc = code_base + Load32(pc + 7) + (map << 2);
pc = code_base + Load32(new_pc);
+ break;
}
BYTECODE(LOOKUP_HI_MAP8) {
// Look up high byte of this character in a byte map. Use the
byte as
@@ -224,12 +263,14 @@
byte map = code_base[Load32(pc + 2) + index];
const byte* new_pc = code_base + Load32(pc + 6) + (map << 2);
pc = code_base + Load32(new_pc);
+ break;
}
BYTECODE(CHECK_BACKREF)
UNREACHABLE();
+ break;
BYTECODE(CHECK_NOT_BACKREF)
UNREACHABLE();
- break; // Last one doesn't have break in macro.
+ break;
default:
UNREACHABLE();
break;
@@ -239,15 +280,18 @@
bool Re2kInterpreter::Match(Handle<ByteArray> code_array,
- Handle<String> subject,
+ Handle<String> subject16,
int* registers,
int start_position) {
+ ASSERT(StringShape(*subject16).IsTwoByteRepresentation());
+ ASSERT(subject16->IsFlat(StringShape(*subject16)));
+
+
+ AssertNoAllocation a;
const byte* code_base = code_array->GetDataStartAddress();
- ASSERT(subject->IsFlat(StringShape(*subject)));
- Handle<String> flat_two_byte =
RegExpImpl::CachedStringToTwoByte(subject);
- ASSERT(StringShape(*flat_two_byte).IsTwoByteRepresentation());
return RawMatch(code_base,
- flat_two_byte->ToUC16Vector(),
+ Vector<const uc16>(subject16->GetTwoByteData(),
+ subject16->length()),
registers,
start_position);
}
Modified: branches/experimental/regexp2000/src/interpreter-re2k.h
==============================================================================
--- branches/experimental/regexp2000/src/interpreter-re2k.h (original)
+++ branches/experimental/regexp2000/src/interpreter-re2k.h Wed Nov 19
01:37:50 2008
@@ -36,7 +36,7 @@
class Re2kInterpreter {
public:
static bool Match(Handle<ByteArray> code,
- Handle<String> subject,
+ Handle<String> subject16,
int* captures,
int start_position);
};
Modified: branches/experimental/regexp2000/src/jsregexp-inl.h
==============================================================================
--- branches/experimental/regexp2000/src/jsregexp-inl.h (original)
+++ branches/experimental/regexp2000/src/jsregexp-inl.h Wed Nov 19 01:37:50
2008
@@ -30,6 +30,7 @@
#include "jsregexp.h"
+#include "regexp-macro-assembler.h"
namespace v8 {
@@ -250,6 +251,11 @@
DoForEach<Node, Callback>(node->left(), callback);
callback->Call(node->key(), node->value());
DoForEach<Node, Callback>(node->right(), callback);
+}
+
+
+void RegExpNode::Bind(RegExpMacroAssembler* macro) {
+ macro->Bind(&label_);
}
Modified: branches/experimental/regexp2000/src/jsregexp.cc
==============================================================================
--- branches/experimental/regexp2000/src/jsregexp.cc (original)
+++ branches/experimental/regexp2000/src/jsregexp.cc Wed Nov 19 01:37:50
2008
@@ -460,25 +460,31 @@
Handle<Object> RegExpImpl::Re2kExecOnce(Handle<JSRegExp> regexp,
int num_captures,
- Handle<String> subject,
+ Handle<String> two_byte_subject,
int previous_index,
- const uc16* two_byte_subject,
int* offsets_vector,
int offsets_vector_length) {
+#ifdef DEBUG
+ if (FLAG_trace_regexp_bytecodes) {
+ String* pattern = regexp->Pattern();
+ PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
+ PrintF("\n\nSubject string: '%s'\n\n",
*(two_byte_subject->ToCString()));
+ }
+#endif
+ ASSERT(StringShape(*two_byte_subject).IsTwoByteRepresentation());
+ ASSERT(two_byte_subject->IsFlat(StringShape(*two_byte_subject)));
bool rc;
{
for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) {
offsets_vector[i] = -1;
}
- AssertNoAllocation a;
-
- LOG(RegExpExecEvent(regexp, previous_index, subject));
+ LOG(RegExpExecEvent(regexp, previous_index, two_byte_subject));
Handle<ByteArray> byte_codes = Re2kCode(regexp);
rc = Re2kInterpreter::Match(byte_codes,
- subject,
+ two_byte_subject,
offsets_vector,
previous_index);
}
@@ -605,13 +611,13 @@
Handle<String> subject16 = CachedStringToTwoByte(subject);
- Handle<Object> result(Re2kExecOnce(regexp,
- num_captures,
- subject,
- previous_index,
- subject16->GetTwoByteData(),
- offsets.vector(),
- offsets.length()));
+ Handle<Object> result(
+ Re2kExecOnce(regexp,
+ num_captures,
+ subject16,
+ previous_index,
+ offsets.vector(),
+ offsets.length()));
return result;
}
@@ -671,9 +677,8 @@
} else {
matches = Re2kExecOnce(regexp,
Re2kNumberOfCaptures(regexp),
- subject,
+ subject16,
previous_index,
- subject16->GetTwoByteData(),
offsets.vector(),
offsets.length());
@@ -845,11 +850,17 @@
EndNode* accept() { return accept_; }
EndNode* backtrack() { return backtrack_; }
+ static const int kMaxRecursion = 100;
+ inline int recursion_depth() { return recursion_depth_; }
+ inline void IncrementRecursionDepth() { recursion_depth_++; }
+ inline void DecrementRecursionDepth() { recursion_depth_--; }
+
private:
EndNode* accept_;
EndNode* backtrack_;
int next_register_;
List<RegExpNode*>* work_list_;
+ int recursion_depth_;
RegExpMacroAssembler* macro_assembler_;
};
@@ -857,8 +868,9 @@
// Attempts to compile the regexp using a Regexp2000 code generator.
Returns
// a fixed array or a null handle depending on whether it succeeded.
RegExpCompiler::RegExpCompiler(int capture_count)
- : next_register_(2 * capture_count),
- work_list_(NULL) {
+ : next_register_(2 * (capture_count + 1)),
+ work_list_(NULL),
+ recursion_depth_(0) {
accept_ = new EndNode(EndNode::ACCEPT);
backtrack_ = new EndNode(EndNode::BACKTRACK);
}
@@ -880,7 +892,7 @@
return Handle<FixedArray>::null();
}
while (!work_list.is_empty()) {
- if (!work_list.RemoveLast()->Emit(this)) {
+ if (!work_list.RemoveLast()->GoTo(this)) {
fail.Unuse();
return Handle<FixedArray>::null();
}
@@ -903,27 +915,66 @@
bool RegExpNode::GoTo(RegExpCompiler* compiler) {
+ // TODO(erikcorry): Implement support.
+ if (info_.follows_word_interest ||
+ info_.follows_newline_interest ||
+ info_.follows_start_interest) {
+ return false;
+ }
if (label_.is_bound()) {
compiler->macro_assembler()->GoTo(&label_);
return true;
} else {
- return Emit(compiler);
+ if (compiler->recursion_depth() > RegExpCompiler::kMaxRecursion) {
+ compiler->macro_assembler()->GoTo(&label_);
+ compiler->AddWork(this);
+ return true;
+ } else {
+ compiler->IncrementRecursionDepth();
+ bool how_it_went = Emit(compiler);
+ compiler->DecrementRecursionDepth();
+ return how_it_went;
+ }
}
}
+bool EndNode::GoTo(RegExpCompiler* compiler) {
+ if (info()->follows_word_interest ||
+ info()->follows_newline_interest ||
+ info()->follows_start_interest) {
+ return false;
+ }
+ if (!label()->is_bound()) {
+ Bind(compiler->macro_assembler());
+ }
+ switch (action_) {
+ case ACCEPT:
+ compiler->macro_assembler()->Succeed();
+ break;
+ case BACKTRACK:
+ compiler->macro_assembler()->Backtrack();
+ break;
+ }
+ return true;
+}
+
+
Label* RegExpNode::label() {
return &label_;
}
bool EndNode::Emit(RegExpCompiler* compiler) {
+ RegExpMacroAssembler* macro = compiler->macro_assembler();
switch (action_) {
case ACCEPT:
- compiler->macro_assembler()->Succeed();
+ Bind(macro);
+ macro->Succeed();
return true;
case BACKTRACK:
- compiler->macro_assembler()->Backtrack();
+ Bind(macro);
+ macro->Backtrack();
return true;
}
return false;
@@ -995,47 +1046,136 @@
// Emit code.
-void ChoiceNode::GenerateGuard(RegExpCompiler* compiler,
- Guard *guard,
+void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,
+ Guard* guard,
Label* on_failure) {
+ switch (guard->op()) {
+ case Guard::LT:
+ macro_assembler->IfRegisterGE(guard->reg(), guard->value(),
on_failure);
+ break;
+ case Guard::GEQ:
+ macro_assembler->IfRegisterLT(guard->reg(), guard->value(),
on_failure);
+ break;
+ }
+}
+
+
+bool TextNode::Emit(RegExpCompiler* compiler) {
+ RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
+ Bind(macro_assembler);
+ int element_count = elms_->length();
+ int cp_offset = 0;
+ for (int i = 0; i < element_count; i++) {
+ TextElement elm = (*elms_)[i];
+ switch (elm.type) {
+ case TextElement::ATOM: {
+ Vector<const uc16> quarks = elm.data.u_atom->data();
+ macro_assembler->CheckCharacters(quarks,
+ cp_offset,
+ on_failure_->label());
+ cp_offset += quarks.length();
+ break;
+ }
+ case TextElement::CHAR_CLASS: {
+ RegExpCharacterClass* cc = elm.data.u_char_class;
+ if (cc->is_negated()) return false;
+ macro_assembler->LoadCurrentCharacter(cp_offset,
on_failure_->label());
+ cp_offset++;
+
+ ZoneList<CharacterRange>* ranges = cc->ranges();
+
+ Label found;
+
+ int range_count = ranges->length();
+
+ if (range_count == 0) {
+ on_failure()->GoTo(compiler);
+ break;
+ }
+
+ for (int i = 0; i < range_count - 1; i++) {
+ CharacterRange& range = (*ranges)[i];
+ Label next_range;
+ uc16 from = range.from();
+ uc16 to = range.to();
+ if (from != 0) {
+ macro_assembler->CheckCharacterLT(from, &next_range);
+ }
+ if (to != 0xffff) {
+ macro_assembler->CheckCharacterLT(to + 1, &found);
+ } else {
+ macro_assembler->AdvanceCurrentPosition(1);
+ on_success()->GoTo(compiler);
+ }
+ macro_assembler->Bind(&next_range);
+ }
+
+ CharacterRange& range = (*ranges)[range_count - 1];
+ uc16 from = range.from();
+ uc16 to = range.to();
+ if (from != 0) {
+ macro_assembler->CheckCharacterLT(from, on_failure_->label());
+ }
+ if (to != 0xffff) {
+ macro_assembler->CheckCharacterGT(to, on_failure_->label());
+ }
+ compiler->AddWork(on_failure_);
+ macro_assembler->Bind(&found);
+ break;
+ }
+ default:
+ UNREACHABLE();
+ return false;
+ }
+ }
+ macro_assembler->AdvanceCurrentPosition(cp_offset);
+ return on_success()->GoTo(compiler);
}
bool ChoiceNode::Emit(RegExpCompiler* compiler) {
int choice_count = alternatives_->length();
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
+ Bind(macro_assembler);
// For now we just call all choices one after the other. The idea
ultimately
// is to use the Dispatch table to try only the relevant ones.
- for (int i = 0; i < choice_count; i++) {
+ int i;
+ for (i = 0; i < choice_count - 1; i++) {
GuardedAlternative alternative = (*alternatives_)[i];
Label after;
- Label* next_alternative;
- if (i < choice_count - 1) {
- next_alternative = &after;
- } else {
- next_alternative = on_failure_->label();
- }
+ Label after_no_pop_cp;
ZoneList<Guard*>* guards = alternative.guards();
if (guards != NULL) {
int guard_count = guards->length();
for (int j = 0; j < guard_count; j++) {
- GenerateGuard(compiler, (*guards)[i], next_alternative);
+ GenerateGuard(macro_assembler, (*guards)[j], &after_no_pop_cp);
}
}
- macro_assembler->PushBacktrack(next_alternative);
- if (!alternative.node()->Emit(compiler)) {
+ macro_assembler->PushCurrentPosition();
+ macro_assembler->PushBacktrack(&after);
+ if (!alternative.node()->GoTo(compiler)) {
after.Unuse();
- if (next_alternative != &after) {
- next_alternative->Unuse();
- }
+ after_no_pop_cp.Unuse();
return false;
}
- if (i < choice_count - 1) {
- macro_assembler->Bind(&after);
- } else {
- after.Unuse();
+ macro_assembler->Bind(&after);
+ macro_assembler->PopCurrentPosition();
+ macro_assembler->Bind(&after_no_pop_cp);
+ }
+ GuardedAlternative alternative = (*alternatives_)[i];
+ ZoneList<Guard*>* guards = alternative.guards();
+ if (guards != NULL) {
+ int guard_count = guards->length();
+ for (int j = 0; j < guard_count; j++) {
+ GenerateGuard(macro_assembler, (*guards)[j], on_failure_->label());
}
}
+ if (!on_failure_->IsBacktrack()) {
+ macro_assembler->PushBacktrack(on_failure_->label());
+ }
+ if (!alternative.node()->GoTo(compiler)) {
+ return false;
+ }
compiler->AddWork(on_failure_);
return true;
}
@@ -1043,20 +1183,44 @@
bool ActionNode::Emit(RegExpCompiler* compiler) {
RegExpMacroAssembler* macro = compiler->macro_assembler();
+ Bind(macro);
switch (type_) {
case STORE_REGISTER:
macro->SetRegister(data_.u_store_register.reg,
data_.u_store_register.value);
break;
- case INCREMENT_REGISTER:
+ case INCREMENT_REGISTER: {
+ Label undo;
+ macro->PushBacktrack(&undo);
macro->AdvanceRegister(data_.u_increment_register.reg, 1);
+ bool ok = on_success()->GoTo(compiler);
+ if (!ok) {
+ undo.Unuse();
+ return false;
+ }
+ macro->Bind(&undo);
+ macro->AdvanceRegister(data_.u_increment_register.reg, -1);
+ macro->Backtrack();
break;
- case STORE_POSITION:
- macro->PushCurrentPosition();
+ }
+ case STORE_POSITION: {
+ Label undo;
+ macro->PushRegister(data_.u_position_register.reg);
+ macro->PushBacktrack(&undo);
+ macro->WriteCurrentPositionToRegister(data_.u_position_register.reg);
+ bool ok = on_success()->GoTo(compiler);
+ if (!ok) {
+ undo.Unuse();
+ return false;
+ }
+ macro->Bind(&undo);
+ macro->PopRegister(data_.u_position_register.reg);
+ macro->Backtrack();
break;
+ }
case RESTORE_POSITION:
- macro->PopCurrentPosition();
- break;
+ // TODO(erikcorry): Implement this.
+ return false;
case BEGIN_SUBMATCH:
// TODO(erikcorry): Implement this.
return false;
@@ -1070,8 +1234,7 @@
UNREACHABLE();
return false;
}
- compiler->AddWork(on_success());
- return true;
+ return on_success()->GoTo(compiler);
}
@@ -1565,9 +1728,9 @@
static const int kSpaceRangeCount = 20;
static const uc16 kSpaceRanges[kSpaceRangeCount] = {
- 0x0009, 0x0009, 0x000B, 0x000C, 0x0020, 0x0020, 0x00A0, 0x00A0,
- 0x1680, 0x1680, 0x180E, 0x180E, 0x2000, 0x200A, 0x202F, 0x202F,
- 0x205F, 0x205F, 0x3000, 0x3000
+ 0x0009, 0x000D, 0x0020, 0x0020, 0x00A0, 0x00A0, 0x1680,
+ 0x1680, 0x180E, 0x180E, 0x2000, 0x200A, 0x2028, 0x2029,
+ 0x202F, 0x202F, 0x205F, 0x205F, 0x3000, 0x3000
};
@@ -1969,7 +2132,7 @@
void DispatchTableConstructor::VisitBackreference(BackreferenceNode* that)
{
- UNIMPLEMENTED();
+ // TODO(plesner): What should this do?
}
@@ -2055,7 +2218,7 @@
if (node_return != NULL) *node_return = node;
Analysis analysis;
analysis.EnsureAnalyzed(node);
- byte codes[10240];
+ byte codes[1024];
Re2kAssembler assembler(Vector<byte>(codes, 1024));
RegExpMacroAssemblerRe2k macro_assembler(&assembler);
return compiler.Assemble(¯o_assembler,
Modified: branches/experimental/regexp2000/src/jsregexp.h
==============================================================================
--- branches/experimental/regexp2000/src/jsregexp.h (original)
+++ branches/experimental/regexp2000/src/jsregexp.h Wed Nov 19 01:37:50 2008
@@ -30,6 +30,10 @@
namespace v8 { namespace internal {
+
+class RegExpMacroAssembler;
+
+
class RegExpImpl {
public:
// Creates a regular expression literal in the old space.
@@ -151,9 +155,8 @@
static Handle<Object> Re2kExecOnce(Handle<JSRegExp> regexp,
int num_captures,
- Handle<String> subject,
+ Handle<String> subject16,
int previous_index,
- const uc16* utf8_subject,
int* ovector,
int ovector_length);
@@ -476,7 +479,7 @@
class SiblingList {
-public:
+ public:
SiblingList() : list_(NULL) { }
int length() {
return list_ == NULL ? 0 : list_->length();
@@ -489,7 +492,7 @@
}
void Add(RegExpNode* node) { list_->Add(node); }
RegExpNode* Get(int index) { return list_->at(index); }
-private:
+ private:
ZoneList<RegExpNode*>* list_;
};
@@ -501,7 +504,7 @@
// Generates a goto to this node or actually generates the code at this
point.
// Until the implementation is complete we will return true for success
and
// false for failure.
- bool GoTo(RegExpCompiler* compiler);
+ virtual bool GoTo(RegExpCompiler* compiler);
Label* label();
// Until the implementation is complete we will return true for success
and
@@ -513,6 +516,8 @@
RegExpNode* GetSibling(NodeInfo* info);
void EnsureSiblings() { siblings_.Ensure(this); }
void AddSibling(RegExpNode* node) { siblings_.Add(node); }
+ protected:
+ inline void Bind(RegExpMacroAssembler* macro);
private:
Label label_;
NodeInfo info_;
@@ -583,9 +588,9 @@
on_failure_(on_failure),
elms_(elms) { }
virtual void Accept(NodeVisitor* visitor);
- virtual bool Emit(RegExpCompiler* compiler) { return false; }
virtual RegExpNode* PropagateInterest(NodeInfo* info);
RegExpNode* on_failure() { return on_failure_; }
+ virtual bool Emit(RegExpCompiler* compiler);
ZoneList<TextElement>* elements() { return elms_; }
private:
RegExpNode* on_failure_;
@@ -624,6 +629,7 @@
virtual bool Emit(RegExpCompiler* compiler);
virtual RegExpNode* PropagateInterest(NodeInfo* info);
virtual bool IsBacktrack() { return action_ == BACKTRACK; }
+ virtual bool GoTo(RegExpCompiler* compiler);
private:
Action action_;
};
@@ -678,7 +684,7 @@
bool being_calculated() { return being_calculated_; }
void set_being_calculated(bool b) { being_calculated_ = b; }
private:
- void GenerateGuard(RegExpCompiler* compiler,
+ void GenerateGuard(RegExpMacroAssembler* macro_assembler,
Guard *guard,
Label* on_failure);
RegExpNode* on_failure_;
Modified: branches/experimental/regexp2000/src/parser.cc
==============================================================================
--- branches/experimental/regexp2000/src/parser.cc (original)
+++ branches/experimental/regexp2000/src/parser.cc Wed Nov 19 01:37:50 2008
@@ -4025,6 +4025,9 @@
ASSERT(has_next() && !IsSpecialClassEscape(next()));
Advance();
switch (current()) {
+ case 'b':
+ Advance();
+ return '\b';
// ControlEscape :: one of
// f n r t v
case 'f':
Modified:
branches/experimental/regexp2000/src/regexp-macro-assembler-ia32.cc
==============================================================================
--- branches/experimental/regexp2000/src/regexp-macro-assembler-ia32.cc
(original)
+++ branches/experimental/regexp2000/src/regexp-macro-assembler-ia32.cc Wed
Nov 19 01:37:50 2008
@@ -121,8 +121,18 @@
}
-void RegExpMacroAssemblerIA32::CheckCharacterClass(RegExpCharacterClass
*cclass,
- Label* on_failure) {
+void RegExpMacroAssemblerIA32::LoadCurrentCharacter(int cp_offset,
+ Label*
on_end_of_input) {
+ UNREACHABLE(); // Not implemented.
+}
+
+
+void RegExpMacroAssemblerIA32::CheckCharacterLT(uc16 limit, Label*
on_less) {
+ UNREACHABLE(); // Not implemented.
+}
+
+
+void RegExpMacroAssemblerIA32::CheckCharacterGT(uc16 limit, Label*
on_greater) {
UNREACHABLE(); // Not implemented.
}
Modified: branches/experimental/regexp2000/src/regexp-macro-assembler-ia32.h
==============================================================================
--- branches/experimental/regexp2000/src/regexp-macro-assembler-ia32.h
(original)
+++ branches/experimental/regexp2000/src/regexp-macro-assembler-ia32.h Wed
Nov 19 01:37:50 2008
@@ -43,6 +43,9 @@
virtual void AdvanceRegister(int reg, int by); // r[reg] += by.
virtual void Backtrack();
virtual void Bind(Label* label);
+ virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input);
+
+
// Check the current character against a bitmap. The range of the
current
// character must be from start to start + length_of_bitmap_in_bits.
Modified:
branches/experimental/regexp2000/src/regexp-macro-assembler-re2k.cc
==============================================================================
--- branches/experimental/regexp2000/src/regexp-macro-assembler-re2k.cc
(original)
+++ branches/experimental/regexp2000/src/regexp-macro-assembler-re2k.cc Wed
Nov 19 01:37:50 2008
@@ -123,46 +123,6 @@
}
-static void TwoWayCharacterClass(
- Re2kAssembler* assembler,
- RegExpCharacterClass* char_class,
- Label* on_match,
- Label* on_mismatch) {
- ZoneList<CharacterRange>* ranges = char_class->ranges();
- int range_count = ranges->length();
- if (!char_class->is_negated()) {
- for (int i = 0; i < range_count; i++) {
- CharacterRange& range = ranges->at(i);
- assembler->CheckRange(range.from(), range.to(), on_match);
- }
- if (on_mismatch == NULL) {
- assembler->PopBacktrack();
- } else {
- assembler->GoTo(on_mismatch);
- }
- } else { // range is negated.
- if (range_count == 0) {
- assembler->GoTo(on_match);
- } else {
- CharacterRange& previous = ranges->at(0);
- if (previous.from() > 0) {
- assembler->CheckRange(0, previous.from() - 1, on_match);
- }
- for (int i = 1; i < range_count; i++) {
- CharacterRange& range = ranges->at(i);
- if (previous.to() < range.from() - 1) {
- assembler->CheckRange(previous.to() + 1, range.from() - 1,
on_match);
- }
- previous = range;
- }
- if (previous.to() < 65535) {
- assembler->CheckRange(previous.to() + 1, 65535, on_match);
- }
- }
- }
-}
-
-
void RegExpMacroAssemblerRe2k::CheckCurrentPosition(
int register_index,
Label* on_equal) {
@@ -171,23 +131,21 @@
}
-void RegExpMacroAssemblerRe2k::CheckCharacterClass(
- RegExpCharacterClass* char_class,
- int cp_offset,
- Label* on_failure) {
+void RegExpMacroAssemblerRe2k::LoadCurrentCharacter(int cp_offset,
+ Label* on_failure) {
assembler_->LoadCurrentChar(cp_offset, on_failure);
- if (!char_class->is_negated() &&
- char_class->ranges()->length() == 1 &&
- on_failure != NULL) {
- // This is the simple case where the char class has one range and we
want to
- // fall through if it matches.
- CharacterRange& range = char_class->ranges()->at(0);
- assembler_->CheckNotRange(range.from(), range.to(), on_failure);
- } else {
- Label on_success;
- TwoWayCharacterClass(assembler_, char_class, &on_success, on_failure);
- assembler_->Bind(&on_success);
- }
+}
+
+
+void RegExpMacroAssemblerRe2k::CheckCharacterLT(uc16 limit,
+ Label* on_less) {
+ assembler_->CheckCharacterLT(limit, on_less);
+}
+
+
+void RegExpMacroAssemblerRe2k::CheckCharacterGT(uc16 limit,
+ Label* on_greater) {
+ assembler_->CheckCharacterGT(limit, on_greater);
}
Modified: branches/experimental/regexp2000/src/regexp-macro-assembler-re2k.h
==============================================================================
--- branches/experimental/regexp2000/src/regexp-macro-assembler-re2k.h
(original)
+++ branches/experimental/regexp2000/src/regexp-macro-assembler-re2k.h Wed
Nov 19 01:37:50 2008
@@ -52,17 +52,13 @@
virtual void AdvanceRegister(int reg, int by); // r[reg] += by.
virtual void SetRegister(int register_index, int to);
virtual void WriteCurrentPositionToRegister(int reg);
- virtual void CheckCharacterClass(
- RegExpCharacterClass* cclass,
- int cp_offset,
- Label* on_failure);
- virtual void CheckCharacters(
- Vector<const uc16> str,
- int cp_offset,
- Label* on_failure);
- virtual void CheckCurrentPosition(
- int register_index,
- Label* on_equal);
+ virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input);
+ virtual void CheckCharacterLT(uc16 limit, Label* on_less);
+ virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
+ virtual void CheckCharacters(Vector<const uc16> str,
+ int cp_offset,
+ Label* on_failure);
+ virtual void CheckCurrentPosition(int register_index, Label* on_equal);
virtual void CheckBitmap(uc16 start, Label* bitmap, Label* on_zero);
virtual void DispatchHalfNibbleMap(uc16 start,
Label* half_nibble_map,
Modified: branches/experimental/regexp2000/src/regexp-macro-assembler.h
==============================================================================
--- branches/experimental/regexp2000/src/regexp-macro-assembler.h
(original)
+++ branches/experimental/regexp2000/src/regexp-macro-assembler.h Wed Nov
19 01:37:50 2008
@@ -54,16 +54,11 @@
virtual void PopRegister(int register_index) = 0;
virtual void PushRegister(int register_index) = 0;
virtual void AdvanceRegister(int reg, int by) = 0; // r[reg] += by.
- virtual void WriteCurrentPositionToRegister(int reg) = 0;
virtual void SetRegister(int register_index, int to) = 0;
- // Looks at the next character from the subject and if it doesn't match
- // then goto the on_failure label. End of input never matches. If the
- // label is NULL then we should pop a backtrack address off the stack and
- // go to that.
- virtual void CheckCharacterClass(
- RegExpCharacterClass* cclass,
- int cp_offset,
- Label* on_failure) = 0;
+ virtual void WriteCurrentPositionToRegister(int reg) = 0;
+ virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input)
= 0;
+ virtual void CheckCharacterLT(uc16 limit, Label* on_less) = 0;
+ virtual void CheckCharacterGT(uc16 limit, Label* on_greater) = 0;
// Check the current character for a match with a literal string. If we
// fail to match then goto the on_failure label. End of input always
// matches. If the label is NULL then we should pop a backtrack address
off
Modified: branches/experimental/regexp2000/test/cctest/test-regexp.cc
==============================================================================
--- branches/experimental/regexp2000/test/cctest/test-regexp.cc (original)
+++ branches/experimental/regexp2000/test/cctest/test-regexp.cc Wed Nov 19
01:37:50 2008
@@ -277,7 +277,15 @@
static bool IsWhiteSpace(uc16 c) {
switch (c) {
- case 0x09: case 0x0B: case 0x0C: case 0x20: case 0xA0:
+ case 0x09:
+ case 0x0A:
+ case 0x0B:
+ case 0x0C:
+ case 0x0d:
+ case 0x20:
+ case 0xA0:
+ case 0x2028:
+ case 0x2029:
return true;
default:
return unibrow::Space::Is(c);
@@ -519,15 +527,18 @@
Handle<String> f1 =
Factory::NewStringFromAscii(CStrVector("Now is the time"));
- CHECK(!Re2kInterpreter::Match(array, f1, captures, 0));
+ Handle<String> f1_16 = RegExpImpl::StringToTwoByte(f1);
+ CHECK(!Re2kInterpreter::Match(array, f1_16, captures, 0));
Handle<String> f2 = Factory::NewStringFromAscii(CStrVector("foo bar
baz"));
- CHECK(Re2kInterpreter::Match(array, f2, captures, 0));
+ Handle<String> f2_16 = RegExpImpl::StringToTwoByte(f2);
+ CHECK(Re2kInterpreter::Match(array, f2_16, captures, 0));
CHECK_EQ(0, captures[0]);
CHECK_EQ(2, captures[1]);
Handle<String> f3 =
Factory::NewStringFromAscii(CStrVector("tomfoolery"));
- CHECK(Re2kInterpreter::Match(array, f3, captures, 0));
+ Handle<String> f3_16 = RegExpImpl::StringToTwoByte(f3);
+ CHECK(Re2kInterpreter::Match(array, f3_16, captures, 0));
CHECK_EQ(3, captures[0]);
CHECK_EQ(5, captures[1]);
}
@@ -591,27 +602,32 @@
Handle<String> f1 =
Factory::NewStringFromAscii(CStrVector("Now is the time"));
- CHECK(!Re2kInterpreter::Match(array, f1, captures, 0));
+ Handle<String> f1_16 = RegExpImpl::StringToTwoByte(f1);
+ CHECK(!Re2kInterpreter::Match(array, f1_16, captures, 0));
Handle<String> f2 = Factory::NewStringFromAscii(CStrVector("foo bar
baz"));
- CHECK(Re2kInterpreter::Match(array, f2, captures, 0));
+ Handle<String> f2_16 = RegExpImpl::StringToTwoByte(f2);
+ CHECK(Re2kInterpreter::Match(array, f2_16, captures, 0));
CHECK_EQ(0, captures[0]);
CHECK_EQ(2, captures[1]);
Handle<String> f3 =
Factory::NewStringFromAscii(CStrVector("tomfoolery"));
- CHECK(Re2kInterpreter::Match(array, f3, captures, 0));
+ Handle<String> f3_16 = RegExpImpl::StringToTwoByte(f3);
+ CHECK(Re2kInterpreter::Match(array, f3_16, captures, 0));
CHECK_EQ(0, captures[0]);
CHECK_EQ(5, captures[1]);
Handle<String> f4 =
Factory::NewStringFromAscii(CStrVector("football buffoonery"));
- CHECK(Re2kInterpreter::Match(array, f4, captures, 0));
+ Handle<String> f4_16 = RegExpImpl::StringToTwoByte(f4);
+ CHECK(Re2kInterpreter::Match(array, f4_16, captures, 0));
CHECK_EQ(0, captures[0]);
CHECK_EQ(14, captures[1]);
Handle<String> f5 =
Factory::NewStringFromAscii(CStrVector("walking\nbarefoot"));
- CHECK(!Re2kInterpreter::Match(array, f5, captures, 0));
+ Handle<String> f5_16 = RegExpImpl::StringToTwoByte(f5);
+ CHECK(!Re2kInterpreter::Match(array, f5_16, captures, 0));
}
@@ -662,7 +678,8 @@
Handle<String> f1 =
Factory::NewStringFromAscii(CStrVector("foobar"));
- CHECK(Re2kInterpreter::Match(array, f1, captures, 0));
+ Handle<String> f1_16 = RegExpImpl::StringToTwoByte(f1);
+ CHECK(Re2kInterpreter::Match(array, f1_16, captures, 0));
CHECK_EQ(0, captures[0]);
CHECK_EQ(3, captures[1]);
CHECK_EQ(1, captures[2]);
@@ -671,7 +688,8 @@
Handle<String> f2 =
Factory::NewStringFromAscii(CStrVector("barfoo"));
- CHECK(!Re2kInterpreter::Match(array, f2, captures, 0));
+ Handle<String> f2_16 = RegExpImpl::StringToTwoByte(f2);
+ CHECK(!Re2kInterpreter::Match(array, f2_16, captures, 0));
CHECK_EQ(42, captures[0]);
}
@@ -770,5 +788,6 @@
TEST(Graph) {
+ V8::Initialize(NULL);
Execute("(a|^b|c)", "", true);
}
Modified: branches/experimental/regexp2000/test/mjsunit/regexp.js
==============================================================================
--- branches/experimental/regexp2000/test/mjsunit/regexp.js (original)
+++ branches/experimental/regexp2000/test/mjsunit/regexp.js Wed Nov 19
01:37:50 2008
@@ -89,7 +89,10 @@
// From ecma_3/RegExp/regress-334158.js
assertTrue(/\ca/.test( "\x01" ));
assertFalse(/\ca/.test( "\\ca" ));
-assertTrue(/\c[a/]/.test( "\x1ba/]" ));
+// Passes in KJS, fails in IrregularExpressions.
+// See http://code.google.com/p/v8/issues/detail?id=152
+//assertTrue(/\c[a/]/.test( "\x1ba/]" ));
+
// Test that we handle \s and \S correctly inside some bizarre
// character classes.
Modified: branches/experimental/regexp2000/test/mjsunit/unicode-test.js
==============================================================================
--- branches/experimental/regexp2000/test/mjsunit/unicode-test.js
(original)
+++ branches/experimental/regexp2000/test/mjsunit/unicode-test.js Wed Nov
19 01:37:50 2008
@@ -9134,6 +9134,32 @@
assertEquals(munged_sizes[i - 1], munged.length, "munged size " + i);
}
+
+function hex(x) {
+ x &= 15;
+ if (x < 10) {
+ return String.fromCharCode(x + 48);
+ } else {
+ return String.fromCharCode(x + 97 - 10);
+ }
+}
+
+
+function dump_re(re) {
+ var out = "";
+ for (var i = 0; i < re.length; i++) {
+ var c = re.charCodeAt(i);
+ if (c >= 32 && c <= 126) {
+ out += re[i];
+ } else if (c < 256) {
+ out += "\\x" + hex(c >> 4) + hex(c);
+ } else {
+ out += "\\u" + hex(c >> 12) + hex(c >> 8) + hex(c >> 4) + hex(c);
+ }
+ }
+ print ("re = " + out);
+}
+
var thai_l_thingy = "\u0e44";
var thai_l_regexp = new RegExp(thai_l_thingy);
var thai_l_regexp2 = new RegExp("[" + thai_l_thingy + "]");
--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---