Author: [EMAIL PROTECTED]
Date: Fri Nov 14 03:26:08 2008
New Revision: 753
Modified:
branches/experimental/regexp2000/src/factory.cc
branches/experimental/regexp2000/src/jsregexp.cc
branches/experimental/regexp2000/src/jsregexp.h
branches/experimental/regexp2000/src/objects-debug.cc
branches/experimental/regexp2000/src/objects-inl.h
branches/experimental/regexp2000/src/objects.h
branches/experimental/regexp2000/test/cctest/test-regexp.cc
Log:
Wire Regexp2000 up to the normal JS RegExp object.
Review URL: http://codereview.chromium.org/10943
Modified: branches/experimental/regexp2000/src/factory.cc
==============================================================================
--- branches/experimental/regexp2000/src/factory.cc (original)
+++ branches/experimental/regexp2000/src/factory.cc Fri Nov 14 03:26:08 2008
@@ -706,8 +706,11 @@
ASSERT(type != INVALID_TYPE);
Handle<JSFunction> result =
- Factory::NewFunction(Factory::empty_symbol(), type, instance_size,
- code, true);
+ Factory::NewFunction(Factory::empty_symbol(),
+ type,
+ instance_size,
+ code,
+ true);
// Set class name.
Handle<Object> class_name = Handle<Object>(obj->class_name());
if (class_name->IsString()) {
Modified: branches/experimental/regexp2000/src/jsregexp.cc
==============================================================================
--- branches/experimental/regexp2000/src/jsregexp.cc (original)
+++ branches/experimental/regexp2000/src/jsregexp.cc Fri Nov 14 03:26:08
2008
@@ -40,7 +40,10 @@
#include "compilation-cache.h"
#include "string-stream.h"
#include "parser.h"
+#include "assembler-re2k.h"
#include "regexp-macro-assembler.h"
+#include "regexp-macro-assembler-re2k.h"
+#include "interpreter-re2k.h"
// Including pcre.h undefines DEBUG to avoid getting debug output from
// the JSCRE implementation. Make sure to redefine it in debug mode
@@ -56,9 +59,6 @@
namespace v8 { namespace internal {
-#define CAPTURE_INDEX 0
-#define INTERNAL_INDEX 1
-
static Failure* malloc_failure;
static void* JSREMalloc(size_t size) {
@@ -229,7 +229,16 @@
result = AtomCompile(re, pattern, flags, pattern);
}
} else {
- result = JsrePrepare(re, pattern, flags);
+ RegExpNode* node = NULL;
+ Handle<FixedArray> re2k_data =
+ RegExpEngine::Compile(&parse_result,
+ &node,
+ flags.is_ignore_case());
+ if (re2k_data.is_null()) {
+ result = JscrePrepare(re, pattern, flags);
+ } else {
+ result = Re2kPrepare(re, pattern, flags, re2k_data);
+ }
}
Object* data = re->data();
if (data->IsFixedArray()) {
@@ -250,9 +259,11 @@
Handle<Object> index) {
switch (regexp->TypeTag()) {
case JSRegExp::JSCRE:
- return JsreExec(regexp, subject, index);
+ return JscreExec(regexp, subject, index);
case JSRegExp::ATOM:
return AtomExec(regexp, subject, index);
+ case JSRegExp::RE2K:
+ return Re2kExec(regexp, subject, index);
default:
UNREACHABLE();
return Handle<Object>();
@@ -264,9 +275,11 @@
Handle<String> subject) {
switch (regexp->TypeTag()) {
case JSRegExp::JSCRE:
- return JsreExecGlobal(regexp, subject);
+ return JscreExecGlobal(regexp, subject);
case JSRegExp::ATOM:
return AtomExecGlobal(regexp, subject);
+ case JSRegExp::RE2K:
+ return Re2kExecGlobal(regexp, subject);
default:
UNREACHABLE();
return Handle<Object>();
@@ -298,12 +311,8 @@
if (value == -1) return Factory::null_value();
Handle<FixedArray> array = Factory::NewFixedArray(2);
- array->set(0,
- Smi::FromInt(value),
- SKIP_WRITE_BARRIER);
- array->set(1,
- Smi::FromInt(value + needle->length()),
- SKIP_WRITE_BARRIER);
+ array->set(0, Smi::FromInt(value));
+ array->set(1, Smi::FromInt(value + needle->length()));
return Factory::NewJSArrayWithElements(array);
}
@@ -327,12 +336,8 @@
int end = value + needle_length;
Handle<FixedArray> array = Factory::NewFixedArray(2);
- array->set(0,
- Smi::FromInt(value),
- SKIP_WRITE_BARRIER);
- array->set(1,
- Smi::FromInt(end),
- SKIP_WRITE_BARRIER);
+ array->set(0, Smi::FromInt(value));
+ array->set(1, Smi::FromInt(end));
Handle<JSArray> pair = Factory::NewJSArrayWithElements(array);
SetElement(result, match_count, pair);
match_count++;
@@ -343,15 +348,24 @@
}
-Handle<Object>RegExpImpl::JsrePrepare(Handle<JSRegExp> re,
- Handle<String> pattern,
- JSRegExp::Flags flags) {
+Handle<Object>RegExpImpl::JscrePrepare(Handle<JSRegExp> re,
+ Handle<String> pattern,
+ JSRegExp::Flags flags) {
Handle<Object> value(Heap::undefined_value());
Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value);
return re;
}
+Handle<Object>RegExpImpl::Re2kPrepare(Handle<JSRegExp> re,
+ Handle<String> pattern,
+ JSRegExp::Flags flags,
+ Handle<FixedArray> re2k_data) {
+ Factory::SetRegExpData(re, JSRegExp::RE2K, pattern, flags, re2k_data);
+ return re;
+}
+
+
static inline Object* DoCompile(String* pattern,
JSRegExp::Flags flags,
unsigned* number_of_captures,
@@ -398,7 +412,7 @@
}
-Handle<Object> RegExpImpl::JsreCompile(Handle<JSRegExp> re) {
+Handle<Object> RegExpImpl::JscreCompile(Handle<JSRegExp> re) {
ASSERT_EQ(re->TypeTag(), JSRegExp::JSCRE);
ASSERT(re->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined());
@@ -435,26 +449,65 @@
Handle<ByteArray> internal(
ByteArray::FromDataStartAddress(reinterpret_cast<Address>(code)));
- Handle<FixedArray> value = Factory::NewFixedArray(2);
- value->set(CAPTURE_INDEX, Smi::FromInt(number_of_captures));
- value->set(INTERNAL_INDEX, *internal);
+ Handle<FixedArray> value = Factory::NewFixedArray(kJscreDataLength);
+ value->set(kJscreNumberOfCapturesIndex,
Smi::FromInt(number_of_captures));
+ value->set(kJscreInternalIndex, *internal);
Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value);
return re;
}
-Handle<Object> RegExpImpl::JsreExecOnce(Handle<JSRegExp> regexp,
+Handle<Object> RegExpImpl::Re2kExecOnce(Handle<JSRegExp> regexp,
int num_captures,
Handle<String> subject,
int previous_index,
const uc16* two_byte_subject,
int* offsets_vector,
int offsets_vector_length) {
+ bool rc;
+ {
+ for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) {
+ offsets_vector[i] = -1;
+ }
+
+ AssertNoAllocation a;
+
+ LOG(RegExpExecEvent(regexp, previous_index, subject));
+
+ Handle<ByteArray> byte_codes = Re2kCode(regexp);
+
+ rc = Re2kInterpreter::Match(byte_codes,
+ subject,
+ offsets_vector,
+ previous_index);
+ }
+
+ if (!rc) {
+ return Factory::null_value();
+ }
+
+ Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
+ // The captures come in (start, end+1) pairs.
+ for (int i = 0; i < 2 * (num_captures+1); i += 2) {
+ array->set(i, Smi::FromInt(offsets_vector[i]));
+ array->set(i+1, Smi::FromInt(offsets_vector[i+1]));
+ }
+ return Factory::NewJSArrayWithElements(array);
+}
+
+
+Handle<Object> RegExpImpl::JscreExecOnce(Handle<JSRegExp> regexp,
+ int num_captures,
+ Handle<String> subject,
+ int previous_index,
+ const uc16* two_byte_subject,
+ int* offsets_vector,
+ int offsets_vector_length) {
int rc;
{
AssertNoAllocation a;
- ByteArray* internal = JsreInternal(regexp);
+ ByteArray* internal = JscreInternal(regexp);
const JscreRegExp* js_regexp =
reinterpret_cast<JscreRegExp*>(internal->GetDataStartAddress());
@@ -488,12 +541,8 @@
Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
// The captures come in (start, end+1) pairs.
for (int i = 0; i < 2 * (num_captures+1); i += 2) {
- array->set(i,
- Smi::FromInt(offsets_vector[i]),
- SKIP_WRITE_BARRIER);
- array->set(i+1,
- Smi::FromInt(offsets_vector[i+1]),
- SKIP_WRITE_BARRIER);
+ array->set(i, Smi::FromInt(offsets_vector[i]));
+ array->set(i+1, Smi::FromInt(offsets_vector[i+1]));
}
return Factory::NewJSArrayWithElements(array);
}
@@ -501,8 +550,8 @@
class OffsetsVector {
public:
- inline OffsetsVector(int num_captures) {
- offsets_vector_length_ = (num_captures + 1) * 3;
+ inline OffsetsVector(int num_registers) :
+ offsets_vector_length_(num_registers) {
if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
vector_ = NewArray<int>(offsets_vector_length_);
} else {
@@ -531,7 +580,7 @@
private:
int* vector_;
int offsets_vector_length_;
- static const int kStaticOffsetsVectorSize = 30;
+ static const int kStaticOffsetsVectorSize = 50;
static int static_offsets_vector_[kStaticOffsetsVectorSize];
};
@@ -540,47 +589,127 @@
OffsetsVector::kStaticOffsetsVectorSize];
-Handle<Object> RegExpImpl::JsreExec(Handle<JSRegExp> regexp,
+Handle<Object> RegExpImpl::Re2kExec(Handle<JSRegExp> regexp,
Handle<String> subject,
Handle<Object> index) {
+ ASSERT_EQ(regexp->TypeTag(), JSRegExp::RE2K);
+ ASSERT(!regexp->DataAt(JSRegExp::kRe2kDataIndex)->IsUndefined());
+
+ // Prepare space for the return values.
+ int number_of_registers = Re2kNumberOfRegisters(regexp);
+ OffsetsVector offsets(number_of_registers);
+
+ int num_captures = Re2kNumberOfCaptures(regexp);
+
+ int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
+
+ Handle<String> subject16 = CachedStringToTwoByte(subject);
+
+ Handle<Object> result(Re2kExecOnce(regexp,
+ num_captures,
+ subject,
+ previous_index,
+ subject16->GetTwoByteData(),
+ offsets.vector(),
+ offsets.length()));
+ return result;
+}
+
+
+Handle<Object> RegExpImpl::JscreExec(Handle<JSRegExp> regexp,
+ Handle<String> subject,
+ Handle<Object> index) {
ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
- Handle<Object> compile_result = JsreCompile(regexp);
+ Handle<Object> compile_result = JscreCompile(regexp);
if (compile_result->IsException()) return compile_result;
}
ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());
- // Prepare space for the return values.
- int num_captures = JsreCapture(regexp);
+ int num_captures = JscreNumberOfCaptures(regexp);
- OffsetsVector offsets(num_captures);
+ OffsetsVector offsets((num_captures + 1) * 3);
int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
Handle<String> subject16 = CachedStringToTwoByte(subject);
- Handle<Object> result(JsreExecOnce(regexp, num_captures, subject,
- previous_index,
- subject16->GetTwoByteData(),
- offsets.vector(), offsets.length()));
+ Handle<Object> result(JscreExecOnce(regexp,
+ num_captures,
+ subject,
+ previous_index,
+ subject16->GetTwoByteData(),
+ offsets.vector(),
+ offsets.length()));
return result;
}
-Handle<Object> RegExpImpl::JsreExecGlobal(Handle<JSRegExp> regexp,
+Handle<Object> RegExpImpl::Re2kExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject) {
+ ASSERT_EQ(regexp->TypeTag(), JSRegExp::RE2K);
+ ASSERT(!regexp->DataAt(JSRegExp::kRe2kDataIndex)->IsUndefined());
+
+ // Prepare space for the return values.
+ int number_of_registers = Re2kNumberOfRegisters(regexp);
+ OffsetsVector offsets(number_of_registers);
+
+ int previous_index = 0;
+
+ Handle<JSArray> result = Factory::NewJSArray(0);
+ int i = 0;
+ Handle<Object> matches;
+
+ Handle<String> subject16 = CachedStringToTwoByte(subject);
+
+ do {
+ if (previous_index > subject->length() || previous_index < 0) {
+ // Per ECMA-262 15.10.6.2, if the previous index is greater than the
+ // string length, there is no match.
+ matches = Factory::null_value();
+ } else {
+ matches = Re2kExecOnce(regexp,
+ Re2kNumberOfCaptures(regexp),
+ subject,
+ previous_index,
+ subject16->GetTwoByteData(),
+ offsets.vector(),
+ offsets.length());
+
+ if (matches->IsJSArray()) {
+ SetElement(result, i, matches);
+ i++;
+ previous_index = offsets.vector()[1];
+ if (offsets.vector()[0] == offsets.vector()[1]) {
+ previous_index++;
+ }
+ }
+ }
+ } while (matches->IsJSArray());
+
+ // If we exited the loop with an exception, throw it.
+ if (matches->IsNull()) { // Exited loop normally.
+ return result;
+ } else { // Exited loop with the exception in matches.
+ return matches;
+ }
+}
+
+
+Handle<Object> RegExpImpl::JscreExecGlobal(Handle<JSRegExp> regexp,
+ Handle<String> subject) {
ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
- Handle<Object> compile_result = JsreCompile(regexp);
+ Handle<Object> compile_result = JscreCompile(regexp);
if (compile_result->IsException()) return compile_result;
}
ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());
// Prepare space for the return values.
- int num_captures = JsreCapture(regexp);
+ int num_captures = JscreNumberOfCaptures(regexp);
- OffsetsVector offsets(num_captures);
+ OffsetsVector offsets((num_captures + 1) * 3);
int previous_index = 0;
@@ -596,9 +725,13 @@
// string length, there is no match.
matches = Factory::null_value();
} else {
- matches = JsreExecOnce(regexp, num_captures, subject, previous_index,
- subject16->GetTwoByteData(),
- offsets.vector(), offsets.length());
+ matches = JscreExecOnce(regexp,
+ num_captures,
+ subject,
+ previous_index,
+ subject16->GetTwoByteData(),
+ offsets.vector(),
+ offsets.length());
if (matches->IsJSArray()) {
SetElement(result, i, matches);
@@ -620,15 +753,34 @@
}
-int RegExpImpl::JsreCapture(Handle<JSRegExp> re) {
+int RegExpImpl::JscreNumberOfCaptures(Handle<JSRegExp> re) {
FixedArray* value =
FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
- return Smi::cast(value->get(CAPTURE_INDEX))->value();
+ return Smi::cast(value->get(kJscreNumberOfCapturesIndex))->
+ value();
}
-ByteArray* RegExpImpl::JsreInternal(Handle<JSRegExp> re) {
+ByteArray* RegExpImpl::JscreInternal(Handle<JSRegExp> re) {
FixedArray* value =
FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
- return ByteArray::cast(value->get(INTERNAL_INDEX));
+ return ByteArray::cast(value->get(kJscreInternalIndex));
+}
+
+
+int RegExpImpl::Re2kNumberOfCaptures(Handle<JSRegExp> re) {
+ FixedArray* value =
FixedArray::cast(re->DataAt(JSRegExp::kRe2kDataIndex));
+ return Smi::cast(value->get(kRe2kNumberOfCapturesIndex))->value();
+}
+
+
+int RegExpImpl::Re2kNumberOfRegisters(Handle<JSRegExp> re) {
+ FixedArray* value =
FixedArray::cast(re->DataAt(JSRegExp::kRe2kDataIndex));
+ return Smi::cast(value->get(kRe2kNumberOfRegistersIndex))->value();
+}
+
+
+Handle<ByteArray> RegExpImpl::Re2kCode(Handle<JSRegExp> re) {
+ FixedArray* value =
FixedArray::cast(re->DataAt(JSRegExp::kRe2kDataIndex));
+ return Handle<ByteArray>(ByteArray::cast(value->get(kRe2kCodeIndex)));
}
@@ -648,14 +800,12 @@
int AllocateRegister() { return next_register_++; }
Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler,
- RegExpNode* start);
+ RegExpNode* start,
+ int capture_count,
+ bool case_independent);
inline void AddWork(RegExpNode* node) { work_list_->Add(node); }
- static const int kImplementationOffset = 0;
- static const int kNumberOfRegistersOffset = 0;
- static const int kCodeOffset = 1;
-
RegExpMacroAssembler* macro_assembler() {
return macro_assembler_;
}
@@ -666,34 +816,52 @@
};
+// Attempts to compile the regexp using a Regexp2000 code generator.
Returns
+// a fixed array or a null handle depending on whether it succeeded.
Handle<FixedArray> RegExpCompiler::Assemble(
RegExpMacroAssembler* macro_assembler,
- RegExpNode* start) {
+ RegExpNode* start,
+ int capture_count,
+ bool case_independent) {
+ if (case_independent) return Handle<FixedArray>::null();
macro_assembler_ = macro_assembler;
List <RegExpNode*> work_list(0);
work_list_ = &work_list;
- start->GoTo(this);
+ Label fail;
+ macro_assembler->PushBacktrack(&fail);
+ if (!start->GoTo(this)) {
+ fail.Unuse();
+ return Handle<FixedArray>::null();
+ }
while (!work_list.is_empty()) {
- work_list.RemoveLast()->Emit(this);
+ if (!work_list.RemoveLast()->Emit(this)) {
+ fail.Unuse();
+ return Handle<FixedArray>::null();
+ }
}
- Handle<FixedArray> array = Factory::NewFixedArray(3);
- array->set(kImplementationOffset,
- Smi::FromInt(macro_assembler->Implementation()),
- SKIP_WRITE_BARRIER);
- array->set(kNumberOfRegistersOffset,
- Smi::FromInt(next_register_),
- SKIP_WRITE_BARRIER);
+ macro_assembler->Bind(&fail);
+ macro_assembler->Fail();
+ Handle<FixedArray> array =
+ Factory::NewFixedArray(RegExpImpl::kRe2kDataLength);
+ array->set(RegExpImpl::kRe2kImplementationIndex,
+ Smi::FromInt(macro_assembler->Implementation()));
+ array->set(RegExpImpl::kRe2kNumberOfRegistersIndex,
+ Smi::FromInt(next_register_));
+ array->set(RegExpImpl::kRe2kNumberOfCapturesIndex,
+ Smi::FromInt(capture_count));
Handle<Object> code = macro_assembler->GetCode();
+ array->set(RegExpImpl::kRe2kCodeIndex, *code);
work_list_ = NULL;
return array;
}
-void RegExpNode::GoTo(RegExpCompiler* compiler) {
+bool RegExpNode::GoTo(RegExpCompiler* compiler) {
if (label.is_bound()) {
compiler->macro_assembler()->GoTo(&label);
+ return true;
} else {
- Emit(compiler);
+ return Emit(compiler);
}
}
@@ -707,6 +875,19 @@
EndNode EndNode::kBacktrack(BACKTRACK);
+bool EndNode::Emit(RegExpCompiler* compiler) {
+ switch (action_) {
+ case ACCEPT:
+ compiler->macro_assembler()->Succeed();
+ return true;
+ case BACKTRACK:
+ compiler->macro_assembler()->Backtrack();
+ return true;
+ }
+ return false;
+}
+
+
void GuardedAlternative::AddGuard(Guard* guard) {
if (guards_ == NULL)
guards_ = new ZoneList<Guard*>(1);
@@ -782,13 +963,13 @@
// Emit code.
-void ChoiceNode::Emit(RegExpCompiler* compiler) {
+bool ChoiceNode::Emit(RegExpCompiler* compiler) {
// TODO(erikcorry): Implement this.
- UNREACHABLE();
+ return false;
}
-void ActionNode::Emit(RegExpCompiler* compiler) {
+bool ActionNode::Emit(RegExpCompiler* compiler) {
RegExpMacroAssembler* macro = compiler->macro_assembler();
switch (type_) {
case STORE_REGISTER:
@@ -806,17 +987,19 @@
break;
case BEGIN_SUBMATCH:
// TODO(erikcorry): Implement this.
- UNREACHABLE();
- break;
+ return false;
case ESCAPE_SUBMATCH:
// TODO(erikcorry): Implement this.
- UNREACHABLE();
- break;
+ return false;
case END_SUBMATCH:
// TODO(erikcorry): Implement this.
+ return false;
+ default:
UNREACHABLE();
- break;
+ return false;
}
+ compiler->AddWork(on_success());
+ return true;
}
@@ -1610,7 +1793,9 @@
}
-RegExpNode* RegExpEngine::Compile(RegExpParseResult* input) {
+Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input,
+ RegExpNode** node_return,
+ bool ignore_case) {
RegExpCompiler compiler(input->capture_count);
// Wrap the body of the regexp in capture #0.
RegExpNode* captured_body = RegExpCapture::ToNode(input->tree,
@@ -1630,9 +1815,13 @@
&compiler,
captured_body,
EndNode::GetBacktrack());
+ if (node_return != NULL) *node_return = node;
Analysis analysis(&compiler);
analysis.Analyze(node);
- return node;
+ byte codes[10240];
+ Re2kAssembler assembler(Vector<byte>(codes, 1024));
+ RegExpMacroAssemblerRe2k macro_assembler(&assembler);
+ return compiler.Assemble(¯o_assembler, node, input->capture_count,
ignore_case);
}
RegExpMacroAssembler::RegExpMacroAssembler() {
Modified: branches/experimental/regexp2000/src/jsregexp.h
==============================================================================
--- branches/experimental/regexp2000/src/jsregexp.h (original)
+++ branches/experimental/regexp2000/src/jsregexp.h Fri Nov 14 03:26:08 2008
@@ -63,13 +63,21 @@
// Stores an uncompiled RegExp pattern in the JSRegExp object.
// It will be compiled by JSCRE when first executed.
- static Handle<Object> JsrePrepare(Handle<JSRegExp> re,
+ static Handle<Object> JscrePrepare(Handle<JSRegExp> re,
+ Handle<String> pattern,
+ JSRegExp::Flags flags);
+
+ // Stores a compiled RegExp pattern in the JSRegExp object.
+ // The pattern is compiled by Regexp2000.
+ static Handle<Object> Re2kPrepare(Handle<JSRegExp> re,
Handle<String> pattern,
- JSRegExp::Flags flags);
+ JSRegExp::Flags flags,
+ Handle<FixedArray> re2k_data);
+
// Compile the pattern using JSCRE and store the result in the
// JSRegExp object.
- static Handle<Object> JsreCompile(Handle<JSRegExp> re);
+ static Handle<Object> JscreCompile(Handle<JSRegExp> re);
static Handle<Object> AtomCompile(Handle<JSRegExp> re,
Handle<String> pattern,
@@ -82,16 +90,24 @@
static Handle<Object> AtomExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject);
- static Handle<Object> JsreCompile(Handle<JSRegExp> re,
- Handle<String> pattern,
- JSRegExp::Flags flags);
+ static Handle<Object> JscreCompile(Handle<JSRegExp> re,
+ Handle<String> pattern,
+ JSRegExp::Flags flags);
// Execute a compiled JSCRE pattern.
- static Handle<Object> JsreExec(Handle<JSRegExp> regexp,
+ static Handle<Object> JscreExec(Handle<JSRegExp> regexp,
+ Handle<String> subject,
+ Handle<Object> index);
+
+ // Execute a Regexp2000 bytecode pattern.
+ static Handle<Object> Re2kExec(Handle<JSRegExp> regexp,
Handle<String> subject,
Handle<Object> index);
- static Handle<Object> JsreExecGlobal(Handle<JSRegExp> regexp,
+ static Handle<Object> JscreExecGlobal(Handle<JSRegExp> regexp,
+ Handle<String> subject);
+
+ static Handle<Object> Re2kExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject);
static void NewSpaceCollectionPrologue();
@@ -103,16 +119,37 @@
static Handle<String> StringToTwoByte(Handle<String> pattern);
static Handle<String> CachedStringToTwoByte(Handle<String> pattern);
+ static const int kRe2kImplementationIndex = 0;
+ static const int kRe2kNumberOfCapturesIndex = 1;
+ static const int kRe2kNumberOfRegistersIndex = 2;
+ static const int kRe2kCodeIndex = 3;
+ static const int kRe2kDataLength = 4;
+
+ static const int kJscreNumberOfCapturesIndex = 0;
+ static const int kJscreInternalIndex = 1;
+ static const int kJscreDataLength = 2;
+
private:
static String* last_ascii_string_;
static String* two_byte_cached_string_;
- // Returns the caputure from the re.
- static int JsreCapture(Handle<JSRegExp> re);
- static ByteArray* JsreInternal(Handle<JSRegExp> re);
+ static int JscreNumberOfCaptures(Handle<JSRegExp> re);
+ static ByteArray* JscreInternal(Handle<JSRegExp> re);
+
+ static int Re2kNumberOfCaptures(Handle<JSRegExp> re);
+ static int Re2kNumberOfRegisters(Handle<JSRegExp> re);
+ static Handle<ByteArray> Re2kCode(Handle<JSRegExp> re);
// Call jsRegExpExecute once
- static Handle<Object> JsreExecOnce(Handle<JSRegExp> regexp,
+ static Handle<Object> JscreExecOnce(Handle<JSRegExp> regexp,
+ int num_captures,
+ Handle<String> subject,
+ int previous_index,
+ const uc16* utf8_subject,
+ int* ovector,
+ int ovector_length);
+
+ static Handle<Object> Re2kExecOnce(Handle<JSRegExp> regexp,
int num_captures,
Handle<String> subject,
int previous_index,
@@ -122,8 +159,10 @@
// Set the subject cache. The previous string buffer is not deleted, so
the
// caller should ensure that it doesn't leak.
- static void SetSubjectCache(String* subject, char* utf8_subject,
- int uft8_length, int character_position,
+ static void SetSubjectCache(String* subject,
+ char* utf8_subject,
+ int uft8_length,
+ int character_position,
int utf8_position);
// A one element cache of the last utf8_subject string and its length.
The
@@ -362,9 +401,13 @@
virtual ~RegExpNode() { }
virtual void Accept(NodeVisitor* visitor) = 0;
// Generates a goto to this node or actually generates the code at this
point.
- void GoTo(RegExpCompiler* compiler);
+ // Until the implementation is complete we will return true for success
and
+ // false for failure.
+ bool GoTo(RegExpCompiler* compiler);
void EmitAddress(RegExpCompiler* compiler);
- virtual void Emit(RegExpCompiler* compiler) = 0;
+ // Until the implementation is complete we will return true for success
and
+ // false for failure.
+ virtual bool Emit(RegExpCompiler* compiler) = 0;
private:
Label label;
};
@@ -375,7 +418,7 @@
explicit SeqRegExpNode(RegExpNode* on_success)
: on_success_(on_success) { }
RegExpNode* on_success() { return on_success_; }
- virtual void Emit(RegExpCompiler* compiler) { UNREACHABLE(); }
+ virtual bool Emit(RegExpCompiler* compiler) { return false; }
private:
RegExpNode* on_success_;
};
@@ -400,7 +443,7 @@
static ActionNode* EscapeSubmatch(RegExpNode* on_success);
static ActionNode* EndSubmatch(RegExpNode* on_success);
virtual void Accept(NodeVisitor* visitor);
- virtual void Emit(RegExpCompiler* compiler);
+ virtual bool Emit(RegExpCompiler* compiler);
private:
union {
struct {
@@ -433,7 +476,7 @@
virtual void Accept(NodeVisitor* visitor);
Vector<const uc16> data() { return data_; }
RegExpNode* on_failure() { return on_failure_; }
- virtual void Emit(RegExpCompiler* compiler) { UNREACHABLE(); }
+ virtual bool Emit(RegExpCompiler* compiler) { return false; }
private:
RegExpNode* on_failure_;
Vector<const uc16> data_;
@@ -454,7 +497,7 @@
RegExpNode* on_failure() { return on_failure_; }
int start_register() { return start_reg_; }
int end_register() { return end_reg_; }
- virtual void Emit(RegExpCompiler* compiler) { UNREACHABLE(); }
+ virtual bool Emit(RegExpCompiler* compiler) { return false; }
private:
RegExpNode* on_failure_;
int start_reg_;
@@ -476,7 +519,7 @@
ZoneList<CharacterRange>* ranges() { return ranges_; }
bool is_negated() { return is_negated_; }
RegExpNode* on_failure() { return on_failure_; }
- virtual void Emit(RegExpCompiler* compiler) { UNREACHABLE(); }
+ virtual bool Emit(RegExpCompiler* compiler) { return false; }
static void AddInverseToTable(ZoneList<CharacterRange>* ranges,
DispatchTable* table,
int index);
@@ -493,7 +536,7 @@
virtual void Accept(NodeVisitor* visitor);
static EndNode* GetAccept() { return &kAccept; }
static EndNode* GetBacktrack() { return &kBacktrack; }
- virtual void Emit(RegExpCompiler* compiler) { UNREACHABLE(); }
+ virtual bool Emit(RegExpCompiler* compiler);
private:
explicit EndNode(Action action) : action_(action) { }
Action action_;
@@ -542,7 +585,7 @@
ZoneList<GuardedAlternative>* choices() { return choices_; }
DispatchTable* table() { return &table_; }
RegExpNode* on_failure() { return on_failure_; }
- virtual void Emit(RegExpCompiler* compiler);
+ virtual bool Emit(RegExpCompiler* compiler);
bool visited() { return visited_; }
void set_visited(bool value) { visited_ = value; }
private:
@@ -563,7 +606,9 @@
class RegExpEngine: public AllStatic {
public:
- static RegExpNode* Compile(RegExpParseResult* input);
+ static Handle<FixedArray> Compile(RegExpParseResult* input,
+ RegExpNode** node_return,
+ bool ignore_case);
static void DotPrint(const char* label, RegExpNode* node);
};
Modified: branches/experimental/regexp2000/src/objects-debug.cc
==============================================================================
--- branches/experimental/regexp2000/src/objects-debug.cc (original)
+++ branches/experimental/regexp2000/src/objects-debug.cc Fri Nov 14
03:26:08 2008
@@ -674,6 +674,12 @@
ASSERT(jscre_data->IsFixedArray() || jscre_data->IsUndefined());
break;
}
+ case JSRegExp::RE2K: {
+ FixedArray* arr = FixedArray::cast(data());
+ Object* jscre_data = arr->get(JSRegExp::kJscreDataIndex);
+ ASSERT(jscre_data->IsFixedArray());
+ break;
+ }
default:
ASSERT_EQ(JSRegExp::NOT_COMPILED, TypeTag());
ASSERT(data()->IsUndefined());
Modified: branches/experimental/regexp2000/src/objects-inl.h
==============================================================================
--- branches/experimental/regexp2000/src/objects-inl.h (original)
+++ branches/experimental/regexp2000/src/objects-inl.h Fri Nov 14 03:26:08
2008
@@ -1142,6 +1142,13 @@
}
+void FixedArray::set(int index, Smi* value) {
+ ASSERT(reinterpret_cast<Object*>(value)->IsSmi());
+ int offset = kHeaderSize + index * kPointerSize;
+ WRITE_FIELD(this, offset, value);
+}
+
+
void FixedArray::set(int index, Object* value) {
ASSERT(index >= 0 && index < this->length());
int offset = kHeaderSize + index * kPointerSize;
Modified: branches/experimental/regexp2000/src/objects.h
==============================================================================
--- branches/experimental/regexp2000/src/objects.h (original)
+++ branches/experimental/regexp2000/src/objects.h Fri Nov 14 03:26:08 2008
@@ -1498,9 +1498,12 @@
// Setter and getter for elements.
inline Object* get(int index);
+ // Setter that uses write barrier.
inline void set(int index, Object* value);
- // Setter with barrier mode.
+ // Setter that doesn't need write barrier).
+ inline void set(int index, Smi* value);
+ // Setter with explicit barrier mode.
inline void set(int index, Object* value, WriteBarrierMode mode);
// Setters for frequently used oddballs located in old space.
@@ -2916,7 +2919,9 @@
// NOT_COMPILED: Initial value. No data has been stored in the JSRegExp
yet.
// JSCRE: A complex RegExp for JSCRE
// ATOM: A simple string to match against using an indexOf operation.
- enum Type { NOT_COMPILED, JSCRE, ATOM };
+ // RE2K: Compiled with RegExp2000.
+ // RE2K_NATIVE: Compiled to native code with RegExp2000.
+ enum Type { NOT_COMPILED, JSCRE, ATOM, RE2K, RE2K_NATIVE };
enum Flag { NONE = 0, GLOBAL = 1, IGNORE_CASE = 2, MULTILINE = 4 };
class Flags {
@@ -2951,10 +2956,11 @@
static const int kTagIndex = 0;
static const int kSourceIndex = kTagIndex + 1;
static const int kFlagsIndex = kSourceIndex + 1;
- // These two are the same since the same entry is shared for
+ // These three are the same since the same entry is shared for
// different purposes in different types of regexps.
static const int kAtomPatternIndex = kFlagsIndex + 1;
static const int kJscreDataIndex = kFlagsIndex + 1;
+ static const int kRe2kDataIndex = kFlagsIndex + 1;
static const int kDataSize = kAtomPatternIndex + 1;
};
Modified: branches/experimental/regexp2000/test/cctest/test-regexp.cc
==============================================================================
--- branches/experimental/regexp2000/test/cctest/test-regexp.cc (original)
+++ branches/experimental/regexp2000/test/cctest/test-regexp.cc Fri Nov 14
03:26:08 2008
@@ -338,7 +338,8 @@
RegExpParseResult result;
if (!v8::internal::ParseRegExp(&buffer, &result))
return;
- RegExpNode* node = RegExpEngine::Compile(&result);
+ RegExpNode* node = NULL;
+ RegExpEngine::Compile(&result, &node, false);
USE(node);
#ifdef DEBUG
if (dot_output) {
--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---