Author: [EMAIL PROTECTED]
Date: Mon Dec 8 04:43:01 2008
New Revision: 937
Modified:
branches/bleeding_edge/src/jsregexp.cc
branches/bleeding_edge/src/jsregexp.h
branches/bleeding_edge/src/objects.h
branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc
branches/bleeding_edge/test/cctest/test-regexp.cc
Log:
Irregexp is specialized on subject character type.
Modified: branches/bleeding_edge/src/jsregexp.cc
==============================================================================
--- branches/bleeding_edge/src/jsregexp.cc (original)
+++ branches/bleeding_edge/src/jsregexp.cc Mon Dec 8 04:43:01 2008
@@ -201,6 +201,50 @@
}
+// Generic RegExp methods. Dispatches to implementation specific methods.
+
+
+class OffsetsVector {
+ public:
+ inline OffsetsVector(int num_registers)
+ : offsets_vector_length_(num_registers) {
+ if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
+ vector_ = NewArray<int>(offsets_vector_length_);
+ } else {
+ vector_ = static_offsets_vector_;
+ }
+ }
+
+
+ inline ~OffsetsVector() {
+ if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
+ DeleteArray(vector_);
+ vector_ = NULL;
+ }
+ }
+
+
+ inline int* vector() {
+ return vector_;
+ }
+
+
+ inline int length() {
+ return offsets_vector_length_;
+ }
+
+ private:
+ int* vector_;
+ int offsets_vector_length_;
+ static const int kStaticOffsetsVectorSize = 50;
+ static int static_offsets_vector_[kStaticOffsetsVectorSize];
+};
+
+
+int OffsetsVector::static_offsets_vector_[
+ OffsetsVector::kStaticOffsetsVectorSize];
+
+
Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
Handle<String> pattern,
Handle<String> flag_str) {
@@ -224,7 +268,7 @@
pattern,
parse_result.error,
"malformed_regexp");
- return Handle<Object>();
+ return Handle<Object>::null();
}
RegExpAtom* atom = parse_result.tree->AsAtom();
if (atom != NULL && !flags.is_ignore_case()) {
@@ -237,20 +281,10 @@
result = AtomCompile(re, pattern, flags, pattern);
}
} else {
- RegExpNode* node = NULL;
- Handle<FixedArray> irregexp_data =
- RegExpEngine::Compile(&parse_result,
- &node,
- flags.is_ignore_case(),
- flags.is_multiline(),
- pattern);
- if (irregexp_data.is_null()) {
- if (FLAG_disable_jscre) {
- UNIMPLEMENTED();
- }
- result = JscrePrepare(re, pattern, flags);
+ if (FLAG_irregexp) {
+ result = IrregexpPrepare(re, pattern, flags);
} else {
- result = IrregexpPrepare(re, pattern, flags, irregexp_data);
+ result = JscrePrepare(re, pattern, flags);
}
}
Object* data = re->data();
@@ -270,18 +304,30 @@
Handle<String> subject,
Handle<Object> index) {
switch (regexp->TypeTag()) {
+ case JSRegExp::ATOM:
+ return AtomExec(regexp, subject, index);
+ case JSRegExp::IRREGEXP: {
+ Handle<Object> result = IrregexpExec(regexp, subject, index);
+ if (!result.is_null()) {
+ return result;
+ }
+ // We couldn't handle the regexp using Irregexp, so fall back
+ // on JSCRE. We rejoice at the though of the day when this is
+ // no longer needed.
+ // Reset the JSRegExp to use JSCRE.
+ JscrePrepare(regexp,
+ Handle<String>(regexp->Pattern()),
+ regexp->GetFlags());
+ // Fall-through to JSCRE.
+ }
case JSRegExp::JSCRE:
if (FLAG_disable_jscre) {
UNIMPLEMENTED();
}
return JscreExec(regexp, subject, index);
- case JSRegExp::ATOM:
- return AtomExec(regexp, subject, index);
- case JSRegExp::IRREGEXP:
- return IrregexpExec(regexp, subject, index);
default:
UNREACHABLE();
- return Handle<Object>();
+ return Handle<Object>::null();
}
}
@@ -289,22 +335,37 @@
Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject) {
switch (regexp->TypeTag()) {
+ case JSRegExp::ATOM:
+ return AtomExecGlobal(regexp, subject);
+ case JSRegExp::IRREGEXP: {
+ Handle<Object> result = IrregexpExecGlobal(regexp, subject);
+ if (!result.is_null()) {
+ return result;
+ }
+ // We couldn't handle the regexp using Irregexp, so fall back
+ // on JSCRE. We rejoice at the though of the day when this is
+ // no longer needed.
+ // Reset the JSRegExp to use JSCRE.
+ JscrePrepare(regexp,
+ Handle<String>(regexp->Pattern()),
+ regexp->GetFlags());
+ // Fall-through to JSCRE.
+ }
case JSRegExp::JSCRE:
if (FLAG_disable_jscre) {
UNIMPLEMENTED();
}
return JscreExecGlobal(regexp, subject);
- case JSRegExp::ATOM:
- return AtomExecGlobal(regexp, subject);
- case JSRegExp::IRREGEXP:
- return IrregexpExecGlobal(regexp, subject);
default:
UNREACHABLE();
- return Handle<Object>();
+ return Handle<Object>::null();
}
}
+// RegExp Atom implementation: Simple string search using indexOf.
+
+
Handle<Object> RegExpImpl::AtomCompile(Handle<JSRegExp> re,
Handle<String> pattern,
JSRegExp::Flags flags,
@@ -366,6 +427,21 @@
}
+// JSCRE implementation.
+
+
+int RegExpImpl::JscreNumberOfCaptures(Handle<JSRegExp> re) {
+ FixedArray* value =
FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
+ return Smi::cast(value->get(kJscreNumberOfCapturesIndex))->value();
+}
+
+
+ByteArray* RegExpImpl::JscreInternal(Handle<JSRegExp> re) {
+ FixedArray* value =
FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
+ return ByteArray::cast(value->get(kJscreInternalIndex));
+}
+
+
Handle<Object>RegExpImpl::JscrePrepare(Handle<JSRegExp> re,
Handle<String> pattern,
JSRegExp::Flags flags) {
@@ -375,20 +451,11 @@
}
-Handle<Object>RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re,
- Handle<String> pattern,
- JSRegExp::Flags flags,
- Handle<FixedArray>
irregexp_data) {
- Factory::SetRegExpData(re, JSRegExp::IRREGEXP, pattern, flags,
irregexp_data);
- return re;
-}
-
-
-static inline Object* DoCompile(String* pattern,
- JSRegExp::Flags flags,
- unsigned* number_of_captures,
- const char** error_message,
- v8::jscre::JscreRegExp** code) {
+static inline Object* JscreDoCompile(String* pattern,
+ JSRegExp::Flags flags,
+ unsigned* number_of_captures,
+ const char** error_message,
+ v8::jscre::JscreRegExp** code) {
v8::jscre::JSRegExpIgnoreCaseOption case_option = flags.is_ignore_case()
? v8::jscre::JSRegExpIgnoreCase
: v8::jscre::JSRegExpDoNotIgnoreCase;
@@ -417,16 +484,16 @@
}
-void CompileWithRetryAfterGC(Handle<String> pattern,
- JSRegExp::Flags flags,
- unsigned* number_of_captures,
- const char** error_message,
- v8::jscre::JscreRegExp** code) {
- CALL_HEAP_FUNCTION_VOID(DoCompile(*pattern,
- flags,
- number_of_captures,
- error_message,
- code));
+static void JscreCompileWithRetryAfterGC(Handle<String> pattern,
+ JSRegExp::Flags flags,
+ unsigned* number_of_captures,
+ const char** error_message,
+ v8::jscre::JscreRegExp** code) {
+ CALL_HEAP_FUNCTION_VOID(JscreDoCompile(*pattern,
+ flags,
+ number_of_captures,
+ error_message,
+ code));
}
@@ -445,11 +512,11 @@
v8::jscre::JscreRegExp* code = NULL;
FlattenString(pattern);
- CompileWithRetryAfterGC(two_byte_pattern,
- flags,
- &number_of_captures,
- &error_message,
- &code);
+ JscreCompileWithRetryAfterGC(two_byte_pattern,
+ flags,
+ &number_of_captures,
+ &error_message,
+ &code);
if (code == NULL) {
// Throw an exception.
@@ -476,92 +543,31 @@
}
-Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> regexp,
- int num_captures,
- Handle<String>
two_byte_subject,
- int previous_index,
- int* offsets_vector,
- int offsets_vector_length) {
-#ifdef DEBUG
- if (FLAG_trace_regexp_bytecodes) {
- String* pattern = regexp->Pattern();
- PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
- PrintF("\n\nSubject string: '%s'\n\n",
*(two_byte_subject->ToCString()));
- }
-#endif
- ASSERT(StringShape(*two_byte_subject).IsTwoByteRepresentation());
- ASSERT(two_byte_subject->IsFlat(StringShape(*two_byte_subject)));
- bool rc;
-
- for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) {
- offsets_vector[i] = -1;
+Handle<Object> RegExpImpl::JscreExec(Handle<JSRegExp> regexp,
+ Handle<String> subject,
+ Handle<Object> index) {
+ ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
+ if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
+ Handle<Object> compile_result = JscreCompile(regexp);
+ if (compile_result.is_null()) return compile_result;
}
+ ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());
- LOG(RegExpExecEvent(regexp, previous_index, two_byte_subject));
-
- FixedArray* irregexp =
- FixedArray::cast(regexp->DataAt(JSRegExp::kIrregexpDataIndex));
- int tag =
Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value();
+ int num_captures = JscreNumberOfCaptures(regexp);
- switch (tag) {
- case RegExpMacroAssembler::kIA32Implementation: {
-#ifndef ARM
- Code* code = Code::cast(irregexp->get(kIrregexpCodeIndex));
- Address start_addr =
-
Handle<SeqTwoByteString>::cast(two_byte_subject)->GetCharsAddress();
- int string_offset =
- start_addr - reinterpret_cast<Address>(*two_byte_subject);
- int start_offset = string_offset + previous_index * sizeof(uc16);
- int end_offset =
- string_offset + two_byte_subject->length() * sizeof(uc16);
- rc = RegExpMacroAssemblerIA32::Execute(code,
- two_byte_subject.location(),
- start_offset,
- end_offset,
- offsets_vector,
- previous_index == 0);
- if (rc) {
- // Capture values are relative to start_offset only.
- for (int i = 0; i < offsets_vector_length; i++) {
- if (offsets_vector[i] >= 0) {
- offsets_vector[i] += previous_index;
- }
- }
- }
- break;
-#else
- UNIMPLEMENTED();
- rc = false;
- break;
-#endif
- }
- case RegExpMacroAssembler::kBytecodeImplementation: {
- Handle<ByteArray> byte_codes = IrregexpCode(regexp);
+ OffsetsVector offsets((num_captures + 1) * 3);
- rc = IrregexpInterpreter::Match(byte_codes,
- two_byte_subject,
- offsets_vector,
- previous_index);
- break;
- }
- case RegExpMacroAssembler::kARMImplementation:
- default:
- UNREACHABLE();
- rc = false;
- break;
- }
+ int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
- if (!rc) {
- return Factory::null_value();
- }
+ Handle<String> subject16 = CachedStringToTwoByte(subject);
- Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
- // The captures come in (start, end+1) pairs.
- for (int i = 0; i < 2 * (num_captures+1); i += 2) {
- array->set(i, Smi::FromInt(offsets_vector[i]));
- array->set(i+1, Smi::FromInt(offsets_vector[i+1]));
- }
- return Factory::NewJSArrayWithElements(array);
+ return JscreExecOnce(regexp,
+ num_captures,
+ subject,
+ previous_index,
+ subject16->GetTwoByteData(),
+ offsets.vector(),
+ offsets.length());
}
@@ -617,76 +623,8 @@
}
-class OffsetsVector {
- public:
- inline OffsetsVector(int num_registers)
- : offsets_vector_length_(num_registers) {
- if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
- vector_ = NewArray<int>(offsets_vector_length_);
- } else {
- vector_ = static_offsets_vector_;
- }
- }
-
-
- inline ~OffsetsVector() {
- if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
- DeleteArray(vector_);
- vector_ = NULL;
- }
- }
-
-
- inline int* vector() {
- return vector_;
- }
-
-
- inline int length() {
- return offsets_vector_length_;
- }
-
- private:
- int* vector_;
- int offsets_vector_length_;
- static const int kStaticOffsetsVectorSize = 50;
- static int static_offsets_vector_[kStaticOffsetsVectorSize];
-};
-
-
-int OffsetsVector::static_offsets_vector_[
- OffsetsVector::kStaticOffsetsVectorSize];
-
-
-Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
- Handle<String> subject,
- Handle<Object> index) {
- ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
- ASSERT(!regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsUndefined());
-
- // Prepare space for the return values.
- int number_of_registers = IrregexpNumberOfRegisters(regexp);
- OffsetsVector offsets(number_of_registers);
-
- int num_captures = IrregexpNumberOfCaptures(regexp);
-
- int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
-
- Handle<String> subject16 = CachedStringToTwoByte(subject);
-
- Handle<Object> result(IrregexpExecOnce(regexp,
- num_captures,
- subject16,
- previous_index,
- offsets.vector(),
- offsets.length()));
- return result;
-}
-
-
-Handle<Object> RegExpImpl::JscreExec(Handle<JSRegExp> regexp,
- Handle<String> subject,
- Handle<Object> index) {
+Handle<Object> RegExpImpl::JscreExecGlobal(Handle<JSRegExp> regexp,
+ Handle<String> subject) {
ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
Handle<Object> compile_result = JscreCompile(regexp);
@@ -694,35 +632,11 @@
}
ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());
+ // Prepare space for the return values.
int num_captures = JscreNumberOfCaptures(regexp);
OffsetsVector offsets((num_captures + 1) * 3);
- int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
-
- Handle<String> subject16 = CachedStringToTwoByte(subject);
-
- Handle<Object> result(JscreExecOnce(regexp,
- num_captures,
- subject,
- previous_index,
- subject16->GetTwoByteData(),
- offsets.vector(),
- offsets.length()));
-
- return result;
-}
-
-
-Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
- Handle<String> subject) {
- ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
- ASSERT(!regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsUndefined());
-
- // Prepare space for the return values.
- int number_of_registers = IrregexpNumberOfRegisters(regexp);
- OffsetsVector offsets(number_of_registers);
-
int previous_index = 0;
Handle<JSArray> result = Factory::NewJSArray(0);
@@ -737,12 +651,13 @@
// string length, there is no match.
matches = Factory::null_value();
} else {
- matches = IrregexpExecOnce(regexp,
- IrregexpNumberOfCaptures(regexp),
- subject16,
- previous_index,
- offsets.vector(),
- offsets.length());
+ matches = JscreExecOnce(regexp,
+ num_captures,
+ subject,
+ previous_index,
+ subject16->GetTwoByteData(),
+ offsets.vector(),
+ offsets.length());
if (matches->IsJSArray()) {
SetElement(result, i, matches);
@@ -766,19 +681,146 @@
}
-Handle<Object> RegExpImpl::JscreExecGlobal(Handle<JSRegExp> regexp,
- Handle<String> subject) {
- ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
- if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
- Handle<Object> compile_result = JscreCompile(regexp);
- if (compile_result.is_null()) return compile_result;
+// Irregexp implementation.
+
+
+static Handle<FixedArray> GetCompiledIrregexp(Handle<JSRegExp> re,
+ bool is_ascii) {
+ ASSERT(re->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray());
+ Handle<FixedArray> alternatives(
+ FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex)));
+ ASSERT_EQ(2, alternatives->length());
+
+ int index = is_ascii ? 0 : 1;
+ Object* entry = alternatives->get(index);
+ if (!entry->IsNull()) {
+ return Handle<FixedArray>(FixedArray::cast(entry));
+ }
+
+ // Compile the RegExp.
+ ZoneScope zone_scope(DELETE_ON_EXIT);
+
+ JSRegExp::Flags flags = re->GetFlags();
+
+ Handle<String> pattern(re->Pattern());
+ StringShape shape(*pattern);
+ if (!pattern->IsFlat(shape)) {
+ pattern->Flatten(shape);
+ }
+
+ RegExpParseResult parse_result;
+ FlatStringReader reader(pattern);
+ if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) {
+ // Throw an exception if we fail to parse the pattern.
+ // THIS SHOULD NOT HAPPEN. We already parsed it successfully once.
+ ThrowRegExpException(re,
+ pattern,
+ parse_result.error,
+ "malformed_regexp");
+ return Handle<FixedArray>::null();
+ }
+ Handle<FixedArray> compiled_entry =
+ RegExpEngine::Compile(&parse_result,
+ NULL,
+ flags.is_ignore_case(),
+ flags.is_multiline(),
+ pattern,
+ is_ascii);
+ if (!compiled_entry.is_null()) {
+ alternatives->set(index, *compiled_entry);
+ }
+ return compiled_entry;
+}
+
+
+int RegExpImpl::IrregexpNumberOfCaptures(Handle<FixedArray> irre) {
+ return Smi::cast(irre->get(kIrregexpNumberOfCapturesIndex))->value();
+}
+
+
+int RegExpImpl::IrregexpNumberOfRegisters(Handle<FixedArray> irre) {
+ return Smi::cast(irre->get(kIrregexpNumberOfRegistersIndex))->value();
+}
+
+
+Handle<ByteArray> RegExpImpl::IrregexpByteCode(Handle<FixedArray> irre) {
+ ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value()
+ == RegExpMacroAssembler::kBytecodeImplementation);
+ return Handle<ByteArray>(ByteArray::cast(irre->get(kIrregexpCodeIndex)));
+}
+
+
+Handle<Code> RegExpImpl::IrregexpNativeCode(Handle<FixedArray> irre) {
+ ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value()
+ != RegExpMacroAssembler::kBytecodeImplementation);
+ return Handle<Code>(Code::cast(irre->get(kIrregexpCodeIndex)));
+}
+
+
+Handle<Object>RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re,
+ Handle<String> pattern,
+ JSRegExp::Flags flags) {
+ // Make space for ASCII and UC16 versions.
+ Handle<FixedArray> alternatives = Factory::NewFixedArray(2);
+ alternatives->set_null(0);
+ alternatives->set_null(1);
+ Factory::SetRegExpData(re, JSRegExp::IRREGEXP, pattern, flags,
alternatives);
+ return re;
+}
+
+
+Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
+ Handle<String> subject,
+ Handle<Object> index) {
+ ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
+ ASSERT(regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray());
+
+ bool is_ascii = StringShape(*subject).IsAsciiRepresentation();
+ Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii);
+ if (irregexp.is_null()) {
+ // We can't handle the RegExp with IRRegExp.
+ return Handle<Object>::null();
}
- ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());
// Prepare space for the return values.
- int num_captures = JscreNumberOfCaptures(regexp);
+ int number_of_registers = IrregexpNumberOfRegisters(irregexp);
+ OffsetsVector offsets(number_of_registers);
+
+ int num_captures = IrregexpNumberOfCaptures(irregexp);
+
+ int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
+
+#ifdef DEBUG
+ if (FLAG_trace_regexp_bytecodes) {
+ String* pattern = regexp->Pattern();
+ PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
+ PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
+ }
+#endif
+ LOG(RegExpExecEvent(regexp, previous_index, subject));
+ return IrregexpExecOnce(irregexp,
+ num_captures,
+ subject,
+ previous_index,
+ offsets.vector(),
+ offsets.length());
+}
- OffsetsVector offsets((num_captures + 1) * 3);
+
+Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
+ Handle<String> subject) {
+ ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
+
+ StringShape shape(*subject);
+ bool is_ascii = shape.IsAsciiRepresentation();
+ Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii);
+ if (irregexp.is_null()) {
+ return Handle<Object>::null();
+ }
+
+ // Prepare space for the return values.
+ int number_of_registers = IrregexpNumberOfRegisters(irregexp);
+ OffsetsVector offsets(number_of_registers);
int previous_index = 0;
@@ -786,7 +828,9 @@
int i = 0;
Handle<Object> matches;
- Handle<String> subject16 = CachedStringToTwoByte(subject);
+ if (!subject->IsFlat(shape)) {
+ subject->Flatten(shape);
+ }
do {
if (previous_index > subject->length() || previous_index < 0) {
@@ -794,13 +838,20 @@
// string length, there is no match.
matches = Factory::null_value();
} else {
- matches = JscreExecOnce(regexp,
- num_captures,
- subject,
- previous_index,
- subject16->GetTwoByteData(),
- offsets.vector(),
- offsets.length());
+#ifdef DEBUG
+ if (FLAG_trace_regexp_bytecodes) {
+ String* pattern = regexp->Pattern();
+ PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
+ PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
+ }
+#endif
+ LOG(RegExpExecEvent(regexp, previous_index, subject));
+ matches = IrregexpExecOnce(irregexp,
+ IrregexpNumberOfCaptures(irregexp),
+ subject,
+ previous_index,
+ offsets.vector(),
+ offsets.length());
if (matches->IsJSArray()) {
SetElement(result, i, matches);
@@ -824,36 +875,120 @@
}
-int RegExpImpl::JscreNumberOfCaptures(Handle<JSRegExp> re) {
- FixedArray* value =
FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
- return Smi::cast(value->get(kJscreNumberOfCapturesIndex))->value();
-}
+Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> irregexp,
+ int num_captures,
+ Handle<String> subject,
+ int previous_index,
+ int* offsets_vector,
+ int offsets_vector_length) {
+ bool rc;
+ int tag =
Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value();
-ByteArray* RegExpImpl::JscreInternal(Handle<JSRegExp> re) {
- FixedArray* value =
FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
- return ByteArray::cast(value->get(kJscreInternalIndex));
-}
+ switch (tag) {
+ case RegExpMacroAssembler::kIA32Implementation: {
+#ifndef ARM
+ if (!subject->IsFlat(StringShape(*subject))) {
+ FlattenString(subject);
+ }
+ Handle<Code> code = IrregexpNativeCode(irregexp);
+ StringShape shape(*subject);
-int RegExpImpl::IrregexpNumberOfCaptures(Handle<JSRegExp> re) {
- FixedArray* value =
- FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex));
- return Smi::cast(value->get(kIrregexpNumberOfCapturesIndex))->value();
-}
+ // Character offsets into string.
+ int start_offset = previous_index;
+ int end_offset = subject->length(shape);
+
+ if (shape.IsCons()) {
+ subject = Handle<String>(ConsString::cast(*subject)->first());
+ } else if (shape.IsSliced()) {
+ SlicedString* slice = SlicedString::cast(*subject);
+ start_offset += slice->start();
+ end_offset += slice->start();
+ subject = Handle<String>(slice->buffer());
+ }
+
+ // String is now either Sequential or External
+ StringShape flatshape(*subject);
+ bool is_ascii = flatshape.IsAsciiRepresentation();
+ int char_size = is_ascii ? sizeof(char) : sizeof(uc16); // NOLINT
+
+ if (flatshape.IsExternal()) {
+ const byte* address;
+ if (is_ascii) {
+ ExternalAsciiString* ext = ExternalAsciiString::cast(*subject);
+ address = reinterpret_cast<const byte*>(ext->resource()->data());
+ } else {
+ ExternalTwoByteString* ext =
ExternalTwoByteString::cast(*subject);
+ address = reinterpret_cast<const byte*>(ext->resource()->data());
+ }
+ rc = RegExpMacroAssemblerIA32::Execute(
+ *code,
+ &address,
+ start_offset * char_size,
+ end_offset * char_size,
+ offsets_vector,
+ previous_index == 0);
+ } else { // Sequential string
+ int byte_offset =
+ is_ascii ? SeqAsciiString::kHeaderSize - kHeapObjectTag:
+ SeqTwoByteString::kHeaderSize - kHeapObjectTag;
+ rc = RegExpMacroAssemblerIA32::Execute(
+ *code,
+ subject.location(),
+ byte_offset + start_offset * char_size,
+ byte_offset + end_offset * char_size,
+ offsets_vector,
+ previous_index == 0);
+ }
+ if (rc) {
+ // Capture values are relative to start_offset only.
+ for (int i = 0; i < offsets_vector_length; i++) {
+ if (offsets_vector[i] >= 0) {
+ offsets_vector[i] += previous_index;
+ }
+ }
+ }
+ break;
+#else
+ UNIMPLEMENTED();
+ rc = false;
+ break;
+#endif
+ }
+ case RegExpMacroAssembler::kBytecodeImplementation: {
+ for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) {
+ offsets_vector[i] = -1;
+ }
+ Handle<ByteArray> byte_codes = IrregexpByteCode(irregexp);
-int RegExpImpl::IrregexpNumberOfRegisters(Handle<JSRegExp> re) {
- FixedArray* value =
- FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex));
- return Smi::cast(value->get(kIrregexpNumberOfRegistersIndex))->value();
-}
+ Handle<String> two_byte_subject = CachedStringToTwoByte(subject);
+ rc = IrregexpInterpreter::Match(byte_codes,
+ two_byte_subject,
+ offsets_vector,
+ previous_index);
+ break;
+ }
+ case RegExpMacroAssembler::kARMImplementation:
+ default:
+ UNREACHABLE();
+ rc = false;
+ break;
+ }
+
+ if (!rc) {
+ return Factory::null_value();
+ }
-Handle<ByteArray> RegExpImpl::IrregexpCode(Handle<JSRegExp> re) {
- FixedArray* value =
- FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex));
- return
Handle<ByteArray>(ByteArray::cast(value->get(kIrregexpCodeIndex)));
+ Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
+ // The captures come in (start, end+1) pairs.
+ for (int i = 0; i < 2 * (num_captures+1); i += 2) {
+ array->set(i, Smi::FromInt(offsets_vector[i]));
+ array->set(i+1, Smi::FromInt(offsets_vector[i+1]));
+ }
+ return Factory::NewJSArrayWithElements(array);
}
@@ -3475,7 +3610,8 @@
RegExpNode** node_return,
bool ignore_case,
bool is_multiline,
- Handle<String> pattern) {
+ Handle<String> pattern,
+ bool is_ascii) {
RegExpCompiler compiler(input->capture_count, ignore_case);
// Wrap the body of the regexp in capture #0.
RegExpNode* captured_body = RegExpCapture::ToNode(input->tree,
@@ -3500,10 +3636,6 @@
NodeInfo info = *node->info();
node = node->EnsureExpanded(&info);
- if (!FLAG_irregexp) {
- return Handle<FixedArray>::null();
- }
-
if (is_multiline && !FLAG_attempt_multiline_irregexp) {
return Handle<FixedArray>::null();
}
@@ -3512,7 +3644,13 @@
#ifdef ARM
// Unimplemented, fall-through to bytecode implementation.
#else // IA32
- RegExpMacroAssemblerIA32
macro_assembler(RegExpMacroAssemblerIA32::UC16,
+ RegExpMacroAssemblerIA32::Mode mode;
+ if (is_ascii) {
+ mode = RegExpMacroAssemblerIA32::ASCII;
+ } else {
+ mode = RegExpMacroAssemblerIA32::UC16;
+ }
+ RegExpMacroAssemblerIA32 macro_assembler(mode,
(input->capture_count + 1) *
2);
return compiler.Assemble(¯o_assembler,
node,
Modified: branches/bleeding_edge/src/jsregexp.h
==============================================================================
--- branches/bleeding_edge/src/jsregexp.h (original)
+++ branches/bleeding_edge/src/jsregexp.h Mon Dec 8 04:43:01 2008
@@ -48,6 +48,9 @@
// This function calls the garbage collector if necessary.
static Handle<String> ToString(Handle<Object> value);
+ // Parses the RegExp pattern and prepares the JSRegExp object with
+ // generic data and choice of implementation - as well as what
+ // the implementation wants to store in the data field.
static Handle<Object> Compile(Handle<JSRegExp> re,
Handle<String> pattern,
Handle<String> flags);
@@ -71,12 +74,10 @@
Handle<String> pattern,
JSRegExp::Flags flags);
- // Stores a compiled RegExp pattern in the JSRegExp object.
- // The pattern is compiled by Irregexp.
+ // Prepares a JSRegExp object with Irregexp-specific data.
static Handle<Object> IrregexpPrepare(Handle<JSRegExp> re,
Handle<String> pattern,
- JSRegExp::Flags flags,
- Handle<FixedArray> irregexp_data);
+ JSRegExp::Flags flags);
// Compile the pattern using JSCRE and store the result in the
@@ -140,9 +141,10 @@
static int JscreNumberOfCaptures(Handle<JSRegExp> re);
static ByteArray* JscreInternal(Handle<JSRegExp> re);
- static int IrregexpNumberOfCaptures(Handle<JSRegExp> re);
- static int IrregexpNumberOfRegisters(Handle<JSRegExp> re);
- static Handle<ByteArray> IrregexpCode(Handle<JSRegExp> re);
+ static int IrregexpNumberOfCaptures(Handle<FixedArray> re);
+ static int IrregexpNumberOfRegisters(Handle<FixedArray> re);
+ static Handle<ByteArray> IrregexpByteCode(Handle<FixedArray> re);
+ static Handle<Code> IrregexpNativeCode(Handle<FixedArray> re);
// Call jsRegExpExecute once
static Handle<Object> JscreExecOnce(Handle<JSRegExp> regexp,
@@ -153,7 +155,7 @@
int* ovector,
int ovector_length);
- static Handle<Object> IrregexpExecOnce(Handle<JSRegExp> regexp,
+ static Handle<Object> IrregexpExecOnce(Handle<FixedArray> regexp,
int num_captures,
Handle<String> subject16,
int previous_index,
@@ -1082,7 +1084,9 @@
RegExpNode** node_return,
bool ignore_case,
bool multiline,
- Handle<String> pattern);
+ Handle<String> pattern,
+ bool is_ascii);
+
static void DotPrint(const char* label, RegExpNode* node, bool
ignore_case);
};
Modified: branches/bleeding_edge/src/objects.h
==============================================================================
--- branches/bleeding_edge/src/objects.h (original)
+++ branches/bleeding_edge/src/objects.h Mon Dec 8 04:43:01 2008
@@ -2924,7 +2924,7 @@
// ATOM: A simple string to match against using an indexOf operation.
// IRREGEXP: Compiled with Irregexp.
// IRREGEXP_NATIVE: Compiled to native code with Irregexp.
- enum Type { NOT_COMPILED, JSCRE, ATOM, IRREGEXP, IRREGEXP_NATIVE };
+ enum Type { NOT_COMPILED, JSCRE, ATOM, IRREGEXP };
enum Flag { NONE = 0, GLOBAL = 1, IGNORE_CASE = 2, MULTILINE = 4 };
class Flags {
Modified: branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc
==============================================================================
--- branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc (original)
+++ branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc Mon Dec 8
04:43:01 2008
@@ -111,9 +111,10 @@
void RegExpMacroAssemblerIA32::AdvanceCurrentPosition(int by) {
- ASSERT(by > 0);
- Label inside_string;
- __ add(Operand(edi), Immediate(by * char_size()));
+ if (by != 0) {
+ Label inside_string;
+ __ add(Operand(edi), Immediate(by * char_size()));
+ }
}
@@ -138,7 +139,7 @@
void RegExpMacroAssemblerIA32::CheckBitmap(uc16 start,
Label* bitmap,
Label* on_zero) {
- UNREACHABLE();
+ UNIMPLEMENTED();
__ mov(eax, current_character());
__ sub(Operand(eax), Immediate(start));
__ cmp(eax, 64); // FIXME: 64 = length_of_bitmap_in_bits.
@@ -683,6 +684,8 @@
int byte_offset1,
int byte_offset2,
size_t
byte_length) {
+ // This function MUST NOT cause a garbage collection. A GC might move
+ // the calling generated code and invalidate the stacked return address.
ASSERT(byte_length % 2 == 0);
Address buffer_address = reinterpret_cast<Address>(*buffer);
uc16* substring1 = reinterpret_cast<uc16*>(buffer_address +
byte_offset1);
Modified: branches/bleeding_edge/test/cctest/test-regexp.cc
==============================================================================
--- branches/bleeding_edge/test/cctest/test-regexp.cc (original)
+++ branches/bleeding_edge/test/cctest/test-regexp.cc Mon Dec 8 04:43:01
2008
@@ -355,7 +355,7 @@
}
-static RegExpNode* Compile(const char* input, bool multiline) {
+static RegExpNode* Compile(const char* input, bool multiline, bool
is_ascii) {
V8::Initialize(NULL);
FlatStringReader reader(CStrVector(input));
RegExpParseResult result;
@@ -363,17 +363,18 @@
return NULL;
RegExpNode* node = NULL;
Handle<String> pattern = Factory::NewStringFromUtf8(CStrVector(input));
- RegExpEngine::Compile(&result, &node, false, multiline, pattern);
+ RegExpEngine::Compile(&result, &node, false, multiline, pattern,
is_ascii);
return node;
}
static void Execute(const char* input,
bool multiline,
+ bool is_ascii,
bool dot_output = false) {
v8::HandleScope scope;
ZoneScope zone_scope(DELETE_ON_EXIT);
- RegExpNode* node = Compile(input, multiline);
+ RegExpNode* node = Compile(input, multiline, is_ascii);
USE(node);
#ifdef DEBUG
if (dot_output) {
@@ -1130,7 +1131,7 @@
TEST(SimplePropagation) {
v8::HandleScope scope;
ZoneScope zone_scope(DELETE_ON_EXIT);
- RegExpNode* node = Compile("(a|^b|c)", false);
+ RegExpNode* node = Compile("(a|^b|c)", false, true);
CHECK(node->info()->follows_start_interest);
}
@@ -1300,5 +1301,5 @@
TEST(Graph) {
V8::Initialize(NULL);
- Execute("(?=[d#.])", false, true);
+ Execute("(?=[d#.])", false, true, true);
}
--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---