Author: [email protected]
Date: Mon Mar 2 05:58:37 2009
New Revision: 1398
Modified:
branches/bleeding_edge/src/factory.cc
branches/bleeding_edge/src/factory.h
branches/bleeding_edge/src/jsregexp.cc
branches/bleeding_edge/src/jsregexp.h
branches/bleeding_edge/src/objects-debug.cc
branches/bleeding_edge/src/objects-inl.h
branches/bleeding_edge/src/objects.h
Log:
All RegExp data are set on a single FixedArray instead of nesting them
three deep.
Modified: branches/bleeding_edge/src/factory.cc
==============================================================================
--- branches/bleeding_edge/src/factory.cc (original)
+++ branches/bleeding_edge/src/factory.cc Mon Mar 2 05:58:37 2009
@@ -826,18 +826,38 @@
}
-void Factory::SetRegExpData(Handle<JSRegExp> regexp,
- JSRegExp::Type type,
- Handle<String> source,
- JSRegExp::Flags flags,
- Handle<Object> data) {
- Handle<FixedArray> store = NewFixedArray(JSRegExp::kDataSize);
+void Factory::SetRegExpAtomData(Handle<JSRegExp> regexp,
+ JSRegExp::Type type,
+ Handle<String> source,
+ JSRegExp::Flags flags,
+ Handle<Object> data) {
+ Handle<FixedArray> store = NewFixedArray(JSRegExp::kAtomDataSize);
+
store->set(JSRegExp::kTagIndex, Smi::FromInt(type));
store->set(JSRegExp::kSourceIndex, *source);
store->set(JSRegExp::kFlagsIndex, Smi::FromInt(flags.value()));
store->set(JSRegExp::kAtomPatternIndex, *data);
regexp->set_data(*store);
}
+
+void Factory::SetRegExpIrregexpData(Handle<JSRegExp> regexp,
+ JSRegExp::Type type,
+ Handle<String> source,
+ JSRegExp::Flags flags,
+ int capture_count) {
+ Handle<FixedArray> store = NewFixedArray(JSRegExp::kIrregexpDataSize);
+
+ store->set(JSRegExp::kTagIndex, Smi::FromInt(type));
+ store->set(JSRegExp::kSourceIndex, *source);
+ store->set(JSRegExp::kFlagsIndex, Smi::FromInt(flags.value()));
+ store->set(JSRegExp::kIrregexpASCIICodeIndex, Heap::the_hole_value());
+ store->set(JSRegExp::kIrregexpUC16CodeIndex, Heap::the_hole_value());
+ store->set(JSRegExp::kIrregexpMaxRegisterCountIndex, Smi::FromInt(0));
+ store->set(JSRegExp::kIrregexpCaptureCountIndex,
+ Smi::FromInt(capture_count));
+ regexp->set_data(*store);
+}
+
void Factory::ConfigureInstance(Handle<FunctionTemplateInfo> desc,
Modified: branches/bleeding_edge/src/factory.h
==============================================================================
--- branches/bleeding_edge/src/factory.h (original)
+++ branches/bleeding_edge/src/factory.h Mon Mar 2 05:58:37 2009
@@ -316,12 +316,20 @@
Handle<FixedArray> keys);
// Creates a new FixedArray that holds the data associated with the
- // regexp and stores it in the regexp.
- static void SetRegExpData(Handle<JSRegExp> regexp,
- JSRegExp::Type type,
- Handle<String> source,
- JSRegExp::Flags flags,
- Handle<Object> data);
+ // atom regexp and stores it in the regexp.
+ static void SetRegExpAtomData(Handle<JSRegExp> regexp,
+ JSRegExp::Type type,
+ Handle<String> source,
+ JSRegExp::Flags flags,
+ Handle<Object> match_pattern);
+
+ // Creates a new FixedArray that holds the data associated with the
+ // irregexp regexp and stores it in the regexp.
+ static void SetRegExpIrregexpData(Handle<JSRegExp> regexp,
+ JSRegExp::Type type,
+ Handle<String> source,
+ JSRegExp::Flags flags,
+ int capture_count);
private:
static Handle<JSFunction> NewFunctionHelper(Handle<String> name,
Modified: branches/bleeding_edge/src/jsregexp.cc
==============================================================================
--- branches/bleeding_edge/src/jsregexp.cc (original)
+++ branches/bleeding_edge/src/jsregexp.cc Mon Mar 2 05:58:37 2009
@@ -213,44 +213,41 @@
Handle<Object> result;
if (in_cache) {
re->set_data(*cached);
- result = re;
- } else {
- FlattenString(pattern);
- ZoneScope zone_scope(DELETE_ON_EXIT);
- RegExpCompileData parse_result;
- FlatStringReader reader(pattern);
- if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) {
- // Throw an exception if we fail to parse the pattern.
- ThrowRegExpException(re,
- pattern,
- parse_result.error,
- "malformed_regexp");
- return Handle<Object>::null();
- }
+ return re;
+ }
+ FlattenString(pattern);
+ ZoneScope zone_scope(DELETE_ON_EXIT);
+ RegExpCompileData parse_result;
+ FlatStringReader reader(pattern);
+ if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) {
+ // Throw an exception if we fail to parse the pattern.
+ ThrowRegExpException(re,
+ pattern,
+ parse_result.error,
+ "malformed_regexp");
+ return Handle<Object>::null();
+ }
- if (parse_result.simple && !flags.is_ignore_case()) {
- // Parse-tree is a single atom that is equal to the pattern.
- result = AtomCompile(re, pattern, flags, pattern);
- } else if (parse_result.tree->IsAtom() &&
- !flags.is_ignore_case() &&
- parse_result.capture_count == 0) {
- RegExpAtom* atom = parse_result.tree->AsAtom();
- Vector<const uc16> atom_pattern = atom->data();
- Handle<String> atom_string =
Factory::NewStringFromTwoByte(atom_pattern);
- result = AtomCompile(re, pattern, flags, atom_string);
- } else {
- result = IrregexpPrepare(re, pattern, flags);
- }
- Object* data = re->data();
- if (data->IsFixedArray()) {
- // If compilation succeeded then the data is set on the regexp
- // and we can store it in the cache.
- Handle<FixedArray> data(FixedArray::cast(re->data()));
- CompilationCache::PutRegExp(pattern, flags, data);
- }
+ if (parse_result.simple && !flags.is_ignore_case()) {
+ // Parse-tree is a single atom that is equal to the pattern.
+ AtomCompile(re, pattern, flags, pattern);
+ } else if (parse_result.tree->IsAtom() &&
+ !flags.is_ignore_case() &&
+ parse_result.capture_count == 0) {
+ RegExpAtom* atom = parse_result.tree->AsAtom();
+ Vector<const uc16> atom_pattern = atom->data();
+ Handle<String> atom_string =
Factory::NewStringFromTwoByte(atom_pattern);
+ AtomCompile(re, pattern, flags, atom_string);
+ } else {
+ IrregexpPrepare(re, pattern, flags, parse_result.capture_count);
}
+ ASSERT(re->data()->IsFixedArray());
+ // Compilation succeeded so the data is set on the regexp
+ // and we can store it in the cache.
+ Handle<FixedArray> data(FixedArray::cast(re->data()));
+ CompilationCache::PutRegExp(pattern, flags, data);
- return result;
+ return re;
}
@@ -275,8 +272,8 @@
Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp,
- Handle<String> subject,
- Handle<JSArray> last_match_info) {
+ Handle<String> subject,
+ Handle<JSArray> last_match_info) {
switch (regexp->TypeTag()) {
case JSRegExp::ATOM:
return AtomExecGlobal(regexp, subject, last_match_info);
@@ -296,12 +293,15 @@
// RegExp Atom implementation: Simple string search using indexOf.
-Handle<Object> RegExpImpl::AtomCompile(Handle<JSRegExp> re,
- Handle<String> pattern,
- JSRegExp::Flags flags,
- Handle<String> match_pattern) {
- Factory::SetRegExpData(re, JSRegExp::ATOM, pattern, flags,
match_pattern);
- return re;
+void RegExpImpl::AtomCompile(Handle<JSRegExp> re,
+ Handle<String> pattern,
+ JSRegExp::Flags flags,
+ Handle<String> match_pattern) {
+ Factory::SetRegExpAtomData(re,
+ JSRegExp::ATOM,
+ pattern,
+ flags,
+ match_pattern);
}
@@ -386,23 +386,29 @@
// Irregexp implementation.
-// Retrieves a compiled version of the regexp for either ASCII or non-ASCII
-// strings. If the compiled version doesn't already exist, it is compiled
+// Ensures that the regexp object contains a compiled version of the
+// source for either ASCII or non-ASCII strings.
+// If the compiled version doesn't already exist, it is compiled
// from the source pattern.
-// Irregexp is not feature complete yet. If there is something in the
-// regexp that the compiler cannot currently handle, an empty
-// handle is returned, but no exception is thrown.
-static Handle<FixedArray> GetCompiledIrregexp(Handle<JSRegExp> re,
- bool is_ascii) {
- ASSERT(re->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray());
- Handle<FixedArray> alternatives(
- FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex)));
- ASSERT_EQ(2, alternatives->length());
-
- int index = is_ascii ? 0 : 1;
- Object* entry = alternatives->get(index);
- if (!entry->IsNull()) {
- return Handle<FixedArray>(FixedArray::cast(entry));
+// If compilation fails, an exception is thrown and this function
+// returns false.
+bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re,
+ bool is_ascii) {
+ int index;
+ if (is_ascii) {
+ index = JSRegExp::kIrregexpASCIICodeIndex;
+ } else {
+ index = JSRegExp::kIrregexpUC16CodeIndex;
+ }
+ Object* entry = re->DataAt(index);
+ if (!entry->IsTheHole()) {
+ // A value has already been compiled.
+ if (entry->IsJSObject()) {
+ // If it's a JS value, it's an error.
+ Top::Throw(entry);
+ return false;
+ }
+ return true;
}
// Compile the RegExp.
@@ -424,54 +430,101 @@
pattern,
compile_data.error,
"malformed_regexp");
- return Handle<FixedArray>::null();
+ return false;
}
- Handle<FixedArray> compiled_entry =
+ RegExpEngine::CompilationResult result =
RegExpEngine::Compile(&compile_data,
flags.is_ignore_case(),
flags.is_multiline(),
pattern,
is_ascii);
- if (!compiled_entry.is_null()) {
- alternatives->set(index, *compiled_entry);
+ if (result.error_message != NULL) {
+ // Unable to compile regexp.
+ Handle<JSArray> array = Factory::NewJSArray(2);
+ SetElement(array, 0, pattern);
+ SetElement(array,
+ 1,
+
Factory::NewStringFromUtf8(CStrVector(result.error_message)));
+ Handle<Object> regexp_err =
+ Factory::NewSyntaxError("malformed_regexp", array);
+ Top::Throw(*regexp_err);
+ re->SetDataAt(index, *regexp_err);
+ return false;
}
- return compiled_entry;
+
+ Handle<FixedArray> data(FixedArray::cast(re->data()));
+ data->set(index, result.code);
+ int register_max = IrregexpMaxRegisterCount(data);
+ if (result.num_registers > register_max) {
+ SetIrregexpMaxRegisterCount(data, result.num_registers);
+ }
+
+ return true;
}
-int RegExpImpl::IrregexpNumberOfCaptures(Handle<FixedArray> irre) {
- return Smi::cast(irre->get(kIrregexpNumberOfCapturesIndex))->value();
+int RegExpImpl::IrregexpMaxRegisterCount(Handle<FixedArray> re) {
+ return Smi::cast(
+ re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value();
}
-int RegExpImpl::IrregexpNumberOfRegisters(Handle<FixedArray> irre) {
- return Smi::cast(irre->get(kIrregexpNumberOfRegistersIndex))->value();
+void RegExpImpl::SetIrregexpMaxRegisterCount(Handle<FixedArray> re, int
value) {
+ re->set(JSRegExp::kIrregexpMaxRegisterCountIndex,
+ Smi::FromInt(value));
}
-Handle<ByteArray> RegExpImpl::IrregexpByteCode(Handle<FixedArray> irre) {
- ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value()
- == RegExpMacroAssembler::kBytecodeImplementation);
- return Handle<ByteArray>(ByteArray::cast(irre->get(kIrregexpCodeIndex)));
+int RegExpImpl::IrregexpNumberOfCaptures(Handle<FixedArray> re) {
+ return Smi::cast(
+ re->get(JSRegExp::kIrregexpCaptureCountIndex))->value();
}
-Handle<Code> RegExpImpl::IrregexpNativeCode(Handle<FixedArray> irre) {
- ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value()
- != RegExpMacroAssembler::kBytecodeImplementation);
- return Handle<Code>(Code::cast(irre->get(kIrregexpCodeIndex)));
+int RegExpImpl::IrregexpNumberOfRegisters(Handle<FixedArray> re) {
+ return Smi::cast(
+ re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value();
}
-Handle<Object>RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re,
- Handle<String> pattern,
- JSRegExp::Flags flags) {
- // Make space for ASCII and UC16 versions.
- Handle<FixedArray> alternatives = Factory::NewFixedArray(2);
- alternatives->set_null(0);
- alternatives->set_null(1);
- Factory::SetRegExpData(re, JSRegExp::IRREGEXP, pattern, flags,
alternatives);
- return re;
+Handle<ByteArray> RegExpImpl::IrregexpByteCode(Handle<FixedArray> re,
+ bool is_ascii) {
+ int index;
+ if (is_ascii) {
+ index = JSRegExp::kIrregexpASCIICodeIndex;
+ } else {
+ index = JSRegExp::kIrregexpUC16CodeIndex;
+ }
+ Object* value = re->get(index);
+ ASSERT(value->IsByteArray());
+ return Handle<ByteArray>(ByteArray::cast(value));
+}
+
+
+Handle<Code> RegExpImpl::IrregexpNativeCode(Handle<FixedArray> re,
+ bool is_ascii) {
+ int index;
+ if (is_ascii) {
+ index = JSRegExp::kIrregexpASCIICodeIndex;
+ } else {
+ index = JSRegExp::kIrregexpUC16CodeIndex;
+ }
+ Object* value = re->get(index);
+ ASSERT(value->IsCode());
+ return Handle<Code>(Code::cast(value));
+}
+
+
+void RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re,
+ Handle<String> pattern,
+ JSRegExp::Flags flags,
+ int capture_count) {
+ // Initialize compiled code entries to null.
+ Factory::SetRegExpIrregexpData(re,
+ JSRegExp::IRREGEXP,
+ pattern,
+ flags,
+ capture_count);
}
@@ -480,18 +533,16 @@
int index,
Handle<JSArray> last_match_info) {
ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
- ASSERT(regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray());
bool is_ascii = StringShape(*subject).IsAsciiRepresentation();
- Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii);
- if (irregexp.is_null()) {
- // We can't handle the RegExp with IRRegExp.
+ if (!EnsureCompiledIrregexp(regexp, is_ascii)) {
return Handle<Object>::null();
}
// Prepare space for the return values.
+ Handle<FixedArray> re_data(FixedArray::cast(regexp->data()));
int number_of_capture_registers =
- (IrregexpNumberOfCaptures(irregexp) + 1) * 2;
+ (IrregexpNumberOfCaptures(re_data) + 1) * 2;
OffsetsVector offsets(number_of_capture_registers);
int previous_index = index;
@@ -510,7 +561,7 @@
last_match_info->EnsureSize(number_of_capture_registers +
kLastMatchOverhead);
- return IrregexpExecOnce(irregexp,
+ return IrregexpExecOnce(re_data,
number_of_capture_registers,
last_match_info,
subject,
@@ -524,10 +575,10 @@
Handle<String> subject,
Handle<JSArray>
last_match_info) {
ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
+ Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()));
bool is_ascii = StringShape(*subject).IsAsciiRepresentation();
- Handle<FixedArray> irregexp = GetCompiledIrregexp(regexp, is_ascii);
- if (irregexp.is_null()) {
+ if (!EnsureCompiledIrregexp(regexp, is_ascii)) {
return Handle<Object>::null();
}
@@ -605,135 +656,124 @@
}
-Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> irregexp,
+Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> regexp,
int
number_of_capture_registers,
Handle<JSArray>
last_match_info,
Handle<String> subject,
int previous_index,
int* offsets_vector,
int offsets_vector_length) {
- ASSERT(subject->IsFlat(StringShape(*subject)));
+ StringShape shape(*subject);
+ ASSERT(subject->IsFlat(shape));
+ bool is_ascii = shape.IsAsciiRepresentation();
bool rc;
- int tag =
Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value();
-
- switch (tag) {
- case RegExpMacroAssembler::kIA32Implementation: {
+ if (FLAG_regexp_native) {
#ifndef ARM
- Handle<Code> code = IrregexpNativeCode(irregexp);
-
- StringShape shape(*subject);
+ Handle<Code> code(IrregexpNativeCode(regexp, is_ascii));
- // Character offsets into string.
- int start_offset = previous_index;
- int end_offset = subject->length(shape);
-
- if (shape.IsCons()) {
- subject = Handle<String>(ConsString::cast(*subject)->first());
- } else if (shape.IsSliced()) {
- SlicedString* slice = SlicedString::cast(*subject);
- start_offset += slice->start();
- end_offset += slice->start();
- subject = Handle<String>(slice->buffer());
- }
-
- // String is now either Sequential or External
- StringShape flatshape(*subject);
- bool is_ascii = flatshape.IsAsciiRepresentation();
- int char_size_shift = is_ascii ? 0 : 1;
-
- RegExpMacroAssemblerIA32::Result res;
-
- if (flatshape.IsExternal()) {
- const byte* address;
- if (is_ascii) {
- ExternalAsciiString* ext = ExternalAsciiString::cast(*subject);
- address = reinterpret_cast<const byte*>(ext->resource()->data());
- } else {
- ExternalTwoByteString* ext =
ExternalTwoByteString::cast(*subject);
- address = reinterpret_cast<const byte*>(ext->resource()->data());
- }
- res = RegExpMacroAssemblerIA32::Execute(
- *code,
- const_cast<Address*>(&address),
- start_offset << char_size_shift,
- end_offset << char_size_shift,
- offsets_vector,
- previous_index == 0);
- } else { // Sequential string
- ASSERT(StringShape(*subject).IsSequential());
- Address char_address =
- is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress()
- : SeqTwoByteString::cast(*subject)->GetCharsAddress();
- int byte_offset = char_address -
reinterpret_cast<Address>(*subject);
- res = RegExpMacroAssemblerIA32::Execute(
- *code,
- reinterpret_cast<Address*>(subject.location()),
- byte_offset + (start_offset << char_size_shift),
- byte_offset + (end_offset << char_size_shift),
- offsets_vector,
- previous_index == 0);
+ // Character offsets into string.
+ int start_offset = previous_index;
+ int end_offset = subject->length(shape);
+
+ if (shape.IsCons()) {
+ subject = Handle<String>(ConsString::cast(*subject)->first());
+ } else if (shape.IsSliced()) {
+ SlicedString* slice = SlicedString::cast(*subject);
+ start_offset += slice->start();
+ end_offset += slice->start();
+ subject = Handle<String>(slice->buffer());
+ }
+
+ // String is now either Sequential or External
+ StringShape flatshape(*subject);
+ bool is_ascii = flatshape.IsAsciiRepresentation();
+ int char_size_shift = is_ascii ? 0 : 1;
+
+ RegExpMacroAssemblerIA32::Result res;
+
+ if (flatshape.IsExternal()) {
+ const byte* address;
+ if (is_ascii) {
+ ExternalAsciiString* ext = ExternalAsciiString::cast(*subject);
+ address = reinterpret_cast<const byte*>(ext->resource()->data());
+ } else {
+ ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject);
+ address = reinterpret_cast<const byte*>(ext->resource()->data());
}
+ res = RegExpMacroAssemblerIA32::Execute(
+ *code,
+ const_cast<Address*>(&address),
+ start_offset << char_size_shift,
+ end_offset << char_size_shift,
+ offsets_vector,
+ previous_index == 0);
+ } else { // Sequential string
+ ASSERT(StringShape(*subject).IsSequential());
+ Address char_address =
+ is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress()
+ : SeqTwoByteString::cast(*subject)->GetCharsAddress();
+ int byte_offset = char_address - reinterpret_cast<Address>(*subject);
+ res = RegExpMacroAssemblerIA32::Execute(
+ *code,
+ reinterpret_cast<Address*>(subject.location()),
+ byte_offset + (start_offset << char_size_shift),
+ byte_offset + (end_offset << char_size_shift),
+ offsets_vector,
+ previous_index == 0);
+ }
- if (res == RegExpMacroAssemblerIA32::EXCEPTION) {
- ASSERT(Top::has_pending_exception());
- return Handle<Object>::null();
- }
- rc = (res == RegExpMacroAssemblerIA32::SUCCESS);
+ if (res == RegExpMacroAssemblerIA32::EXCEPTION) {
+ ASSERT(Top::has_pending_exception());
+ return Handle<Object>::null();
+ }
+ rc = (res == RegExpMacroAssemblerIA32::SUCCESS);
- if (rc) {
- // Capture values are relative to start_offset only.
- for (int i = 0; i < offsets_vector_length; i++) {
- if (offsets_vector[i] >= 0) {
- offsets_vector[i] += previous_index;
- }
+ if (rc) {
+ // Capture values are relative to start_offset only.
+ for (int i = 0; i < offsets_vector_length; i++) {
+ if (offsets_vector[i] >= 0) {
+ offsets_vector[i] += previous_index;
}
}
- break;
+ }
+ } else {
#else
- UNIMPLEMENTED();
- rc = false;
- break;
+ // Unimplemented on ARM, fall through to bytecode.
+ }
+ {
#endif
+ for (int i = number_of_capture_registers - 1; i >= 0; i--) {
+ offsets_vector[i] = -1;
}
- case RegExpMacroAssembler::kBytecodeImplementation: {
- for (int i = number_of_capture_registers - 1; i >= 0; i--) {
- offsets_vector[i] = -1;
- }
- Handle<ByteArray> byte_codes = IrregexpByteCode(irregexp);
-
- rc = IrregexpInterpreter::Match(byte_codes,
- subject,
- offsets_vector,
- previous_index);
- break;
- }
- case RegExpMacroAssembler::kARMImplementation:
- default:
- UNREACHABLE();
- rc = false;
- break;
+ Handle<ByteArray> byte_codes = IrregexpByteCode(regexp, is_ascii);
+
+ rc = IrregexpInterpreter::Match(byte_codes,
+ subject,
+ offsets_vector,
+ previous_index);
}
if (!rc) {
return Factory::null_value();
}
- Handle<FixedArray> array(last_match_info->elements());
+ FixedArray* array = last_match_info->elements();
+ ASSERT(array->length() >= number_of_capture_registers +
kLastMatchOverhead);
// The captures come in (start, end+1) pairs.
for (int i = 0; i < number_of_capture_registers; i += 2) {
- SetCapture(*array, i, offsets_vector[i]);
- SetCapture(*array, i + 1, offsets_vector[i + 1]);
+ SetCapture(array, i, offsets_vector[i]);
+ SetCapture(array, i + 1, offsets_vector[i + 1]);
}
- SetLastCaptureCount(*array, number_of_capture_registers);
- SetLastSubject(*array, *subject);
- SetLastInput(*array, *subject);
+ SetLastCaptureCount(array, number_of_capture_registers);
+ SetLastSubject(array, *subject);
+ SetLastInput(array, *subject);
return last_match_info;
}
// -------------------------------------------------------------------
-// Implmentation of the Irregexp regular expression engine.
+// Implementation of the Irregexp regular expression engine.
//
// The Irregexp regular expression engine is intended to be a complete
// implementation of ECMAScript regular expressions. It generates either
@@ -950,10 +990,10 @@
return next_register_++;
}
- Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler,
- RegExpNode* start,
- int capture_count,
- Handle<String> pattern);
+ RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler,
+ RegExpNode* start,
+ int capture_count,
+ Handle<String> pattern);
inline void AddWork(RegExpNode* node) { work_list_->Add(node); }
@@ -998,15 +1038,8 @@
};
-static Handle<FixedArray> IrregexpRegExpTooBig(Handle<String> pattern) {
- Handle<JSArray> array = Factory::NewJSArray(2);
- SetElement(array, 0, pattern);
- const char* message = "RegExp too big";
- SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(message)));
- Handle<Object> regexp_err =
- Factory::NewSyntaxError("malformed_regexp", array);
- Top::Throw(*regexp_err);
- return Handle<FixedArray>();
+static RegExpEngine::CompilationResult IrregexpRegExpTooBig() {
+ return RegExpEngine::CompilationResult("RegExp too big");
}
@@ -1024,7 +1057,7 @@
}
-Handle<FixedArray> RegExpCompiler::Assemble(
+RegExpEngine::CompilationResult RegExpCompiler::Assemble(
RegExpMacroAssembler* macro_assembler,
RegExpNode* start,
int capture_count,
@@ -1046,24 +1079,17 @@
while (!work_list.is_empty()) {
work_list.RemoveLast()->Emit(this, &new_trace);
}
- if (reg_exp_too_big_) return IrregexpRegExpTooBig(pattern);
- Handle<FixedArray> array =
- Factory::NewFixedArray(RegExpImpl::kIrregexpDataLength);
- array->set(RegExpImpl::kIrregexpImplementationIndex,
- Smi::FromInt(macro_assembler_->Implementation()));
- array->set(RegExpImpl::kIrregexpNumberOfRegistersIndex,
- Smi::FromInt(next_register_));
- array->set(RegExpImpl::kIrregexpNumberOfCapturesIndex,
- Smi::FromInt(capture_count));
+ if (reg_exp_too_big_) return IrregexpRegExpTooBig();
+
Handle<Object> code = macro_assembler_->GetCode(pattern);
- array->set(RegExpImpl::kIrregexpCodeIndex, *code);
+
work_list_ = NULL;
#ifdef DEBUG
if (FLAG_trace_regexp_assembler) {
delete macro_assembler_;
}
#endif
- return array;
+ return RegExpEngine::CompilationResult(*code, next_register_);
}
@@ -4647,13 +4673,13 @@
}
-Handle<FixedArray> RegExpEngine::Compile(RegExpCompileData* data,
- bool ignore_case,
- bool is_multiline,
- Handle<String> pattern,
- bool is_ascii) {
+RegExpEngine::CompilationResult RegExpEngine::Compile(RegExpCompileData*
data,
+ bool ignore_case,
+ bool is_multiline,
+ Handle<String>
pattern,
+ bool is_ascii) {
if ((data->capture_count + 1) * 2 - 1 >
RegExpMacroAssembler::kMaxRegister) {
- return IrregexpRegExpTooBig(pattern);
+ return IrregexpRegExpTooBig();
}
RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii);
// Wrap the body of the regexp in capture #0.
Modified: branches/bleeding_edge/src/jsregexp.h
==============================================================================
--- branches/bleeding_edge/src/jsregexp.h (original)
+++ branches/bleeding_edge/src/jsregexp.h Mon Mar 2 05:58:37 2009
@@ -51,6 +51,7 @@
// Parses the RegExp pattern and prepares the JSRegExp object with
// generic data and choice of implementation - as well as what
// the implementation wants to store in the data field.
+ // Returns false if compilation fails.
static Handle<Object> Compile(Handle<JSRegExp> re,
Handle<String> pattern,
Handle<String> flags);
@@ -70,15 +71,16 @@
Handle<JSArray> lastMatchInfo);
// Prepares a JSRegExp object with Irregexp-specific data.
- static Handle<Object> IrregexpPrepare(Handle<JSRegExp> re,
- Handle<String> pattern,
- JSRegExp::Flags flags);
+ static void IrregexpPrepare(Handle<JSRegExp> re,
+ Handle<String> pattern,
+ JSRegExp::Flags flags,
+ int capture_register_count);
- static Handle<Object> AtomCompile(Handle<JSRegExp> re,
- Handle<String> pattern,
- JSRegExp::Flags flags,
- Handle<String> match_pattern);
+ static void AtomCompile(Handle<JSRegExp> re,
+ Handle<String> pattern,
+ JSRegExp::Flags flags,
+ Handle<String> match_pattern);
static Handle<Object> AtomExec(Handle<JSRegExp> regexp,
Handle<String> subject,
int index,
@@ -107,12 +109,6 @@
static Handle<String> StringToTwoByte(Handle<String> pattern);
static Handle<String> CachedStringToTwoByte(Handle<String> pattern);
- static const int kIrregexpImplementationIndex = 0;
- static const int kIrregexpNumberOfCapturesIndex = 1;
- static const int kIrregexpNumberOfRegistersIndex = 2;
- static const int kIrregexpCodeIndex = 3;
- static const int kIrregexpDataLength = 4;
-
// Offsets in the lastMatchInfo array.
static const int kLastCaptureCount = 0;
static const int kLastSubject = 1;
@@ -141,10 +137,15 @@
static String* last_ascii_string_;
static String* two_byte_cached_string_;
+ static bool EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii);
+
+ static int IrregexpMaxRegisterCount(Handle<FixedArray> re);
+ static void SetIrregexpMaxRegisterCount(Handle<FixedArray> re, int
value);
static int IrregexpNumberOfCaptures(Handle<FixedArray> re);
static int IrregexpNumberOfRegisters(Handle<FixedArray> re);
- static Handle<ByteArray> IrregexpByteCode(Handle<FixedArray> re);
- static Handle<Code> IrregexpNativeCode(Handle<FixedArray> re);
+ static Handle<ByteArray> IrregexpByteCode(Handle<FixedArray> re,
+ bool is_ascii);
+ static Handle<Code> IrregexpNativeCode(Handle<FixedArray> re, bool
is_ascii);
// On a successful match, the result is a JSArray containing
// captured positions. On a failure, the result is the null value.
@@ -1354,11 +1355,25 @@
class RegExpEngine: public AllStatic {
public:
- static Handle<FixedArray> Compile(RegExpCompileData* input,
- bool ignore_case,
- bool multiline,
- Handle<String> pattern,
- bool is_ascii);
+ struct CompilationResult {
+ explicit CompilationResult(const char* error_message)
+ : error_message(error_message),
+ code(Heap::the_hole_value()),
+ num_registers(0) {}
+ CompilationResult(Object* code, int registers)
+ : error_message(NULL),
+ code(code),
+ num_registers(registers) {}
+ const char* error_message;
+ Object* code;
+ int num_registers;
+ };
+
+ static CompilationResult Compile(RegExpCompileData* input,
+ bool ignore_case,
+ bool multiline,
+ Handle<String> pattern,
+ bool is_ascii);
static void DotPrint(const char* label, RegExpNode* node, bool
ignore_case);
};
Modified: branches/bleeding_edge/src/objects-debug.cc
==============================================================================
--- branches/bleeding_edge/src/objects-debug.cc (original)
+++ branches/bleeding_edge/src/objects-debug.cc Mon Mar 2 05:58:37 2009
@@ -697,8 +697,18 @@
}
case JSRegExp::IRREGEXP: {
FixedArray* arr = FixedArray::cast(data());
- Object* irregexp_data = arr->get(JSRegExp::kIrregexpDataIndex);
- ASSERT(irregexp_data->IsFixedArray());
+ Object* ascii_data = arr->get(JSRegExp::kIrregexpASCIICodeIndex);
+ ASSERT(ascii_data->IsTheHole()
+ || (FLAG_regexp_native ?
+ ascii_data->IsCode()
+ : ascii_data->IsByteArray()));
+ Object* uc16_data = arr->get(JSRegExp::kIrregexpUC16CodeIndex);
+ ASSERT(uc16_data->IsTheHole()
+ || (FLAG_regexp_native ?
+ uc16_data->IsCode()
+ : uc16_data->IsByteArray()));
+ ASSERT(arr->get(JSRegExp::kIrregexpCaptureCountIndex)->IsSmi());
+ ASSERT(arr->get(JSRegExp::kIrregexpMaxRegisterCountIndex)->IsSmi());
break;
}
default:
Modified: branches/bleeding_edge/src/objects-inl.h
==============================================================================
--- branches/bleeding_edge/src/objects-inl.h (original)
+++ branches/bleeding_edge/src/objects-inl.h Mon Mar 2 05:58:37 2009
@@ -2337,6 +2337,13 @@
}
+void JSRegExp::SetDataAt(int index, Object* value) {
+ ASSERT(TypeTag() != NOT_COMPILED);
+ ASSERT(index >= kDataIndex); // Only implementation data can be set
this way.
+ FixedArray::cast(data())->set(index, value);
+}
+
+
bool JSObject::HasFastElements() {
return !elements()->IsDictionary();
}
Modified: branches/bleeding_edge/src/objects.h
==============================================================================
--- branches/bleeding_edge/src/objects.h (original)
+++ branches/bleeding_edge/src/objects.h Mon Mar 2 05:58:37 2009
@@ -2946,6 +2946,19 @@
};
// Regular expressions
+// The regular expression holds a single reference to a FixedArray in
+// the kDataOffset field.
+// The FixedArray contains the following data:
+// - tag : type of regexp implementation (not compiled yet, atom or
irregexp)
+// - reference to the original source string
+// - reference to the original flag string
+// If it is an atom regexp
+// - a reference to a literal string to search for
+// If it is an irregexp regexp:
+// - a reference to code for ASCII inputs (bytecode or compiled).
+// - a reference to code for UC16 inputs (bytecode or compiled).
+// - max number of registers used by irregexp implementations.
+// - number of capture registers (output values) of the regexp.
class JSRegExp: public JSObject {
public:
// Meaning of Type:
@@ -2973,6 +2986,8 @@
inline Flags GetFlags();
inline String* Pattern();
inline Object* DataAt(int index);
+ // Set implementation data after the object has been prepared.
+ inline void SetDataAt(int index, Object* value);
static inline JSRegExp* cast(Object* obj);
@@ -2984,14 +2999,29 @@
static const int kDataOffset = JSObject::kHeaderSize;
static const int kSize = kDataOffset + kIntSize;
+ // Indices in the data array.
static const int kTagIndex = 0;
static const int kSourceIndex = kTagIndex + 1;
static const int kFlagsIndex = kSourceIndex + 1;
- // These two are the same since the same entry is shared for
- // different purposes in different types of regexps.
- static const int kAtomPatternIndex = kFlagsIndex + 1;
- static const int kIrregexpDataIndex = kFlagsIndex + 1;
- static const int kDataSize = kAtomPatternIndex + 1;
+ static const int kDataIndex = kFlagsIndex + 1;
+ // The data fields are used in different ways depending on the
+ // value of the tag.
+ // Atom regexps (literal strings).
+ static const int kAtomPatternIndex = kDataIndex;
+
+ static const int kAtomDataSize = kAtomPatternIndex + 1;
+
+ // Irregexp compiled code or bytecode for ASCII.
+ static const int kIrregexpASCIICodeIndex = kDataIndex;
+ // Irregexp compiled code or bytecode for UC16.
+ static const int kIrregexpUC16CodeIndex = kDataIndex + 1;
+ // Maximal number of registers used by either ASCII or UC16.
+ // Only used to check that there is enough stack space
+ static const int kIrregexpMaxRegisterCountIndex = kDataIndex + 2;
+ // Number of captures in the compiled regexp.
+ static const int kIrregexpCaptureCountIndex = kDataIndex + 3;
+
+ static const int kIrregexpDataSize = kIrregexpCaptureCountIndex + 1;
};
--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---