Revision: 3542
Author: [email protected]
Date: Wed Jan 6 03:09:30 2010
Log: Direct call to native RegExp code from JavaScript.
Calls to RegExp no longer have to be via a call to the runtime system. A
new stub have been added which can handle this call in generated code. The
stub checks all the parameters and creates RegExp entry frame in the same
way as it is created by the runtime system. Bailout to the runtime system
is done whenever an uncommon situation is encountered or when the static
data used is not initialized. After running the native RegExp code the last
match info is updated like in the runtime system.
Currently only ASCII strings are handled.
Added another argument to the RegExp entry frame. It indicated whether the
call is direct from JavaScript code or through the runtime system. This
information is used when RegExp execution is interrupted. If an
interruption happens when RegExp code is called directly a retry is issued
causing the interruption to be handled via the runtime system. The reason
for this is that the direct call to RegExp code does not support garbage
collection.
Review URL: http://codereview.chromium.org/521028
http://code.google.com/p/v8/source/detail?r=3542
Modified:
/branches/bleeding_edge/src/arm/codegen-arm.cc
/branches/bleeding_edge/src/arm/codegen-arm.h
/branches/bleeding_edge/src/arm/regexp-macro-assembler-arm.cc
/branches/bleeding_edge/src/arm/regexp-macro-assembler-arm.h
/branches/bleeding_edge/src/arm/simulator-arm.h
/branches/bleeding_edge/src/assembler.cc
/branches/bleeding_edge/src/assembler.h
/branches/bleeding_edge/src/code-stubs.h
/branches/bleeding_edge/src/codegen.cc
/branches/bleeding_edge/src/codegen.h
/branches/bleeding_edge/src/flag-definitions.h
/branches/bleeding_edge/src/ia32/codegen-ia32.cc
/branches/bleeding_edge/src/ia32/codegen-ia32.h
/branches/bleeding_edge/src/ia32/macro-assembler-ia32.cc
/branches/bleeding_edge/src/ia32/macro-assembler-ia32.h
/branches/bleeding_edge/src/ia32/regexp-macro-assembler-ia32.cc
/branches/bleeding_edge/src/ia32/regexp-macro-assembler-ia32.h
/branches/bleeding_edge/src/ia32/simulator-ia32.h
/branches/bleeding_edge/src/jsregexp.cc
/branches/bleeding_edge/src/jsregexp.h
/branches/bleeding_edge/src/objects.h
/branches/bleeding_edge/src/regexp-delay.js
/branches/bleeding_edge/src/regexp-macro-assembler.cc
/branches/bleeding_edge/src/regexp-stack.h
/branches/bleeding_edge/src/v8-counters.h
/branches/bleeding_edge/src/x64/codegen-x64.cc
/branches/bleeding_edge/src/x64/codegen-x64.h
/branches/bleeding_edge/src/x64/regexp-macro-assembler-x64.cc
/branches/bleeding_edge/src/x64/regexp-macro-assembler-x64.h
/branches/bleeding_edge/src/x64/simulator-x64.h
=======================================
--- /branches/bleeding_edge/src/arm/codegen-arm.cc Tue Jan 5 01:38:02 2010
+++ /branches/bleeding_edge/src/arm/codegen-arm.cc Wed Jan 6 03:09:30 2010
@@ -3449,6 +3449,19 @@
frame_->CallRuntime(Runtime::kStringAdd, 2);
frame_->EmitPush(r0);
}
+
+
+void CodeGenerator::GenerateRegExpExec(ZoneList<Expression*>* args) {
+ ASSERT_EQ(4, args->length());
+
+ Load(args->at(0));
+ Load(args->at(1));
+ Load(args->at(2));
+ Load(args->at(3));
+
+ frame_->CallRuntime(Runtime::kRegExpExec, 4);
+ frame_->EmitPush(r0);
+}
void CodeGenerator::GenerateObjectEquals(ZoneList<Expression*>* args) {
=======================================
--- /branches/bleeding_edge/src/arm/codegen-arm.h Mon Dec 21 05:30:10 2009
+++ /branches/bleeding_edge/src/arm/codegen-arm.h Wed Jan 6 03:09:30 2010
@@ -363,6 +363,9 @@
// Fast support for StringAdd.
void GenerateStringAdd(ZoneList<Expression*>* args);
+ // Support for direct calls from JavaScript to native RegExp code.
+ void GenerateRegExpExec(ZoneList<Expression*>* args);
+
// Simple condition analysis.
enum ConditionAnalysis {
ALWAYS_TRUE,
=======================================
--- /branches/bleeding_edge/src/arm/regexp-macro-assembler-arm.cc Thu Nov
5 05:59:40 2009
+++ /branches/bleeding_edge/src/arm/regexp-macro-assembler-arm.cc Wed Jan
6 03:09:30 2010
@@ -59,15 +59,19 @@
*
* Each call to a public method should retain this convention.
* The stack will have the following structure:
+ * - direct_call (if 1, direct call from JavaScript code, if
0 call
+ * through the runtime system)
* - stack_area_base (High end of the memory area to use as
* backtracking stack)
- * - at_start (if 1, start at start of string, if 0, don't)
+ * - at_start (if 1, we are starting at the start of the
+ * string, otherwise 0)
+ * - int* capture_array (int[num_saved_registers_], for output).
* --- sp when called ---
* - link address
* - backup of registers r4..r11
- * - int* capture_array (int[num_saved_registers_], for output).
* - end of input (Address of end of string)
* - start of input (Address of first character in string)
+ * - start index (character index of start)
* --- frame pointer ----
* - void* input_string (location of a handle containing the string)
* - Offset of location before start of input (effectively character
@@ -85,11 +89,13 @@
* The data up to the return address must be placed there by the calling
* code, by calling the code entry as cast to a function with the
signature:
* int (*match)(String* input_string,
+ * int start_index,
* Address start,
* Address end,
* int* capture_output_array,
* bool at_start,
- * byte* stack_area_base)
+ * byte* stack_area_base,
+ * bool direct_call)
* The call is performed by NativeRegExpMacroAssembler::Execute()
* (in regexp-macro-assembler.cc).
*/
=======================================
--- /branches/bleeding_edge/src/arm/regexp-macro-assembler-arm.h Mon Oct 26
05:26:42 2009
+++ /branches/bleeding_edge/src/arm/regexp-macro-assembler-arm.h Wed Jan 6
03:09:30 2010
@@ -127,6 +127,7 @@
static const int kRegisterOutput = kReturnAddress + kPointerSize;
static const int kAtStart = kRegisterOutput + kPointerSize;
static const int kStackHighEnd = kAtStart + kPointerSize;
+ static const int kDirectCall = kStackHighEnd + kPointerSize;
// Below the frame pointer.
// Register parameters stored by setup code.
=======================================
--- /branches/bleeding_edge/src/arm/simulator-arm.h Thu Nov 12 05:55:21 2009
+++ /branches/bleeding_edge/src/arm/simulator-arm.h Wed Jan 6 03:09:30 2010
@@ -62,9 +62,9 @@
// Call the generated regexp code directly. The entry function pointer
should
-// expect seven int/pointer sized arguments and return an int.
-#define CALL_GENERATED_REGEXP_CODE(entry, p0, p1, p2, p3, p4, p5, p6) \
- entry(p0, p1, p2, p3, p4, p5, p6)
+// expect eight int/pointer sized arguments and return an int.
+#define CALL_GENERATED_REGEXP_CODE(entry, p0, p1, p2, p3, p4, p5, p6, p7) \
+ entry(p0, p1, p2, p3, p4, p5, p6, p7)
#define TRY_CATCH_FROM_ADDRESS(try_catch_address) \
reinterpret_cast<TryCatch*>(try_catch_address)
@@ -79,9 +79,9 @@
assembler::arm::Simulator::current()->Call(FUNCTION_ADDR(entry), 5, \
p0, p1, p2, p3, p4))
-#define CALL_GENERATED_REGEXP_CODE(entry, p0, p1, p2, p3, p4, p5, p6) \
+#define CALL_GENERATED_REGEXP_CODE(entry, p0, p1, p2, p3, p4, p5, p6, p7) \
assembler::arm::Simulator::current()->Call( \
- FUNCTION_ADDR(entry), 7, p0, p1, p2, p3, p4, p5, p6)
+ FUNCTION_ADDR(entry), 8, p0, p1, p2, p3, p4, p5, p6, p7)
#define TRY_CATCH_FROM_ADDRESS(try_catch_address) \
try_catch_address == NULL ? \
=======================================
--- /branches/bleeding_edge/src/assembler.cc Thu Dec 10 07:10:50 2009
+++ /branches/bleeding_edge/src/assembler.cc Wed Jan 6 03:09:30 2010
@@ -673,6 +673,19 @@
return ExternalReference(Redirect(
FUNCTION_ADDR(NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16)));
}
+
+
+ExternalReference ExternalReference::address_of_static_offsets_vector() {
+ return ExternalReference(OffsetsVector::static_offsets_vector_address());
+}
+
+ExternalReference
ExternalReference::address_of_regexp_stack_memory_address() {
+ return ExternalReference(RegExpStack::memory_address());
+}
+
+ExternalReference ExternalReference::address_of_regexp_stack_memory_size()
{
+ return ExternalReference(RegExpStack::memory_size_address());
+}
#endif
=======================================
--- /branches/bleeding_edge/src/assembler.h Thu Dec 10 07:10:50 2009
+++ /branches/bleeding_edge/src/assembler.h Wed Jan 6 03:09:30 2010
@@ -420,6 +420,11 @@
// Static variable RegExpStack::limit_address()
static ExternalReference address_of_regexp_stack_limit();
+ // Static variables for RegExp.
+ static ExternalReference address_of_static_offsets_vector();
+ static ExternalReference address_of_regexp_stack_memory_address();
+ static ExternalReference address_of_regexp_stack_memory_size();
+
// Static variable Heap::NewSpaceStart()
static ExternalReference new_space_start();
static ExternalReference heap_always_allocate_scope_depth();
=======================================
--- /branches/bleeding_edge/src/code-stubs.h Tue Dec 22 02:16:27 2009
+++ /branches/bleeding_edge/src/code-stubs.h Wed Jan 6 03:09:30 2010
@@ -52,6 +52,7 @@
V(Instanceof) \
V(CounterOp) \
V(ArgumentsAccess) \
+ V(RegExpExec) \
V(Runtime) \
V(CEntry) \
V(JSEntry)
=======================================
--- /branches/bleeding_edge/src/codegen.cc Tue Dec 22 02:16:27 2009
+++ /branches/bleeding_edge/src/codegen.cc Wed Jan 6 03:09:30 2010
@@ -345,6 +345,7 @@
{&CodeGenerator::GenerateIsObject, "_IsObject"},
{&CodeGenerator::GenerateIsFunction, "_IsFunction"},
{&CodeGenerator::GenerateStringAdd, "_StringAdd"},
+ {&CodeGenerator::GenerateRegExpExec, "_RegExpExec"},
};
=======================================
--- /branches/bleeding_edge/src/codegen.h Tue Dec 22 02:16:27 2009
+++ /branches/bleeding_edge/src/codegen.h Wed Jan 6 03:09:30 2010
@@ -478,6 +478,26 @@
};
+class RegExpExecStub: public CodeStub {
+ public:
+ RegExpExecStub() { }
+
+ private:
+ Major MajorKey() { return RegExpExec; }
+ int MinorKey() { return 0; }
+
+ void Generate(MacroAssembler* masm);
+
+ const char* GetName() { return "RegExpExecStub"; }
+
+#ifdef DEBUG
+ void Print() {
+ PrintF("RegExpExecStub\n");
+ }
+#endif
+};
+
+
} // namespace internal
} // namespace v8
=======================================
--- /branches/bleeding_edge/src/flag-definitions.h Fri Dec 18 05:38:09 2009
+++ /branches/bleeding_edge/src/flag-definitions.h Wed Jan 6 03:09:30 2010
@@ -231,6 +231,7 @@
// Regexp
DEFINE_bool(trace_regexps, false, "trace regexp execution")
DEFINE_bool(regexp_optimization, true, "generate optimized regexp code")
+DEFINE_bool(regexp_entry_native, true, "use native code to enter regexp")
// Testing flags test/cctest/test-{flags,api,serialization}.cc
DEFINE_bool(testing_bool_flag, true, "testing_bool_flag")
=======================================
--- /branches/bleeding_edge/src/ia32/codegen-ia32.cc Tue Jan 5 04:02:18
2010
+++ /branches/bleeding_edge/src/ia32/codegen-ia32.cc Wed Jan 6 03:09:30
2010
@@ -32,7 +32,10 @@
#include "compiler.h"
#include "debug.h"
#include "ic-inl.h"
+#include "jsregexp.h"
#include "parser.h"
+#include "regexp-macro-assembler.h"
+#include "regexp-stack.h"
#include "register-allocator-inl.h"
#include "runtime.h"
#include "scopes.h"
@@ -5277,6 +5280,20 @@
Result answer = frame_->CallStub(&stub, 2);
frame_->Push(&answer);
}
+
+
+void CodeGenerator::GenerateRegExpExec(ZoneList<Expression*>* args) {
+ ASSERT_EQ(args->length(), 4);
+
+ // Load the arguments on the stack and call the stub.
+ Load(args->at(0));
+ Load(args->at(1));
+ Load(args->at(2));
+ Load(args->at(3));
+ RegExpExecStub stub;
+ Result result = frame_->CallStub(&stub, 4);
+ frame_->Push(&result);
+}
void CodeGenerator::VisitCallRuntime(CallRuntime* node) {
@@ -7908,6 +7925,277 @@
__ bind(&runtime);
__ TailCallRuntime(ExternalReference(Runtime::kNewArgumentsFast), 3, 1);
}
+
+
+void RegExpExecStub::Generate(MacroAssembler* masm) {
+ // Just jump directly to runtime if regexp entry in generated code is
turned
+ // off.
+ if (!FLAG_regexp_entry_native) {
+ __ TailCallRuntime(ExternalReference(Runtime::kRegExpExec), 4, 1);
+ return;
+ }
+
+ // Stack frame on entry.
+ // esp[0]: return address
+ // esp[4]: last_match_info (expected JSArray)
+ // esp[8]: previous index
+ // esp[12]: subject string
+ // esp[16]: JSRegExp object
+
+ Label runtime;
+
+ // Check that the first argument is a JSRegExp object.
+ __ mov(eax, Operand(esp, 4 * kPointerSize));
+ ASSERT_EQ(0, kSmiTag);
+ __ test(eax, Immediate(kSmiTagMask));
+ __ j(zero, &runtime);
+ __ CmpObjectType(eax, JS_REGEXP_TYPE, ecx);
+ __ j(not_equal, &runtime);
+ // Check that the RegExp has been compiled (data contains a fixed array).
+ __ mov(ecx, FieldOperand(eax, JSRegExp::kDataOffset));
+#ifdef DEBUG
+ __ test(ecx, Immediate(kSmiTagMask));
+ __ Check(not_zero, "Unexpected type for RegExp data, FixedArray
expected");
+ __ CmpObjectType(ecx, FIXED_ARRAY_TYPE, ebx);
+ __ Check(equal, "Unexpected type for RegExp data, FixedArray expected");
+#endif
+
+ // ecx: RegExp data (FixedArray)
+ // Check the type of the RegExp. Only continue if type is
JSRegExp::IRREGEXP.
+ __ mov(ebx, FieldOperand(ecx, JSRegExp::kDataTagOffset));
+ __ cmp(Operand(ebx), Immediate(Smi::FromInt(JSRegExp::IRREGEXP)));
+ __ j(not_equal, &runtime);
+
+ // ecx: RegExp data (FixedArray)
+ // Check that the number of captures fit in the static offsets vector
buffer.
+ __ mov(edx, FieldOperand(ecx, JSRegExp::kIrregexpCaptureCountOffset));
+ // Calculate number of capture registers (number_of_captures + 1) * 2.
This
+ // uses the asumption that smis are 2 * their untagged value.
+ ASSERT_EQ(0, kSmiTag);
+ ASSERT_EQ(1, kSmiTagSize + kSmiShiftSize);
+ __ add(Operand(edx), Immediate(2)); // edx was a smi.
+ // Check that the static offsets vector buffer is large enough.
+ __ cmp(edx, OffsetsVector::kStaticOffsetsVectorSize);
+ __ j(above, &runtime);
+
+ // ecx: RegExp data (FixedArray)
+ // edx: Number of capture registers
+ // Check that the second argument is a string.
+ __ mov(eax, Operand(esp, 3 * kPointerSize));
+ __ test(eax, Immediate(kSmiTagMask));
+ __ j(zero, &runtime);
+ Condition is_string = masm->IsObjectStringType(eax, ebx, ebx);
+ __ j(NegateCondition(is_string), &runtime);
+ // Get the length of the string to ebx.
+ __ mov(ebx, FieldOperand(eax, String::kLengthOffset));
+
+ // ebx: Length of subject string
+ // ecx: RegExp data (FixedArray)
+ // edx: Number of capture registers
+ // Check that the third argument is a positive smi.
+ __ mov(eax, Operand(esp, 2 * kPointerSize));
+ __ test(eax, Immediate(kSmiTagMask | 0x80000000));
+ __ j(not_zero, &runtime);
+ // Check that it is not greater than the subject string length.
+ __ SmiUntag(eax);
+ __ cmp(eax, Operand(ebx));
+ __ j(greater, &runtime);
+
+ // ecx: RegExp data (FixedArray)
+ // edx: Number of capture registers
+ // Check that the fourth object is a JSArray object.
+ __ mov(eax, Operand(esp, 1 * kPointerSize));
+ __ test(eax, Immediate(kSmiTagMask));
+ __ j(zero, &runtime);
+ __ CmpObjectType(eax, JS_ARRAY_TYPE, ebx);
+ __ j(not_equal, &runtime);
+ // Check that the JSArray is in fast case.
+ __ mov(ebx, FieldOperand(eax, JSArray::kElementsOffset));
+ __ cmp(eax, Factory::fixed_array_map());
+ __ j(not_equal, &runtime);
+ // Check that the last match info has space for the capture registers
and the
+ // additional information.
+ __ mov(eax, FieldOperand(ebx, FixedArray::kLengthOffset));
+ __ add(Operand(edx), Immediate(RegExpImpl::kLastMatchOverhead));
+ __ cmp(edx, Operand(eax));
+ __ j(greater, &runtime);
+
+ // ecx: RegExp data (FixedArray)
+ // Check the representation and encoding of the subject string (only
support
+ // flat ascii strings).
+ __ mov(eax, Operand(esp, 3 * kPointerSize));
+ __ mov(ebx, FieldOperand(eax, HeapObject::kMapOffset));
+ __ movzx_b(ebx, FieldOperand(ebx, Map::kInstanceTypeOffset));
+ __ and_(ebx, kStringRepresentationMask | kStringEncodingMask);
+ __ cmp(ebx, kSeqStringTag | kAsciiStringTag);
+ __ j(not_equal, &runtime);
+
+ // ecx: RegExp data (FixedArray)
+ // Ensure that a RegExp stack is allocated.
+ ExternalReference address_of_regexp_stack_memory_address =
+ ExternalReference::address_of_regexp_stack_memory_address();
+ ExternalReference address_of_regexp_stack_memory_size =
+ ExternalReference::address_of_regexp_stack_memory_size();
+ __ mov(eax,
Operand::StaticVariable(address_of_regexp_stack_memory_size));
+ __ test(eax, Operand(eax));
+ __ j(zero, &runtime, not_taken);
+
+ // ecx: RegExp data (FixedArray)
+ // Check that the irregexp code has been generated for an ascii string.
If
+ // it has the field contains a code object otherwise it contains the
hole.
+ __ mov(edx, FieldOperand(ecx, JSRegExp::kDataAsciiCodeOffset));
+ __ CmpObjectType(edx, CODE_TYPE, ebx);
+ __ j(not_equal, &runtime);
+
+ // Load used arguments before starting to push arguments for call to
native
+ // RegExp code to avoid handling changing stack height.
+ __ mov(eax, Operand(esp, 3 * kPointerSize)); // Subject string.
+ __ mov(ebx, Operand(esp, 2 * kPointerSize)); // Previous index.
+ __ mov(ecx, Operand(esp, 4 * kPointerSize)); // JSRegExp object.
+ __ SmiUntag(ebx); // Previous index from sim.
+
+ // eax: subject string
+ // ebx: previous index
+ // edx: code
+ // All checks done. Now push arguments for native regexp code.
+ __ IncrementCounter(&Counters::regexp_entry_native, 1);
+
+ // Argument 8: Indicate that this is a direct call from JavaScript.
+ __ push(Immediate(1));
+
+ // Argument 7: Start (high end) of backtracking stack memory area.
+ __ mov(ecx,
Operand::StaticVariable(address_of_regexp_stack_memory_address));
+ __ add(ecx,
Operand::StaticVariable(address_of_regexp_stack_memory_size));
+ __ push(ecx);
+
+ // Argument 6: At start of string?
+ __ xor_(Operand(ecx), ecx); // setcc only operated on cl (lower byte of
ecx).
+ __ test(ebx, Operand(ebx));
+ __ setcc(zero, ecx); // 1 if 0 (start of string), 0 if positive.
+ __ push(ecx);
+
+ // Argument 5: static offsets vector buffer.
+ __
push(Immediate(ExternalReference::address_of_static_offsets_vector()));
+
+ // Argument 4: End of string data.
+ __ mov(ecx, FieldOperand(eax, String::kLengthOffset));
+ __ add(ecx, Operand(eax));
+ __ add(Operand(ecx), Immediate(SeqAsciiString::kHeaderSize -
kHeapObjectTag));
+ __ push(ecx);
+
+ // Argument 3: Start of string data.
+ __ mov(ecx, ebx);
+ __ add(ebx, Operand(eax)); // String is ASCII.
+ __ add(Operand(ebx), Immediate(SeqAsciiString::kHeaderSize -
kHeapObjectTag));
+ __ push(ebx);
+
+ // Argument 2: Previous index.
+ __ push(ecx);
+
+ // Argument 1: Subject string.
+ __ push(eax);
+
+ // Locate the code entry and call it.
+ __ add(Operand(edx), Immediate(Code::kHeaderSize - kHeapObjectTag));
+ __ call(Operand(edx));
+ // Remove arguments.
+ __ add(Operand(esp), Immediate(8 * kPointerSize));
+
+ // Check the result.
+ Label success;
+ __ cmp(eax, NativeRegExpMacroAssembler::SUCCESS);
+ __ j(equal, &success, taken);
+ Label failure;
+ __ cmp(eax, NativeRegExpMacroAssembler::FAILURE);
+ __ j(equal, &failure, taken);
+ __ cmp(eax, NativeRegExpMacroAssembler::EXCEPTION);
+ // If not exception it can only be retry. Handle that in the runtime
system.
+ __ j(not_equal, &runtime);
+ // Result must now be exception. If there is no pending exception
already a
+ // stack overflow (on the backtrack stack) was detected in RegExp code
but
+ // haven't created the exception yet. Handle that in the runtime system.
+ ExternalReference pending_exception(Top::k_pending_exception_address);
+ __ mov(eax,
+
Operand::StaticVariable(ExternalReference::the_hole_value_location()));
+ __ cmp(eax, Operand::StaticVariable(pending_exception));
+ __ j(equal, &runtime);
+ __ bind(&failure);
+ // For failure and exception return null.
+ __ mov(Operand(eax), Factory::null_value());
+ __ ret(4 * kPointerSize);
+
+ // Load RegExp data.
+ __ bind(&success);
+ __ mov(eax, Operand(esp, 4 * kPointerSize));
+ __ mov(ecx, FieldOperand(eax, JSRegExp::kDataOffset));
+ __ mov(edx, FieldOperand(ecx, JSRegExp::kIrregexpCaptureCountOffset));
+ // Calculate number of capture registers (number_of_captures + 1) * 2.
+ __ add(Operand(edx), Immediate(2)); // edx was a smi.
+
+ // edx: Number of capture registers
+ // Load last_match_info which is still known to be a fast case JSArray.
+ __ mov(eax, Operand(esp, 1 * kPointerSize));
+ __ mov(ebx, FieldOperand(eax, JSArray::kElementsOffset));
+
+ // ebx: last_match_info backing store (FixedArray)
+ // edx: number of capture registers
+ // Store the capture count.
+ __ SmiTag(edx); // Number of capture registers to smi.
+ __ mov(FieldOperand(ebx, RegExpImpl::kLastCaptureCountOffset), edx);
+ __ SmiUntag(edx); // Number of capture registers back from smi.
+ // Store last subject and last input.
+ __ mov(eax, Operand(esp, 3 * kPointerSize));
+ __ mov(FieldOperand(ebx, RegExpImpl::kLastSubjectOffset), eax);
+ __ mov(ecx, ebx);
+ __ RecordWrite(ecx, RegExpImpl::kLastSubjectOffset, eax, edi);
+ __ mov(eax, Operand(esp, 3 * kPointerSize));
+ __ mov(FieldOperand(ebx, RegExpImpl::kLastInputOffset), eax);
+ __ mov(ecx, ebx);
+ __ RecordWrite(ecx, RegExpImpl::kLastInputOffset, eax, edi);
+
+ // Get the static offsets vector filled by the native regexp code.
+ ExternalReference address_of_static_offsets_vector =
+ ExternalReference::address_of_static_offsets_vector();
+ __ mov(ecx, Immediate(address_of_static_offsets_vector));
+
+ // ebx: last_match_info backing store (FixedArray)
+ // ecx: offsets vector
+ // edx: number of capture registers
+ Label next_capture, done;
+ __ mov(eax, Operand(esp, 2 * kPointerSize)); // Read previous index.
+ // Capture register counter starts from number of capture registers and
+ // counts down until wraping after zero.
+ __ bind(&next_capture);
+ __ sub(Operand(edx), Immediate(1));
+ __ j(negative, &done);
+ // Read the value from the static offsets vector buffer.
+ __ mov(edi, Operand(ecx, edx, times_pointer_size, 0));
+ // Perform explicit shift
+ ASSERT_EQ(0, kSmiTag);
+ __ shl(edi, kSmiTagSize);
+ // Add previous index (from its stack slot) if value is not negative.
+ Label capture_negative;
+ // Carry flag set by shift above.
+ __ j(negative, &capture_negative, not_taken);
+ __ add(edi, Operand(eax)); // Add previous index (adding smi to smi).
+ __ bind(&capture_negative);
+ // Store the smi value in the last match info.
+ __ mov(FieldOperand(ebx,
+ edx,
+ times_pointer_size,
+ RegExpImpl::kFirstCaptureOffset),
+ edi);
+ __ jmp(&next_capture);
+ __ bind(&done);
+
+ // Return last match info.
+ __ mov(eax, Operand(esp, 1 * kPointerSize));
+ __ ret(4 * kPointerSize);
+
+ // Do the runtime call to execute the regexp.
+ __ bind(&runtime);
+ __ TailCallRuntime(ExternalReference(Runtime::kRegExpExec), 4, 1);
+}
void CompareStub::Generate(MacroAssembler* masm) {
=======================================
--- /branches/bleeding_edge/src/ia32/codegen-ia32.h Mon Dec 21 05:30:10 2009
+++ /branches/bleeding_edge/src/ia32/codegen-ia32.h Wed Jan 6 03:09:30 2010
@@ -544,6 +544,9 @@
// Fast support for StringAdd.
void GenerateStringAdd(ZoneList<Expression*>* args);
+ // Support for direct calls from JavaScript to native RegExp code.
+ void GenerateRegExpExec(ZoneList<Expression*>* args);
+
// Simple condition analysis.
enum ConditionAnalysis {
ALWAYS_TRUE,
=======================================
--- /branches/bleeding_edge/src/ia32/macro-assembler-ia32.cc Wed Dec 23
05:27:58 2009
+++ /branches/bleeding_edge/src/ia32/macro-assembler-ia32.cc Wed Jan 6
03:09:30 2010
@@ -323,6 +323,17 @@
cmpb(FieldOperand(map, Map::kInstanceTypeOffset),
static_cast<int8_t>(type));
}
+
+
+Condition MacroAssembler::IsObjectStringType(Register heap_object,
+ Register map,
+ Register instance_type) {
+ mov(map, FieldOperand(heap_object, HeapObject::kMapOffset));
+ movzx_b(instance_type, FieldOperand(map, Map::kInstanceTypeOffset));
+ ASSERT(kNotStringTag != 0);
+ test(instance_type, Immediate(kIsNotStringMask));
+ return zero;
+}
void MacroAssembler::FCmp() {
=======================================
--- /branches/bleeding_edge/src/ia32/macro-assembler-ia32.h Tue Jan 5
03:29:27 2010
+++ /branches/bleeding_edge/src/ia32/macro-assembler-ia32.h Wed Jan 6
03:09:30 2010
@@ -141,6 +141,15 @@
// Compare instance type for map.
void CmpInstanceType(Register map, InstanceType type);
+ // Check if the object in register heap_object is a string. Afterwards
the
+ // register map contains the object map and the register instance_type
+ // contains the instance_type. The registers map and instance_type can
be the
+ // same in which case it contains the instance type afterwards. Either
of the
+ // registers map and instance_type can be the same as heap_object.
+ Condition IsObjectStringType(Register heap_object,
+ Register map,
+ Register instance_type);
+
// FCmp is similar to integer cmp, but requires unsigned
// jcc instructions (je, ja, jae, jb, jbe, je, and jz).
void FCmp();
=======================================
--- /branches/bleeding_edge/src/ia32/regexp-macro-assembler-ia32.cc Thu
Nov 5 05:59:40 2009
+++ /branches/bleeding_edge/src/ia32/regexp-macro-assembler-ia32.cc Wed
Jan 6 03:09:30 2010
@@ -55,13 +55,17 @@
*
* Each call to a public method should retain this convention.
* The stack will have the following structure:
- * - stack_area_base (High end of the memory area to use as
- * backtracking stack)
- * - at_start (if 1, start at start of string, if 0, don't)
- * - int* capture_array (int[num_saved_registers_], for output).
- * - end of input (Address of end of string)
- * - start of input (Address of first character in string)
- * - void* input_string (location of a handle containing the string)
+ * - direct_call (if 1, direct call from JavaScript code,
if 0
+ * call through the runtime system)
+ * - stack_area_base (High end of the memory area to use as
+ * backtracking stack)
+ * - at_start (if 1, we are starting at the start of the
+ * string, otherwise 0)
+ * - int* capture_array (int[num_saved_registers_], for output).
+ * - end of input (Address of end of string)
+ * - start of input (Address of first character in string)
+ * - start index (character index of start)
+ * - String* input_string (location of a handle containing the
string)
* --- frame alignment (if applicable) ---
* - return address
* ebp-> - old ebp
@@ -81,11 +85,13 @@
* The data up to the return address must be placed there by the calling
* code, by calling the code entry as cast to a function with the
signature:
* int (*match)(String* input_string,
+ * int start_index,
* Address start,
* Address end,
* int* capture_output_array,
* bool at_start,
- * byte* stack_area_base)
+ * byte* stack_area_base,
+ * bool direct_call)
*/
#define __ ACCESS_MASM(masm_)
@@ -941,6 +947,12 @@
// If not real stack overflow the stack guard was used to interrupt
// execution for another purpose.
+
+ // If this is a direct call from JavaScript retry the RegExp forcing the
call
+ // through the runtime system. Currently the direct call cannot handle a
GC.
+ if (frame_entry<int>(re_frame, kDirectCall) == 1) {
+ return RETRY;
+ }
// Prepare for possible GC.
HandleScope handles;
=======================================
--- /branches/bleeding_edge/src/ia32/regexp-macro-assembler-ia32.h Wed Sep
9 03:49:40 2009
+++ /branches/bleeding_edge/src/ia32/regexp-macro-assembler-ia32.h Wed Jan
6 03:09:30 2010
@@ -128,6 +128,7 @@
static const int kRegisterOutput = kInputEnd + kPointerSize;
static const int kAtStart = kRegisterOutput + kPointerSize;
static const int kStackHighEnd = kAtStart + kPointerSize;
+ static const int kDirectCall = kStackHighEnd + kPointerSize;
// Below the frame pointer - local stack variables.
// When adding local variables remember to push space for them in
// the frame in GetCode.
=======================================
--- /branches/bleeding_edge/src/ia32/simulator-ia32.h Thu Nov 5 05:27:21
2009
+++ /branches/bleeding_edge/src/ia32/simulator-ia32.h Wed Jan 6 03:09:30
2010
@@ -52,9 +52,9 @@
};
// Call the generated regexp code directly. The entry function pointer
should
-// expect seven int/pointer sized arguments and return an int.
-#define CALL_GENERATED_REGEXP_CODE(entry, p0, p1, p2, p3, p4, p5, p6) \
- entry(p0, p1, p2, p3, p4, p5, p6)
+// expect eight int/pointer sized arguments and return an int.
+#define CALL_GENERATED_REGEXP_CODE(entry, p0, p1, p2, p3, p4, p5, p6, p7) \
+ entry(p0, p1, p2, p3, p4, p5, p6, p7)
#define TRY_CATCH_FROM_ADDRESS(try_catch_address) \
reinterpret_cast<TryCatch*>(try_catch_address)
=======================================
--- /branches/bleeding_edge/src/jsregexp.cc Mon Nov 9 05:21:14 2009
+++ /branches/bleeding_edge/src/jsregexp.cc Wed Jan 6 03:09:30 2010
@@ -112,37 +112,6 @@
// Generic RegExp methods. Dispatches to implementation specific methods.
-class OffsetsVector {
- public:
- inline OffsetsVector(int num_registers)
- : offsets_vector_length_(num_registers) {
- if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
- vector_ = NewArray<int>(offsets_vector_length_);
- } else {
- vector_ = static_offsets_vector_;
- }
- }
- inline ~OffsetsVector() {
- if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
- DeleteArray(vector_);
- vector_ = NULL;
- }
- }
- inline int* vector() { return vector_; }
- inline int length() { return offsets_vector_length_; }
-
- private:
- int* vector_;
- int offsets_vector_length_;
- static const int kStaticOffsetsVectorSize = 50;
- static int static_offsets_vector_[kStaticOffsetsVectorSize];
-};
-
-
-int OffsetsVector::static_offsets_vector_[
- OffsetsVector::kStaticOffsetsVectorSize];
-
-
Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
Handle<String> pattern,
Handle<String> flag_str) {
@@ -448,6 +417,14 @@
ASSERT(array->length() >= number_of_capture_registers +
kLastMatchOverhead);
// The captures come in (start, end+1) pairs.
for (int i = 0; i < number_of_capture_registers; i += 2) {
+ // Capture values are relative to start_offset only.
+ // Convert them to be relative to start of string.
+ if (captures_vector[i] >= 0) {
+ captures_vector[i] += previous_index;
+ }
+ if (captures_vector[i + 1] >= 0) {
+ captures_vector[i + 1] += previous_index;
+ }
SetCapture(*array, i, captures_vector[i]);
SetCapture(*array, i + 1, captures_vector[i + 1]);
}
@@ -4605,5 +4582,9 @@
data->capture_count,
pattern);
}
+
+
+int OffsetsVector::static_offsets_vector_[
+ OffsetsVector::kStaticOffsetsVectorSize];
}} // namespace v8::internal
=======================================
--- /branches/bleeding_edge/src/jsregexp.h Mon Jan 4 03:24:03 2010
+++ /branches/bleeding_edge/src/jsregexp.h Wed Jan 6 03:09:30 2010
@@ -101,13 +101,23 @@
int index,
Handle<JSArray> lastMatchInfo);
- // Offsets in the lastMatchInfo array.
+ // Array index in the lastMatchInfo array.
static const int kLastCaptureCount = 0;
static const int kLastSubject = 1;
static const int kLastInput = 2;
static const int kFirstCapture = 3;
static const int kLastMatchOverhead = 3;
+ // Direct offset into the lastMatchInfo array.
+ static const int kLastCaptureCountOffset =
+ FixedArray::kHeaderSize + kLastCaptureCount * kPointerSize;
+ static const int kLastSubjectOffset =
+ FixedArray::kHeaderSize + kLastSubject * kPointerSize;
+ static const int kLastInputOffset =
+ FixedArray::kHeaderSize + kLastInput * kPointerSize;
+ static const int kFirstCaptureOffset =
+ FixedArray::kHeaderSize + kFirstCapture * kPointerSize;
+
// Used to access the lastMatchInfo array.
static int GetCapture(FixedArray* array, int index) {
return Smi::cast(array->get(index + kFirstCapture))->value();
@@ -1276,6 +1286,40 @@
};
+class OffsetsVector {
+ public:
+ inline OffsetsVector(int num_registers)
+ : offsets_vector_length_(num_registers) {
+ if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
+ vector_ = NewArray<int>(offsets_vector_length_);
+ } else {
+ vector_ = static_offsets_vector_;
+ }
+ }
+ inline ~OffsetsVector() {
+ if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
+ DeleteArray(vector_);
+ vector_ = NULL;
+ }
+ }
+ inline int* vector() { return vector_; }
+ inline int length() { return offsets_vector_length_; }
+
+ static const int kStaticOffsetsVectorSize = 50;
+
+ private:
+ static Address static_offsets_vector_address() {
+ return reinterpret_cast<Address>(&static_offsets_vector_);
+ }
+
+ int* vector_;
+ int offsets_vector_length_;
+ static int static_offsets_vector_[kStaticOffsetsVectorSize];
+
+ friend class ExternalReference;
+};
+
+
} } // namespace v8::internal
#endif // V8_JSREGEXP_H_
=======================================
--- /branches/bleeding_edge/src/objects.h Tue Jan 5 03:38:36 2010
+++ /branches/bleeding_edge/src/objects.h Wed Jan 6 03:09:30 2010
@@ -3603,6 +3603,14 @@
static const int kIrregexpCaptureCountIndex = kDataIndex + 3;
static const int kIrregexpDataSize = kIrregexpCaptureCountIndex + 1;
+
+ // Offsets directly into the data fixed array.
+ static const int kDataTagOffset =
+ FixedArray::kHeaderSize + kTagIndex * kPointerSize;
+ static const int kDataAsciiCodeOffset =
+ FixedArray::kHeaderSize + kIrregexpASCIICodeIndex * kPointerSize;
+ static const int kIrregexpCaptureCountOffset =
+ FixedArray::kHeaderSize + kIrregexpCaptureCountIndex * kPointerSize;
};
=======================================
--- /branches/bleeding_edge/src/regexp-delay.js Mon Jan 4 03:24:03 2010
+++ /branches/bleeding_edge/src/regexp-delay.js Wed Jan 6 03:09:30 2010
@@ -136,7 +136,7 @@
function DoRegExpExec(regexp, string, index) {
- return %RegExpExec(regexp, string, index, lastMatchInfo);
+ return %_RegExpExec(regexp, string, index, lastMatchInfo);
}
@@ -164,7 +164,7 @@
%_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, s, lastIndex]);
// matchIndices is either null or the lastMatchInfo array.
- var matchIndices = %RegExpExec(this, s, i, lastMatchInfo);
+ var matchIndices = %_RegExpExec(this, s, i, lastMatchInfo);
if (matchIndices == null) {
if (this.global) this.lastIndex = 0;
@@ -221,7 +221,7 @@
%_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, s, lastIndex]);
// matchIndices is either null or the lastMatchInfo array.
- var matchIndices = %RegExpExec(this, s, i, lastMatchInfo);
+ var matchIndices = %_RegExpExec(this, s, i, lastMatchInfo);
if (matchIndices == null) {
if (this.global) this.lastIndex = 0;
=======================================
--- /branches/bleeding_edge/src/regexp-macro-assembler.cc Tue Nov 10
05:23:05 2009
+++ /branches/bleeding_edge/src/regexp-macro-assembler.cc Wed Jan 6
03:09:30 2010
@@ -143,17 +143,6 @@
input_end,
offsets_vector,
previous_index == 0);
-
- if (res == SUCCESS) {
- // Capture values are relative to start_offset only.
- // Convert them to be relative to start of string.
- for (int i = 0; i < offsets_vector_length; i++) {
- if (offsets_vector[i] >= 0) {
- offsets_vector[i] += previous_index;
- }
- }
- }
-
return res;
}
@@ -167,7 +156,7 @@
int* output,
bool at_start) {
typedef int (*matcher)(String*, int, const byte*,
- const byte*, int*, int, Address);
+ const byte*, int*, int, Address, int);
matcher matcher_func = FUNCTION_CAST<matcher>(code->entry());
int at_start_val = at_start ? 1 : 0;
@@ -176,6 +165,7 @@
RegExpStack stack;
Address stack_base = RegExpStack::stack_base();
+ int direct_call = 0;
int result = CALL_GENERATED_REGEXP_CODE(matcher_func,
input,
start_offset,
@@ -183,7 +173,8 @@
input_end,
output,
at_start_val,
- stack_base);
+ stack_base,
+ direct_call);
ASSERT(result <= SUCCESS);
ASSERT(result >= RETRY);
=======================================
--- /branches/bleeding_edge/src/regexp-stack.h Wed Nov 11 01:50:06 2009
+++ /branches/bleeding_edge/src/regexp-stack.h Wed Jan 6 03:09:30 2010
@@ -97,6 +97,16 @@
Address limit_;
void Free();
};
+
+ // Address of allocated memory.
+ static Address memory_address() {
+ return reinterpret_cast<Address>(&thread_local_.memory_);
+ }
+
+ // Address of size of allocated memory.
+ static Address memory_size_address() {
+ return reinterpret_cast<Address>(&thread_local_.memory_size_);
+ }
// Resets the buffer if it has grown beyond the default/minimum size.
// After this, the buffer is either the default size, or it is empty, so
@@ -104,6 +114,8 @@
static void Reset();
static ThreadLocal thread_local_;
+
+ friend class ExternalReference;
};
}} // namespace v8::internal
=======================================
--- /branches/bleeding_edge/src/v8-counters.h Wed Dec 9 06:32:45 2009
+++ /branches/bleeding_edge/src/v8-counters.h Wed Jan 6 03:09:30 2010
@@ -153,7 +153,9 @@
SC(generic_binary_stub_calls, V8.GenericBinaryStubCalls) \
SC(generic_binary_stub_calls_regs, V8.GenericBinaryStubCallsRegs) \
SC(string_add_runtime, V8.StringAddRuntime) \
- SC(string_add_native, V8.StringAddNative)
+ SC(string_add_native, V8.StringAddNative) \
+ SC(regexp_entry_runtime, V8.RegExpEntryRuntime) \
+ SC(regexp_entry_native, V8.RegExpEntryNative)
// This file contains all the v8 counters that are in use.
class Counters : AllStatic {
=======================================
--- /branches/bleeding_edge/src/x64/codegen-x64.cc Tue Jan 5 04:02:18 2010
+++ /branches/bleeding_edge/src/x64/codegen-x64.cc Wed Jan 6 03:09:30 2010
@@ -3874,6 +3874,19 @@
Result result = allocator_->Allocate(rax);
frame_->Push(&result);
}
+
+
+void CodeGenerator::GenerateRegExpExec(ZoneList<Expression*>* args) {
+ ASSERT_EQ(args->length(), 4);
+
+ // Load the arguments on the stack and call the runtime system.
+ Load(args->at(0));
+ Load(args->at(1));
+ Load(args->at(2));
+ Load(args->at(3));
+ Result result = frame_->CallRuntime(Runtime::kRegExpExec, 4);
+ frame_->Push(&result);
+}
void CodeGenerator::GenerateStringAdd(ZoneList<Expression*>* args) {
=======================================
--- /branches/bleeding_edge/src/x64/codegen-x64.h Mon Dec 21 05:30:10 2009
+++ /branches/bleeding_edge/src/x64/codegen-x64.h Wed Jan 6 03:09:30 2010
@@ -541,6 +541,9 @@
// Fast support for StringAdd.
void GenerateStringAdd(ZoneList<Expression*>* args);
+ // Support for direct calls from JavaScript to native RegExp code.
+ void GenerateRegExpExec(ZoneList<Expression*>* args);
+
// Simple condition analysis.
enum ConditionAnalysis {
ALWAYS_TRUE,
=======================================
--- /branches/bleeding_edge/src/x64/regexp-macro-assembler-x64.cc Wed Nov
11 01:50:06 2009
+++ /branches/bleeding_edge/src/x64/regexp-macro-assembler-x64.cc Wed Jan
6 03:09:30 2010
@@ -67,13 +67,17 @@
*
* The stack will have the following content, in some order, indexable
from the
* frame pointer (see, e.g., kStackHighEnd):
- * - stack_area_base (High end of the memory area to use as
- * backtracking stack)
- * - at_start (if 1, start at start of string, if 0, don't)
- * - int* capture_array (int[num_saved_registers_], for output).
- * - end of input (Address of end of string)
- * - start of input (Address of first character in string)
- * - String** input_string (location of a handle containing the string)
+ * - direct_call (if 1, direct call from JavaScript code, if 0
call
+ * through the runtime system)
+ * - stack_area_base (High end of the memory area to use as
+ * backtracking stack)
+ * - at_start (if 1, we are starting at the start of the
+ * string, otherwise 0)
+ * - int* capture_array (int[num_saved_registers_], for output).
+ * - end of input (Address of end of string)
+ * - start of input (Address of first character in string)
+ * - start index (character index of start)
+ * - String* input_string (input string)
* - return address
* - backup of callee save registers (rbx, possibly rsi and rdi).
* - Offset of location before start of input (effectively character
@@ -90,11 +94,13 @@
* calling the code's entry address cast to a function pointer with the
* following signature:
* int (*match)(String* input_string,
+ * int start_index,
* Address start,
* Address end,
* int* capture_output_array,
* bool at_start,
- * byte* stack_area_base)
+ * byte* stack_area_base,
+ * bool direct_call)
*/
#define __ ACCESS_MASM(masm_)
=======================================
--- /branches/bleeding_edge/src/x64/regexp-macro-assembler-x64.h Fri Sep 4
04:00:07 2009
+++ /branches/bleeding_edge/src/x64/regexp-macro-assembler-x64.h Wed Jan 6
03:09:30 2010
@@ -143,6 +143,8 @@
// AtStart is passed as 32 bit int (values 0 or 1).
static const int kAtStart = kRegisterOutput + kPointerSize;
static const int kStackHighEnd = kAtStart + kPointerSize;
+ // DirectCall is passed as 32 bit int (values 0 or 1).
+ static const int kDirectCall = kStackHighEnd + kPointerSize;
#else
// In AMD64 ABI Calling Convention, the first six integer parameters
// are passed as registers, and caller must allocate space on the stack
@@ -154,6 +156,7 @@
static const int kRegisterOutput = kInputEnd - kPointerSize;
static const int kAtStart = kRegisterOutput - kPointerSize;
static const int kStackHighEnd = kFrameAlign;
+ static const int kDirectCall = kStackHighEnd + kPointerSize;
#endif
#ifdef _WIN64
=======================================
--- /branches/bleeding_edge/src/x64/simulator-x64.h Thu Nov 5 05:27:21 2009
+++ /branches/bleeding_edge/src/x64/simulator-x64.h Wed Jan 6 03:09:30 2010
@@ -53,9 +53,9 @@
};
// Call the generated regexp code directly. The entry function pointer
should
-// expect seven int/pointer sized arguments and return an int.
-#define CALL_GENERATED_REGEXP_CODE(entry, p0, p1, p2, p3, p4, p5, p6) \
- entry(p0, p1, p2, p3, p4, p5, p6)
+// expect eight int/pointer sized arguments and return an int.
+#define CALL_GENERATED_REGEXP_CODE(entry, p0, p1, p2, p3, p4, p5, p6, p7) \
+ entry(p0, p1, p2, p3, p4, p5, p6, p7)
#define TRY_CATCH_FROM_ADDRESS(try_catch_address) \
reinterpret_cast<TryCatch*>(try_catch_address)
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev