Revision: 5021
Author: [email protected]
Date: Mon Jul 5 05:49:00 2010
Log: RegExp replace with empty string optimization by Sandholm.
Review URL: http://codereview.chromium.org/2809048
http://code.google.com/p/v8/source/detail?r=5021
Modified:
/branches/bleeding_edge/src/codegen.h
/branches/bleeding_edge/src/factory.cc
/branches/bleeding_edge/src/factory.h
/branches/bleeding_edge/src/runtime.cc
=======================================
--- /branches/bleeding_edge/src/codegen.h Mon Jul 5 04:03:16 2010
+++ /branches/bleeding_edge/src/codegen.h Mon Jul 5 05:49:00 2010
@@ -420,7 +420,7 @@
kIgnoreNegativeZero
};
-
+
class GenericUnaryOpStub : public CodeStub {
public:
GenericUnaryOpStub(Token::Value op,
=======================================
--- /branches/bleeding_edge/src/factory.cc Thu Jun 24 06:56:35 2010
+++ /branches/bleeding_edge/src/factory.cc Mon Jul 5 05:49:00 2010
@@ -94,6 +94,12 @@
CALL_HEAP_FUNCTION(Heap::AllocateStringFromTwoByte(string, pretenure),
String);
}
+
+
+Handle<String> Factory::NewRawAsciiString(int length,
+ PretenureFlag pretenure) {
+ CALL_HEAP_FUNCTION(Heap::AllocateRawAsciiString(length, pretenure),
String);
+}
Handle<String> Factory::NewRawTwoByteString(int length,
=======================================
--- /branches/bleeding_edge/src/factory.h Thu Jun 24 06:56:35 2010
+++ /branches/bleeding_edge/src/factory.h Mon Jul 5 05:49:00 2010
@@ -95,12 +95,16 @@
Vector<const char> str,
PretenureFlag pretenure = NOT_TENURED);
- static Handle<String> NewStringFromTwoByte(Vector<const uc16> str,
+ static Handle<String> NewStringFromTwoByte(
+ Vector<const uc16> str,
PretenureFlag pretenure = NOT_TENURED);
- // Allocates and partially initializes a TwoByte String. The characters
of
- // the string are uninitialized. Currently used in regexp code only,
where
- // they are pretenured.
+ // Allocates and partially initializes an ASCII or TwoByte String. The
+ // characters of the string are uninitialized. Currently used in regexp
code
+ // only, where they are pretenured.
+ static Handle<String> NewRawAsciiString(
+ int length,
+ PretenureFlag pretenure = NOT_TENURED);
static Handle<String> NewRawTwoByteString(
int length,
PretenureFlag pretenure = NOT_TENURED);
=======================================
--- /branches/bleeding_edge/src/runtime.cc Fri Jul 2 13:09:33 2010
+++ /branches/bleeding_edge/src/runtime.cc Mon Jul 5 05:49:00 2010
@@ -2284,6 +2284,134 @@
return *(builder.ToString());
}
+template <typename ResultSeqString>
+static Object* StringReplaceRegExpWithEmptyString(ResultSeqString* subject,
+ JSRegExp* regexp,
+ JSArray*
last_match_info) {
+ ASSERT(subject->IsFlat());
+
+ HandleScope handles;
+
+ Handle<String> subject_handle(subject);
+ Handle<JSRegExp> regexp_handle(regexp);
+ Handle<JSArray> last_match_info_handle(last_match_info);
+ Handle<Object> match = RegExpImpl::Exec(regexp_handle,
+ subject_handle,
+ 0,
+ last_match_info_handle);
+ if (match.is_null()) return Failure::Exception();
+ if (match->IsNull()) return *subject_handle;
+
+ ASSERT(last_match_info_handle->HasFastElements());
+
+ HandleScope loop_scope;
+ int start, end;
+ {
+ AssertNoAllocation match_info_array_is_not_in_a_handle;
+ FixedArray* match_info_array =
+ FixedArray::cast(last_match_info_handle->elements());
+
+ start = RegExpImpl::GetCapture(match_info_array, 0);
+ end = RegExpImpl::GetCapture(match_info_array, 1);
+ }
+
+ int length = subject->length();
+ int new_length = length - (end - start);
+ if (new_length == 0) {
+ return Heap::empty_string();
+ }
+ // TODO(sandholm) try to use types statically to determine this.
+ Handle<ResultSeqString> answer;
+ if (subject_handle->IsAsciiRepresentation()) {
+ answer =
+
Handle<ResultSeqString>::cast(Factory::NewRawAsciiString(new_length));
+ } else {
+ answer =
+
Handle<ResultSeqString>::cast(Factory::NewRawTwoByteString(new_length));
+ }
+
+ // If the regexp isn't global, only match once.
+ if (!regexp_handle->GetFlags().is_global()) {
+ if (start > 0) {
+ String::WriteToFlat(*subject_handle,
+ answer->GetChars(),
+ 0,
+ start);
+ }
+ if (end < length) {
+ String::WriteToFlat(*subject_handle,
+ answer->GetChars() + start,
+ end,
+ length);
+ }
+ return *answer;
+ }
+
+ int prev = 0; // Index of end of last match.
+ int next = 0; // Start of next search (prev unless last match was
empty).
+ int position = 0;
+
+ do {
+ if (prev < start) {
+ // Add substring subject[prev;start] to answer string.
+ String::WriteToFlat(*subject_handle,
+ answer->GetChars() + position,
+ prev,
+ start);
+ position += start - prev;
+ }
+ prev = end;
+ next = end;
+ // Continue from where the match ended, unless it was an empty match.
+ if (start == end) {
+ next++;
+ if (next > length) break;
+ }
+ match = RegExpImpl::Exec(regexp_handle,
+ subject_handle,
+ next,
+ last_match_info_handle);
+ if (match.is_null()) return Failure::Exception();
+ if (match->IsNull()) break;
+
+ ASSERT(last_match_info_handle->HasFastElements());
+ HandleScope loop_scope;
+ {
+ AssertNoAllocation match_info_array_is_not_in_a_handle;
+ FixedArray* match_info_array =
+ FixedArray::cast(last_match_info_handle->elements());
+ start = RegExpImpl::GetCapture(match_info_array, 0);
+ end = RegExpImpl::GetCapture(match_info_array, 1);
+ }
+ } while (true);
+
+ if (prev < length) {
+ // Add substring subject[prev;length] to answer string.
+ String::WriteToFlat(*subject_handle,
+ answer->GetChars() + position,
+ prev,
+ length);
+ position += length - prev;
+ }
+
+ if (position == 0) {
+ return Heap::empty_string();
+ }
+
+ // Shorten string and fill
+ int string_size = ResultSeqString::SizeFor(position);
+ int allocated_string_size = ResultSeqString::SizeFor(new_length);
+ int delta = allocated_string_size - string_size;
+
+ answer->set_length(position);
+ if (delta == 0) return *answer;
+
+ Address end_of_string = answer->address() + string_size;
+ Heap::CreateFillerObjectAt(end_of_string, delta);
+
+ return *answer;
+}
+
static Object* Runtime_StringReplaceRegExpWithString(Arguments args) {
ASSERT(args.length() == 4);
@@ -2311,6 +2439,18 @@
ASSERT(last_match_info->HasFastElements());
+ if (replacement->length() == 0) {
+ if (subject->IsAsciiRepresentation()) {
+ return
StringReplaceRegExpWithEmptyString(SeqAsciiString::cast(subject),
+ regexp,
+ last_match_info);
+ } else {
+ return
StringReplaceRegExpWithEmptyString(SeqTwoByteString::cast(subject),
+ regexp,
+ last_match_info);
+ }
+ }
+
return StringReplaceRegExpWithString(subject,
regexp,
replacement,
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev