Revision: 5021
Author: [email protected]
Date: Mon Jul  5 05:49:00 2010
Log: RegExp replace with empty string optimization by Sandholm.

Review URL: http://codereview.chromium.org/2809048
http://code.google.com/p/v8/source/detail?r=5021

Modified:
 /branches/bleeding_edge/src/codegen.h
 /branches/bleeding_edge/src/factory.cc
 /branches/bleeding_edge/src/factory.h
 /branches/bleeding_edge/src/runtime.cc

=======================================
--- /branches/bleeding_edge/src/codegen.h       Mon Jul  5 04:03:16 2010
+++ /branches/bleeding_edge/src/codegen.h       Mon Jul  5 05:49:00 2010
@@ -420,7 +420,7 @@
   kIgnoreNegativeZero
 };

-
+
 class GenericUnaryOpStub : public CodeStub {
  public:
   GenericUnaryOpStub(Token::Value op,
=======================================
--- /branches/bleeding_edge/src/factory.cc      Thu Jun 24 06:56:35 2010
+++ /branches/bleeding_edge/src/factory.cc      Mon Jul  5 05:49:00 2010
@@ -94,6 +94,12 @@
   CALL_HEAP_FUNCTION(Heap::AllocateStringFromTwoByte(string, pretenure),
                      String);
 }
+
+
+Handle<String> Factory::NewRawAsciiString(int length,
+                                          PretenureFlag pretenure) {
+ CALL_HEAP_FUNCTION(Heap::AllocateRawAsciiString(length, pretenure), String);
+}


 Handle<String> Factory::NewRawTwoByteString(int length,
=======================================
--- /branches/bleeding_edge/src/factory.h       Thu Jun 24 06:56:35 2010
+++ /branches/bleeding_edge/src/factory.h       Mon Jul  5 05:49:00 2010
@@ -95,12 +95,16 @@
       Vector<const char> str,
       PretenureFlag pretenure = NOT_TENURED);

-  static Handle<String> NewStringFromTwoByte(Vector<const uc16> str,
+  static Handle<String> NewStringFromTwoByte(
+      Vector<const uc16> str,
       PretenureFlag pretenure = NOT_TENURED);

- // Allocates and partially initializes a TwoByte String. The characters of - // the string are uninitialized. Currently used in regexp code only, where
-  // they are pretenured.
+  // Allocates and partially initializes an ASCII or TwoByte String. The
+ // characters of the string are uninitialized. Currently used in regexp code
+  // only, where they are pretenured.
+  static Handle<String> NewRawAsciiString(
+      int length,
+      PretenureFlag pretenure = NOT_TENURED);
   static Handle<String> NewRawTwoByteString(
       int length,
       PretenureFlag pretenure = NOT_TENURED);
=======================================
--- /branches/bleeding_edge/src/runtime.cc      Fri Jul  2 13:09:33 2010
+++ /branches/bleeding_edge/src/runtime.cc      Mon Jul  5 05:49:00 2010
@@ -2284,6 +2284,134 @@
   return *(builder.ToString());
 }

+template <typename ResultSeqString>
+static Object* StringReplaceRegExpWithEmptyString(ResultSeqString* subject,
+                                                  JSRegExp* regexp,
+ JSArray* last_match_info) {
+  ASSERT(subject->IsFlat());
+
+  HandleScope handles;
+
+  Handle<String> subject_handle(subject);
+  Handle<JSRegExp> regexp_handle(regexp);
+  Handle<JSArray> last_match_info_handle(last_match_info);
+  Handle<Object> match = RegExpImpl::Exec(regexp_handle,
+                                          subject_handle,
+                                          0,
+                                          last_match_info_handle);
+  if (match.is_null()) return Failure::Exception();
+  if (match->IsNull()) return *subject_handle;
+
+  ASSERT(last_match_info_handle->HasFastElements());
+
+  HandleScope loop_scope;
+  int start, end;
+  {
+    AssertNoAllocation match_info_array_is_not_in_a_handle;
+    FixedArray* match_info_array =
+        FixedArray::cast(last_match_info_handle->elements());
+
+    start = RegExpImpl::GetCapture(match_info_array, 0);
+    end = RegExpImpl::GetCapture(match_info_array, 1);
+  }
+
+  int length = subject->length();
+  int new_length = length - (end - start);
+  if (new_length == 0) {
+    return Heap::empty_string();
+  }
+  // TODO(sandholm) try to use types statically to determine this.
+  Handle<ResultSeqString> answer;
+  if (subject_handle->IsAsciiRepresentation()) {
+    answer =
+ Handle<ResultSeqString>::cast(Factory::NewRawAsciiString(new_length));
+  } else {
+    answer =
+ Handle<ResultSeqString>::cast(Factory::NewRawTwoByteString(new_length));
+  }
+
+  // If the regexp isn't global, only match once.
+  if (!regexp_handle->GetFlags().is_global()) {
+    if (start > 0) {
+      String::WriteToFlat(*subject_handle,
+                          answer->GetChars(),
+                          0,
+                          start);
+    }
+    if (end < length) {
+      String::WriteToFlat(*subject_handle,
+                          answer->GetChars() + start,
+                          end,
+                          length);
+    }
+    return *answer;
+  }
+
+  int prev = 0;  // Index of end of last match.
+ int next = 0; // Start of next search (prev unless last match was empty).
+  int position = 0;
+
+  do {
+    if (prev < start) {
+      // Add substring subject[prev;start] to answer string.
+      String::WriteToFlat(*subject_handle,
+                          answer->GetChars() + position,
+                          prev,
+                          start);
+      position += start - prev;
+    }
+    prev = end;
+    next = end;
+    // Continue from where the match ended, unless it was an empty match.
+    if (start == end) {
+      next++;
+      if (next > length) break;
+    }
+    match = RegExpImpl::Exec(regexp_handle,
+                             subject_handle,
+                             next,
+                             last_match_info_handle);
+    if (match.is_null()) return Failure::Exception();
+    if (match->IsNull()) break;
+
+    ASSERT(last_match_info_handle->HasFastElements());
+    HandleScope loop_scope;
+    {
+      AssertNoAllocation match_info_array_is_not_in_a_handle;
+      FixedArray* match_info_array =
+          FixedArray::cast(last_match_info_handle->elements());
+      start = RegExpImpl::GetCapture(match_info_array, 0);
+      end = RegExpImpl::GetCapture(match_info_array, 1);
+    }
+  } while (true);
+
+  if (prev < length) {
+    // Add substring subject[prev;length] to answer string.
+    String::WriteToFlat(*subject_handle,
+                        answer->GetChars() + position,
+                        prev,
+                        length);
+    position += length - prev;
+  }
+
+  if (position == 0) {
+    return Heap::empty_string();
+  }
+
+  // Shorten string and fill
+  int string_size = ResultSeqString::SizeFor(position);
+  int allocated_string_size = ResultSeqString::SizeFor(new_length);
+  int delta = allocated_string_size - string_size;
+
+  answer->set_length(position);
+  if (delta == 0) return *answer;
+
+  Address end_of_string = answer->address() + string_size;
+  Heap::CreateFillerObjectAt(end_of_string, delta);
+
+  return *answer;
+}
+

 static Object* Runtime_StringReplaceRegExpWithString(Arguments args) {
   ASSERT(args.length() == 4);
@@ -2311,6 +2439,18 @@

   ASSERT(last_match_info->HasFastElements());

+  if (replacement->length() == 0) {
+    if (subject->IsAsciiRepresentation()) {
+ return StringReplaceRegExpWithEmptyString(SeqAsciiString::cast(subject),
+                                                regexp,
+                                                last_match_info);
+    } else {
+ return StringReplaceRegExpWithEmptyString(SeqTwoByteString::cast(subject),
+                                                regexp,
+                                                last_match_info);
+    }
+  }
+
   return StringReplaceRegExpWithString(subject,
                                        regexp,
                                        replacement,

--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev

Reply via email to