Revision: 9180
Author: [email protected]
Date: Wed Sep 7 08:17:57 2011
Log: Faster non-regexp global string.replace.
BUG=v8:1662
Review URL: http://codereview.chromium.org/7782028
http://code.google.com/p/v8/source/detail?r=9180
Modified:
/branches/bleeding_edge/src/runtime.cc
=======================================
--- /branches/bleeding_edge/src/runtime.cc Wed Sep 7 04:28:48 2011
+++ /branches/bleeding_edge/src/runtime.cc Wed Sep 7 08:17:57 2011
@@ -2769,6 +2769,164 @@
}
+void FindAsciiStringIndices(Vector<const char> subject,
+ char pattern,
+ ZoneList<int>* indices,
+ unsigned int limit) {
+ ASSERT(limit > 0);
+ // Collect indices of pattern in subject using memchr.
+ // Stop after finding at most limit values.
+ const char* subject_start = reinterpret_cast<const
char*>(subject.start());
+ const char* subject_end = subject_start + subject.length();
+ const char* pos = subject_start;
+ while (limit > 0) {
+ pos = reinterpret_cast<const char*>(
+ memchr(pos, pattern, subject_end - pos));
+ if (pos == NULL) return;
+ indices->Add(static_cast<int>(pos - subject_start));
+ pos++;
+ limit--;
+ }
+}
+
+
+template <typename SubjectChar, typename PatternChar>
+void FindStringIndices(Isolate* isolate,
+ Vector<const SubjectChar> subject,
+ Vector<const PatternChar> pattern,
+ ZoneList<int>* indices,
+ unsigned int limit) {
+ ASSERT(limit > 0);
+ // Collect indices of pattern in subject.
+ // Stop after finding at most limit values.
+ int pattern_length = pattern.length();
+ int index = 0;
+ StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
+ while (limit > 0) {
+ index = search.Search(subject, index);
+ if (index < 0) return;
+ indices->Add(index);
+ index += pattern_length;
+ limit--;
+ }
+}
+
+
+void FindStringIndicesDispatch(Isolate* isolate,
+ String* subject,
+ String* pattern,
+ ZoneList<int>* indices,
+ unsigned int limit) {
+ {
+ AssertNoAllocation no_gc;
+ String::FlatContent subject_content = subject->GetFlatContent();
+ String::FlatContent pattern_content = pattern->GetFlatContent();
+ ASSERT(subject_content.IsFlat());
+ ASSERT(pattern_content.IsFlat());
+ if (subject_content.IsAscii()) {
+ Vector<const char> subject_vector = subject_content.ToAsciiVector();
+ if (pattern_content.IsAscii()) {
+ Vector<const char> pattern_vector =
pattern_content.ToAsciiVector();
+ if (pattern_vector.length() == 1) {
+ FindAsciiStringIndices(subject_vector,
+ pattern_vector[0],
+ indices,
+ limit);
+ } else {
+ FindStringIndices(isolate,
+ subject_vector,
+ pattern_vector,
+ indices,
+ limit);
+ }
+ } else {
+ FindStringIndices(isolate,
+ subject_vector,
+ pattern_content.ToUC16Vector(),
+ indices,
+ limit);
+ }
+ } else {
+ Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
+ if (pattern->IsAsciiRepresentation()) {
+ FindStringIndices(isolate,
+ subject_vector,
+ pattern_content.ToAsciiVector(),
+ indices,
+ limit);
+ } else {
+ FindStringIndices(isolate,
+ subject_vector,
+ pattern_content.ToUC16Vector(),
+ indices,
+ limit);
+ }
+ }
+ }
+}
+
+
+template<typename ResultSeqString>
+MUST_USE_RESULT static MaybeObject* StringReplaceStringWithString(
+ Isolate* isolate,
+ Handle<String> subject,
+ Handle<JSRegExp> pattern_regexp,
+ Handle<String> replacement = Handle<String>::null()) {
+ ASSERT(subject->IsFlat());
+ ASSERT(replacement->IsFlat());
+
+ ZoneScope zone_space(isolate, DELETE_ON_EXIT);
+ ZoneList<int> indices(8);
+ String* pattern =
+ String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
+ int subject_len = subject->length();
+ int pattern_len = pattern->length();
+ int replacement_len = (replacement.is_null()) ? 0 :
replacement->length();
+
+ FindStringIndicesDispatch(isolate, *subject, pattern, &indices,
0xffffffff);
+
+ int matches = indices.length();
+ if (matches == 0) return *subject;
+
+ int result_len = (replacement_len - pattern_len) * matches + subject_len;
+ int subject_pos = 0;
+ int result_pos = 0;
+
+ Handle<ResultSeqString> result;
+ if (ResultSeqString::kHasAsciiEncoding) {
+ result = Handle<ResultSeqString>::cast(
+ isolate->factory()->NewRawAsciiString(result_len));
+ } else {
+ result = Handle<ResultSeqString>::cast(
+ isolate->factory()->NewRawTwoByteString(result_len));
+ }
+
+ for(int i = 0; i < matches; i++) {
+ // Copy non-matched subject content.
+ String::WriteToFlat(*subject,
+ result->GetChars() + result_pos,
+ subject_pos,
+ indices.at(i));
+ result_pos += indices.at(i) - subject_pos;
+ // Replace match.
+
+ if (replacement_len > 0) {
+ String::WriteToFlat(*replacement,
+ result->GetChars() + result_pos,
+ 0,
+ replacement_len);
+ result_pos += replacement_len;
+ }
+
+ subject_pos = indices.at(i) + pattern_len;
+ }
+ String::WriteToFlat(*subject,
+ result->GetChars() + result_pos,
+ subject_pos,
+ subject_len);
+ return *result;
+}
+
MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString(
Isolate* isolate,
@@ -2807,6 +2965,20 @@
length);
bool is_global = regexp_handle->GetFlags().is_global();
+
+ // Shortcut for simple non-regexp global replacements
+ if (is_global &&
+ regexp->TypeTag() == JSRegExp::ATOM &&
+ compiled_replacement.parts() == 1) {
+ if (subject_handle->HasOnlyAsciiChars() &&
+ replacement_handle->HasOnlyAsciiChars()) {
+ return StringReplaceStringWithString<SeqAsciiString>(
+ isolate, subject_handle, regexp_handle, replacement_handle);
+ } else {
+ return StringReplaceStringWithString<SeqTwoByteString>(
+ isolate, subject_handle, regexp_handle, replacement_handle);
+ }
+ }
// Guessing the number of parts that the final result string is built
// from. Global regexps can match any number of times, so we guess
@@ -2893,6 +3065,19 @@
Handle<String> subject_handle(subject);
Handle<JSRegExp> regexp_handle(regexp);
+
+ // Shortcut for simple non-regexp global replacements
+ if (regexp_handle->GetFlags().is_global() &&
+ regexp_handle->TypeTag() == JSRegExp::ATOM) {
+ if (subject_handle->HasOnlyAsciiChars()) {
+ return StringReplaceStringWithString<SeqAsciiString>(
+ isolate, subject_handle, regexp_handle);
+ } else {
+ return StringReplaceStringWithString<SeqTwoByteString>(
+ isolate, subject_handle, regexp_handle);
+ }
+ }
+
Handle<JSArray> last_match_info_handle(last_match_info);
Handle<Object> match = RegExpImpl::Exec(regexp_handle,
subject_handle,
@@ -5928,49 +6113,6 @@
}
return s->SubString(left, right);
}
-
-
-void FindAsciiStringIndices(Vector<const char> subject,
- char pattern,
- ZoneList<int>* indices,
- unsigned int limit) {
- ASSERT(limit > 0);
- // Collect indices of pattern in subject using memchr.
- // Stop after finding at most limit values.
- const char* subject_start = reinterpret_cast<const
char*>(subject.start());
- const char* subject_end = subject_start + subject.length();
- const char* pos = subject_start;
- while (limit > 0) {
- pos = reinterpret_cast<const char*>(
- memchr(pos, pattern, subject_end - pos));
- if (pos == NULL) return;
- indices->Add(static_cast<int>(pos - subject_start));
- pos++;
- limit--;
- }
-}
-
-
-template <typename SubjectChar, typename PatternChar>
-void FindStringIndices(Isolate* isolate,
- Vector<const SubjectChar> subject,
- Vector<const PatternChar> pattern,
- ZoneList<int>* indices,
- unsigned int limit) {
- ASSERT(limit > 0);
- // Collect indices of pattern in subject.
- // Stop after finding at most limit values.
- int pattern_length = pattern.length();
- int index = 0;
- StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
- while (limit > 0) {
- index = search.Search(subject, index);
- if (index < 0) return;
- indices->Add(index);
- index += pattern_length;
- limit--;
- }
-}
RUNTIME_FUNCTION(MaybeObject*, Runtime_StringSplit) {
@@ -6012,53 +6154,7 @@
ZoneList<int> indices(initial_capacity);
if (!pattern->IsFlat()) FlattenString(pattern);
- // No allocation block.
- {
- AssertNoAllocation no_gc;
- String::FlatContent subject_content = subject->GetFlatContent();
- String::FlatContent pattern_content = pattern->GetFlatContent();
- ASSERT(subject_content.IsFlat());
- ASSERT(pattern_content.IsFlat());
- if (subject_content.IsAscii()) {
- Vector<const char> subject_vector = subject_content.ToAsciiVector();
- if (pattern_content.IsAscii()) {
- Vector<const char> pattern_vector =
pattern_content.ToAsciiVector();
- if (pattern_vector.length() == 1) {
- FindAsciiStringIndices(subject_vector,
- pattern_vector[0],
- &indices,
- limit);
- } else {
- FindStringIndices(isolate,
- subject_vector,
- pattern_vector,
- &indices,
- limit);
- }
- } else {
- FindStringIndices(isolate,
- subject_vector,
- pattern_content.ToUC16Vector(),
- &indices,
- limit);
- }
- } else {
- Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
- if (pattern->IsAsciiRepresentation()) {
- FindStringIndices(isolate,
- subject_vector,
- pattern_content.ToAsciiVector(),
- &indices,
- limit);
- } else {
- FindStringIndices(isolate,
- subject_vector,
- pattern_content.ToUC16Vector(),
- &indices,
- limit);
- }
- }
- }
+ FindStringIndicesDispatch(isolate, *subject, *pattern, &indices, limit);
if (static_cast<uint32_t>(indices.length()) < limit) {
indices.Add(subject_length);
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev