Reviewers: Michael Starzinger,
Message:
string: optimize non-ASCII string splitting
Optimize String.split() for UCS-2 strings where the search pattern is a
single
ASCII or UCS-2 character.
Gives a 15% performance boost on x64 with the below micro-benchmark:
var s = "Zum iech jeitzt wa, der stolz fergiess Dauschen op. " +
"Dat rout Mier Faarwen no. Halm fond d'Blumme do hie, " +
"Kënnt dénen derfir dat ke. Wuel main rem vu, hu main " +
"zwëschen gei. Un lait schaddreg hie, gin hire drun " +
"Hémecht en, hun hire dämpen da.";
for (var i = 0; i < 1e7; ++i) s.split(" ");
Description:
Optimize non-ASCII string splitting with single-character search pattern
Please review this at https://codereview.chromium.org/11299163/
Affected files:
M src/objects.h
M src/runtime.cc
M test/mjsunit/string-split.js
Index: src/objects.h
diff --git a/src/objects.h b/src/objects.h
index
7d46c068589bfb707a43e93d84bf5cc74e328275..5f04fe1e61d929ba47e86c45e3e804ac5644da0d
100644
--- a/src/objects.h
+++ b/src/objects.h
@@ -7189,6 +7189,13 @@ class String: public HeapObject {
ASSERT_EQ(TWO_BYTE, state_);
return Vector<const uc16>::cast(buffer_);
}
+ // Return the character length of the string.
+ unsigned Length() const {
+ if (state_ == ASCII) return buffer_.length();
+ if (state_ == TWO_BYTE) return buffer_.length() / 2;
+ UNREACHABLE();
+ return 0; // Placate compiler.
+ }
private:
enum State { NON_FLAT, ASCII, TWO_BYTE };
Index: src/runtime.cc
diff --git a/src/runtime.cc b/src/runtime.cc
index
a15e1f50539de0b18ac3d4526f83d63cb1076eb5..ba41459d02707ef4ec1ee43a4aeaac0fe27f9ed4
100644
--- a/src/runtime.cc
+++ b/src/runtime.cc
@@ -2762,6 +2762,23 @@ void FindAsciiStringIndices(Vector<const char>
subject,
}
+void FindTwoByteStringIndices(const Vector<const uc16> subject,
+ uc16 pattern,
+ ZoneList<int>* indices,
+ unsigned int limit,
+ Zone* zone) {
+ ASSERT(limit > 0);
+ const uc16* subject_start = subject.start();
+ const uc16* subject_end = subject_start + subject.length();
+ for (const uc16* pos = subject_start; pos < subject_end && limit > 0;
pos++) {
+ if (*pos == pattern) {
+ indices->Add(static_cast<int>(pos - subject_start), zone);
+ limit--;
+ }
+ }
+}
+
+
template <typename SubjectChar, typename PatternChar>
void FindStringIndices(Isolate* isolate,
Vector<const SubjectChar> subject,
@@ -2825,13 +2842,22 @@ void FindStringIndicesDispatch(Isolate* isolate,
}
} else {
Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
- if (pattern_content.IsAscii()) {
- FindStringIndices(isolate,
- subject_vector,
- pattern_content.ToAsciiVector(),
- indices,
- limit,
- zone);
+ if (pattern_content.Length() == 1) {
+ uc16 pattern_char = pattern_content.IsAscii() ?
+ pattern_content.ToAsciiVector()[0] :
+ pattern_content.ToUC16Vector()[0];
+ FindTwoByteStringIndices(subject_vector,
+ pattern_char,
+ indices,
+ limit,
+ zone);
+ } else if (pattern_content.IsAscii()) {
+ FindStringIndices(isolate,
+ subject_vector,
+ pattern_content.ToAsciiVector(),
+ indices,
+ limit,
+ zone);
} else {
FindStringIndices(isolate,
subject_vector,
Index: test/mjsunit/string-split.js
diff --git a/test/mjsunit/string-split.js b/test/mjsunit/string-split.js
index
d8412f0eed561cd710270efd6b2e62a2c9b7fc16..1308244cabd538eeba01f2d5b27d94c736978991
100644
--- a/test/mjsunit/string-split.js
+++ b/test/mjsunit/string-split.js
@@ -66,6 +66,23 @@
assertArrayEquals(["div", "#i", "d", ".class"], "div#id.class".split(/(?=[d#.])/
assertArrayEquals(["a", "b", "c"], "abc".split(/(?=.)/));
+assertArrayEquals(["Wenige", "sind", "auserwählt."],
+ "Wenige sind auserwählt.".split(" "));
+
+assertArrayEquals([], "Wenige sind auserwählt.".split(" ", 0));
+
+assertArrayEquals(["Wenige"], "Wenige sind auserwählt.".split(" ", 1));
+
+assertArrayEquals(["Wenige", "sind"], "Wenige sind auserwählt.".split(" ",
2));
+
+assertArrayEquals(["Wenige", "sind", "auserwählt."],
+ "Wenige sind auserwählt.".split(" ", 3));
+
+assertArrayEquals(["Wenige sind auserw", "hlt."],
+ "Wenige sind auserwählt.".split("ä"));
+
+assertArrayEquals(["Wenige sind ", "."],
+ "Wenige sind auserwählt.".split("auserwählt"));
/* "ab".split(/((?=.))/)
*
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev