Reviewers: danno, Michael Starzinger,
Description:
Speedup stringsearch for two byte strings
Uses the lower byte with memchr which is
significantly faster than a naive compare
Performance difference with bench (http://hastebin.com/xuxexataso.js):
old new
single character single character
Κ found at 922 Κ found at 922
3324 616
㎡ found at 13217 ㎡ found at 13217
42366 4931
က found at 4096 က found at 4096
13369 9836
found at 65280 found at 65280
207472 36149
ᆬ found at 65445 ᆬ found at 65445
209344 36666
found at 8197 found at 8197
26731 11757
倂 found at 20482 倂 found at 20482
66071 17193
linear search linear search
ΚΛ found at 922 ΚΛ found at 922
4112 504
㎡㎢ found at 13217 ㎡㎢ found at 13217
55105 5119
ᆬᆭ found at 65445 ᆬᆭ found at 65445
268016 35496
linear + bmh search linear + bmh search
ΚΛΜΝΞΟΠΡ found at 922 ΚΛΜΝΞΟΠΡ found at 922
2897 522
ᆬᆭᄃᄄᄅᆰᆱᆲ found at 65445 ᆬᆭᄃᄄᄅᆰᆱᆲ found at 65445
167687 158465
I experimented a bit with it for nodejs.
The original committer ([email protected]) or reviewer
([email protected])
of string-search.h did not seem active anymore on v8.
BUG=
Please review this at https://codereview.chromium.org/1303033012/
Base URL: https://chromium.googlesource.com/v8/v8.git@master
Affected files (+38, -33 lines):
M src/string-search.h
Index: src/string-search.h
diff --git a/src/string-search.h b/src/string-search.h
index
349d4fd2db72918560fd0e784bf9ea4664c803c2..192261655e5aa7c776d3071ca91adeb6fcefd68a
100644
--- a/src/string-search.h
+++ b/src/string-search.h
@@ -189,11 +189,37 @@ class StringSearch : private StringSearchBase {
int start_;
};
+template <typename T, typename U>
+inline T AlignDown(T value, U alignment) {
+ return reinterpret_cast<T>(
+ (reinterpret_cast<uintptr_t>(value) & ~(alignment - 1)));
+}
+
+template <typename PatternChar, typename SubjectChar>
+inline int FindFirstByte(Vector<const PatternChar> pattern,
+ Vector<const SubjectChar> subject, int index) {
+ PatternChar pattern_first_char = pattern[0];
+
+ if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
+ const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(memchr(
+ subject.start() + index, pattern_first_char, subject.length() -
index));
+ if (pos == NULL) return -1;
+ return static_cast<int>(pos - subject.start());
+ } else {
+ uint8_t search_low_byte = static_cast<uint8_t>(pattern_first_char &
0xFF);
+ const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
+ memchr(subject.start() + index, search_low_byte,
+ (subject.length() - index) * sizeof(SubjectChar)));
+ if (pos == NULL) return -1;
+ pos = AlignDown(pos, sizeof(SubjectChar));
+ return static_cast<int>(pos - subject.start());
+ }
+ return -1;
+}
//---------------------------------------------------------------------
// Single Character Pattern Search Strategy
//---------------------------------------------------------------------
-
template <typename PatternChar, typename SubjectChar>
int StringSearch<PatternChar, SubjectChar>::SingleCharSearch(
StringSearch<PatternChar, SubjectChar>* search,
@@ -201,14 +227,8 @@ int StringSearch<PatternChar,
SubjectChar>::SingleCharSearch(
int index) {
DCHECK_EQ(1, search->pattern_.length());
PatternChar pattern_first_char = search->pattern_[0];
- int i = index;
if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
- const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
- memchr(subject.start() + i,
- pattern_first_char,
- subject.length() - i));
- if (pos == NULL) return -1;
- return static_cast<int>(pos - subject.start());
+ return FindFirstByte(search->pattern_, subject, index);
} else {
if (sizeof(PatternChar) > sizeof(SubjectChar)) {
if (exceedsOneByte(pattern_first_char)) {
@@ -216,8 +236,12 @@ int StringSearch<PatternChar,
SubjectChar>::SingleCharSearch(
}
}
SubjectChar search_char = static_cast<SubjectChar>(pattern_first_char);
- int n = subject.length();
+ const int n = subject.length();
+ int i = index;
while (i < n) {
+ i = FindFirstByte(search->pattern_, subject, i);
+ if (i == -1) return -1;
+
if (subject[i++] == search_char) return i - 1;
}
return -1;
@@ -254,20 +278,12 @@ int StringSearch<PatternChar,
SubjectChar>::LinearSearch(
Vector<const PatternChar> pattern = search->pattern_;
DCHECK(pattern.length() > 1);
int pattern_length = pattern.length();
- PatternChar pattern_first_char = pattern[0];
int i = index;
int n = subject.length() - pattern_length;
while (i <= n) {
- if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
- const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
- memchr(subject.start() + i,
- pattern_first_char,
- n - i + 1));
- if (pos == NULL) return -1;
- i = static_cast<int>(pos - subject.start()) + 1;
- } else {
- if (subject[i++] != pattern_first_char) continue;
- }
+ i = FindFirstByte(pattern, subject, i);
+ if (i == -1) return -1;
+ i++;
// Loop extracted to separate function to allow using return to do
// a deeper break.
if (CharCompare(pattern.start() + 1,
@@ -505,22 +521,11 @@ int StringSearch<PatternChar,
SubjectChar>::InitialSearch(
// We know our pattern is at least 2 characters, we cache the first so
// the common case of the first character not matching is faster.
- PatternChar pattern_first_char = pattern[0];
for (int i = index, n = subject.length() - pattern_length; i <= n; i++) {
badness++;
if (badness <= 0) {
- if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
- const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
- memchr(subject.start() + i,
- pattern_first_char,
- n - i + 1));
- if (pos == NULL) {
- return -1;
- }
- i = static_cast<int>(pos - subject.start());
- } else {
- if (subject[i] != pattern_first_char) continue;
- }
+ i = FindFirstByte(pattern, subject, i);
+ if (i == -1) return -1;
int j = 1;
do {
if (pattern[j] != subject[i + j]) {
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.