Reviewers: danno, Michael Starzinger, Jakob, skomski,
Message:
Created Revert of Speedup stringsearch for two byte strings
Description:
Revert of Speedup stringsearch for two byte strings (patchset #3 id:40001 of
https://codereview.chromium.org/1303033012/ )
Reason for revert:
[Sheriff] Breaks fuzzer and msan:
http://build.chromium.org/p/client.v8/builders/V8%20Fuzzer/builds/4773
Repro with:
tools/fuzz-harness.sh out/Debug/d8
(in a ninja Debug build)
Msan:
http://build.chromium.org/p/client.v8/builders/V8%20Linux%20-%20arm64%20-%20sim%20-%20MSAN/builds/4097
Original issue's description:
Speedup stringsearch for two byte strings
Uses the lower byte with memchr which is
significantly faster than a naive compare
Performance difference with bench (http://hastebin.com/xuxexataso.js):
old new
single character single character
Κ found at 922 Κ found at 922
3324 616
㎡ found at 13217 ㎡ found at 13217
42366 4931
က found at 4096 က found at 4096
13369 9836
found at 65280 found at 65280
207472 36149
ᆬ found at 65445 ᆬ found at 65445
209344 36666
found at 8197 found at 8197
26731 11757
倂 found at 20482 倂 found at 20482
66071 17193
linear search linear search
ΚΛ found at 922 ΚΛ found at 922
4112 504
㎡㎢ found at 13217 ㎡㎢ found at 13217
55105 5119
ᆬᆭ found at 65445 ᆬᆭ found at 65445
268016 35496
linear + bmh search linear + bmh search
ΚΛΜΝΞΟΠΡ found at 922 ΚΛΜΝΞΟΠΡ found at 922
2897 522
ᆬᆭᄃᄄᄅᆰᆱᆲ found at 65445 ᆬᆭᄃᄄᄅᆰᆱᆲ found at 65445
167687 158465
Committed: https://crrev.com/fced280f37588f8a232a414201276e053117e9ea
Cr-Commit-Position: refs/heads/master@{#30587}
[email protected],[email protected],[email protected],[email protected]
NOPRESUBMIT=true
NOTREECHECKS=true
NOTRY=true
Please review this at https://codereview.chromium.org/1331433002/
Base URL: https://chromium.googlesource.com/v8/v8.git@master
Affected files (+37, -49 lines):
M AUTHORS
M src/string-search.h
M test/mjsunit/string-indexof-1.js
Index: AUTHORS
diff --git a/AUTHORS b/AUTHORS
index
82a9c164f06c9f462ea52b5efae0a069dc7aa689..72c23bcc83e0b73fac67a2d554ab1b5b2cccabc7
100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -67,7 +67,6 @@
Jonathan Liu <[email protected]>
JunHo Seo <[email protected]>
Kang-Hao (Kenny) Lu <[email protected]>
-Karl Skomski <[email protected]>
Luis Reis <[email protected]>
Luke Zarko <[email protected]>
Maciej Małecki <[email protected]>
Index: src/string-search.h
diff --git a/src/string-search.h b/src/string-search.h
index
aeaa9b33eb14def87d38822883496f830fc14364..349d4fd2db72918560fd0e784bf9ea4664c803c2
100644
--- a/src/string-search.h
+++ b/src/string-search.h
@@ -190,38 +190,6 @@
};
-template <typename PatternChar, typename SubjectChar>
-int FindFirstCharacter(Vector<const PatternChar> pattern,
- Vector<const SubjectChar> subject, int index) {
- PatternChar pattern_first_char = pattern[0];
-
- if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
- const SubjectChar* char_pos = reinterpret_cast<const
SubjectChar*>(memchr(
- subject.start() + index, pattern_first_char, subject.length() -
index));
- if (char_pos == NULL) return -1;
- return static_cast<int>(char_pos - subject.start());
- } else {
- const uint8_t search_low_byte =
- static_cast<uint8_t>(pattern_first_char & 0xFF);
- const SubjectChar search_char =
- static_cast<SubjectChar>(pattern_first_char);
- int pos = index;
- do {
- const SubjectChar* char_pos = reinterpret_cast<const SubjectChar*>(
- memchr(subject.start() + pos, search_low_byte,
- (subject.length() - pos) * sizeof(SubjectChar)));
- if (char_pos == NULL) return -1;
- pos = static_cast<int>(char_pos - subject.start());
- if (IsAligned(reinterpret_cast<uintptr_t>(char_pos),
- sizeof(SubjectChar))) {
- if (subject[pos] == search_char) return pos;
- }
- } while (++pos < subject.length());
- }
- return -1;
-}
-
-
//---------------------------------------------------------------------
// Single Character Pattern Search Strategy
//---------------------------------------------------------------------
@@ -233,15 +201,26 @@
int index) {
DCHECK_EQ(1, search->pattern_.length());
PatternChar pattern_first_char = search->pattern_[0];
+ int i = index;
if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
- return FindFirstCharacter(search->pattern_, subject, index);
+ const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
+ memchr(subject.start() + i,
+ pattern_first_char,
+ subject.length() - i));
+ if (pos == NULL) return -1;
+ return static_cast<int>(pos - subject.start());
} else {
if (sizeof(PatternChar) > sizeof(SubjectChar)) {
if (exceedsOneByte(pattern_first_char)) {
return -1;
}
}
- return FindFirstCharacter(search->pattern_, subject, index);
+ SubjectChar search_char = static_cast<SubjectChar>(pattern_first_char);
+ int n = subject.length();
+ while (i < n) {
+ if (subject[i++] == search_char) return i - 1;
+ }
+ return -1;
}
}
@@ -275,12 +254,20 @@
Vector<const PatternChar> pattern = search->pattern_;
DCHECK(pattern.length() > 1);
int pattern_length = pattern.length();
+ PatternChar pattern_first_char = pattern[0];
int i = index;
int n = subject.length() - pattern_length;
while (i <= n) {
- i = FindFirstCharacter(pattern, subject, i);
- if (i == -1) return -1;
- i++;
+ if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
+ const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
+ memchr(subject.start() + i,
+ pattern_first_char,
+ n - i + 1));
+ if (pos == NULL) return -1;
+ i = static_cast<int>(pos - subject.start()) + 1;
+ } else {
+ if (subject[i++] != pattern_first_char) continue;
+ }
// Loop extracted to separate function to allow using return to do
// a deeper break.
if (CharCompare(pattern.start() + 1,
@@ -518,11 +505,22 @@
// We know our pattern is at least 2 characters, we cache the first so
// the common case of the first character not matching is faster.
+ PatternChar pattern_first_char = pattern[0];
for (int i = index, n = subject.length() - pattern_length; i <= n; i++) {
badness++;
if (badness <= 0) {
- i = FindFirstCharacter(pattern, subject, i);
- if (i == -1) return -1;
+ if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
+ const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
+ memchr(subject.start() + i,
+ pattern_first_char,
+ n - i + 1));
+ if (pos == NULL) {
+ return -1;
+ }
+ i = static_cast<int>(pos - subject.start());
+ } else {
+ if (subject[i] != pattern_first_char) continue;
+ }
int j = 1;
do {
if (pattern[j] != subject[i + j]) {
Index: test/mjsunit/string-indexof-1.js
diff --git a/test/mjsunit/string-indexof-1.js
b/test/mjsunit/string-indexof-1.js
index
366437a1a0a55cd5c8fa75439cbd8aa32faf8750..db3623f7c029cd4e55782fff38bd7cbed4231511
100644
--- a/test/mjsunit/string-indexof-1.js
+++ b/test/mjsunit/string-indexof-1.js
@@ -77,15 +77,6 @@
//single char pattern
assertEquals(4, twoByteString.indexOf("\u0395"));
-// test string with alignment traps
-var alignmentString = "\u1122\u2211\u2222\uFF00\u00FF\u00FF";
-assertEquals(2, alignmentString.indexOf("\u2222"));
-assertEquals(4, alignmentString.indexOf("\u00FF\u00FF"));
-
-var longAlignmentString = "\uFF00" + "\u00FF".repeat(10);
-assertEquals(1,
- longAlignmentString.indexOf("\u00FF".repeat(10)));
-
// Test complex string indexOf algorithms. Only trigger for long strings.
// Long string that isn't a simple repeat of a shorter string.
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.