Reviewers: Mads Ager, Kasper Lund, Description: Avoid the KMP overhead for simple indexOf() operations. Will look into evaluating the best cutoff between a simple search and KMP in the future. This improves some simple operations ~1.5x.
Please review this at http://codereview.chromium.org/2989 Affected files: M src/runtime.cc M src/string.js Index: src/runtime.cc diff --git a/src/runtime.cc b/src/runtime.cc index d1f5e9d253c97472a21b59a941e2779c5da49027..13bcd128804f111e03ba26abf4c7ca79cba7c430 100644 --- a/src/runtime.cc +++ b/src/runtime.cc @@ -911,12 +911,12 @@ static Object* Runtime_StringIndexOf(Arguments args) { CONVERT_CHECKED(String, pat, args[1]); Object* index = args[2]; - int subject_length = sub->length(); - int pattern_length = pat->length(); - sub->TryFlatten(); pat->TryFlatten(); + int subject_length = sub->length(); + int pattern_length = pat->length(); + uint32_t start_index; if (!Array::IndexFromObject(index, &start_index)) return Smi::FromInt(-1); if (pattern_length == 0) return Smi::FromInt(start_index); @@ -934,8 +934,24 @@ static Object* Runtime_StringIndexOf(Arguments args) { return Smi::FromInt(-1); } - // For patterns with a length larger than one character we use the KMP - // algorithm. + // For small searches, the ability to skip the search length is not worth + // the overhead of setting up KMP. TODO get more accurate cutoffs here. + if (subject_length < 100) { + // We know our pattern is at least 2 characters, we cache the first so the + // the common case of the first character not matching is fast. + uint16_t pattern_first_char = pat->Get(0); + for (int i = start_index; i + pattern_length <= subject_length; i++) { + if (sub->Get(i) != pattern_first_char) continue; + + for (int j = 1; j < pattern_length; ++j) { + if (pat->Get(j) != sub->Get(j + i)) break; + if (j == pattern_length - 1) return Smi::FromInt(i); + } + } + return Smi::FromInt(-1); + } + + // For patterns with a length larger length we use the KMP algorithm. // // Compute the 'next' table. int* next_table = NewArray<int>(pattern_length); Index: src/string.js diff --git a/src/string.js b/src/string.js index 1a13d16428a2dbb961578b978566ff44edf49738..d8105fdfc0b88ffaa02a0e8ec110559a4c3b79a4 100644 --- a/src/string.js +++ b/src/string.js @@ -332,6 +332,7 @@ function ApplyReplacementFunction(replace, captures, subject) { // ECMA-262 section 15.5.4.7 %AddProperty($String.prototype, "indexOf", function(searchString /* position */) { // length == 1 var str = ToString(this); + var str_len = str.length; var searchStr = ToString(searchString); var index = 0; if (%_ArgumentsLength() > 1) { @@ -339,8 +340,8 @@ function ApplyReplacementFunction(replace, captures, subject) { index = TO_INTEGER(arg1); } if (index < 0) index = 0; - if (index > str.length) index = str.length; - if (searchStr.length + index > str.length) return -1; + if (index > str_len) index = str_len; + if (searchStr.length + index > str_len) return -1; return %StringIndexOf(str, searchStr, index); }, DONT_ENUM); --~--~---------~--~----~------------~-------~--~----~ v8-dev mailing list [email protected] http://groups.google.com/group/v8-dev -~----------~----~----~----~------~----~------~--~---
