[v8-dev] Avoid the KMP overhead for simple indexOf() operations.

deanm Fri, 19 Sep 2008 05:13:30 -0700

Reviewers: Mads Ager, Kasper Lund,

Description:
Avoid the KMP overhead for simple indexOf() operations.  Will look into
evaluating the best cutoff between a simple search and KMP in the
future.  This improves some simple operations ~1.5x.


Please review this at http://codereview.chromium.org/2989

Affected files:
   M src/runtime.cc
   M src/string.js


Index: src/runtime.cc
diff --git a/src/runtime.cc b/src/runtime.cc
index  
d1f5e9d253c97472a21b59a941e2779c5da49027..13bcd128804f111e03ba26abf4c7ca79cba7c430
  
100644
--- a/src/runtime.cc
+++ b/src/runtime.cc
@@ -911,12 +911,12 @@ static Object* Runtime_StringIndexOf(Arguments args) {
    CONVERT_CHECKED(String, pat, args[1]);
    Object* index = args[2];

-  int subject_length = sub->length();
-  int pattern_length = pat->length();
-
    sub->TryFlatten();
    pat->TryFlatten();

+  int subject_length = sub->length();
+  int pattern_length = pat->length();
+
    uint32_t start_index;
    if (!Array::IndexFromObject(index, &start_index)) return  
Smi::FromInt(-1);
    if (pattern_length == 0) return Smi::FromInt(start_index);
@@ -934,8 +934,24 @@ static Object* Runtime_StringIndexOf(Arguments args) {
      return Smi::FromInt(-1);
    }

-  // For patterns with a length larger than one character we use the KMP
-  // algorithm.
+  // For small searches, the ability to skip the search length is not worth
+  // the overhead of setting up KMP.  TODO get more accurate cutoffs here.
+  if (subject_length < 100) {
+    // We know our pattern is at least 2 characters, we cache the first so  
the
+    // the common case of the first character not matching is fast.
+    uint16_t pattern_first_char = pat->Get(0);
+    for (int i = start_index; i + pattern_length <= subject_length; i++) {
+      if (sub->Get(i) != pattern_first_char) continue;
+
+      for (int j = 1; j < pattern_length; ++j) {
+        if (pat->Get(j) != sub->Get(j + i)) break;
+        if (j == pattern_length - 1) return Smi::FromInt(i);
+      }
+    }
+    return Smi::FromInt(-1);
+  }
+
+  // For patterns with a length larger length we use the KMP algorithm.
    //
    // Compute the 'next' table.
    int* next_table = NewArray<int>(pattern_length);
Index: src/string.js
diff --git a/src/string.js b/src/string.js
index  
1a13d16428a2dbb961578b978566ff44edf49738..d8105fdfc0b88ffaa02a0e8ec110559a4c3b79a4
  
100644
--- a/src/string.js
+++ b/src/string.js
@@ -332,6 +332,7 @@ function ApplyReplacementFunction(replace, captures,  
subject) {
  // ECMA-262 section 15.5.4.7
  %AddProperty($String.prototype, "indexOf", function(searchString /*  
position */) {  // length == 1
    var str = ToString(this);
+  var str_len = str.length;
    var searchStr = ToString(searchString);
    var index = 0;
    if (%_ArgumentsLength() > 1) {
@@ -339,8 +340,8 @@ function ApplyReplacementFunction(replace, captures,  
subject) {
      index = TO_INTEGER(arg1);
    }
    if (index < 0) index = 0;
-  if (index > str.length) index = str.length;
-  if (searchStr.length + index > str.length) return -1;
+  if (index > str_len) index = str_len;
+  if (searchStr.length + index > str_len) return -1;
    return %StringIndexOf(str, searchStr, index);
  }, DONT_ENUM);




--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---

[v8-dev] Avoid the KMP overhead for simple indexOf() operations.

Reply via email to