Reviewers: Christian Plesner Hansen,

Message:
Small code review

Description:
Selecting between simple text search and Boyer-Moore now happens by
trying
the simple search and bailing out and continuing with Boyer-Moore if the
simple
version seems too expensive.

Please review this at http://codereview.chromium.org/7139

Affected files:
   M src/runtime.cc


Index: src/runtime.cc
diff --git a/src/runtime.cc b/src/runtime.cc
index  
cb0c9d160a6028a5fab4ef0786ec247500431b44..80704c265802d4f6e98b039c58959f9e8c07d76a
  
100644
--- a/src/runtime.cc
+++ b/src/runtime.cc
@@ -1156,14 +1156,20 @@ static int SimpleIndexOf(Vector<const schar>  
subject,
                           int start_index) {
    int pattern_length = pattern.length();
    int subject_length = subject.length();
+  // Badness is a count of how many extra times the same character
+  // is checked. We compare it to the index counter, so we start
+  // it at the start_index, and give it a little discount to avoid
+  // very early bail-outs.
+  int badness = start_index - pattern_length;
    // We know our pattern is at least 2 characters, we cache the first so
    // the common case of the first character not matching is faster.
    pchar pattern_first_char = pattern[0];
+
    for (int i = start_index, n = subject_length - pattern_length; i <= n;  
i++) {
      if (subject[i] != pattern_first_char) continue;
      int j = 1;
      do {
-      if (pattern[j] != subject[j+i]) {
+      if (pattern[j] != subject[i+j]) {
          break;
        }
        j++;
@@ -1171,6 +1177,10 @@ static int SimpleIndexOf(Vector<const schar> subject,
      if (j == pattern_length) {
        return i;
      }
+    badness += j;
+    if (badness > i) {  // More than one extra character on average.
+      return -(i + 1);  // No matches up to index i+1.
+    }
    }
    return -1;
  }
@@ -1180,16 +1190,12 @@ template <typename schar, typename pchar>
  static int StringMatchStrategy(Vector<const schar> sub,
                                 Vector<const pchar> pat,
                                 int start_index) {
-  int pattern_length = pat.length();
-  ASSERT(pattern_length > 1);
+  ASSERT(pat.length() > 1);

    // For small searches, a complex sort is not worth the setup overhead.
-  int subject_length = sub.length() - start_index;
-  if (subject_length < 100 || pattern_length < 8) {
-    return SimpleIndexOf(sub, pat, start_index);
-  }
-
-  return BoyerMooreIndexOf(sub, pat, start_index);
+  int res = SimpleIndexOf(sub, pat, start_index);
+  if (res >= -1) return res;
+  return BoyerMooreIndexOf(sub, pat, -res);
  }

  // Perform string match of pattern on subject, starting at start index.



--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---

Reply via email to