Reviewers: Erik Corry,

Message:
Small review.

Description:
Minor changes, mostly cosmetic, in string search.

Please review this at http://codereview.chromium.org/14505

Affected files:
   M src/runtime.cc


Index: src/runtime.cc
diff --git a/src/runtime.cc b/src/runtime.cc
index  
24b19047eb564ff6b4c1842ed99dba46bb9112f2..b9f1f48ef2138a4179b5436b4572e70d851e6e49
  
100644
--- a/src/runtime.cc
+++ b/src/runtime.cc
@@ -1151,15 +1151,14 @@ static inline int CharOccurence(int char_code) {
    return bad_char_occurence[char_code % kBMAlphabetSize];
  }

-// Restricted simplified Boyer-Moore string matching. Restricts tables to a
-// suffix of long pattern strings and handles only equivalence classes
-// of the full alphabet. This allows us to ensure that tables take only
-// a fixed amount of space.
+// Restricted simplified Boyer-Moore string matching.
+// Uses only the bad-shift table of Boyer-Moore and only uses it
+// for the character compared to the last character of the needle.
  template <typename schar, typename pchar>
-static int BoyerMooreSimplified(Vector<const schar> subject,
-                                Vector<const pchar> pattern,
-                                int start_index,
-                                bool* complete) {
+static int BoyerMooreHorsepool(Vector<const schar> subject,
+                               Vector<const pchar> pattern,
+                               int start_index,
+                               bool* complete) {
    int n = subject.length();
    int m = pattern.length();
    // Only preprocess at most kBMMaxShift last characters of pattern.
@@ -1170,6 +1169,7 @@ static int BoyerMooreSimplified(Vector<const schar>  
subject,
    int badness = -m;  // How bad we are doing without a good-suffix table.
    int idx;  // No matches found prior to this index.
    pchar last_char = pattern[m - 1];
+  int last_char_shift = m - 1 - CharOccurence<schar, pchar>(last_char);
    // Perform search
    for (idx = start_index; idx <= n - m;) {
      int j = m - 1;
@@ -1185,19 +1185,17 @@ static int BoyerMooreSimplified(Vector<const schar>  
subject,
        }
      }
      j--;
-    while (j >= 0 && pattern[j] == (c = subject[idx + j])) j--;
+    while (j >= 0 && pattern[j] == (subject[idx + j])) j--;
      if (j < 0) {
        *complete = true;
        return idx;
      } else {
-      int bc_occ = CharOccurence<schar, pchar>(c);
-      int shift = bc_occ < j ? j - bc_occ : 1;
-      idx += shift;
+      idx += last_char_shift;
        // Badness increases by the number of characters we have
        // checked, and decreases by the number of characters we
        // can skip by shifting. It's a measure of how we are doing
        // compared to reading each character exactly once.
-      badness += (m - j) - shift;
+      badness += (m - j) - last_char_shift;
        if (badness > 0) {
          *complete = false;
          return idx;
@@ -1237,12 +1235,15 @@ static int BoyerMooreIndexOf(Vector<const schar>  
subject,
        return idx;
      } else if (j < start) {
        // we have matched more than our tables allow us to be smart about.
-      idx += 1;
+      // Fall back on BMH shift.
+      idx += m - 1 - CharOccurence<schar, pchar>(last_char);
      } else {
        int gs_shift = bmgs_buffers.shift(j + 1);       // Good suffix shift.
        int bc_occ = CharOccurence<schar, pchar>(c);
        int shift = j - bc_occ;                         // Bad-char shift.
-      shift = (gs_shift > shift) ? gs_shift : shift;
+      if (gs_shift > shift) {
+        shift = gs_shift;
+      }
        idx += shift;
      }
    } while (idx <= n - m);
@@ -1286,7 +1287,7 @@ static int SimpleIndexOf(Vector<const schar> subject,
      badness++;
      if (badness > 0) {
        *complete = false;
-      return (i);
+      return i;
      }
      if (subject[i] != pattern_first_char) continue;
      int j = 1;
@@ -1357,7 +1358,7 @@ static int StringMatchStrategy(Vector<const schar>  
sub,
    bool complete;
    int idx = SimpleIndexOf(sub, pat, start_index, &complete);
    if (complete) return idx;
-  idx = BoyerMooreSimplified(sub, pat, idx, &complete);
+  idx = BoyerMooreHorsepool(sub, pat, idx, &complete);
    if (complete) return idx;
    return BoyerMooreIndexOf(sub, pat, idx);
  }



--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---

Reply via email to