I'm not a fan of using negative return values as a signal.  How about
using a boolean that says whether or not the search was aborted and an
integer containing the number of characters seen instead?

Otherwise lgtm.

On Tue, Oct 14, 2008 at 3:39 PM,  <[EMAIL PROTECTED]> wrote:
> Reviewers: Christian Plesner Hansen,
>
> Message:
> Small code review
>
> Description:
> Selecting between simple text search and Boyer-Moore now happens by
> trying
> the simple search and bailing out and continuing with Boyer-Moore if the
> simple
> version seems too expensive.
>
> Please review this at http://codereview.chromium.org/7139
>
> Affected files:
>  M src/runtime.cc
>
>
> Index: src/runtime.cc
> diff --git a/src/runtime.cc b/src/runtime.cc
> index
> cb0c9d160a6028a5fab4ef0786ec247500431b44..80704c265802d4f6e98b039c58959f9e8c07d76a
> 100644
> --- a/src/runtime.cc
> +++ b/src/runtime.cc
> @@ -1156,14 +1156,20 @@ static int SimpleIndexOf(Vector<const schar>
> subject,
>                          int start_index) {
>   int pattern_length = pattern.length();
>   int subject_length = subject.length();
> +  // Badness is a count of how many extra times the same character
> +  // is checked. We compare it to the index counter, so we start
> +  // it at the start_index, and give it a little discount to avoid
> +  // very early bail-outs.
> +  int badness = start_index - pattern_length;
>   // We know our pattern is at least 2 characters, we cache the first so
>   // the common case of the first character not matching is faster.
>   pchar pattern_first_char = pattern[0];
> +
>   for (int i = start_index, n = subject_length - pattern_length; i <= n;
> i++) {
>     if (subject[i] != pattern_first_char) continue;
>     int j = 1;
>     do {
> -      if (pattern[j] != subject[j+i]) {
> +      if (pattern[j] != subject[i+j]) {
>         break;
>       }
>       j++;
> @@ -1171,6 +1177,10 @@ static int SimpleIndexOf(Vector<const schar> subject,
>     if (j == pattern_length) {
>       return i;
>     }
> +    badness += j;
> +    if (badness > i) {  // More than one extra character on average.
> +      return -(i + 1);  // No matches up to index i+1.
> +    }
>   }
>   return -1;
>  }
> @@ -1180,16 +1190,12 @@ template <typename schar, typename pchar>
>  static int StringMatchStrategy(Vector<const schar> sub,
>                                Vector<const pchar> pat,
>                                int start_index) {
> -  int pattern_length = pat.length();
> -  ASSERT(pattern_length > 1);
> +  ASSERT(pat.length() > 1);
>
>   // For small searches, a complex sort is not worth the setup overhead.
> -  int subject_length = sub.length() - start_index;
> -  if (subject_length < 100 || pattern_length < 8) {
> -    return SimpleIndexOf(sub, pat, start_index);
> -  }
> -
> -  return BoyerMooreIndexOf(sub, pat, start_index);
> +  int res = SimpleIndexOf(sub, pat, start_index);
> +  if (res >= -1) return res;
> +  return BoyerMooreIndexOf(sub, pat, -res);
>  }
>
>  // Perform string match of pattern on subject, starting at start index.
>
>
>

--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---

Reply via email to