LGTM

On Fri, Oct 17, 2008 at 12:00 PM, <[EMAIL PROTECTED]> wrote:

> Reviewers: olehougaard,
>
> Message:
> Small code review.
>
> Description:
> Changed Boyer-Moore's bad-char table code:
> - Reduce it to half size if the pattern is ASCII, saving on
> initialization
> - If pattern is ASCII and subject is not, any non-ASCII char can cause a
>  full pattern-length shift, even if we haven't indexed the entire
> pattern.
> - Use memset to initialize buffer in the common case where the pattern
> is
>  shorter than the max significant suffix limit.
>
> Please review this at http://codereview.chromium.org/7621
>
> Affected files:
>  M src/runtime.cc
>
>
> Index: src/runtime.cc
> diff --git a/src/runtime.cc b/src/runtime.cc
> index
> bcdd76decaa0f23c795f50853838371d292278c4..bc1152142afade11849fc0dcf0c0ee96fe811ab3
> 100644
> --- a/src/runtime.cc
> +++ b/src/runtime.cc
> @@ -965,7 +965,7 @@ static const int kBMMinPatternLength = 5;
>  // Holds the two buffers used by Boyer-Moore string search's Good Suffix
>  // shift. Only allows the last kBMMaxShift characters of the needle
>  // to be indexed.
> -class BMGoodSuffixBuffers: public AllStatic {
> +class BMGoodSuffixBuffers {
>  public:
>   BMGoodSuffixBuffers() {}
>   inline void init(int needle_length) {
> @@ -1005,12 +1005,18 @@ static void
> BoyerMoorePopulateBadCharTable(Vector<const pchar> pattern,
>   // Run forwards to populate bad_char_table, so that *last* instance
>   // of character equivalence class is the one registered.
>   // Notice: Doesn't include the last character.
> -  for (int i = 0; i < kBMAlphabetSize; i++) {
> -    bad_char_occurence[i] = start - 1;
> +  int table_size = (sizeof(pchar) == 1) ? String::kMaxAsciiCharCode + 1
> +                                        : kBMAlphabetSize;
> +  if (start == 0) {  // All patterns less than kBMMaxShift in length.
> +    memset(bad_char_occurence, -1, table_size *
> sizeof(*bad_char_occurence));
> +  } else {
> +    for (int i = 0; i < table_size; i++) {
> +      bad_char_occurence[i] = start - 1;
> +    }
>   }
>   for (int i = start; i < pattern.length() - 1; i++) {
>     pchar c = pattern[i];
> -    int bucket = c % kBMAlphabetSize;
> +    int bucket = (sizeof(pchar) ==1) ? c : c % kBMAlphabetSize;
>     bad_char_occurence[bucket] = i;
>   }
>  }
> @@ -1065,6 +1071,19 @@ static void
> BoyerMoorePopulateGoodSuffixTable(Vector<const pchar> pattern,
>   }
>  }
>
> +template <typename schar, typename pchar>
> +static inline int CharOccurence(int char_code) {
> +  if (sizeof(schar) == 1) {
> +    return bad_char_occurence[char_code];
> +  }
> +  if (sizeof(pchar) == 1) {
> +    if (char_code > String::kMaxAsciiCharCode) {
> +      return -1;
> +    }
> +    return bad_char_occurence[char_code];
> +  }
> +  return bad_char_occurence[char_code % kBMAlphabetSize];
> +}
>
>  // Restricted simplified Boyer-Moore string matching. Restricts tables to
> a
>  // suffix of long pattern strings and handles only equivalence classes
> @@ -1090,7 +1109,7 @@ static int BoyerMooreSimplified(Vector<const schar>
> subject,
>     int j = m - 1;
>     int c;
>     while (last_char != (c = subject[idx + j])) {
> -      int bc_occ = bad_char_occurence[c % kBMAlphabetSize];
> +      int bc_occ = CharOccurence<schar, pchar>(c);
>       int shift = j - bc_occ;
>       idx += shift;
>       badness += 1 - shift;  // at most zero, so badness cannot increase.
> @@ -1105,7 +1124,7 @@ static int BoyerMooreSimplified(Vector<const schar>
> subject,
>       complete = true;
>       return idx;
>     } else {
> -      int bc_occ = bad_char_occurence[c % kBMAlphabetSize];
> +      int bc_occ = CharOccurence<schar, pchar>(c);
>       int shift = bc_occ < j ? j - bc_occ : 1;
>       idx += shift;
>       // Badness increases by the number of characters we have
> @@ -1141,7 +1160,7 @@ static int BoyerMooreIndexOf(Vector<const schar>
> subject,
>     int j = m - 1;
>     schar c;
>     while (last_char != (c = subject[idx + j])) {
> -      int shift = j - bad_char_occurence[c % kBMAlphabetSize];
> +      int shift = j - CharOccurence<schar, pchar>(c);
>       idx += shift;
>       if (idx > n - m) {
>         return -1;
> @@ -1155,7 +1174,7 @@ static int BoyerMooreIndexOf(Vector<const schar>
> subject,
>       idx += 1;
>     } else {
>       int gs_shift = bmgs_buffers.shift(j + 1);       // Good suffix shift.
> -      int bc_occ = bad_char_occurence[c % kBMAlphabetSize];
> +      int bc_occ = CharOccurence<schar, pchar>(c);
>       int shift = j - bc_occ;                         // Bad-char shift.
>       shift = (gs_shift > shift) ? gs_shift : shift;
>       idx += shift;
>
>
>


-- 
-- 
Ole I Hougaard
Google Aarhus, Denmark
+45 8745 9215

--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---

Reply via email to