LGTM On Fri, Oct 17, 2008 at 12:00 PM, <[EMAIL PROTECTED]> wrote:
> Reviewers: olehougaard, > > Message: > Small code review. > > Description: > Changed Boyer-Moore's bad-char table code: > - Reduce it to half size if the pattern is ASCII, saving on > initialization > - If pattern is ASCII and subject is not, any non-ASCII char can cause a > full pattern-length shift, even if we haven't indexed the entire > pattern. > - Use memset to initialize buffer in the common case where the pattern > is > shorter than the max significant suffix limit. > > Please review this at http://codereview.chromium.org/7621 > > Affected files: > M src/runtime.cc > > > Index: src/runtime.cc > diff --git a/src/runtime.cc b/src/runtime.cc > index > bcdd76decaa0f23c795f50853838371d292278c4..bc1152142afade11849fc0dcf0c0ee96fe811ab3 > 100644 > --- a/src/runtime.cc > +++ b/src/runtime.cc > @@ -965,7 +965,7 @@ static const int kBMMinPatternLength = 5; > // Holds the two buffers used by Boyer-Moore string search's Good Suffix > // shift. Only allows the last kBMMaxShift characters of the needle > // to be indexed. > -class BMGoodSuffixBuffers: public AllStatic { > +class BMGoodSuffixBuffers { > public: > BMGoodSuffixBuffers() {} > inline void init(int needle_length) { > @@ -1005,12 +1005,18 @@ static void > BoyerMoorePopulateBadCharTable(Vector<const pchar> pattern, > // Run forwards to populate bad_char_table, so that *last* instance > // of character equivalence class is the one registered. > // Notice: Doesn't include the last character. > - for (int i = 0; i < kBMAlphabetSize; i++) { > - bad_char_occurence[i] = start - 1; > + int table_size = (sizeof(pchar) == 1) ? String::kMaxAsciiCharCode + 1 > + : kBMAlphabetSize; > + if (start == 0) { // All patterns less than kBMMaxShift in length. > + memset(bad_char_occurence, -1, table_size * > sizeof(*bad_char_occurence)); > + } else { > + for (int i = 0; i < table_size; i++) { > + bad_char_occurence[i] = start - 1; > + } > } > for (int i = start; i < pattern.length() - 1; i++) { > pchar c = pattern[i]; > - int bucket = c % kBMAlphabetSize; > + int bucket = (sizeof(pchar) ==1) ? c : c % kBMAlphabetSize; > bad_char_occurence[bucket] = i; > } > } > @@ -1065,6 +1071,19 @@ static void > BoyerMoorePopulateGoodSuffixTable(Vector<const pchar> pattern, > } > } > > +template <typename schar, typename pchar> > +static inline int CharOccurence(int char_code) { > + if (sizeof(schar) == 1) { > + return bad_char_occurence[char_code]; > + } > + if (sizeof(pchar) == 1) { > + if (char_code > String::kMaxAsciiCharCode) { > + return -1; > + } > + return bad_char_occurence[char_code]; > + } > + return bad_char_occurence[char_code % kBMAlphabetSize]; > +} > > // Restricted simplified Boyer-Moore string matching. Restricts tables to > a > // suffix of long pattern strings and handles only equivalence classes > @@ -1090,7 +1109,7 @@ static int BoyerMooreSimplified(Vector<const schar> > subject, > int j = m - 1; > int c; > while (last_char != (c = subject[idx + j])) { > - int bc_occ = bad_char_occurence[c % kBMAlphabetSize]; > + int bc_occ = CharOccurence<schar, pchar>(c); > int shift = j - bc_occ; > idx += shift; > badness += 1 - shift; // at most zero, so badness cannot increase. > @@ -1105,7 +1124,7 @@ static int BoyerMooreSimplified(Vector<const schar> > subject, > complete = true; > return idx; > } else { > - int bc_occ = bad_char_occurence[c % kBMAlphabetSize]; > + int bc_occ = CharOccurence<schar, pchar>(c); > int shift = bc_occ < j ? j - bc_occ : 1; > idx += shift; > // Badness increases by the number of characters we have > @@ -1141,7 +1160,7 @@ static int BoyerMooreIndexOf(Vector<const schar> > subject, > int j = m - 1; > schar c; > while (last_char != (c = subject[idx + j])) { > - int shift = j - bad_char_occurence[c % kBMAlphabetSize]; > + int shift = j - CharOccurence<schar, pchar>(c); > idx += shift; > if (idx > n - m) { > return -1; > @@ -1155,7 +1174,7 @@ static int BoyerMooreIndexOf(Vector<const schar> > subject, > idx += 1; > } else { > int gs_shift = bmgs_buffers.shift(j + 1); // Good suffix shift. > - int bc_occ = bad_char_occurence[c % kBMAlphabetSize]; > + int bc_occ = CharOccurence<schar, pchar>(c); > int shift = j - bc_occ; // Bad-char shift. > shift = (gs_shift > shift) ? gs_shift : shift; > idx += shift; > > > -- -- Ole I Hougaard Google Aarhus, Denmark +45 8745 9215 --~--~---------~--~----~------------~-------~--~----~ v8-dev mailing list [email protected] http://groups.google.com/group/v8-dev -~----------~----~----~----~------~----~------~--~---
