Author: [EMAIL PROTECTED]
Date: Tue Oct 14 07:25:37 2008
New Revision: 499
Modified:
branches/bleeding_edge/src/runtime.cc
Log:
Test for non-ASCII character in a needle used on an ASCII string is now
handled first
instead of only in the unrelated table-construction of Boyer-Moore.
Modified: branches/bleeding_edge/src/runtime.cc
==============================================================================
--- branches/bleeding_edge/src/runtime.cc (original)
+++ branches/bleeding_edge/src/runtime.cc Tue Oct 14 07:25:37 2008
@@ -996,8 +996,8 @@
// Compute the bad-char table for Boyer-Moore in the static buffer.
// Return false if the pattern contains non-ASCII characters that cannot be
// in the searched string.
-template <typename pchar, bool check_ascii>
-static bool BoyerMoorePopulateBadCharTable(Vector<const pchar> pattern,
+template <typename pchar>
+static void BoyerMoorePopulateBadCharTable(Vector<const pchar> pattern,
int start) {
// Run forwards to populate bad_char_table, so that *last* instance
// of character equivalence class is the one registered.
@@ -1006,14 +1006,8 @@
bad_char_occurence[i] = start - 1;
}
for (int i = start; i < pattern.length(); i++) {
- uc32 c = pattern[i];
- bad_char_occurence[c % kBMAlphabetSize] = i;
- if (check_ascii &&
- c > String::kMaxAsciiCharCode) {
- return false;
- }
+ bad_char_occurence[pattern[i] % kBMAlphabetSize] = i;
}
- return true;
}
template <typename pchar>
@@ -1081,13 +1075,7 @@
int start = m < kBMMaxShift ? 0 : m - kBMMaxShift;
int len = m - start;
- if (sizeof(pchar) > 1 && sizeof(schar) == 1) {
- BoyerMoorePopulateBadCharTable<pchar, true>(pattern, start);
- } else {
- if (!BoyerMoorePopulateBadCharTable<pchar, false>(pattern, start)) {
- return -1;
- }
- }
+ BoyerMoorePopulateBadCharTable(pattern, start);
int badness = 0; // How bad we are doing without a good-suffix table.
int idx; // No matches found prior to this index.
@@ -1201,6 +1189,17 @@
int start_index) {
ASSERT(pat.length() > 1);
+ // We have an ASCII haystack and a non-ASCII needle. Check if there
+ // really is a non-ASCII character in the needle and bail out if there
+ // is.
+ if (sizeof(pchar) > 1 && sizeof(schar) == 1) {
+ for (int i = 0; i < pat.length(); i++) {
+ uc16 c = pat[i];
+ if (c > String::kMaxAsciiCharCode) {
+ return -1;
+ }
+ }
+ }
// For small searches, a complex sort is not worth the setup overhead.
bool complete;
int idx = SimpleIndexOf(sub, pat, start_index, complete);
--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---