Revision: 6109
Author: [email protected]
Date: Wed Dec 22 03:31:18 2010
Log: Clean up is-ASCII checks.
Review URL: http://codereview.chromium.org/5963003
http://code.google.com/p/v8/source/detail?r=6109
Modified:
/branches/bleeding_edge/src/heap-inl.h
/branches/bleeding_edge/src/heap.cc
/branches/bleeding_edge/src/ia32/regexp-macro-assembler-ia32.cc
/branches/bleeding_edge/src/objects.h
/branches/bleeding_edge/src/string-search.h
/branches/bleeding_edge/src/x64/regexp-macro-assembler-x64.cc
=======================================
--- /branches/bleeding_edge/src/heap-inl.h Tue Dec 21 05:24:23 2010
+++ /branches/bleeding_edge/src/heap-inl.h Wed Dec 22 03:31:18 2010
@@ -43,15 +43,13 @@
MaybeObject* Heap::AllocateStringFromUtf8(Vector<const char> str,
PretenureFlag pretenure) {
// Check for ASCII first since this is the common case.
- for (int i = 0; i < str.length(); ++i) {
- if (static_cast<uint8_t>(str[i]) > String::kMaxAsciiCharCodeU) {
- // Non-ASCII and we need to decode.
- return AllocateStringFromUtf8Slow(str, pretenure);
- }
- }
- // If the string is ASCII, we do not need to convert the characters
- // since UTF8 is backwards compatible with ASCII.
- return AllocateStringFromAscii(str, pretenure);
+ if (String::IsAscii(str.start(), str.length())) {
+ // If the string is ASCII, we do not need to convert the characters
+ // since UTF8 is backwards compatible with ASCII.
+ return AllocateStringFromAscii(str, pretenure);
+ }
+ // Non-ASCII and we need to decode.
+ return AllocateStringFromUtf8Slow(str, pretenure);
}
=======================================
--- /branches/bleeding_edge/src/heap.cc Tue Dec 21 05:24:23 2010
+++ /branches/bleeding_edge/src/heap.cc Wed Dec 22 03:31:18 2010
@@ -2549,20 +2549,10 @@
}
// For small strings we check whether the resource contains only
- // ascii characters. If yes, we use a different string map.
- bool is_ascii = true;
- if (length >= static_cast<size_t>(String::kMinNonFlatLength)) {
- is_ascii = false;
- } else {
- const uc16* data = resource->data();
- for (size_t i = 0; i < length; i++) {
- if (data[i] > String::kMaxAsciiCharCode) {
- is_ascii = false;
- break;
- }
- }
- }
-
+ // ASCII characters. If yes, we use a different string map.
+ static const size_t kAsciiCheckLengthLimit = 32;
+ bool is_ascii = length <= kAsciiCheckLengthLimit &&
+ String::IsAscii(resource->data(), length);
Map* map = is_ascii ?
Heap::external_string_with_ascii_data_map() :
Heap::external_string_map();
Object* result;
@@ -3342,11 +3332,8 @@
MaybeObject* Heap::AllocateStringFromTwoByte(Vector<const uc16> string,
PretenureFlag pretenure) {
// Check if the string is an ASCII string.
- int i = 0;
- while (i < string.length() && string[i] <= String::kMaxAsciiCharCode)
i++;
-
MaybeObject* maybe_result;
- if (i == string.length()) { // It's an ASCII string.
+ if (String::IsAscii(string.start(), string.length())) {
maybe_result = AllocateRawAsciiString(string.length(), pretenure);
} else { // It's not an ASCII string.
maybe_result = AllocateRawTwoByteString(string.length(), pretenure);
=======================================
--- /branches/bleeding_edge/src/ia32/regexp-macro-assembler-ia32.cc Tue
Dec 7 03:01:02 2010
+++ /branches/bleeding_edge/src/ia32/regexp-macro-assembler-ia32.cc Wed Dec
22 03:31:18 2010
@@ -211,9 +211,7 @@
// If input is ASCII, don't even bother calling here if the string to
// match contains a non-ascii character.
if (mode_ == ASCII) {
- for (int i = 0; i < str.length(); i++) {
- ASSERT(str[i] <= String::kMaxAsciiCharCodeU);
- }
+ ASSERT(String::IsAscii(str.start(), str.length()));
}
#endif
int byte_length = str.length() * char_size();
=======================================
--- /branches/bleeding_edge/src/objects.h Mon Dec 20 06:20:43 2010
+++ /branches/bleeding_edge/src/objects.h Wed Dec 22 03:31:18 2010
@@ -5245,6 +5245,34 @@
int from,
int to);
+ static inline bool IsAscii(const char* chars, int length) {
+ const char* limit = chars + length;
+#ifdef V8_HOST_CAN_READ_UNALIGNED
+ ASSERT(kMaxAsciiCharCode == 0x7F);
+ const uintptr_t non_ascii_mask = kUintptrAllBitsSet / 0xFF * 0x80;
+ while (chars <= limit - sizeof(uintptr_t)) {
+ if (*reinterpret_cast<const uintptr_t*>(chars) & non_ascii_mask) {
+ return false;
+ }
+ chars += sizeof(uintptr_t);
+ }
+#endif
+ while (chars < limit) {
+ if (static_cast<uint8_t>(*chars) > kMaxAsciiCharCodeU) return false;
+ ++chars;
+ }
+ return true;
+ }
+
+ static inline bool IsAscii(const uc16* chars, int length) {
+ const uc16* limit = chars + length;
+ while (chars < limit) {
+ if (*chars > kMaxAsciiCharCodeU) return false;
+ ++chars;
+ }
+ return true;
+ }
+
protected:
class ReadBlockBuffer {
public:
=======================================
--- /branches/bleeding_edge/src/string-search.h Tue Dec 7 03:01:02 2010
+++ /branches/bleeding_edge/src/string-search.h Wed Dec 22 03:31:18 2010
@@ -66,12 +66,7 @@
}
static inline bool IsAsciiString(Vector<const uc16> string) {
- for (int i = 0, n = string.length(); i < n; i++) {
- if (static_cast<unsigned>(string[i]) > String::kMaxAsciiCharCodeU) {
- return false;
- }
- }
- return true;
+ return String::IsAscii(string.start(), string.length());
}
// The following tables are shared by all searches.
=======================================
--- /branches/bleeding_edge/src/x64/regexp-macro-assembler-x64.cc Tue Dec
7 03:01:02 2010
+++ /branches/bleeding_edge/src/x64/regexp-macro-assembler-x64.cc Wed Dec
22 03:31:18 2010
@@ -223,9 +223,7 @@
// If input is ASCII, don't even bother calling here if the string to
// match contains a non-ascii character.
if (mode_ == ASCII) {
- for (int i = 0; i < str.length(); i++) {
- ASSERT(str[i] <= String::kMaxAsciiCharCodeU);
- }
+ ASSERT(String::IsAscii(str.start(), str.length()));
}
#endif
int byte_length = str.length() * char_size();
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev