Reviewers: Mads Ager,

Description:
Clean up is-ASCII checks.

Please review this at http://codereview.chromium.org/5963003/

Affected files:
  M src/heap-inl.h
  M src/heap.cc
  M src/ia32/regexp-macro-assembler-ia32.cc
  M src/objects.h
  M src/string-search.h
  M src/x64/regexp-macro-assembler-x64.cc


Index: src/heap-inl.h
diff --git a/src/heap-inl.h b/src/heap-inl.h
index 62e810fcb1b80b44a2b518c1c8eb460abd68534c..26cf6e0f4751d0c5ea1c839339170ff0433e0bae 100644
--- a/src/heap-inl.h
+++ b/src/heap-inl.h
@@ -43,15 +43,13 @@ int Heap::MaxObjectSizeInPagedSpace() {
 MaybeObject* Heap::AllocateStringFromUtf8(Vector<const char> str,
                                           PretenureFlag pretenure) {
   // Check for ASCII first since this is the common case.
-  for (int i = 0; i < str.length(); ++i) {
-    if (static_cast<uint8_t>(str[i]) > String::kMaxAsciiCharCodeU) {
-      // Non-ASCII and we need to decode.
-      return AllocateStringFromUtf8Slow(str, pretenure);
-    }
+  if (String::IsAscii(str.start(), str.length())) {
+    // If the string is ASCII, we do not need to convert the characters
+    // since UTF8 is backwards compatible with ASCII.
+    return AllocateStringFromAscii(str, pretenure);
   }
-  // If the string is ASCII, we do not need to convert the characters
-  // since UTF8 is backwards compatible with ASCII.
-  return AllocateStringFromAscii(str, pretenure);
+  // Non-ASCII and we need to decode.
+  return AllocateStringFromUtf8Slow(str, pretenure);
 }


Index: src/heap.cc
diff --git a/src/heap.cc b/src/heap.cc
index 2f70ef0188dbad295cb44622708e2dec3c4a4b24..4713f69e2f0e622b9e2ec9db99a28d24ef148ce5 100644
--- a/src/heap.cc
+++ b/src/heap.cc
@@ -2549,20 +2549,10 @@ MaybeObject* Heap::AllocateExternalStringFromTwoByte(
   }

   // For small strings we check whether the resource contains only
-  // ascii characters.  If yes, we use a different string map.
-  bool is_ascii = true;
-  if (length >= static_cast<size_t>(String::kMinNonFlatLength)) {
-    is_ascii = false;
-  } else {
-    const uc16* data = resource->data();
-    for (size_t i = 0; i < length; i++) {
-      if (data[i] > String::kMaxAsciiCharCode) {
-        is_ascii = false;
-        break;
-      }
-    }
-  }
-
+  // ASCII characters.  If yes, we use a different string map.
+  static const size_t kAsciiCheckLengthLimit = 32;
+  bool is_ascii = length <= kAsciiCheckLengthLimit &&
+                  String::IsAscii(resource->data(), length);
   Map* map = is_ascii ?
Heap::external_string_with_ascii_data_map() : Heap::external_string_map();
   Object* result;
@@ -3342,11 +3332,8 @@ MaybeObject* Heap::AllocateStringFromUtf8Slow(Vector<const char> string,
 MaybeObject* Heap::AllocateStringFromTwoByte(Vector<const uc16> string,
                                              PretenureFlag pretenure) {
   // Check if the string is an ASCII string.
-  int i = 0;
- while (i < string.length() && string[i] <= String::kMaxAsciiCharCode) i++;
-
   MaybeObject* maybe_result;
-  if (i == string.length()) {  // It's an ASCII string.
+  if (String::IsAscii(string.start(), string.length())) {
     maybe_result = AllocateRawAsciiString(string.length(), pretenure);
   } else {  // It's not an ASCII string.
     maybe_result = AllocateRawTwoByteString(string.length(), pretenure);
Index: src/ia32/regexp-macro-assembler-ia32.cc
diff --git a/src/ia32/regexp-macro-assembler-ia32.cc b/src/ia32/regexp-macro-assembler-ia32.cc index d435a707758f06afe9ed87c0a14399221250b639..cc2f30dc1285df19d25d4ef0d746b149c08969c0 100644
--- a/src/ia32/regexp-macro-assembler-ia32.cc
+++ b/src/ia32/regexp-macro-assembler-ia32.cc
@@ -211,9 +211,7 @@ void RegExpMacroAssemblerIA32::CheckCharacters(Vector<const uc16> str,
   // If input is ASCII, don't even bother calling here if the string to
   // match contains a non-ascii character.
   if (mode_ == ASCII) {
-    for (int i = 0; i < str.length(); i++) {
-      ASSERT(str[i] <= String::kMaxAsciiCharCodeU);
-    }
+    ASSERT(String::IsAscii(str.start(), str.length()));
   }
 #endif
   int byte_length = str.length() * char_size();
Index: src/objects.h
diff --git a/src/objects.h b/src/objects.h
index c5fda7d0385d10d299754204a1c79c9d8b3ef33a..fdebd1f19dde2dc194d641ea4f8561857167204e 100644
--- a/src/objects.h
+++ b/src/objects.h
@@ -5245,6 +5245,34 @@ class String: public HeapObject {
                           int from,
                           int to);

+  static inline bool IsAscii(const char* chars, int length) {
+    const char* limit = chars + length;
+#ifdef V8_HOST_CAN_READ_UNALIGNED
+    ASSERT(kMaxAsciiCharCode == 0x7F);
+    const uintptr_t non_ascii_mask = kUintptrAllBitsSet / 0xFF * 0x80;
+    while (chars <= limit - sizeof(uintptr_t)) {
+      if (*reinterpret_cast<const uintptr_t*>(chars) & non_ascii_mask) {
+        return false;
+      }
+      chars += sizeof(uintptr_t);
+    }
+#endif
+    while (chars < limit) {
+      if (static_cast<uint8_t>(*chars) > kMaxAsciiCharCodeU) return false;
+      ++chars;
+    }
+    return true;
+  }
+
+  static inline bool IsAscii(const uc16* chars, int length) {
+    const uc16* limit = chars + length;
+    while (chars < limit) {
+      if (*chars > kMaxAsciiCharCodeU) return false;
+      ++chars;
+    }
+    return true;
+  }
+
  protected:
   class ReadBlockBuffer {
    public:
Index: src/string-search.h
diff --git a/src/string-search.h b/src/string-search.h
index eac84757ecf1d45458133442c5cfc200627cd691..5de3c0951e5d37a1f06d399c4d7919efa5d9019c 100644
--- a/src/string-search.h
+++ b/src/string-search.h
@@ -66,12 +66,7 @@ class StringSearchBase {
   }

   static inline bool IsAsciiString(Vector<const uc16> string) {
-    for (int i = 0, n = string.length(); i < n; i++) {
-      if (static_cast<unsigned>(string[i]) > String::kMaxAsciiCharCodeU) {
-        return false;
-      }
-    }
-    return true;
+    return String::IsAscii(string.start(), string.length());
   }

   // The following tables are shared by all searches.
Index: src/x64/regexp-macro-assembler-x64.cc
diff --git a/src/x64/regexp-macro-assembler-x64.cc b/src/x64/regexp-macro-assembler-x64.cc index 2cf85f11655c5da705bbf1f48956ebc30d7c6987..0838ddd4619629daf3f56a7614aa094995b2f1e0 100644
--- a/src/x64/regexp-macro-assembler-x64.cc
+++ b/src/x64/regexp-macro-assembler-x64.cc
@@ -223,9 +223,7 @@ void RegExpMacroAssemblerX64::CheckCharacters(Vector<const uc16> str,
   // If input is ASCII, don't even bother calling here if the string to
   // match contains a non-ascii character.
   if (mode_ == ASCII) {
-    for (int i = 0; i < str.length(); i++) {
-      ASSERT(str[i] <= String::kMaxAsciiCharCodeU);
-    }
+    ASSERT(String::IsAscii(str.start(), str.length()));
   }
 #endif
   int byte_length = str.length() * char_size();


--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev

Reply via email to