Reviewers: Lasse Reichstein,

Description:
Avoid decoding overhead when allocating ascii strings.

The assumption is that most utf8 strings allocated are actually ascii
and that if they are not we will encounter a non-ascii char pretty
quickly.


Please review this at http://codereview.chromium.org/6072004/

SVN Base: http://v8.googlecode.com/svn/branches/bleeding_edge/

Affected files:
  M     src/heap-inl.h
  M     src/heap.h
  M     src/heap.cc


Index: src/heap-inl.h
===================================================================
--- src/heap-inl.h      (revision 6096)
+++ src/heap-inl.h      (working copy)
@@ -40,6 +40,21 @@
 }


+MaybeObject* Heap::AllocateStringFromUtf8(Vector<const char> str,
+                                          PretenureFlag pretenure) {
+  // Check for ascii first since this is the common case.
+  for (int i = 0; i < str.length(); ++i) {
+    if (static_cast<uc16>(str[i]) > String::kMaxAsciiCharCode) {
+      // Non-ascii and we need to decode.
+      return AllocateStringFromUtf8Slow(str, pretenure);
+    }
+  }
+  // If the string is ascii, we do not need to convert the characters
+  // since UTF8 is backwards compatible with ascii.
+  return AllocateStringFromAscii(str, pretenure);
+}
+
+
 MaybeObject* Heap::AllocateSymbol(Vector<const char> str,
                                   int chars,
                                   uint32_t hash_field) {
Index: src/heap.cc
===================================================================
--- src/heap.cc (revision 6096)
+++ src/heap.cc (working copy)
@@ -3307,8 +3307,8 @@
 }


-MaybeObject* Heap::AllocateStringFromUtf8(Vector<const char> string,
-                                          PretenureFlag pretenure) {
+MaybeObject* Heap::AllocateStringFromUtf8Slow(Vector<const char> string,
+                                              PretenureFlag pretenure) {
   // V8 only supports characters in the Basic Multilingual Plane.
   const uc32 kMaxSupportedChar = 0xFFFF;
   // Count the number of characters in the UTF-8 string and check if
@@ -3317,17 +3317,11 @@
       decoder(ScannerConstants::utf8_decoder());
   decoder->Reset(string.start(), string.length());
   int chars = 0;
-  bool is_ascii = true;
   while (decoder->has_more()) {
-    uc32 r = decoder->GetNext();
-    if (r > String::kMaxAsciiCharCode) is_ascii = false;
+    decoder->GetNext();
     chars++;
   }

-  // If the string is ascii, we do not need to convert the characters
-  // since UTF8 is backwards compatible with ascii.
-  if (is_ascii) return AllocateStringFromAscii(string, pretenure);
-
   Object* result;
   { MaybeObject* maybe_result = AllocateRawTwoByteString(chars, pretenure);
     if (!maybe_result->ToObject(&result)) return maybe_result;
Index: src/heap.h
===================================================================
--- src/heap.h  (revision 6096)
+++ src/heap.h  (working copy)
@@ -412,9 +412,12 @@
   MUST_USE_RESULT static MaybeObject* AllocateStringFromAscii(
       Vector<const char> str,
       PretenureFlag pretenure = NOT_TENURED);
-  MUST_USE_RESULT static MaybeObject* AllocateStringFromUtf8(
+  MUST_USE_RESULT static inline MaybeObject* AllocateStringFromUtf8(
       Vector<const char> str,
       PretenureFlag pretenure = NOT_TENURED);
+  MUST_USE_RESULT static MaybeObject* AllocateStringFromUtf8Slow(
+      Vector<const char> str,
+      PretenureFlag pretenure = NOT_TENURED);
   MUST_USE_RESULT static MaybeObject* AllocateStringFromTwoByte(
       Vector<const uc16> str,
       PretenureFlag pretenure = NOT_TENURED);


--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev

Reply via email to