Revision: 6099
Author: [email protected]
Date: Tue Dec 21 05:24:23 2010
Log: Avoid decoding overhead when allocating ascii strings.

The assumption is that most utf8 strings allocated are actually ascii
and that if they are not we will encounter a non-ascii char pretty
quickly.

Review URL: http://codereview.chromium.org/6072004
http://code.google.com/p/v8/source/detail?r=6099

Modified:
 /branches/bleeding_edge/src/heap-inl.h
 /branches/bleeding_edge/src/heap.cc
 /branches/bleeding_edge/src/heap.h

=======================================
--- /branches/bleeding_edge/src/heap-inl.h      Wed Dec 15 00:07:27 2010
+++ /branches/bleeding_edge/src/heap-inl.h      Tue Dec 21 05:24:23 2010
@@ -38,6 +38,21 @@
 int Heap::MaxObjectSizeInPagedSpace() {
   return Page::kMaxHeapObjectSize;
 }
+
+
+MaybeObject* Heap::AllocateStringFromUtf8(Vector<const char> str,
+                                          PretenureFlag pretenure) {
+  // Check for ASCII first since this is the common case.
+  for (int i = 0; i < str.length(); ++i) {
+    if (static_cast<uint8_t>(str[i]) > String::kMaxAsciiCharCodeU) {
+      // Non-ASCII and we need to decode.
+      return AllocateStringFromUtf8Slow(str, pretenure);
+    }
+  }
+  // If the string is ASCII, we do not need to convert the characters
+  // since UTF8 is backwards compatible with ASCII.
+  return AllocateStringFromAscii(str, pretenure);
+}


 MaybeObject* Heap::AllocateSymbol(Vector<const char> str,
=======================================
--- /branches/bleeding_edge/src/heap.cc Tue Dec 21 02:49:40 2010
+++ /branches/bleeding_edge/src/heap.cc Tue Dec 21 05:24:23 2010
@@ -3307,8 +3307,8 @@
 }


-MaybeObject* Heap::AllocateStringFromUtf8(Vector<const char> string,
-                                          PretenureFlag pretenure) {
+MaybeObject* Heap::AllocateStringFromUtf8Slow(Vector<const char> string,
+                                              PretenureFlag pretenure) {
   // V8 only supports characters in the Basic Multilingual Plane.
   const uc32 kMaxSupportedChar = 0xFFFF;
   // Count the number of characters in the UTF-8 string and check if
@@ -3317,16 +3317,10 @@
       decoder(ScannerConstants::utf8_decoder());
   decoder->Reset(string.start(), string.length());
   int chars = 0;
-  bool is_ascii = true;
   while (decoder->has_more()) {
-    uc32 r = decoder->GetNext();
-    if (r > String::kMaxAsciiCharCode) is_ascii = false;
+    decoder->GetNext();
     chars++;
   }
-
-  // If the string is ascii, we do not need to convert the characters
-  // since UTF8 is backwards compatible with ascii.
-  if (is_ascii) return AllocateStringFromAscii(string, pretenure);

   Object* result;
   { MaybeObject* maybe_result = AllocateRawTwoByteString(chars, pretenure);
=======================================
--- /branches/bleeding_edge/src/heap.h  Tue Dec 21 02:49:40 2010
+++ /branches/bleeding_edge/src/heap.h  Tue Dec 21 05:24:23 2010
@@ -412,7 +412,10 @@
   MUST_USE_RESULT static MaybeObject* AllocateStringFromAscii(
       Vector<const char> str,
       PretenureFlag pretenure = NOT_TENURED);
-  MUST_USE_RESULT static MaybeObject* AllocateStringFromUtf8(
+  MUST_USE_RESULT static inline MaybeObject* AllocateStringFromUtf8(
+      Vector<const char> str,
+      PretenureFlag pretenure = NOT_TENURED);
+  MUST_USE_RESULT static MaybeObject* AllocateStringFromUtf8Slow(
       Vector<const char> str,
       PretenureFlag pretenure = NOT_TENURED);
   MUST_USE_RESULT static MaybeObject* AllocateStringFromTwoByte(

--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev

Reply via email to