Revision: 6099 Author: [email protected] Date: Tue Dec 21 05:24:23 2010 Log: Avoid decoding overhead when allocating ascii strings.
The assumption is that most utf8 strings allocated are actually ascii and that if they are not we will encounter a non-ascii char pretty quickly. Review URL: http://codereview.chromium.org/6072004 http://code.google.com/p/v8/source/detail?r=6099 Modified: /branches/bleeding_edge/src/heap-inl.h /branches/bleeding_edge/src/heap.cc /branches/bleeding_edge/src/heap.h ======================================= --- /branches/bleeding_edge/src/heap-inl.h Wed Dec 15 00:07:27 2010 +++ /branches/bleeding_edge/src/heap-inl.h Tue Dec 21 05:24:23 2010 @@ -38,6 +38,21 @@ int Heap::MaxObjectSizeInPagedSpace() { return Page::kMaxHeapObjectSize; } + + +MaybeObject* Heap::AllocateStringFromUtf8(Vector<const char> str, + PretenureFlag pretenure) { + // Check for ASCII first since this is the common case. + for (int i = 0; i < str.length(); ++i) { + if (static_cast<uint8_t>(str[i]) > String::kMaxAsciiCharCodeU) { + // Non-ASCII and we need to decode. + return AllocateStringFromUtf8Slow(str, pretenure); + } + } + // If the string is ASCII, we do not need to convert the characters + // since UTF8 is backwards compatible with ASCII. + return AllocateStringFromAscii(str, pretenure); +} MaybeObject* Heap::AllocateSymbol(Vector<const char> str, ======================================= --- /branches/bleeding_edge/src/heap.cc Tue Dec 21 02:49:40 2010 +++ /branches/bleeding_edge/src/heap.cc Tue Dec 21 05:24:23 2010 @@ -3307,8 +3307,8 @@ } -MaybeObject* Heap::AllocateStringFromUtf8(Vector<const char> string, - PretenureFlag pretenure) { +MaybeObject* Heap::AllocateStringFromUtf8Slow(Vector<const char> string, + PretenureFlag pretenure) { // V8 only supports characters in the Basic Multilingual Plane. const uc32 kMaxSupportedChar = 0xFFFF; // Count the number of characters in the UTF-8 string and check if @@ -3317,16 +3317,10 @@ decoder(ScannerConstants::utf8_decoder()); decoder->Reset(string.start(), string.length()); int chars = 0; - bool is_ascii = true; while (decoder->has_more()) { - uc32 r = decoder->GetNext(); - if (r > String::kMaxAsciiCharCode) is_ascii = false; + decoder->GetNext(); chars++; } - - // If the string is ascii, we do not need to convert the characters - // since UTF8 is backwards compatible with ascii. - if (is_ascii) return AllocateStringFromAscii(string, pretenure); Object* result; { MaybeObject* maybe_result = AllocateRawTwoByteString(chars, pretenure); ======================================= --- /branches/bleeding_edge/src/heap.h Tue Dec 21 02:49:40 2010 +++ /branches/bleeding_edge/src/heap.h Tue Dec 21 05:24:23 2010 @@ -412,7 +412,10 @@ MUST_USE_RESULT static MaybeObject* AllocateStringFromAscii( Vector<const char> str, PretenureFlag pretenure = NOT_TENURED); - MUST_USE_RESULT static MaybeObject* AllocateStringFromUtf8( + MUST_USE_RESULT static inline MaybeObject* AllocateStringFromUtf8( + Vector<const char> str, + PretenureFlag pretenure = NOT_TENURED); + MUST_USE_RESULT static MaybeObject* AllocateStringFromUtf8Slow( Vector<const char> str, PretenureFlag pretenure = NOT_TENURED); MUST_USE_RESULT static MaybeObject* AllocateStringFromTwoByte( -- v8-dev mailing list [email protected] http://groups.google.com/group/v8-dev
