Reviewers: Lasse Reichstein, Description: Avoid decoding overhead when allocating ascii strings.
The assumption is that most utf8 strings allocated are actually ascii and that if they are not we will encounter a non-ascii char pretty quickly. Please review this at http://codereview.chromium.org/6072004/ SVN Base: http://v8.googlecode.com/svn/branches/bleeding_edge/ Affected files: M src/heap-inl.h M src/heap.h M src/heap.cc Index: src/heap-inl.h =================================================================== --- src/heap-inl.h (revision 6096) +++ src/heap-inl.h (working copy) @@ -40,6 +40,21 @@ } +MaybeObject* Heap::AllocateStringFromUtf8(Vector<const char> str, + PretenureFlag pretenure) { + // Check for ascii first since this is the common case. + for (int i = 0; i < str.length(); ++i) { + if (static_cast<uc16>(str[i]) > String::kMaxAsciiCharCode) { + // Non-ascii and we need to decode. + return AllocateStringFromUtf8Slow(str, pretenure); + } + } + // If the string is ascii, we do not need to convert the characters + // since UTF8 is backwards compatible with ascii. + return AllocateStringFromAscii(str, pretenure); +} + + MaybeObject* Heap::AllocateSymbol(Vector<const char> str, int chars, uint32_t hash_field) { Index: src/heap.cc =================================================================== --- src/heap.cc (revision 6096) +++ src/heap.cc (working copy) @@ -3307,8 +3307,8 @@ } -MaybeObject* Heap::AllocateStringFromUtf8(Vector<const char> string, - PretenureFlag pretenure) { +MaybeObject* Heap::AllocateStringFromUtf8Slow(Vector<const char> string, + PretenureFlag pretenure) { // V8 only supports characters in the Basic Multilingual Plane. const uc32 kMaxSupportedChar = 0xFFFF; // Count the number of characters in the UTF-8 string and check if @@ -3317,17 +3317,11 @@ decoder(ScannerConstants::utf8_decoder()); decoder->Reset(string.start(), string.length()); int chars = 0; - bool is_ascii = true; while (decoder->has_more()) { - uc32 r = decoder->GetNext(); - if (r > String::kMaxAsciiCharCode) is_ascii = false; + decoder->GetNext(); chars++; } - // If the string is ascii, we do not need to convert the characters - // since UTF8 is backwards compatible with ascii. - if (is_ascii) return AllocateStringFromAscii(string, pretenure); - Object* result; { MaybeObject* maybe_result = AllocateRawTwoByteString(chars, pretenure); if (!maybe_result->ToObject(&result)) return maybe_result; Index: src/heap.h =================================================================== --- src/heap.h (revision 6096) +++ src/heap.h (working copy) @@ -412,9 +412,12 @@ MUST_USE_RESULT static MaybeObject* AllocateStringFromAscii( Vector<const char> str, PretenureFlag pretenure = NOT_TENURED); - MUST_USE_RESULT static MaybeObject* AllocateStringFromUtf8( + MUST_USE_RESULT static inline MaybeObject* AllocateStringFromUtf8( Vector<const char> str, PretenureFlag pretenure = NOT_TENURED); + MUST_USE_RESULT static MaybeObject* AllocateStringFromUtf8Slow( + Vector<const char> str, + PretenureFlag pretenure = NOT_TENURED); MUST_USE_RESULT static MaybeObject* AllocateStringFromTwoByte( Vector<const uc16> str, PretenureFlag pretenure = NOT_TENURED); -- v8-dev mailing list [email protected] http://groups.google.com/group/v8-dev
