Skip to site navigation (Press enter)

[webkit-changes] [241233] trunk

ysuzuki Fri, 08 Feb 2019 20:41:30 -0800

Title: [241233] trunk

Revision: 241233
Author: [email protected]
Date: 2019-02-08 20:40:22 -0800 (Fri, 08 Feb 2019)

Log Message

[JSC] String.fromCharCode's slow path always generates 16bit string
https://bugs.webkit.org/show_bug.cgi?id=194466


Reviewed by Keith Miller.

JSTests:

* stress/string-from-char-code-slow-path.js: Added.
(shouldBe):
(testWithLength):

Source/_javascript_Core:

String.fromCharCode(a1) has a fast path and the most frequently used. And String.fromCharCode(a1, a2, ...)
goes to the slow path. However, in the slow path, we always create 16bit string. 16bit string takes 2x memory,
and even worse, taints ropes 16bit if 16bit string is included in the given rope. We find that acorn-wtb
creates very large strings multiple times with String.fromCharCode, and String.fromCharCode always produces
16bit string. However, only few strings are actually 16bit strings. This patch attempts to make 8bit string
as much as possible.

It improves non JIT acorn-wtb's peak and current memory footprint by 6% and 3% respectively.

* runtime/StringConstructor.cpp:
(JSC::stringFromCharCode):

Modified Paths

trunk/JSTests/ChangeLog
trunk/Source/_javascript_Core/ChangeLog
trunk/Source/_javascript_Core/runtime/StringConstructor.cpp
trunk/Source/WTF/wtf/text/StringImpl.cpp
trunk/Source/WTF/wtf/text/StringImpl.h
trunk/Source/WTF/wtf/text/WTFString.cpp

Added Paths

trunk/JSTests/stress/string-from-char-code-slow-path.js

Diff

Modified: trunk/JSTests/ChangeLog (241232 => 241233)


--- trunk/JSTests/ChangeLog	2019-02-09 04:36:57 UTC (rev 241232)
+++ trunk/JSTests/ChangeLog	2019-02-09 04:40:22 UTC (rev 241233)
@@ -1,3 +1,14 @@
+2019-02-08  Yusuke Suzuki  <[email protected]>
+
+        [JSC] String.fromCharCode's slow path always generates 16bit string
+        https://bugs.webkit.org/show_bug.cgi?id=194466
+
+        Reviewed by Keith Miller.
+
+        * stress/string-from-char-code-slow-path.js: Added.
+        (shouldBe):
+        (testWithLength):
+
 2019-02-08  Saam barati  <[email protected]>
 
         Nodes that rely on being dominated by CheckInBounds should have a child edge to it

Added: trunk/JSTests/stress/string-from-char-code-slow-path.js (0 => 241233)


--- trunk/JSTests/stress/string-from-char-code-slow-path.js	                        (rev 0)
+++ trunk/JSTests/stress/string-from-char-code-slow-path.js	2019-02-09 04:40:22 UTC (rev 241233)
@@ -0,0 +1,26 @@
+function shouldBe(actual, expected) {
+    if (actual !== expected)
+        throw new Error('bad value: ' + actual);
+}
+
+function testWithLength(length, index) {
+    shouldBe(length >= 1, true);
+    var array = [];
+    for (var i = 0; i < length; ++i)
+        array[i] = i & 0xff;
+    array[index] = 0xffef;
+    var string = String.fromCharCode.apply(String, array);
+    shouldBe(string.length, length);
+    for (var i = 0; i < length; ++i) {
+        if (index === i)
+            shouldBe(string[i], String.fromCharCode(0xffef));
+        else
+            shouldBe(string[i], String.fromCharCode(i & 0xff));
+    }
+}
+
+testWithLength(1e4, 1e4 - 1);
+testWithLength(1e4, 1e3);
+testWithLength(1, 0);
+testWithLength(2, 1);
+testWithLength(2, 0);

Modified: trunk/Source/_javascript_Core/ChangeLog (241232 => 241233)


--- trunk/Source/_javascript_Core/ChangeLog	2019-02-09 04:36:57 UTC (rev 241232)
+++ trunk/Source/_javascript_Core/ChangeLog	2019-02-09 04:40:22 UTC (rev 241233)
@@ -1,3 +1,22 @@
+2019-02-08  Yusuke Suzuki  <[email protected]>
+
+        [JSC] String.fromCharCode's slow path always generates 16bit string
+        https://bugs.webkit.org/show_bug.cgi?id=194466
+
+        Reviewed by Keith Miller.
+
+        String.fromCharCode(a1) has a fast path and the most frequently used. And String.fromCharCode(a1, a2, ...)
+        goes to the slow path. However, in the slow path, we always create 16bit string. 16bit string takes 2x memory,
+        and even worse, taints ropes 16bit if 16bit string is included in the given rope. We find that acorn-wtb
+        creates very large strings multiple times with String.fromCharCode, and String.fromCharCode always produces
+        16bit string. However, only few strings are actually 16bit strings. This patch attempts to make 8bit string
+        as much as possible.
+
+        It improves non JIT acorn-wtb's peak and current memory footprint by 6% and 3% respectively.
+
+        * runtime/StringConstructor.cpp:
+        (JSC::stringFromCharCode):
+
 2019-02-08  Keith Miller  <[email protected]>
 
         We should only make rope strings when concatenating strings long enough.

Modified: trunk/Source/_javascript_Core/runtime/StringConstructor.cpp (241232 => 241233)


--- trunk/Source/_javascript_Core/runtime/StringConstructor.cpp	2019-02-09 04:36:57 UTC (rev 241232)
+++ trunk/Source/_javascript_Core/runtime/StringConstructor.cpp	2019-02-09 04:40:22 UTC (rev 241233)
@@ -83,13 +83,26 @@
         return JSValue::encode(jsSingleCharacterString(exec, code));
     }
 
-    UChar* buf;
-    auto impl = StringImpl::createUninitialized(length, buf);
+    LChar* buf8Bit;
+    auto impl8Bit = StringImpl::createUninitialized(length, buf8Bit);
     for (unsigned i = 0; i < length; ++i) {
-        buf[i] = static_cast<UChar>(exec->uncheckedArgument(i).toUInt32(exec));
+        UChar character = static_cast<UChar>(exec->uncheckedArgument(i).toUInt32(exec));
         RETURN_IF_EXCEPTION(scope, encodedJSValue());
+        if (UNLIKELY(!isLatin1(character))) {
+            UChar* buf16Bit;
+            auto impl16Bit = StringImpl::createUninitialized(length, buf16Bit);
+            StringImpl::copyCharacters(buf16Bit, buf8Bit, i);
+            buf16Bit[i] = character;
+            ++i;
+            for (; i < length; ++i) {
+                buf16Bit[i] = static_cast<UChar>(exec->uncheckedArgument(i).toUInt32(exec));
+                RETURN_IF_EXCEPTION(scope, encodedJSValue());
+            }
+            RELEASE_AND_RETURN(scope, JSValue::encode(jsString(exec, WTFMove(impl16Bit))));
+        }
+        buf8Bit[i] = static_cast<LChar>(character);
     }
-    RELEASE_AND_RETURN(scope, JSValue::encode(jsString(exec, WTFMove(impl))));
+    RELEASE_AND_RETURN(scope, JSValue::encode(jsString(exec, WTFMove(impl8Bit))));
 }
 
 JSString* JSC_HOST_CALL stringFromCharCode(ExecState* exec, int32_t arg)

Modified: trunk/Source/WTF/wtf/text/StringImpl.cpp (241232 => 241233)


--- trunk/Source/WTF/wtf/text/StringImpl.cpp	2019-02-09 04:36:57 UTC (rev 241232)
+++ trunk/Source/WTF/wtf/text/StringImpl.cpp	2019-02-09 04:40:22 UTC (rev 241233)
@@ -289,7 +289,7 @@
     auto string = createUninitializedInternalNonEmpty(length, data);
 
     for (size_t i = 0; i < length; ++i) {
-        if (characters[i] & 0xFF00)
+        if (!isLatin1(characters[i]))
             return create(characters, length);
         data[i] = static_cast<LChar>(characters[i]);
     }
@@ -414,7 +414,7 @@
         if (!(character & ~0x7F))
             data8[i] = toASCIILower(character);
         else {
-            ASSERT(u_tolower(character) <= 0xFF);
+            ASSERT(isLatin1(u_tolower(character)));
             data8[i] = static_cast<LChar>(u_tolower(character));
         }
     }
@@ -459,7 +459,7 @@
                 ++numberSharpSCharacters;
             ASSERT(u_toupper(character) <= 0xFFFF);
             UChar upper = u_toupper(character);
-            if (UNLIKELY(upper > 0xFF)) {
+            if (UNLIKELY(!isLatin1(upper))) {
                 // Since this upper-cased character does not fit in an 8-bit string, we need to take the 16-bit path.
                 goto upconvert;
             }
@@ -480,7 +480,7 @@
                 *dest++ = 'S';
                 *dest++ = 'S';
             } else {
-                ASSERT(u_toupper(character) <= 0xFF);
+                ASSERT(isLatin1(u_toupper(character)));
                 *dest++ = static_cast<LChar>(u_toupper(character));
             }
         }
@@ -628,7 +628,7 @@
                 if (isASCII(character))
                     data8[i] = toASCIILower(character);
                 else {
-                    ASSERT(u_foldCase(character, U_FOLD_CASE_DEFAULT) <= 0xFF);
+                    ASSERT(isLatin1(u_foldCase(character, U_FOLD_CASE_DEFAULT)));
                     data8[i] = static_cast<LChar>(u_foldCase(character, U_FOLD_CASE_DEFAULT));
                 }
             }
@@ -1253,12 +1253,12 @@
         return *this;
 
     if (is8Bit()) {
-        if (target > 0xFF) {
+        if (!isLatin1(target)) {
             // Looking for a 16-bit character in an 8-bit string, so we're done.
             return *this;
         }
 
-        if (replacement <= 0xFF) {
+        if (isLatin1(replacement)) {
             LChar* data;
             LChar oldChar = static_cast<LChar>(target);
             LChar newChar = static_cast<LChar>(replacement);

Modified: trunk/Source/WTF/wtf/text/StringImpl.h (241232 => 241233)


--- trunk/Source/WTF/wtf/text/StringImpl.h	2019-02-09 04:36:57 UTC (rev 241232)
+++ trunk/Source/WTF/wtf/text/StringImpl.h	2019-02-09 04:40:22 UTC (rev 241233)
@@ -127,6 +127,12 @@
 
 #endif
 
+template<typename CharacterType> inline bool isLatin1(CharacterType character)
+{
+    using UnsignedCharacterType = typename std::make_unsigned<CharacterType>::type;
+    return static_cast<UnsignedCharacterType>(character) <= static_cast<UnsignedCharacterType>(0xFF);
+}
+
 class StringImplShape {
     WTF_MAKE_NONCOPYABLE(StringImplShape);
 public:
@@ -1226,3 +1232,4 @@
 using WTF::StaticStringImpl;
 using WTF::StringImpl;
 using WTF::equal;
+using WTF::isLatin1;

Modified: trunk/Source/WTF/wtf/text/WTFString.cpp (241232 => 241233)


--- trunk/Source/WTF/wtf/text/WTFString.cpp	2019-02-09 04:36:57 UTC (rev 241232)
+++ trunk/Source/WTF/wtf/text/WTFString.cpp	2019-02-09 04:40:22 UTC (rev 241233)
@@ -146,7 +146,7 @@
         m_impl = StringImpl::create(&character, 1);
         return;
     }
-    if (character <= 0xFF && is8Bit()) {
+    if (isLatin1(character) && is8Bit()) {
         append(static_cast<LChar>(character));
         return;
     }
@@ -829,7 +829,7 @@
 
     for (unsigned i = 0; i < length; ++i) {
         UChar ch = characters[i];
-        characterBuffer[i] = ch > 0xff ? '?' : ch;
+        characterBuffer[i] = !isLatin1(ch) ? '?' : ch;
     }
 
     return result;

_______________________________________________
webkit-changes mailing list
[email protected]
https://lists.webkit.org/mailman/listinfo/webkit-changes