- Revision
- 241233
- Author
- [email protected]
- Date
- 2019-02-08 20:40:22 -0800 (Fri, 08 Feb 2019)
Log Message
[JSC] String.fromCharCode's slow path always generates 16bit string
https://bugs.webkit.org/show_bug.cgi?id=194466
Reviewed by Keith Miller.
JSTests:
* stress/string-from-char-code-slow-path.js: Added.
(shouldBe):
(testWithLength):
Source/_javascript_Core:
String.fromCharCode(a1) has a fast path and the most frequently used. And String.fromCharCode(a1, a2, ...)
goes to the slow path. However, in the slow path, we always create 16bit string. 16bit string takes 2x memory,
and even worse, taints ropes 16bit if 16bit string is included in the given rope. We find that acorn-wtb
creates very large strings multiple times with String.fromCharCode, and String.fromCharCode always produces
16bit string. However, only few strings are actually 16bit strings. This patch attempts to make 8bit string
as much as possible.
It improves non JIT acorn-wtb's peak and current memory footprint by 6% and 3% respectively.
* runtime/StringConstructor.cpp:
(JSC::stringFromCharCode):
Modified Paths
Added Paths
Diff
Modified: trunk/JSTests/ChangeLog (241232 => 241233)
--- trunk/JSTests/ChangeLog 2019-02-09 04:36:57 UTC (rev 241232)
+++ trunk/JSTests/ChangeLog 2019-02-09 04:40:22 UTC (rev 241233)
@@ -1,3 +1,14 @@
+2019-02-08 Yusuke Suzuki <[email protected]>
+
+ [JSC] String.fromCharCode's slow path always generates 16bit string
+ https://bugs.webkit.org/show_bug.cgi?id=194466
+
+ Reviewed by Keith Miller.
+
+ * stress/string-from-char-code-slow-path.js: Added.
+ (shouldBe):
+ (testWithLength):
+
2019-02-08 Saam barati <[email protected]>
Nodes that rely on being dominated by CheckInBounds should have a child edge to it
Added: trunk/JSTests/stress/string-from-char-code-slow-path.js (0 => 241233)
--- trunk/JSTests/stress/string-from-char-code-slow-path.js (rev 0)
+++ trunk/JSTests/stress/string-from-char-code-slow-path.js 2019-02-09 04:40:22 UTC (rev 241233)
@@ -0,0 +1,26 @@
+function shouldBe(actual, expected) {
+ if (actual !== expected)
+ throw new Error('bad value: ' + actual);
+}
+
+function testWithLength(length, index) {
+ shouldBe(length >= 1, true);
+ var array = [];
+ for (var i = 0; i < length; ++i)
+ array[i] = i & 0xff;
+ array[index] = 0xffef;
+ var string = String.fromCharCode.apply(String, array);
+ shouldBe(string.length, length);
+ for (var i = 0; i < length; ++i) {
+ if (index === i)
+ shouldBe(string[i], String.fromCharCode(0xffef));
+ else
+ shouldBe(string[i], String.fromCharCode(i & 0xff));
+ }
+}
+
+testWithLength(1e4, 1e4 - 1);
+testWithLength(1e4, 1e3);
+testWithLength(1, 0);
+testWithLength(2, 1);
+testWithLength(2, 0);
Modified: trunk/Source/_javascript_Core/ChangeLog (241232 => 241233)
--- trunk/Source/_javascript_Core/ChangeLog 2019-02-09 04:36:57 UTC (rev 241232)
+++ trunk/Source/_javascript_Core/ChangeLog 2019-02-09 04:40:22 UTC (rev 241233)
@@ -1,3 +1,22 @@
+2019-02-08 Yusuke Suzuki <[email protected]>
+
+ [JSC] String.fromCharCode's slow path always generates 16bit string
+ https://bugs.webkit.org/show_bug.cgi?id=194466
+
+ Reviewed by Keith Miller.
+
+ String.fromCharCode(a1) has a fast path and the most frequently used. And String.fromCharCode(a1, a2, ...)
+ goes to the slow path. However, in the slow path, we always create 16bit string. 16bit string takes 2x memory,
+ and even worse, taints ropes 16bit if 16bit string is included in the given rope. We find that acorn-wtb
+ creates very large strings multiple times with String.fromCharCode, and String.fromCharCode always produces
+ 16bit string. However, only few strings are actually 16bit strings. This patch attempts to make 8bit string
+ as much as possible.
+
+ It improves non JIT acorn-wtb's peak and current memory footprint by 6% and 3% respectively.
+
+ * runtime/StringConstructor.cpp:
+ (JSC::stringFromCharCode):
+
2019-02-08 Keith Miller <[email protected]>
We should only make rope strings when concatenating strings long enough.
Modified: trunk/Source/_javascript_Core/runtime/StringConstructor.cpp (241232 => 241233)
--- trunk/Source/_javascript_Core/runtime/StringConstructor.cpp 2019-02-09 04:36:57 UTC (rev 241232)
+++ trunk/Source/_javascript_Core/runtime/StringConstructor.cpp 2019-02-09 04:40:22 UTC (rev 241233)
@@ -83,13 +83,26 @@
return JSValue::encode(jsSingleCharacterString(exec, code));
}
- UChar* buf;
- auto impl = StringImpl::createUninitialized(length, buf);
+ LChar* buf8Bit;
+ auto impl8Bit = StringImpl::createUninitialized(length, buf8Bit);
for (unsigned i = 0; i < length; ++i) {
- buf[i] = static_cast<UChar>(exec->uncheckedArgument(i).toUInt32(exec));
+ UChar character = static_cast<UChar>(exec->uncheckedArgument(i).toUInt32(exec));
RETURN_IF_EXCEPTION(scope, encodedJSValue());
+ if (UNLIKELY(!isLatin1(character))) {
+ UChar* buf16Bit;
+ auto impl16Bit = StringImpl::createUninitialized(length, buf16Bit);
+ StringImpl::copyCharacters(buf16Bit, buf8Bit, i);
+ buf16Bit[i] = character;
+ ++i;
+ for (; i < length; ++i) {
+ buf16Bit[i] = static_cast<UChar>(exec->uncheckedArgument(i).toUInt32(exec));
+ RETURN_IF_EXCEPTION(scope, encodedJSValue());
+ }
+ RELEASE_AND_RETURN(scope, JSValue::encode(jsString(exec, WTFMove(impl16Bit))));
+ }
+ buf8Bit[i] = static_cast<LChar>(character);
}
- RELEASE_AND_RETURN(scope, JSValue::encode(jsString(exec, WTFMove(impl))));
+ RELEASE_AND_RETURN(scope, JSValue::encode(jsString(exec, WTFMove(impl8Bit))));
}
JSString* JSC_HOST_CALL stringFromCharCode(ExecState* exec, int32_t arg)
Modified: trunk/Source/WTF/wtf/text/StringImpl.cpp (241232 => 241233)
--- trunk/Source/WTF/wtf/text/StringImpl.cpp 2019-02-09 04:36:57 UTC (rev 241232)
+++ trunk/Source/WTF/wtf/text/StringImpl.cpp 2019-02-09 04:40:22 UTC (rev 241233)
@@ -289,7 +289,7 @@
auto string = createUninitializedInternalNonEmpty(length, data);
for (size_t i = 0; i < length; ++i) {
- if (characters[i] & 0xFF00)
+ if (!isLatin1(characters[i]))
return create(characters, length);
data[i] = static_cast<LChar>(characters[i]);
}
@@ -414,7 +414,7 @@
if (!(character & ~0x7F))
data8[i] = toASCIILower(character);
else {
- ASSERT(u_tolower(character) <= 0xFF);
+ ASSERT(isLatin1(u_tolower(character)));
data8[i] = static_cast<LChar>(u_tolower(character));
}
}
@@ -459,7 +459,7 @@
++numberSharpSCharacters;
ASSERT(u_toupper(character) <= 0xFFFF);
UChar upper = u_toupper(character);
- if (UNLIKELY(upper > 0xFF)) {
+ if (UNLIKELY(!isLatin1(upper))) {
// Since this upper-cased character does not fit in an 8-bit string, we need to take the 16-bit path.
goto upconvert;
}
@@ -480,7 +480,7 @@
*dest++ = 'S';
*dest++ = 'S';
} else {
- ASSERT(u_toupper(character) <= 0xFF);
+ ASSERT(isLatin1(u_toupper(character)));
*dest++ = static_cast<LChar>(u_toupper(character));
}
}
@@ -628,7 +628,7 @@
if (isASCII(character))
data8[i] = toASCIILower(character);
else {
- ASSERT(u_foldCase(character, U_FOLD_CASE_DEFAULT) <= 0xFF);
+ ASSERT(isLatin1(u_foldCase(character, U_FOLD_CASE_DEFAULT)));
data8[i] = static_cast<LChar>(u_foldCase(character, U_FOLD_CASE_DEFAULT));
}
}
@@ -1253,12 +1253,12 @@
return *this;
if (is8Bit()) {
- if (target > 0xFF) {
+ if (!isLatin1(target)) {
// Looking for a 16-bit character in an 8-bit string, so we're done.
return *this;
}
- if (replacement <= 0xFF) {
+ if (isLatin1(replacement)) {
LChar* data;
LChar oldChar = static_cast<LChar>(target);
LChar newChar = static_cast<LChar>(replacement);
Modified: trunk/Source/WTF/wtf/text/StringImpl.h (241232 => 241233)
--- trunk/Source/WTF/wtf/text/StringImpl.h 2019-02-09 04:36:57 UTC (rev 241232)
+++ trunk/Source/WTF/wtf/text/StringImpl.h 2019-02-09 04:40:22 UTC (rev 241233)
@@ -127,6 +127,12 @@
#endif
+template<typename CharacterType> inline bool isLatin1(CharacterType character)
+{
+ using UnsignedCharacterType = typename std::make_unsigned<CharacterType>::type;
+ return static_cast<UnsignedCharacterType>(character) <= static_cast<UnsignedCharacterType>(0xFF);
+}
+
class StringImplShape {
WTF_MAKE_NONCOPYABLE(StringImplShape);
public:
@@ -1226,3 +1232,4 @@
using WTF::StaticStringImpl;
using WTF::StringImpl;
using WTF::equal;
+using WTF::isLatin1;
Modified: trunk/Source/WTF/wtf/text/WTFString.cpp (241232 => 241233)
--- trunk/Source/WTF/wtf/text/WTFString.cpp 2019-02-09 04:36:57 UTC (rev 241232)
+++ trunk/Source/WTF/wtf/text/WTFString.cpp 2019-02-09 04:40:22 UTC (rev 241233)
@@ -146,7 +146,7 @@
m_impl = StringImpl::create(&character, 1);
return;
}
- if (character <= 0xFF && is8Bit()) {
+ if (isLatin1(character) && is8Bit()) {
append(static_cast<LChar>(character));
return;
}
@@ -829,7 +829,7 @@
for (unsigned i = 0; i < length; ++i) {
UChar ch = characters[i];
- characterBuffer[i] = ch > 0xff ? '?' : ch;
+ characterBuffer[i] = !isLatin1(ch) ? '?' : ch;
}
return result;