- Revision
- 261041
- Author
- [email protected]
- Date
- 2020-05-01 17:21:51 -0700 (Fri, 01 May 2020)
Log Message
Have a thread local cache for the Wasm LLInt bytecode buffer
https://bugs.webkit.org/show_bug.cgi?id=211317
Reviewed by Filip Pizlo and Mark Lam.
Source/_javascript_Core:
One of the main things slowing down Wasm compile times is the banging
on bmalloc's global heap lock. This patch makes it so for the bytecode
instruction buffer, we keep a thread local cache with latest capacity
the thread needed to compile. This makes it so that in the average case,
we only do one malloc at the end of a compile to memcpy the final result.
We clear these thread local caches when the WasmWorklist's automatic threads
underlying machine thread is destroyed.
This is a 15% speedup in zen garden compile times on a 16-core Mac Pro.
This is a 4-5% speedup in zen garden compile times on a 6-core MBP.
* bytecode/InstructionStream.h:
(JSC::InstructionStreamWriter::setInstructionBuffer):
(JSC::InstructionStreamWriter::finalize):
* wasm/WasmLLIntGenerator.cpp:
(JSC::Wasm::threadSpecificBuffer):
(JSC::Wasm::clearLLIntThreadSpecificCache):
(JSC::Wasm::LLIntGenerator::LLIntGenerator):
(JSC::Wasm::LLIntGenerator::finalize):
* wasm/WasmLLIntGenerator.h:
* wasm/WasmWorklist.cpp:
Source/WTF:
* wtf/Vector.h:
(WTF::Vector::sizeInBytes const):
Modified Paths
Diff
Modified: trunk/Source/_javascript_Core/ChangeLog (261040 => 261041)
--- trunk/Source/_javascript_Core/ChangeLog 2020-05-01 23:44:47 UTC (rev 261040)
+++ trunk/Source/_javascript_Core/ChangeLog 2020-05-02 00:21:51 UTC (rev 261041)
@@ -1,3 +1,33 @@
+2020-05-01 Saam Barati <[email protected]>
+
+ Have a thread local cache for the Wasm LLInt bytecode buffer
+ https://bugs.webkit.org/show_bug.cgi?id=211317
+
+ Reviewed by Filip Pizlo and Mark Lam.
+
+ One of the main things slowing down Wasm compile times is the banging
+ on bmalloc's global heap lock. This patch makes it so for the bytecode
+ instruction buffer, we keep a thread local cache with latest capacity
+ the thread needed to compile. This makes it so that in the average case,
+ we only do one malloc at the end of a compile to memcpy the final result.
+
+ We clear these thread local caches when the WasmWorklist's automatic threads
+ underlying machine thread is destroyed.
+
+ This is a 15% speedup in zen garden compile times on a 16-core Mac Pro.
+ This is a 4-5% speedup in zen garden compile times on a 6-core MBP.
+
+ * bytecode/InstructionStream.h:
+ (JSC::InstructionStreamWriter::setInstructionBuffer):
+ (JSC::InstructionStreamWriter::finalize):
+ * wasm/WasmLLIntGenerator.cpp:
+ (JSC::Wasm::threadSpecificBuffer):
+ (JSC::Wasm::clearLLIntThreadSpecificCache):
+ (JSC::Wasm::LLIntGenerator::LLIntGenerator):
+ (JSC::Wasm::LLIntGenerator::finalize):
+ * wasm/WasmLLIntGenerator.h:
+ * wasm/WasmWorklist.cpp:
+
2020-05-01 Per Arne Vollan <[email protected]>
[Win] Fix AppleWin build
Modified: trunk/Source/_javascript_Core/bytecode/InstructionStream.h (261040 => 261041)
--- trunk/Source/_javascript_Core/bytecode/InstructionStream.h 2020-05-01 23:44:47 UTC (rev 261040)
+++ trunk/Source/_javascript_Core/bytecode/InstructionStream.h 2020-05-02 00:21:51 UTC (rev 261041)
@@ -37,11 +37,11 @@
class InstructionStream {
WTF_MAKE_FAST_ALLOCATED;
- using InstructionBuffer = Vector<uint8_t, 0, UnsafeVectorOverflow, 16, InstructionStreamMalloc>;
-
friend class InstructionStreamWriter;
friend class CachedInstructionStream;
public:
+ using InstructionBuffer = Vector<uint8_t, 0, UnsafeVectorOverflow, 16, InstructionStreamMalloc>;
+
size_t sizeInBytes() const;
using Offset = unsigned;
@@ -191,6 +191,13 @@
: InstructionStream({ })
{ }
+ void setInstructionBuffer(InstructionBuffer&& buffer)
+ {
+ RELEASE_ASSERT(!m_instructions.size());
+ RELEASE_ASSERT(!buffer.size());
+ m_instructions = WTFMove(buffer);
+ }
+
inline MutableRef ref(Offset offset)
{
ASSERT(offset < m_instructions.size());
@@ -261,6 +268,19 @@
return std::unique_ptr<InstructionStream> { new InstructionStream(WTFMove(m_instructions)) };
}
+ std::unique_ptr<InstructionStream> finalize(InstructionBuffer& usedBuffer)
+ {
+ m_finalized = true;
+
+ InstructionBuffer resultBuffer(m_instructions.size());
+ RELEASE_ASSERT(m_instructions.sizeInBytes() == resultBuffer.sizeInBytes());
+ memcpy(resultBuffer.data(), m_instructions.data(), m_instructions.sizeInBytes());
+
+ usedBuffer = WTFMove(m_instructions);
+
+ return std::unique_ptr<InstructionStream> { new InstructionStream(WTFMove(resultBuffer)) };
+ }
+
MutableRef ref()
{
return MutableRef { m_instructions, m_position };
Modified: trunk/Source/_javascript_Core/wasm/WasmLLIntGenerator.cpp (261040 => 261041)
--- trunk/Source/_javascript_Core/wasm/WasmLLIntGenerator.cpp 2020-05-01 23:44:47 UTC (rev 261040)
+++ trunk/Source/_javascript_Core/wasm/WasmLLIntGenerator.cpp 2020-05-02 00:21:51 UTC (rev 261041)
@@ -428,11 +428,46 @@
return llintGenerator.finalize();
}
+
+using Buffer = InstructionStream::InstructionBuffer;
+static ThreadSpecific<Buffer>* threadSpecificBufferPtr;
+
+static ThreadSpecific<Buffer>& threadSpecificBuffer()
+{
+ static std::once_flag flag;
+ std::call_once(
+ flag,
+ [] () {
+ threadSpecificBufferPtr = new ThreadSpecific<Buffer>();
+ });
+ return *threadSpecificBufferPtr;
+}
+
+void clearLLIntThreadSpecificCache()
+{
+ auto& threadSpecific = threadSpecificBuffer();
+ if (threadSpecific.isSet())
+ threadSpecific->clear();
+}
+
LLIntGenerator::LLIntGenerator(const ModuleInformation& info, unsigned functionIndex, const Signature&)
: BytecodeGeneratorBase(makeUnique<FunctionCodeBlock>(functionIndex), 0)
, m_info(info)
, m_functionIndex(functionIndex)
{
+ {
+ auto& threadSpecific = threadSpecificBuffer();
+
+ if (!threadSpecific.isSet()) {
+ void* ptr = static_cast<Buffer*>(threadSpecific);
+ new (ptr) Buffer();
+ }
+
+ Buffer buffer = WTFMove(*threadSpecific);
+ *threadSpecific = Buffer();
+ m_writer.setInstructionBuffer(WTFMove(buffer));
+ }
+
m_codeBlock->m_numVars = numberOfLLIntCalleeSaveRegisters;
m_stackSize = numberOfLLIntCalleeSaveRegisters;
m_maxStackSize = numberOfLLIntCalleeSaveRegisters;
@@ -444,7 +479,15 @@
{
RELEASE_ASSERT(m_codeBlock);
m_codeBlock->m_numCalleeLocals = WTF::roundUpToMultipleOf(stackAlignmentRegisters(), m_maxStackSize);
- m_codeBlock->setInstructions(m_writer.finalize());
+
+ auto& threadSpecific = threadSpecificBuffer();
+ Buffer usedBuffer;
+ m_codeBlock->setInstructions(m_writer.finalize(usedBuffer));
+ size_t oldCapacity = usedBuffer.capacity();
+ usedBuffer.resize(0);
+ RELEASE_ASSERT(usedBuffer.capacity() == oldCapacity);
+ *threadSpecific = WTFMove(usedBuffer);
+
return WTFMove(m_codeBlock);
}
Modified: trunk/Source/_javascript_Core/wasm/WasmLLIntGenerator.h (261040 => 261041)
--- trunk/Source/_javascript_Core/wasm/WasmLLIntGenerator.h 2020-05-01 23:44:47 UTC (rev 261040)
+++ trunk/Source/_javascript_Core/wasm/WasmLLIntGenerator.h 2020-05-02 00:21:51 UTC (rev 261041)
@@ -35,6 +35,8 @@
Expected<std::unique_ptr<FunctionCodeBlock>, String> parseAndCompileBytecode(const uint8_t*, size_t, const Signature&, const ModuleInformation&, uint32_t functionIndex);
+void clearLLIntThreadSpecificCache();
+
} } // namespace JSC::Wasm
#endif // ENABLE(WEBASSEMBLY)
Modified: trunk/Source/_javascript_Core/wasm/WasmWorklist.cpp (261040 => 261041)
--- trunk/Source/_javascript_Core/wasm/WasmWorklist.cpp 2020-05-01 23:44:47 UTC (rev 261040)
+++ trunk/Source/_javascript_Core/wasm/WasmWorklist.cpp 2020-05-02 00:21:51 UTC (rev 261041)
@@ -25,6 +25,7 @@
#include "config.h"
#include "WasmWorklist.h"
+#include "WasmLLIntGenerator.h"
#if ENABLE(WEBASSEMBLY)
@@ -116,6 +117,11 @@
return complete(holdLock(*worklist.m_lock));
}
+ void threadIsStopping(const AbstractLocker&) override
+ {
+ clearLLIntThreadSpecificCache();
+ }
+
const char* name() const override
{
return "Wasm Worklist Helper Thread";
Modified: trunk/Source/WTF/ChangeLog (261040 => 261041)
--- trunk/Source/WTF/ChangeLog 2020-05-01 23:44:47 UTC (rev 261040)
+++ trunk/Source/WTF/ChangeLog 2020-05-02 00:21:51 UTC (rev 261041)
@@ -1,3 +1,13 @@
+2020-05-01 Saam Barati <[email protected]>
+
+ Have a thread local cache for the Wasm LLInt bytecode buffer
+ https://bugs.webkit.org/show_bug.cgi?id=211317
+
+ Reviewed by Filip Pizlo and Mark Lam.
+
+ * wtf/Vector.h:
+ (WTF::Vector::sizeInBytes const):
+
2020-05-01 Don Olmstead <[email protected]>
[GTK] Add additional exports to support hidden visibility
Modified: trunk/Source/WTF/wtf/Vector.h (261040 => 261041)
--- trunk/Source/WTF/wtf/Vector.h 2020-05-01 23:44:47 UTC (rev 261040)
+++ trunk/Source/WTF/wtf/Vector.h 2020-05-02 00:21:51 UTC (rev 261041)
@@ -691,6 +691,7 @@
Vector& operator=(Vector&&);
size_t size() const { return m_size; }
+ size_t sizeInBytes() const { return static_cast<size_t>(m_size) * sizeof(T); }
static ptrdiff_t sizeMemoryOffset() { return OBJECT_OFFSETOF(Vector, m_size); }
size_t capacity() const { return Base::capacity(); }
bool isEmpty() const { return !size(); }