Title: [261041] trunk/Source
Revision
261041
Author
[email protected]
Date
2020-05-01 17:21:51 -0700 (Fri, 01 May 2020)

Log Message

Have a thread local cache for the Wasm LLInt bytecode buffer
https://bugs.webkit.org/show_bug.cgi?id=211317

Reviewed by Filip Pizlo and Mark Lam.

Source/_javascript_Core:

One of the main things slowing down Wasm compile times is the banging
on bmalloc's global heap lock. This patch makes it so for the bytecode
instruction buffer, we keep a thread local cache with latest capacity
the thread needed to compile. This makes it so that in the average case,
we only do one malloc at the end of a compile to memcpy the final result.

We clear these thread local caches when the WasmWorklist's automatic threads
underlying machine thread is destroyed.

This is a 15% speedup in zen garden compile times on a 16-core Mac Pro.
This is a 4-5% speedup in zen garden compile times on a 6-core MBP.

* bytecode/InstructionStream.h:
(JSC::InstructionStreamWriter::setInstructionBuffer):
(JSC::InstructionStreamWriter::finalize):
* wasm/WasmLLIntGenerator.cpp:
(JSC::Wasm::threadSpecificBuffer):
(JSC::Wasm::clearLLIntThreadSpecificCache):
(JSC::Wasm::LLIntGenerator::LLIntGenerator):
(JSC::Wasm::LLIntGenerator::finalize):
* wasm/WasmLLIntGenerator.h:
* wasm/WasmWorklist.cpp:

Source/WTF:

* wtf/Vector.h:
(WTF::Vector::sizeInBytes const):

Modified Paths

Diff

Modified: trunk/Source/_javascript_Core/ChangeLog (261040 => 261041)


--- trunk/Source/_javascript_Core/ChangeLog	2020-05-01 23:44:47 UTC (rev 261040)
+++ trunk/Source/_javascript_Core/ChangeLog	2020-05-02 00:21:51 UTC (rev 261041)
@@ -1,3 +1,33 @@
+2020-05-01  Saam Barati  <[email protected]>
+
+        Have a thread local cache for the Wasm LLInt bytecode buffer
+        https://bugs.webkit.org/show_bug.cgi?id=211317
+
+        Reviewed by Filip Pizlo and Mark Lam.
+
+        One of the main things slowing down Wasm compile times is the banging
+        on bmalloc's global heap lock. This patch makes it so for the bytecode
+        instruction buffer, we keep a thread local cache with latest capacity
+        the thread needed to compile. This makes it so that in the average case,
+        we only do one malloc at the end of a compile to memcpy the final result.
+        
+        We clear these thread local caches when the WasmWorklist's automatic threads
+        underlying machine thread is destroyed.
+        
+        This is a 15% speedup in zen garden compile times on a 16-core Mac Pro.
+        This is a 4-5% speedup in zen garden compile times on a 6-core MBP.
+
+        * bytecode/InstructionStream.h:
+        (JSC::InstructionStreamWriter::setInstructionBuffer):
+        (JSC::InstructionStreamWriter::finalize):
+        * wasm/WasmLLIntGenerator.cpp:
+        (JSC::Wasm::threadSpecificBuffer):
+        (JSC::Wasm::clearLLIntThreadSpecificCache):
+        (JSC::Wasm::LLIntGenerator::LLIntGenerator):
+        (JSC::Wasm::LLIntGenerator::finalize):
+        * wasm/WasmLLIntGenerator.h:
+        * wasm/WasmWorklist.cpp:
+
 2020-05-01  Per Arne Vollan  <[email protected]>
 
         [Win] Fix AppleWin build

Modified: trunk/Source/_javascript_Core/bytecode/InstructionStream.h (261040 => 261041)


--- trunk/Source/_javascript_Core/bytecode/InstructionStream.h	2020-05-01 23:44:47 UTC (rev 261040)
+++ trunk/Source/_javascript_Core/bytecode/InstructionStream.h	2020-05-02 00:21:51 UTC (rev 261041)
@@ -37,11 +37,11 @@
 class InstructionStream {
     WTF_MAKE_FAST_ALLOCATED;
 
-    using InstructionBuffer = Vector<uint8_t, 0, UnsafeVectorOverflow, 16, InstructionStreamMalloc>;
-
     friend class InstructionStreamWriter;
     friend class CachedInstructionStream;
 public:
+    using InstructionBuffer = Vector<uint8_t, 0, UnsafeVectorOverflow, 16, InstructionStreamMalloc>;
+
     size_t sizeInBytes() const;
 
     using Offset = unsigned;
@@ -191,6 +191,13 @@
         : InstructionStream({ })
     { }
 
+    void setInstructionBuffer(InstructionBuffer&& buffer)
+    {
+        RELEASE_ASSERT(!m_instructions.size());
+        RELEASE_ASSERT(!buffer.size());
+        m_instructions = WTFMove(buffer);
+    }
+
     inline MutableRef ref(Offset offset)
     {
         ASSERT(offset < m_instructions.size());
@@ -261,6 +268,19 @@
         return std::unique_ptr<InstructionStream> { new InstructionStream(WTFMove(m_instructions)) };
     }
 
+    std::unique_ptr<InstructionStream> finalize(InstructionBuffer& usedBuffer)
+    {
+        m_finalized = true;
+
+        InstructionBuffer resultBuffer(m_instructions.size());
+        RELEASE_ASSERT(m_instructions.sizeInBytes() == resultBuffer.sizeInBytes());
+        memcpy(resultBuffer.data(), m_instructions.data(), m_instructions.sizeInBytes());
+
+        usedBuffer = WTFMove(m_instructions);
+
+        return std::unique_ptr<InstructionStream> { new InstructionStream(WTFMove(resultBuffer)) };
+    }
+
     MutableRef ref()
     {
         return MutableRef { m_instructions, m_position };

Modified: trunk/Source/_javascript_Core/wasm/WasmLLIntGenerator.cpp (261040 => 261041)


--- trunk/Source/_javascript_Core/wasm/WasmLLIntGenerator.cpp	2020-05-01 23:44:47 UTC (rev 261040)
+++ trunk/Source/_javascript_Core/wasm/WasmLLIntGenerator.cpp	2020-05-02 00:21:51 UTC (rev 261041)
@@ -428,11 +428,46 @@
     return llintGenerator.finalize();
 }
 
+
+using Buffer = InstructionStream::InstructionBuffer;
+static ThreadSpecific<Buffer>* threadSpecificBufferPtr;
+
+static ThreadSpecific<Buffer>& threadSpecificBuffer()
+{
+    static std::once_flag flag;
+    std::call_once(
+        flag,
+        [] () {
+            threadSpecificBufferPtr = new ThreadSpecific<Buffer>();
+        });
+    return *threadSpecificBufferPtr;
+}
+
+void clearLLIntThreadSpecificCache()
+{
+    auto& threadSpecific = threadSpecificBuffer();
+    if (threadSpecific.isSet())
+        threadSpecific->clear();
+}
+
 LLIntGenerator::LLIntGenerator(const ModuleInformation& info, unsigned functionIndex, const Signature&)
     : BytecodeGeneratorBase(makeUnique<FunctionCodeBlock>(functionIndex), 0)
     , m_info(info)
     , m_functionIndex(functionIndex)
 {
+    {
+        auto& threadSpecific = threadSpecificBuffer();
+
+        if (!threadSpecific.isSet()) {
+            void* ptr = static_cast<Buffer*>(threadSpecific);
+            new (ptr) Buffer();
+        }
+
+        Buffer buffer = WTFMove(*threadSpecific);
+        *threadSpecific = Buffer();
+        m_writer.setInstructionBuffer(WTFMove(buffer));
+    }
+
     m_codeBlock->m_numVars = numberOfLLIntCalleeSaveRegisters;
     m_stackSize = numberOfLLIntCalleeSaveRegisters;
     m_maxStackSize = numberOfLLIntCalleeSaveRegisters;
@@ -444,7 +479,15 @@
 {
     RELEASE_ASSERT(m_codeBlock);
     m_codeBlock->m_numCalleeLocals = WTF::roundUpToMultipleOf(stackAlignmentRegisters(), m_maxStackSize);
-    m_codeBlock->setInstructions(m_writer.finalize());
+
+    auto& threadSpecific = threadSpecificBuffer();
+    Buffer usedBuffer;
+    m_codeBlock->setInstructions(m_writer.finalize(usedBuffer));
+    size_t oldCapacity = usedBuffer.capacity();
+    usedBuffer.resize(0);
+    RELEASE_ASSERT(usedBuffer.capacity() == oldCapacity);
+    *threadSpecific = WTFMove(usedBuffer);
+
     return WTFMove(m_codeBlock);
 }
 

Modified: trunk/Source/_javascript_Core/wasm/WasmLLIntGenerator.h (261040 => 261041)


--- trunk/Source/_javascript_Core/wasm/WasmLLIntGenerator.h	2020-05-01 23:44:47 UTC (rev 261040)
+++ trunk/Source/_javascript_Core/wasm/WasmLLIntGenerator.h	2020-05-02 00:21:51 UTC (rev 261041)
@@ -35,6 +35,8 @@
 
 Expected<std::unique_ptr<FunctionCodeBlock>, String> parseAndCompileBytecode(const uint8_t*, size_t, const Signature&, const ModuleInformation&, uint32_t functionIndex);
 
+void clearLLIntThreadSpecificCache();
+
 } } // namespace JSC::Wasm
 
 #endif // ENABLE(WEBASSEMBLY)

Modified: trunk/Source/_javascript_Core/wasm/WasmWorklist.cpp (261040 => 261041)


--- trunk/Source/_javascript_Core/wasm/WasmWorklist.cpp	2020-05-01 23:44:47 UTC (rev 261040)
+++ trunk/Source/_javascript_Core/wasm/WasmWorklist.cpp	2020-05-02 00:21:51 UTC (rev 261041)
@@ -25,6 +25,7 @@
 
 #include "config.h"
 #include "WasmWorklist.h"
+#include "WasmLLIntGenerator.h"
 
 #if ENABLE(WEBASSEMBLY)
 
@@ -116,6 +117,11 @@
         return complete(holdLock(*worklist.m_lock));
     }
 
+    void threadIsStopping(const AbstractLocker&) override
+    {
+        clearLLIntThreadSpecificCache();
+    }
+
     const char* name() const override
     {
         return "Wasm Worklist Helper Thread";

Modified: trunk/Source/WTF/ChangeLog (261040 => 261041)


--- trunk/Source/WTF/ChangeLog	2020-05-01 23:44:47 UTC (rev 261040)
+++ trunk/Source/WTF/ChangeLog	2020-05-02 00:21:51 UTC (rev 261041)
@@ -1,3 +1,13 @@
+2020-05-01  Saam Barati  <[email protected]>
+
+        Have a thread local cache for the Wasm LLInt bytecode buffer
+        https://bugs.webkit.org/show_bug.cgi?id=211317
+
+        Reviewed by Filip Pizlo and Mark Lam.
+
+        * wtf/Vector.h:
+        (WTF::Vector::sizeInBytes const):
+
 2020-05-01  Don Olmstead  <[email protected]>
 
         [GTK] Add additional exports to support hidden visibility

Modified: trunk/Source/WTF/wtf/Vector.h (261040 => 261041)


--- trunk/Source/WTF/wtf/Vector.h	2020-05-01 23:44:47 UTC (rev 261040)
+++ trunk/Source/WTF/wtf/Vector.h	2020-05-02 00:21:51 UTC (rev 261041)
@@ -691,6 +691,7 @@
     Vector& operator=(Vector&&);
 
     size_t size() const { return m_size; }
+    size_t sizeInBytes() const { return static_cast<size_t>(m_size) * sizeof(T); }
     static ptrdiff_t sizeMemoryOffset() { return OBJECT_OFFSETOF(Vector, m_size); }
     size_t capacity() const { return Base::capacity(); }
     bool isEmpty() const { return !size(); }
_______________________________________________
webkit-changes mailing list
[email protected]
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to