Title: [199092] trunk/Source/_javascript_Core
Revision
199092
Author
[email protected]
Date
2016-04-05 20:55:11 -0700 (Tue, 05 Apr 2016)

Log Message

JSC SamplingProfiler: Use a thread + sleep loop instead of WTF::WorkQueue for taking samples
https://bugs.webkit.org/show_bug.cgi?id=154017

Reviewed by Geoffrey Garen.

By moving to an explicitly created seperate thread + sample-then-sleep
loop, we can remove a lot of the crufty code around WorkQueue.
We're also getting sample rates that are much closer to what we're
asking the OS for. When the sampling handler was built off of WorkQueue,
we'd often get sample rates much higher than the 1ms we asked for. On Kraken,
we would average about 1.7ms sample rates, even though we'd ask for a 1ms rate.
Now, on Kraken, we're getting about 1.2ms rates. Because we're getting
higher rates, this patch is a performance regression. It's slower because
we're sampling more frequently.

Before this patch, the sampling profiler had the following overhead:
- 10% on Kraken
- 12% on octane
- 15% on AsmBench

With this patch, the sampling profiler has the following overhead:
- 16% on Kraken
- 17% on Octane
- 30% on AsmBench

Comparatively, this new patch has the following overhead over the old sampling profiler:
- 5% on Kraken
- 3.5% on Octane
- 13% slower on AsmBench

* inspector/agents/InspectorScriptProfilerAgent.cpp:
(Inspector::InspectorScriptProfilerAgent::trackingComplete):
* runtime/SamplingProfiler.cpp:
(JSC::SamplingProfiler::SamplingProfiler):
(JSC::SamplingProfiler::~SamplingProfiler):
(JSC::SamplingProfiler::createThreadIfNecessary):
(JSC::SamplingProfiler::timerLoop):
(JSC::SamplingProfiler::takeSample):
(JSC::tryGetBytecodeIndex):
(JSC::SamplingProfiler::shutdown):
(JSC::SamplingProfiler::start):
(JSC::SamplingProfiler::pause):
(JSC::SamplingProfiler::noticeCurrentThreadAsJSCExecutionThread):
(JSC::SamplingProfiler::noticeJSLockAcquisition):
(JSC::SamplingProfiler::noticeVMEntry):
(JSC::SamplingProfiler::clearData):
(JSC::SamplingProfiler::stop): Deleted.
(JSC::SamplingProfiler::dispatchIfNecessary): Deleted.
(JSC::SamplingProfiler::dispatchFunction): Deleted.
* runtime/SamplingProfiler.h:
(JSC::SamplingProfiler::setTimingInterval):
(JSC::SamplingProfiler::setStopWatch):
* runtime/VM.cpp:
(JSC::VM::VM):

Modified Paths

Diff

Modified: trunk/Source/_javascript_Core/ChangeLog (199091 => 199092)


--- trunk/Source/_javascript_Core/ChangeLog	2016-04-06 03:23:18 UTC (rev 199091)
+++ trunk/Source/_javascript_Core/ChangeLog	2016-04-06 03:55:11 UTC (rev 199092)
@@ -1,3 +1,60 @@
+2016-04-05  Saam barati  <[email protected]>
+
+        JSC SamplingProfiler: Use a thread + sleep loop instead of WTF::WorkQueue for taking samples
+        https://bugs.webkit.org/show_bug.cgi?id=154017
+
+        Reviewed by Geoffrey Garen.
+
+        By moving to an explicitly created seperate thread + sample-then-sleep
+        loop, we can remove a lot of the crufty code around WorkQueue.
+        We're also getting sample rates that are much closer to what we're
+        asking the OS for. When the sampling handler was built off of WorkQueue,
+        we'd often get sample rates much higher than the 1ms we asked for. On Kraken,
+        we would average about 1.7ms sample rates, even though we'd ask for a 1ms rate.
+        Now, on Kraken, we're getting about 1.2ms rates. Because we're getting
+        higher rates, this patch is a performance regression. It's slower because
+        we're sampling more frequently.
+
+        Before this patch, the sampling profiler had the following overhead:
+        - 10% on Kraken
+        - 12% on octane
+        - 15% on AsmBench
+
+        With this patch, the sampling profiler has the following overhead:
+        - 16% on Kraken
+        - 17% on Octane
+        - 30% on AsmBench
+
+        Comparatively, this new patch has the following overhead over the old sampling profiler:
+        - 5% on Kraken
+        - 3.5% on Octane
+        - 13% slower on AsmBench
+
+        * inspector/agents/InspectorScriptProfilerAgent.cpp:
+        (Inspector::InspectorScriptProfilerAgent::trackingComplete):
+        * runtime/SamplingProfiler.cpp:
+        (JSC::SamplingProfiler::SamplingProfiler):
+        (JSC::SamplingProfiler::~SamplingProfiler):
+        (JSC::SamplingProfiler::createThreadIfNecessary):
+        (JSC::SamplingProfiler::timerLoop):
+        (JSC::SamplingProfiler::takeSample):
+        (JSC::tryGetBytecodeIndex):
+        (JSC::SamplingProfiler::shutdown):
+        (JSC::SamplingProfiler::start):
+        (JSC::SamplingProfiler::pause):
+        (JSC::SamplingProfiler::noticeCurrentThreadAsJSCExecutionThread):
+        (JSC::SamplingProfiler::noticeJSLockAcquisition):
+        (JSC::SamplingProfiler::noticeVMEntry):
+        (JSC::SamplingProfiler::clearData):
+        (JSC::SamplingProfiler::stop): Deleted.
+        (JSC::SamplingProfiler::dispatchIfNecessary): Deleted.
+        (JSC::SamplingProfiler::dispatchFunction): Deleted.
+        * runtime/SamplingProfiler.h:
+        (JSC::SamplingProfiler::setTimingInterval):
+        (JSC::SamplingProfiler::setStopWatch):
+        * runtime/VM.cpp:
+        (JSC::VM::VM):
+
 2016-04-05  Commit Queue  <[email protected]>
 
         Unreviewed, rolling out r199073.

Modified: trunk/Source/_javascript_Core/inspector/agents/InspectorScriptProfilerAgent.cpp (199091 => 199092)


--- trunk/Source/_javascript_Core/inspector/agents/InspectorScriptProfilerAgent.cpp	2016-04-06 03:23:18 UTC (rev 199091)
+++ trunk/Source/_javascript_Core/inspector/agents/InspectorScriptProfilerAgent.cpp	2016-04-06 03:55:11 UTC (rev 199092)
@@ -202,7 +202,7 @@
         SamplingProfiler* samplingProfiler = m_environment.scriptDebugServer().vm().samplingProfiler();
         RELEASE_ASSERT(samplingProfiler);
         LockHolder locker(samplingProfiler->getLock());
-        samplingProfiler->stop(locker);
+        samplingProfiler->pause(locker);
         Vector<SamplingProfiler::StackTrace> stackTraces = samplingProfiler->releaseStackTraces(locker);
         Ref<Protocol::ScriptProfiler::Samples> samples = buildSamples(m_environment.scriptDebugServer().vm(), WTFMove(stackTraces));
 

Modified: trunk/Source/_javascript_Core/runtime/SamplingProfiler.cpp (199091 => 199092)


--- trunk/Source/_javascript_Core/runtime/SamplingProfiler.cpp	2016-04-06 03:23:18 UTC (rev 199091)
+++ trunk/Source/_javascript_Core/runtime/SamplingProfiler.cpp	2016-04-06 03:55:11 UTC (rev 199092)
@@ -186,11 +186,10 @@
     : m_vm(vm)
     , m_stopwatch(WTFMove(stopwatch))
     , m_timingInterval(std::chrono::microseconds(1000))
-    , m_timerQueue(WorkQueue::create("jsc.sampling-profiler.queue", WorkQueue::Type::Serial, WorkQueue::QOS::UserInteractive))
+    , m_threadIdentifier(0)
     , m_jscExecutionThread(nullptr)
-    , m_isActive(false)
     , m_isPaused(false)
-    , m_hasDispatchedFunction(false)
+    , m_isShutDown(false)
 {
     if (sReportStats) {
         sNumTotalWalks = 0;
@@ -198,97 +197,124 @@
     }
 
     m_currentFrames.grow(256);
+}
 
-    m_handler = [this] () {
-        LockHolder samplingProfilerLocker(m_lock);
-        if (!m_isActive || !m_jscExecutionThread || m_isPaused) {
-            m_hasDispatchedFunction = false;
-            deref();
-            return;
-        }
+SamplingProfiler::~SamplingProfiler()
+{
+}
 
-        if (m_vm.entryScope) {
-            double nowTime = m_stopwatch->elapsedTime();
+void SamplingProfiler::createThreadIfNecessary(const LockHolder&)
+{
+    ASSERT(m_lock.isLocked());
 
-            LockHolder machineThreadsLocker(m_vm.heap.machineThreads().getLock());
-            LockHolder codeBlockSetLocker(m_vm.heap.codeBlockSet().getLock());
-            LockHolder executableAllocatorLocker(m_vm.executableAllocator.getLock());
+    if (m_threadIdentifier)
+        return;
 
-            bool didSuspend = m_jscExecutionThread->suspend();
-            if (didSuspend) {
-                // While the JSC thread is suspended, we can't do things like malloc because the JSC thread
-                // may be holding the malloc lock.
-                ExecState* callFrame;
-                void* machinePC;
-                bool topFrameIsLLInt = false;
-                void* llintPC;
-                {
-                    MachineThreads::Thread::Registers registers;
-                    m_jscExecutionThread->getRegisters(registers);
-                    callFrame = static_cast<ExecState*>(registers.framePointer());
-                    machinePC = registers.instructionPointer();
-                    llintPC = registers.llintPC();
-                    m_jscExecutionThread->freeRegisters(registers);
-                }
-                // FIXME: Lets have a way of detecting when we're parsing code.
-                // https://bugs.webkit.org/show_bug.cgi?id=152761
-                if (m_vm.executableAllocator.isValidExecutableMemory(executableAllocatorLocker, machinePC)) {
-                    if (m_vm.isExecutingInRegExpJIT) {
-                        // FIXME: We're executing a regexp. Lets gather more intersting data.
-                        // https://bugs.webkit.org/show_bug.cgi?id=152729
-                        callFrame = m_vm.topCallFrame; // We need to do this or else we'd fail our backtrace validation b/c this isn't a JS frame.
-                    }
-                } else if (LLInt::isLLIntPC(machinePC)) {
-                    topFrameIsLLInt = true;
-                    // We're okay to take a normal stack trace when the PC
-                    // is in LLInt code.
-                } else {
-                    // We resort to topCallFrame to see if we can get anything
-                    // useful. We usually get here when we're executing C code.
-                    callFrame = m_vm.topCallFrame;
-                }
+    RefPtr<SamplingProfiler> profiler = this;
+    m_threadIdentifier = createThread("jsc.sampling-profiler.thread", [profiler] {
+        profiler->timerLoop();
+    });
+}
 
-                size_t walkSize;
-                bool wasValidWalk;
-                bool didRunOutOfVectorSpace;
-                {
-                    FrameWalker walker(callFrame, m_vm, codeBlockSetLocker, machineThreadsLocker);
-                    walkSize = walker.walk(m_currentFrames, didRunOutOfVectorSpace);
-                    wasValidWalk = walker.wasValidWalk();
-                }
+void SamplingProfiler::timerLoop()
+{
+    while (true) {
+        std::chrono::microseconds stackTraceProcessingTime = std::chrono::microseconds(0);
+        {
+            LockHolder locker(m_lock);
+            if (UNLIKELY(m_isShutDown))
+                return;
 
-                m_jscExecutionThread->resume();
+            if (!m_isPaused && m_jscExecutionThread)
+                takeSample(locker, stackTraceProcessingTime);
 
-                // We can now use data structures that malloc, and do other interesting things, again.
+            m_lastTime = m_stopwatch->elapsedTime();
+        }
 
-                // FIXME: It'd be interesting to take data about the program's state when
-                // we fail to take a stack trace: https://bugs.webkit.org/show_bug.cgi?id=152758
-                if (wasValidWalk && walkSize) {
-                    if (sReportStats)
-                        sNumTotalStackTraces++;
-                    Vector<UnprocessedStackFrame> stackTrace;
-                    stackTrace.reserveInitialCapacity(walkSize);
-                    for (size_t i = 0; i < walkSize; i++) {
-                        UnprocessedStackFrame frame = m_currentFrames[i];
-                        stackTrace.uncheckedAppend(frame);
-                    }
+        std::this_thread::sleep_for(m_timingInterval - std::min(m_timingInterval, stackTraceProcessingTime));
+    }
+}
 
-                    m_unprocessedStackTraces.append(UnprocessedStackTrace { nowTime, machinePC, topFrameIsLLInt, llintPC, WTFMove(stackTrace) });
+void SamplingProfiler::takeSample(const LockHolder&, std::chrono::microseconds& stackTraceProcessingTime)
+{
+    ASSERT(m_lock.isLocked());
+    if (m_vm.entryScope) {
+        double nowTime = m_stopwatch->elapsedTime();
 
-                    if (didRunOutOfVectorSpace)
-                        m_currentFrames.grow(m_currentFrames.size() * 1.25);
+        LockHolder machineThreadsLocker(m_vm.heap.machineThreads().getLock());
+        LockHolder codeBlockSetLocker(m_vm.heap.codeBlockSet().getLock());
+        LockHolder executableAllocatorLocker(m_vm.executableAllocator.getLock());
+
+        bool didSuspend = m_jscExecutionThread->suspend();
+        if (didSuspend) {
+            // While the JSC thread is suspended, we can't do things like malloc because the JSC thread
+            // may be holding the malloc lock.
+            ExecState* callFrame;
+            void* machinePC;
+            bool topFrameIsLLInt = false;
+            void* llintPC;
+            {
+                MachineThreads::Thread::Registers registers;
+                m_jscExecutionThread->getRegisters(registers);
+                callFrame = static_cast<ExecState*>(registers.framePointer());
+                machinePC = registers.instructionPointer();
+                llintPC = registers.llintPC();
+                m_jscExecutionThread->freeRegisters(registers);
+            }
+            // FIXME: Lets have a way of detecting when we're parsing code.
+            // https://bugs.webkit.org/show_bug.cgi?id=152761
+            if (m_vm.executableAllocator.isValidExecutableMemory(executableAllocatorLocker, machinePC)) {
+                if (m_vm.isExecutingInRegExpJIT) {
+                    // FIXME: We're executing a regexp. Lets gather more intersting data.
+                    // https://bugs.webkit.org/show_bug.cgi?id=152729
+                    callFrame = m_vm.topCallFrame; // We need to do this or else we'd fail our backtrace validation b/c this isn't a JS frame.
                 }
+            } else if (LLInt::isLLIntPC(machinePC)) {
+                topFrameIsLLInt = true;
+                // We're okay to take a normal stack trace when the PC
+                // is in LLInt code.
+            } else {
+                // We resort to topCallFrame to see if we can get anything
+                // useful. We usually get here when we're executing C code.
+                callFrame = m_vm.topCallFrame;
             }
-        }
 
-        m_lastTime = m_stopwatch->elapsedTime();
+            size_t walkSize;
+            bool wasValidWalk;
+            bool didRunOutOfVectorSpace;
+            {
+                FrameWalker walker(callFrame, m_vm, codeBlockSetLocker, machineThreadsLocker);
+                walkSize = walker.walk(m_currentFrames, didRunOutOfVectorSpace);
+                wasValidWalk = walker.wasValidWalk();
+            }
 
-        dispatchFunction(samplingProfilerLocker);
-    };
-}
+            m_jscExecutionThread->resume();
 
-SamplingProfiler::~SamplingProfiler()
-{
+            auto startTime = std::chrono::steady_clock::now();
+            // We can now use data structures that malloc, and do other interesting things, again.
+
+            // FIXME: It'd be interesting to take data about the program's state when
+            // we fail to take a stack trace: https://bugs.webkit.org/show_bug.cgi?id=152758
+            if (wasValidWalk && walkSize) {
+                if (sReportStats)
+                    sNumTotalStackTraces++;
+                Vector<UnprocessedStackFrame> stackTrace;
+                stackTrace.reserveInitialCapacity(walkSize);
+                for (size_t i = 0; i < walkSize; i++) {
+                    UnprocessedStackFrame frame = m_currentFrames[i];
+                    stackTrace.uncheckedAppend(frame);
+                }
+
+                m_unprocessedStackTraces.append(UnprocessedStackTrace { nowTime, machinePC, topFrameIsLLInt, llintPC, WTFMove(stackTrace) });
+
+                if (didRunOutOfVectorSpace)
+                    m_currentFrames.grow(m_currentFrames.size() * 1.25);
+            }
+
+            auto endTime = std::chrono::steady_clock::now();
+            stackTraceProcessingTime = std::chrono::duration_cast<std::chrono::microseconds>(endTime - startTime);
+        }
+    }
 }
 
 static ALWAYS_INLINE unsigned tryGetBytecodeIndex(unsigned llintPC, CodeBlock* codeBlock, bool& isValid)
@@ -486,7 +512,8 @@
 
 void SamplingProfiler::shutdown()
 {
-    stop();
+    LockHolder locker(m_lock);
+    m_isShutDown = true;
 }
 
 void SamplingProfiler::start()
@@ -498,26 +525,13 @@
 void SamplingProfiler::start(const LockHolder& locker)
 {
     ASSERT(m_lock.isLocked());
-    m_isActive = true;
-    dispatchIfNecessary(locker);
+    m_isPaused = false;
+    createThreadIfNecessary(locker);
 }
 
-void SamplingProfiler::stop()
+void SamplingProfiler::pause(const LockHolder&)
 {
-    LockHolder locker(m_lock);
-    stop(locker);
-}
-
-void SamplingProfiler::stop(const LockHolder&)
-{
     ASSERT(m_lock.isLocked());
-    m_isActive = false;
-    reportStats();
-}
-
-void SamplingProfiler::pause()
-{
-    LockHolder locker(m_lock);
     m_isPaused = true;
     reportStats();
 }
@@ -534,22 +548,6 @@
     noticeCurrentThreadAsJSCExecutionThread(locker);
 }
 
-void SamplingProfiler::dispatchIfNecessary(const LockHolder& locker)
-{
-    if (m_isActive && !m_hasDispatchedFunction && m_jscExecutionThread && m_vm.entryScope) {
-        ref(); // Matching deref() is inside m_handler when m_handler stops recursing.
-        dispatchFunction(locker);
-    }
-}
-
-void SamplingProfiler::dispatchFunction(const LockHolder&)
-{
-    m_hasDispatchedFunction = true;
-    m_isPaused = false;
-    m_lastTime = m_stopwatch->elapsedTime();
-    m_timerQueue->dispatchAfter(m_timingInterval, m_handler);
-}
-
 void SamplingProfiler::noticeJSLockAcquisition()
 {
     LockHolder locker(m_lock);
@@ -562,7 +560,7 @@
     ASSERT(m_vm.entryScope);
     noticeCurrentThreadAsJSCExecutionThread(locker);
     m_lastTime = m_stopwatch->elapsedTime();
-    dispatchIfNecessary(locker);
+    createThreadIfNecessary(locker);
 }
 
 void SamplingProfiler::clearData(const LockHolder&)

Modified: trunk/Source/_javascript_Core/runtime/SamplingProfiler.h (199091 => 199092)


--- trunk/Source/_javascript_Core/runtime/SamplingProfiler.h	2016-04-06 03:23:18 UTC (rev 199091)
+++ trunk/Source/_javascript_Core/runtime/SamplingProfiler.h	2016-04-06 03:55:11 UTC (rev 199092)
@@ -129,20 +129,19 @@
     void setTimingInterval(std::chrono::microseconds interval) { m_timingInterval = interval; }
     JS_EXPORT_PRIVATE void start();
     void start(const LockHolder&);
-    void stop();
-    void stop(const LockHolder&);
     Vector<StackTrace> releaseStackTraces(const LockHolder&);
     JS_EXPORT_PRIVATE String stackTracesAsJSON();
     JS_EXPORT_PRIVATE void noticeCurrentThreadAsJSCExecutionThread();
     void noticeCurrentThreadAsJSCExecutionThread(const LockHolder&);
     void processUnverifiedStackTraces(); // You should call this only after acquiring the lock.
     void setStopWatch(const LockHolder&, Ref<Stopwatch>&& stopwatch) { m_stopwatch = WTFMove(stopwatch); }
+    void pause(const LockHolder&);
 
 private:
-    void dispatchIfNecessary(const LockHolder&);
-    void dispatchFunction(const LockHolder&);
-    void pause();
     void clearData(const LockHolder&);
+    void createThreadIfNecessary(const LockHolder&);
+    void timerLoop();
+    void takeSample(const LockHolder&, std::chrono::microseconds& stackTraceProcessingTime);
 
     VM& m_vm;
     RefPtr<Stopwatch> m_stopwatch;
@@ -150,13 +149,11 @@
     Vector<UnprocessedStackTrace> m_unprocessedStackTraces;
     std::chrono::microseconds m_timingInterval;
     double m_lastTime;
-    Ref<WorkQueue> m_timerQueue;
-    std::function<void ()> m_handler;
     Lock m_lock;
+    ThreadIdentifier m_threadIdentifier;
     MachineThreads::Thread* m_jscExecutionThread;
-    bool m_isActive;
     bool m_isPaused;
-    bool m_hasDispatchedFunction;
+    bool m_isShutDown;
     HashSet<JSCell*> m_liveCellPointers;
     Vector<UnprocessedStackFrame> m_currentFrames;
 };

Modified: trunk/Source/_javascript_Core/runtime/VM.cpp (199091 => 199092)


--- trunk/Source/_javascript_Core/runtime/VM.cpp	2016-04-06 03:23:18 UTC (rev 199091)
+++ trunk/Source/_javascript_Core/runtime/VM.cpp	2016-04-06 03:55:11 UTC (rev 199092)
@@ -318,7 +318,9 @@
 #if ENABLE(SAMPLING_PROFILER)
     if (Options::useSamplingProfiler()) {
         setShouldBuildPCToCodeOriginMapping();
-        m_samplingProfiler = adoptRef(new SamplingProfiler(*this, Stopwatch::create()));
+        Ref<Stopwatch> stopwatch = Stopwatch::create();
+        stopwatch->start();
+        m_samplingProfiler = adoptRef(new SamplingProfiler(*this, WTFMove(stopwatch)));
         m_samplingProfiler->start();
     }
 #endif // ENABLE(SAMPLING_PROFILER)
_______________________________________________
webkit-changes mailing list
[email protected]
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to