Modified: trunk/Source/_javascript_Core/ChangeLog (199091 => 199092)
--- trunk/Source/_javascript_Core/ChangeLog 2016-04-06 03:23:18 UTC (rev 199091)
+++ trunk/Source/_javascript_Core/ChangeLog 2016-04-06 03:55:11 UTC (rev 199092)
@@ -1,3 +1,60 @@
+2016-04-05 Saam barati <[email protected]>
+
+ JSC SamplingProfiler: Use a thread + sleep loop instead of WTF::WorkQueue for taking samples
+ https://bugs.webkit.org/show_bug.cgi?id=154017
+
+ Reviewed by Geoffrey Garen.
+
+ By moving to an explicitly created seperate thread + sample-then-sleep
+ loop, we can remove a lot of the crufty code around WorkQueue.
+ We're also getting sample rates that are much closer to what we're
+ asking the OS for. When the sampling handler was built off of WorkQueue,
+ we'd often get sample rates much higher than the 1ms we asked for. On Kraken,
+ we would average about 1.7ms sample rates, even though we'd ask for a 1ms rate.
+ Now, on Kraken, we're getting about 1.2ms rates. Because we're getting
+ higher rates, this patch is a performance regression. It's slower because
+ we're sampling more frequently.
+
+ Before this patch, the sampling profiler had the following overhead:
+ - 10% on Kraken
+ - 12% on octane
+ - 15% on AsmBench
+
+ With this patch, the sampling profiler has the following overhead:
+ - 16% on Kraken
+ - 17% on Octane
+ - 30% on AsmBench
+
+ Comparatively, this new patch has the following overhead over the old sampling profiler:
+ - 5% on Kraken
+ - 3.5% on Octane
+ - 13% slower on AsmBench
+
+ * inspector/agents/InspectorScriptProfilerAgent.cpp:
+ (Inspector::InspectorScriptProfilerAgent::trackingComplete):
+ * runtime/SamplingProfiler.cpp:
+ (JSC::SamplingProfiler::SamplingProfiler):
+ (JSC::SamplingProfiler::~SamplingProfiler):
+ (JSC::SamplingProfiler::createThreadIfNecessary):
+ (JSC::SamplingProfiler::timerLoop):
+ (JSC::SamplingProfiler::takeSample):
+ (JSC::tryGetBytecodeIndex):
+ (JSC::SamplingProfiler::shutdown):
+ (JSC::SamplingProfiler::start):
+ (JSC::SamplingProfiler::pause):
+ (JSC::SamplingProfiler::noticeCurrentThreadAsJSCExecutionThread):
+ (JSC::SamplingProfiler::noticeJSLockAcquisition):
+ (JSC::SamplingProfiler::noticeVMEntry):
+ (JSC::SamplingProfiler::clearData):
+ (JSC::SamplingProfiler::stop): Deleted.
+ (JSC::SamplingProfiler::dispatchIfNecessary): Deleted.
+ (JSC::SamplingProfiler::dispatchFunction): Deleted.
+ * runtime/SamplingProfiler.h:
+ (JSC::SamplingProfiler::setTimingInterval):
+ (JSC::SamplingProfiler::setStopWatch):
+ * runtime/VM.cpp:
+ (JSC::VM::VM):
+
2016-04-05 Commit Queue <[email protected]>
Unreviewed, rolling out r199073.
Modified: trunk/Source/_javascript_Core/inspector/agents/InspectorScriptProfilerAgent.cpp (199091 => 199092)
--- trunk/Source/_javascript_Core/inspector/agents/InspectorScriptProfilerAgent.cpp 2016-04-06 03:23:18 UTC (rev 199091)
+++ trunk/Source/_javascript_Core/inspector/agents/InspectorScriptProfilerAgent.cpp 2016-04-06 03:55:11 UTC (rev 199092)
@@ -202,7 +202,7 @@
SamplingProfiler* samplingProfiler = m_environment.scriptDebugServer().vm().samplingProfiler();
RELEASE_ASSERT(samplingProfiler);
LockHolder locker(samplingProfiler->getLock());
- samplingProfiler->stop(locker);
+ samplingProfiler->pause(locker);
Vector<SamplingProfiler::StackTrace> stackTraces = samplingProfiler->releaseStackTraces(locker);
Ref<Protocol::ScriptProfiler::Samples> samples = buildSamples(m_environment.scriptDebugServer().vm(), WTFMove(stackTraces));
Modified: trunk/Source/_javascript_Core/runtime/SamplingProfiler.cpp (199091 => 199092)
--- trunk/Source/_javascript_Core/runtime/SamplingProfiler.cpp 2016-04-06 03:23:18 UTC (rev 199091)
+++ trunk/Source/_javascript_Core/runtime/SamplingProfiler.cpp 2016-04-06 03:55:11 UTC (rev 199092)
@@ -186,11 +186,10 @@
: m_vm(vm)
, m_stopwatch(WTFMove(stopwatch))
, m_timingInterval(std::chrono::microseconds(1000))
- , m_timerQueue(WorkQueue::create("jsc.sampling-profiler.queue", WorkQueue::Type::Serial, WorkQueue::QOS::UserInteractive))
+ , m_threadIdentifier(0)
, m_jscExecutionThread(nullptr)
- , m_isActive(false)
, m_isPaused(false)
- , m_hasDispatchedFunction(false)
+ , m_isShutDown(false)
{
if (sReportStats) {
sNumTotalWalks = 0;
@@ -198,97 +197,124 @@
}
m_currentFrames.grow(256);
+}
- m_handler = [this] () {
- LockHolder samplingProfilerLocker(m_lock);
- if (!m_isActive || !m_jscExecutionThread || m_isPaused) {
- m_hasDispatchedFunction = false;
- deref();
- return;
- }
+SamplingProfiler::~SamplingProfiler()
+{
+}
- if (m_vm.entryScope) {
- double nowTime = m_stopwatch->elapsedTime();
+void SamplingProfiler::createThreadIfNecessary(const LockHolder&)
+{
+ ASSERT(m_lock.isLocked());
- LockHolder machineThreadsLocker(m_vm.heap.machineThreads().getLock());
- LockHolder codeBlockSetLocker(m_vm.heap.codeBlockSet().getLock());
- LockHolder executableAllocatorLocker(m_vm.executableAllocator.getLock());
+ if (m_threadIdentifier)
+ return;
- bool didSuspend = m_jscExecutionThread->suspend();
- if (didSuspend) {
- // While the JSC thread is suspended, we can't do things like malloc because the JSC thread
- // may be holding the malloc lock.
- ExecState* callFrame;
- void* machinePC;
- bool topFrameIsLLInt = false;
- void* llintPC;
- {
- MachineThreads::Thread::Registers registers;
- m_jscExecutionThread->getRegisters(registers);
- callFrame = static_cast<ExecState*>(registers.framePointer());
- machinePC = registers.instructionPointer();
- llintPC = registers.llintPC();
- m_jscExecutionThread->freeRegisters(registers);
- }
- // FIXME: Lets have a way of detecting when we're parsing code.
- // https://bugs.webkit.org/show_bug.cgi?id=152761
- if (m_vm.executableAllocator.isValidExecutableMemory(executableAllocatorLocker, machinePC)) {
- if (m_vm.isExecutingInRegExpJIT) {
- // FIXME: We're executing a regexp. Lets gather more intersting data.
- // https://bugs.webkit.org/show_bug.cgi?id=152729
- callFrame = m_vm.topCallFrame; // We need to do this or else we'd fail our backtrace validation b/c this isn't a JS frame.
- }
- } else if (LLInt::isLLIntPC(machinePC)) {
- topFrameIsLLInt = true;
- // We're okay to take a normal stack trace when the PC
- // is in LLInt code.
- } else {
- // We resort to topCallFrame to see if we can get anything
- // useful. We usually get here when we're executing C code.
- callFrame = m_vm.topCallFrame;
- }
+ RefPtr<SamplingProfiler> profiler = this;
+ m_threadIdentifier = createThread("jsc.sampling-profiler.thread", [profiler] {
+ profiler->timerLoop();
+ });
+}
- size_t walkSize;
- bool wasValidWalk;
- bool didRunOutOfVectorSpace;
- {
- FrameWalker walker(callFrame, m_vm, codeBlockSetLocker, machineThreadsLocker);
- walkSize = walker.walk(m_currentFrames, didRunOutOfVectorSpace);
- wasValidWalk = walker.wasValidWalk();
- }
+void SamplingProfiler::timerLoop()
+{
+ while (true) {
+ std::chrono::microseconds stackTraceProcessingTime = std::chrono::microseconds(0);
+ {
+ LockHolder locker(m_lock);
+ if (UNLIKELY(m_isShutDown))
+ return;
- m_jscExecutionThread->resume();
+ if (!m_isPaused && m_jscExecutionThread)
+ takeSample(locker, stackTraceProcessingTime);
- // We can now use data structures that malloc, and do other interesting things, again.
+ m_lastTime = m_stopwatch->elapsedTime();
+ }
- // FIXME: It'd be interesting to take data about the program's state when
- // we fail to take a stack trace: https://bugs.webkit.org/show_bug.cgi?id=152758
- if (wasValidWalk && walkSize) {
- if (sReportStats)
- sNumTotalStackTraces++;
- Vector<UnprocessedStackFrame> stackTrace;
- stackTrace.reserveInitialCapacity(walkSize);
- for (size_t i = 0; i < walkSize; i++) {
- UnprocessedStackFrame frame = m_currentFrames[i];
- stackTrace.uncheckedAppend(frame);
- }
+ std::this_thread::sleep_for(m_timingInterval - std::min(m_timingInterval, stackTraceProcessingTime));
+ }
+}
- m_unprocessedStackTraces.append(UnprocessedStackTrace { nowTime, machinePC, topFrameIsLLInt, llintPC, WTFMove(stackTrace) });
+void SamplingProfiler::takeSample(const LockHolder&, std::chrono::microseconds& stackTraceProcessingTime)
+{
+ ASSERT(m_lock.isLocked());
+ if (m_vm.entryScope) {
+ double nowTime = m_stopwatch->elapsedTime();
- if (didRunOutOfVectorSpace)
- m_currentFrames.grow(m_currentFrames.size() * 1.25);
+ LockHolder machineThreadsLocker(m_vm.heap.machineThreads().getLock());
+ LockHolder codeBlockSetLocker(m_vm.heap.codeBlockSet().getLock());
+ LockHolder executableAllocatorLocker(m_vm.executableAllocator.getLock());
+
+ bool didSuspend = m_jscExecutionThread->suspend();
+ if (didSuspend) {
+ // While the JSC thread is suspended, we can't do things like malloc because the JSC thread
+ // may be holding the malloc lock.
+ ExecState* callFrame;
+ void* machinePC;
+ bool topFrameIsLLInt = false;
+ void* llintPC;
+ {
+ MachineThreads::Thread::Registers registers;
+ m_jscExecutionThread->getRegisters(registers);
+ callFrame = static_cast<ExecState*>(registers.framePointer());
+ machinePC = registers.instructionPointer();
+ llintPC = registers.llintPC();
+ m_jscExecutionThread->freeRegisters(registers);
+ }
+ // FIXME: Lets have a way of detecting when we're parsing code.
+ // https://bugs.webkit.org/show_bug.cgi?id=152761
+ if (m_vm.executableAllocator.isValidExecutableMemory(executableAllocatorLocker, machinePC)) {
+ if (m_vm.isExecutingInRegExpJIT) {
+ // FIXME: We're executing a regexp. Lets gather more intersting data.
+ // https://bugs.webkit.org/show_bug.cgi?id=152729
+ callFrame = m_vm.topCallFrame; // We need to do this or else we'd fail our backtrace validation b/c this isn't a JS frame.
}
+ } else if (LLInt::isLLIntPC(machinePC)) {
+ topFrameIsLLInt = true;
+ // We're okay to take a normal stack trace when the PC
+ // is in LLInt code.
+ } else {
+ // We resort to topCallFrame to see if we can get anything
+ // useful. We usually get here when we're executing C code.
+ callFrame = m_vm.topCallFrame;
}
- }
- m_lastTime = m_stopwatch->elapsedTime();
+ size_t walkSize;
+ bool wasValidWalk;
+ bool didRunOutOfVectorSpace;
+ {
+ FrameWalker walker(callFrame, m_vm, codeBlockSetLocker, machineThreadsLocker);
+ walkSize = walker.walk(m_currentFrames, didRunOutOfVectorSpace);
+ wasValidWalk = walker.wasValidWalk();
+ }
- dispatchFunction(samplingProfilerLocker);
- };
-}
+ m_jscExecutionThread->resume();
-SamplingProfiler::~SamplingProfiler()
-{
+ auto startTime = std::chrono::steady_clock::now();
+ // We can now use data structures that malloc, and do other interesting things, again.
+
+ // FIXME: It'd be interesting to take data about the program's state when
+ // we fail to take a stack trace: https://bugs.webkit.org/show_bug.cgi?id=152758
+ if (wasValidWalk && walkSize) {
+ if (sReportStats)
+ sNumTotalStackTraces++;
+ Vector<UnprocessedStackFrame> stackTrace;
+ stackTrace.reserveInitialCapacity(walkSize);
+ for (size_t i = 0; i < walkSize; i++) {
+ UnprocessedStackFrame frame = m_currentFrames[i];
+ stackTrace.uncheckedAppend(frame);
+ }
+
+ m_unprocessedStackTraces.append(UnprocessedStackTrace { nowTime, machinePC, topFrameIsLLInt, llintPC, WTFMove(stackTrace) });
+
+ if (didRunOutOfVectorSpace)
+ m_currentFrames.grow(m_currentFrames.size() * 1.25);
+ }
+
+ auto endTime = std::chrono::steady_clock::now();
+ stackTraceProcessingTime = std::chrono::duration_cast<std::chrono::microseconds>(endTime - startTime);
+ }
+ }
}
static ALWAYS_INLINE unsigned tryGetBytecodeIndex(unsigned llintPC, CodeBlock* codeBlock, bool& isValid)
@@ -486,7 +512,8 @@
void SamplingProfiler::shutdown()
{
- stop();
+ LockHolder locker(m_lock);
+ m_isShutDown = true;
}
void SamplingProfiler::start()
@@ -498,26 +525,13 @@
void SamplingProfiler::start(const LockHolder& locker)
{
ASSERT(m_lock.isLocked());
- m_isActive = true;
- dispatchIfNecessary(locker);
+ m_isPaused = false;
+ createThreadIfNecessary(locker);
}
-void SamplingProfiler::stop()
+void SamplingProfiler::pause(const LockHolder&)
{
- LockHolder locker(m_lock);
- stop(locker);
-}
-
-void SamplingProfiler::stop(const LockHolder&)
-{
ASSERT(m_lock.isLocked());
- m_isActive = false;
- reportStats();
-}
-
-void SamplingProfiler::pause()
-{
- LockHolder locker(m_lock);
m_isPaused = true;
reportStats();
}
@@ -534,22 +548,6 @@
noticeCurrentThreadAsJSCExecutionThread(locker);
}
-void SamplingProfiler::dispatchIfNecessary(const LockHolder& locker)
-{
- if (m_isActive && !m_hasDispatchedFunction && m_jscExecutionThread && m_vm.entryScope) {
- ref(); // Matching deref() is inside m_handler when m_handler stops recursing.
- dispatchFunction(locker);
- }
-}
-
-void SamplingProfiler::dispatchFunction(const LockHolder&)
-{
- m_hasDispatchedFunction = true;
- m_isPaused = false;
- m_lastTime = m_stopwatch->elapsedTime();
- m_timerQueue->dispatchAfter(m_timingInterval, m_handler);
-}
-
void SamplingProfiler::noticeJSLockAcquisition()
{
LockHolder locker(m_lock);
@@ -562,7 +560,7 @@
ASSERT(m_vm.entryScope);
noticeCurrentThreadAsJSCExecutionThread(locker);
m_lastTime = m_stopwatch->elapsedTime();
- dispatchIfNecessary(locker);
+ createThreadIfNecessary(locker);
}
void SamplingProfiler::clearData(const LockHolder&)
Modified: trunk/Source/_javascript_Core/runtime/SamplingProfiler.h (199091 => 199092)
--- trunk/Source/_javascript_Core/runtime/SamplingProfiler.h 2016-04-06 03:23:18 UTC (rev 199091)
+++ trunk/Source/_javascript_Core/runtime/SamplingProfiler.h 2016-04-06 03:55:11 UTC (rev 199092)
@@ -129,20 +129,19 @@
void setTimingInterval(std::chrono::microseconds interval) { m_timingInterval = interval; }
JS_EXPORT_PRIVATE void start();
void start(const LockHolder&);
- void stop();
- void stop(const LockHolder&);
Vector<StackTrace> releaseStackTraces(const LockHolder&);
JS_EXPORT_PRIVATE String stackTracesAsJSON();
JS_EXPORT_PRIVATE void noticeCurrentThreadAsJSCExecutionThread();
void noticeCurrentThreadAsJSCExecutionThread(const LockHolder&);
void processUnverifiedStackTraces(); // You should call this only after acquiring the lock.
void setStopWatch(const LockHolder&, Ref<Stopwatch>&& stopwatch) { m_stopwatch = WTFMove(stopwatch); }
+ void pause(const LockHolder&);
private:
- void dispatchIfNecessary(const LockHolder&);
- void dispatchFunction(const LockHolder&);
- void pause();
void clearData(const LockHolder&);
+ void createThreadIfNecessary(const LockHolder&);
+ void timerLoop();
+ void takeSample(const LockHolder&, std::chrono::microseconds& stackTraceProcessingTime);
VM& m_vm;
RefPtr<Stopwatch> m_stopwatch;
@@ -150,13 +149,11 @@
Vector<UnprocessedStackTrace> m_unprocessedStackTraces;
std::chrono::microseconds m_timingInterval;
double m_lastTime;
- Ref<WorkQueue> m_timerQueue;
- std::function<void ()> m_handler;
Lock m_lock;
+ ThreadIdentifier m_threadIdentifier;
MachineThreads::Thread* m_jscExecutionThread;
- bool m_isActive;
bool m_isPaused;
- bool m_hasDispatchedFunction;
+ bool m_isShutDown;
HashSet<JSCell*> m_liveCellPointers;
Vector<UnprocessedStackFrame> m_currentFrames;
};
Modified: trunk/Source/_javascript_Core/runtime/VM.cpp (199091 => 199092)
--- trunk/Source/_javascript_Core/runtime/VM.cpp 2016-04-06 03:23:18 UTC (rev 199091)
+++ trunk/Source/_javascript_Core/runtime/VM.cpp 2016-04-06 03:55:11 UTC (rev 199092)
@@ -318,7 +318,9 @@
#if ENABLE(SAMPLING_PROFILER)
if (Options::useSamplingProfiler()) {
setShouldBuildPCToCodeOriginMapping();
- m_samplingProfiler = adoptRef(new SamplingProfiler(*this, Stopwatch::create()));
+ Ref<Stopwatch> stopwatch = Stopwatch::create();
+ stopwatch->start();
+ m_samplingProfiler = adoptRef(new SamplingProfiler(*this, WTFMove(stopwatch)));
m_samplingProfiler->start();
}
#endif // ENABLE(SAMPLING_PROFILER)