Title: [276786] trunk/Source/_javascript_Core
Revision
276786
Author
[email protected]
Date
2021-04-29 10:42:13 -0700 (Thu, 29 Apr 2021)

Log Message

Sampling profiler should dump a tier breakdown, and add ability to see time spent in C code with sampleCCode=0, and fix bugs with frames having the wrong jitType if they're inlined
https://bugs.webkit.org/show_bug.cgi?id=225116

Reviewed by Yusuke Suzuki.

This patch makes it so we also dump time spent in each tier when dumping top
bytecodes. This can be helpful info when analyzing benchmarks.

This patch also makes it so we know when we're in C/C++ code when we're not
using the sampleCCode=true option. I found some weird performance pathologies
with that option that cause us to not sample code at all. I was seeing ~50
samples taken for ~7 seconds of code running time. It's worth figuring out
what's going on there eventually. But for now, I've made it so that we
recognize that the top frame is C/C++ when using the collectExtraSamplingProfilerData=1
option.

This patch also fixes a bug where we mis-attribute JITTypes for inline
frames. We'd attribute it to whatever the CodeBlock was compiled as, instead
of using the machine frame's JITType.

* jsc.cpp:
(CommandLine::parseArguments):
* runtime/OptionsList.h:
* runtime/SamplingProfiler.cpp:
(JSC::SamplingProfiler::takeSample):
(JSC::SamplingProfiler::processUnverifiedStackTraces):
(JSC::SamplingProfiler::StackFrame::displayName):
(JSC::SamplingProfiler::reportTopBytecodes):

Modified Paths

Diff

Modified: trunk/Source/_javascript_Core/ChangeLog (276785 => 276786)


--- trunk/Source/_javascript_Core/ChangeLog	2021-04-29 17:16:23 UTC (rev 276785)
+++ trunk/Source/_javascript_Core/ChangeLog	2021-04-29 17:42:13 UTC (rev 276786)
@@ -1,3 +1,34 @@
+2021-04-29  Saam Barati  <[email protected]>
+
+        Sampling profiler should dump a tier breakdown, and add ability to see time spent in C code with sampleCCode=0, and fix bugs with frames having the wrong jitType if they're inlined
+        https://bugs.webkit.org/show_bug.cgi?id=225116
+
+        Reviewed by Yusuke Suzuki.
+
+        This patch makes it so we also dump time spent in each tier when dumping top
+        bytecodes. This can be helpful info when analyzing benchmarks.
+        
+        This patch also makes it so we know when we're in C/C++ code when we're not
+        using the sampleCCode=true option. I found some weird performance pathologies
+        with that option that cause us to not sample code at all. I was seeing ~50
+        samples taken for ~7 seconds of code running time. It's worth figuring out
+        what's going on there eventually. But for now, I've made it so that we
+        recognize that the top frame is C/C++ when using the collectExtraSamplingProfilerData=1
+        option.
+        
+        This patch also fixes a bug where we mis-attribute JITTypes for inline
+        frames. We'd attribute it to whatever the CodeBlock was compiled as, instead
+        of using the machine frame's JITType.
+
+        * jsc.cpp:
+        (CommandLine::parseArguments):
+        * runtime/OptionsList.h:
+        * runtime/SamplingProfiler.cpp:
+        (JSC::SamplingProfiler::takeSample):
+        (JSC::SamplingProfiler::processUnverifiedStackTraces):
+        (JSC::SamplingProfiler::StackFrame::displayName):
+        (JSC::SamplingProfiler::reportTopBytecodes):
+
 2021-04-28  Mark Lam  <[email protected]>
 
         Fix exception assertions in light of the TerminationException.

Modified: trunk/Source/_javascript_Core/jsc.cpp (276785 => 276786)


--- trunk/Source/_javascript_Core/jsc.cpp	2021-04-29 17:16:23 UTC (rev 276785)
+++ trunk/Source/_javascript_Core/jsc.cpp	2021-04-29 17:42:13 UTC (rev 276786)
@@ -3299,7 +3299,7 @@
         }
         if (!strcmp(arg, "--sample")) {
             JSC::Options::useSamplingProfiler() = true;
-            JSC::Options::collectSamplingProfilerDataForJSCShell() = true;
+            JSC::Options::collectExtraSamplingProfilerData() = true;
             m_dumpSamplingProfilerData = true;
             continue;
         }

Modified: trunk/Source/_javascript_Core/runtime/OptionsList.h (276785 => 276786)


--- trunk/Source/_javascript_Core/runtime/OptionsList.h	2021-04-29 17:16:23 UTC (rev 276785)
+++ trunk/Source/_javascript_Core/runtime/OptionsList.h	2021-04-29 17:42:13 UTC (rev 276786)
@@ -361,7 +361,7 @@
     \
     v(Bool, useSamplingProfiler, false, Normal, nullptr) \
     v(Unsigned, sampleInterval, 1000, Normal, "Time between stack traces in microseconds.") \
-    v(Bool, collectSamplingProfilerDataForJSCShell, false, Normal, "This corresponds to the JSC shell's --sample option.") \
+    v(Bool, collectExtraSamplingProfilerData, false, Normal, "This corresponds to the JSC shell's --sample option, or if we're wanting to use the sampling profiler via the Debug menu in the browser.") \
     v(Unsigned, samplingProfilerTopFunctionsCount, 12, Normal, "Number of top functions to report when using the command line interface.") \
     v(Unsigned, samplingProfilerTopBytecodesCount, 40, Normal, "Number of top bytecodes to report when using the command line interface.") \
     v(OptionString, samplingProfilerPath, nullptr, Normal, "The path to the directory to write sampiling profiler output to. This probably will not work with WK2 unless the path is in the sandbox.") \

Modified: trunk/Source/_javascript_Core/runtime/SamplingProfiler.cpp (276785 => 276786)


--- trunk/Source/_javascript_Core/runtime/SamplingProfiler.cpp	2021-04-29 17:16:23 UTC (rev 276785)
+++ trunk/Source/_javascript_Core/runtime/SamplingProfiler.cpp	2021-04-29 17:42:13 UTC (rev 276786)
@@ -381,6 +381,8 @@
                 llintPC = removeCodePtrTag(MachineContext::llintInstructionPointer(registers));
                 assertIsNotTagged(machinePC);
             }
+
+            bool shouldAppendTopFrameAsCCode = false;
             // FIXME: Lets have a way of detecting when we're parsing code.
             // https://bugs.webkit.org/show_bug.cgi?id=152761
             if (ExecutableAllocator::singleton().isValidExecutableMemory(executableAllocatorLocker, machinePC)) {
@@ -397,6 +399,8 @@
                 // We resort to topCallFrame to see if we can get anything
                 // useful. We usually get here when we're executing C code.
                 callFrame = m_vm.topCallFrame;
+                if (Options::collectExtraSamplingProfilerData() && !Options::sampleCCode())
+                    shouldAppendTopFrameAsCCode = true;
             }
 
             size_t walkSize;
@@ -423,7 +427,9 @@
                 if (sReportStats)
                     sNumTotalStackTraces++;
                 Vector<UnprocessedStackFrame> stackTrace;
-                stackTrace.reserveInitialCapacity(walkSize);
+                stackTrace.reserveInitialCapacity(walkSize + !!shouldAppendTopFrameAsCCode);
+                if (shouldAppendTopFrameAsCCode)
+                    stackTrace.uncheckedAppend(UnprocessedStackFrame { machinePC });
                 for (size_t i = 0; i < walkSize; i++) {
                     UnprocessedStackFrame frame = m_currentFrames[i];
                     stackTrace.uncheckedAppend(frame);
@@ -465,7 +471,7 @@
         StackTrace& stackTrace = m_stackTraces.last();
         stackTrace.timestamp = unprocessedStackTrace.timestamp;
 
-        auto populateCodeLocation = [] (CodeBlock* codeBlock, BytecodeIndex bytecodeIndex, StackFrame::CodeLocation& location) {
+        auto populateCodeLocation = [] (CodeBlock* codeBlock, JITType jitType, BytecodeIndex bytecodeIndex, StackFrame::CodeLocation& location) {
             if (bytecodeIndex.offset() < codeBlock->instructionsSize()) {
                 int divot;
                 int startOffset;
@@ -474,16 +480,15 @@
                     location.lineNumber, location.columnNumber);
                 location.bytecodeIndex = bytecodeIndex;
             }
-            if (Options::collectSamplingProfilerDataForJSCShell()) {
+            if (codeBlock->hasHash())
                 location.codeBlockHash = codeBlock->hash();
-                location.jitType = codeBlock->jitType();
-            }
+            location.jitType = jitType;
         };
 
-        auto appendCodeBlock = [&] (CodeBlock* codeBlock, BytecodeIndex bytecodeIndex) {
+        auto appendCodeBlock = [&] (CodeBlock* codeBlock, JITType jitType, BytecodeIndex bytecodeIndex) {
             stackTrace.frames.append(StackFrame(codeBlock->ownerExecutable()));
             m_liveCellPointers.add(codeBlock->ownerExecutable());
-            populateCodeLocation(codeBlock, bytecodeIndex, stackTrace.frames.last().semanticLocation);
+            populateCodeLocation(codeBlock, jitType, bytecodeIndex, stackTrace.frames.last().semanticLocation);
         };
 
         auto appendEmptyFrame = [&] {
@@ -561,10 +566,10 @@
             origin.walkUpInlineStack([&] (const CodeOrigin& codeOrigin) {
                 machineOrigin = codeOrigin;
                 auto* inlineCallFrame = codeOrigin.inlineCallFrame();
-                appendCodeBlock(inlineCallFrame ? inlineCallFrame->baselineCodeBlock.get() : machineCodeBlock, codeOrigin.bytecodeIndex());
+                appendCodeBlock(inlineCallFrame ? inlineCallFrame->baselineCodeBlock.get() : machineCodeBlock, machineCodeBlock->jitType(), codeOrigin.bytecodeIndex());
             });
 
-            if (Options::collectSamplingProfilerDataForJSCShell()) {
+            if (Options::collectExtraSamplingProfilerData()) {
                 RELEASE_ASSERT(machineOrigin.isSet());
                 RELEASE_ASSERT(!machineOrigin.inlineCallFrame());
 
@@ -600,7 +605,7 @@
 
                     UNUSED_PARAM(bytecodeIndex); // FIXME: do something with this info for the web inspector: https://bugs.webkit.org/show_bug.cgi?id=153455
 
-                    appendCodeBlock(topCodeBlock, bytecodeIndex);
+                    appendCodeBlock(topCodeBlock, topCodeBlock->jitType(), bytecodeIndex);
                     storeCalleeIntoLastFrame(unprocessedStackTrace.frames[0]);
                     startIndex = 1;
                 }
@@ -622,7 +627,7 @@
                 CallSiteIndex callSiteIndex = unprocessedStackFrame.callSiteIndex;
 
                 auto appendCodeBlockNoInlining = [&] {
-                    appendCodeBlock(codeBlock, tryGetBytecodeIndex(callSiteIndex.bits(), codeBlock));
+                    appendCodeBlock(codeBlock, codeBlock->jitType(), tryGetBytecodeIndex(callSiteIndex.bits(), codeBlock));
                 };
 
 #if ENABLE(DFG_JIT)
@@ -630,7 +635,7 @@
                     if (codeBlock->canGetCodeOrigin(callSiteIndex))
                         appendCodeOrigin(codeBlock, codeBlock->codeOrigin(callSiteIndex));
                     else
-                        appendCodeBlock(codeBlock, BytecodeIndex());
+                        appendCodeBlock(codeBlock, codeBlock->jitType(), BytecodeIndex());
                 } else
                     appendCodeBlockNoInlining();
 #else
@@ -764,7 +769,6 @@
     }
 
     switch (frameType) {
-    case FrameType::Unknown:
     case FrameType::C:
 #if HAVE(DLADDR)
         if (frameType == FrameType::C) {
@@ -774,6 +778,8 @@
             WTF::dataLog("couldn't get a name");
         }
 #endif
+        return "(unknown C PC)"_s;
+    case FrameType::Unknown:
         return "(unknown)"_s;
 
     case FrameType::Host:
@@ -1092,6 +1098,32 @@
 
     size_t totalSamples = 0;
     HashMap<String, size_t> bytecodeCounts;
+    HashMap<String, size_t> tierCounts;
+
+    String llint = "LLInt"_s;
+    String baseline = "Baseline"_s;
+    String dfg = "DFG"_s;
+    String ftl = "FTL"_s;
+    String builtin = "js builtin"_s;
+    String wasm = "Wasm"_s;
+    String host = "Host"_s;
+    String cpp = "C/C++"_s;
+    String unknownFrame = "Unknown Frame"_s;
+    String unknownExecutable = "Unknown Executable"_s;
+
+    auto forEachTier = [&] (auto func) {
+        func(llint);
+        func(baseline);
+        func(dfg);
+        func(ftl);
+        func(builtin);
+        func(wasm);
+        func(host);
+        func(cpp);
+        func(unknownFrame);
+        func(unknownExecutable);
+    };
+
     for (StackTrace& stackTrace : m_stackTraces) {
         if (!stackTrace.frames.size())
             continue;
@@ -1127,6 +1159,57 @@
                 machineLocation->second->inferredName().data(), descriptionForLocation(machineLocation->first, WTF::nullopt));
         }
         bytecodeCounts.add(frameDescription, 0).iterator->value++;
+        
+        {
+            String tierName;
+            switch (frame.frameType) {
+            case SamplingProfiler::FrameType::Executable:
+                switch (frame.semanticLocation.jitType) {
+                case JITType::HostCallThunk:
+                    tierName = host;
+                    break;
+                case JITType::InterpreterThunk:
+                    tierName = llint;
+                    break;
+                case JITType::BaselineJIT:
+                    tierName = baseline;
+                    break;
+                case JITType::DFGJIT:
+                    tierName = dfg;
+                    break;
+                case JITType::FTLJIT:
+                    tierName = ftl;
+                    break;
+                default:
+                    tierName = unknownExecutable;
+                    break;
+                }
+
+                if (frame.executable) {
+                    if (auto* executable = jsDynamicCast<FunctionExecutable*>(m_vm, frame.executable)) {
+                        if (executable->isBuiltinFunction())
+                            tierCounts.add(builtin, 0).iterator->value++;
+                    }
+                }
+
+                break;
+            case SamplingProfiler::FrameType::Wasm:
+                tierName = wasm;
+                break;
+            case SamplingProfiler::FrameType::Host:
+                tierName = host;
+                break;
+            case SamplingProfiler::FrameType::C:
+                tierName = cpp;
+                break;
+            case SamplingProfiler::FrameType::Unknown:
+                tierName = unknownFrame;
+                break;
+            }
+
+            tierCounts.add(tierName, 0).iterator->value++;
+        }
+         
         totalSamples++;
     }
 
@@ -1145,7 +1228,27 @@
     };
 
     if (Options::samplingProfilerTopBytecodesCount()) {
-        out.println("\n\nSampling rate: ", m_timingInterval.microseconds(), " microseconds. Total samples: ", totalSamples);
+        out.println("\n\nSampling rate: ", m_timingInterval.microseconds(), " microseconds. Total samples: ", totalSamples, "\n");
+
+        out.println("Tier breakdown:");
+        out.println("-----------------------------------");
+        unsigned maxTierNameLength = 0;
+        forEachTier([&] (String tier) {
+            maxTierNameLength = std::max(maxTierNameLength, tier.length());
+        });
+        auto printTier = [&] (String tier) {
+            size_t count = tierCounts.get(tier);
+            if (!count && (tier == unknownFrame || tier == unknownExecutable))
+                return;
+            out.print(tier, ": ");
+            for (unsigned i = 0; i < maxTierNameLength + 2 - tier.length(); ++i)
+                out.print(" ");
+            out.printf("%6zu ", count);
+            out.println(" (", (static_cast<double>(count) / static_cast<double>(totalSamples)) * 100, "%)");
+        };
+        forEachTier(printTier);
+        out.println("\n");
+
         out.println("Hottest bytecodes as <numSamples   'functionName#hash:JITType:bytecodeIndex'>");
         for (size_t i = 0; i < Options::samplingProfilerTopBytecodesCount(); i++) {
             auto pair = takeMax();
_______________________________________________
webkit-changes mailing list
[email protected]
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to