llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-bolt Author: Amir Ayupov (aaupov) <details> <summary>Changes</summary> Expose heatmap functionality of profile score computation for text section under a new option `--print-heatmap-stats`. This option collects and prints the following stats: - hotness is the percentage of samples attributed to the section, - utilization: percentage of executed buckets, - partition score: hotness times utilization, higher is better. Test Plan: updated per2bolt tests - pre-aggregated-perf.test: pre-aggregated data - bolt-address-translation-yaml.test: pre-aggregated + BOLTed input - perf_test.test: no-LBR perf data --- Full diff: https://github.com/llvm/llvm-project/pull/139194.diff 9 Files Affected: - (modified) bolt/include/bolt/Profile/DataAggregator.h (+6-1) - (modified) bolt/include/bolt/Profile/Heatmap.h (+2) - (modified) bolt/include/bolt/Utils/CommandLineOpts.h (+1) - (modified) bolt/lib/Profile/DataAggregator.cpp (+50-27) - (modified) bolt/lib/Profile/Heatmap.cpp (+9) - (modified) bolt/lib/Utils/CommandLineOpts.cpp (+5) - (modified) bolt/test/X86/bolt-address-translation-yaml.test (+2-1) - (modified) bolt/test/X86/pre-aggregated-perf.test (+2-1) - (modified) bolt/test/perf2bolt/perf_test.test (+5-2) ``````````diff diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h index d66d198e37d61..ac036fe167eed 100644 --- a/bolt/include/bolt/Profile/DataAggregator.h +++ b/bolt/include/bolt/Profile/DataAggregator.h @@ -15,6 +15,7 @@ #define BOLT_PROFILE_DATA_AGGREGATOR_H #include "bolt/Profile/DataReader.h" +#include "bolt/Profile/Heatmap.h" #include "bolt/Profile/YAMLProfileWriter.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" @@ -270,8 +271,10 @@ class DataAggregator : public DataReader { /// everything bool hasData() const { return !ParsingBuf.empty(); } + /// Build heat map based on LBR samples. + Expected<Heatmap> buildHeatMap(); /// Print heat map based on LBR samples. - std::error_code printLBRHeatMap(); + void printHeatMap(const Heatmap::SectionStatsMap &, const Heatmap &) const; /// Parse a single perf sample containing a PID associated with a sequence of /// LBR entries. If the PID does not correspond to the binary we are looking @@ -473,6 +476,8 @@ class DataAggregator : public DataReader { void printBranchSamplesDiagnostics() const; void printBasicSamplesDiagnostics(uint64_t OutOfRangeSamples) const; void printBranchStacksDiagnostics(uint64_t IgnoredSamples) const; + void printHeatmapTextStats(const Heatmap &, + const Heatmap::SectionStatsMap &) const; public: /// If perf.data was collected without build ids, the buildid-list may contain diff --git a/bolt/include/bolt/Profile/Heatmap.h b/bolt/include/bolt/Profile/Heatmap.h index c7b3d45fa5cc2..bb073833ec9f7 100644 --- a/bolt/include/bolt/Profile/Heatmap.h +++ b/bolt/include/bolt/Profile/Heatmap.h @@ -88,6 +88,8 @@ class Heatmap { uint64_t Buckets{0}; }; + uint64_t getNumBuckets(StringRef Name) const; + /// Mapping from section name to associated \p SectionStats. Special entries: /// - [total] for total stats, /// - [unmapped] for samples outside any section, if non-zero. diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h index 3de945f6a1507..b5a7be53e4189 100644 --- a/bolt/include/bolt/Utils/CommandLineOpts.h +++ b/bolt/include/bolt/Utils/CommandLineOpts.h @@ -44,6 +44,7 @@ extern llvm::cl::opt<unsigned> HeatmapBlock; extern llvm::cl::opt<unsigned long long> HeatmapMaxAddress; extern llvm::cl::opt<unsigned long long> HeatmapMinAddress; extern llvm::cl::opt<bool> HeatmapPrintMappings; +extern llvm::cl::opt<bool> HeatmapStats; extern llvm::cl::opt<bool> HotData; extern llvm::cl::opt<bool> HotFunctionsAtEnd; extern llvm::cl::opt<bool> HotText; diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index 11850fab28bb8..b0ad4c69e2334 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -508,21 +508,27 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) { errs() << "PERF2BOLT: failed to parse samples\n"; // Special handling for memory events - if (prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback)) - return Error::success(); - - if (const std::error_code EC = parseMemEvents()) - errs() << "PERF2BOLT: failed to parse memory events: " << EC.message() - << '\n'; + if (!prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback)) + if (const std::error_code EC = parseMemEvents()) + errs() << "PERF2BOLT: failed to parse memory events: " << EC.message() + << '\n'; deleteTempFiles(); heatmap: + if (!opts::HeatmapMode && !opts::HeatmapStats) + return Error::success(); + + Expected<Heatmap> HM = buildHeatMap(); + if (!HM) + return HM.takeError(); + Heatmap::SectionStatsMap Stats = HM->computeSectionStats(); if (opts::HeatmapMode) { - if (std::error_code EC = printLBRHeatMap()) - return errorCodeToError(EC); + printHeatMap(Stats, *HM); exit(0); } + // opts::HeatmapStats + printHeatmapTextStats(*HM, Stats); return Error::success(); } @@ -1310,7 +1316,7 @@ bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) const { (LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr); } -std::error_code DataAggregator::printLBRHeatMap() { +Expected<Heatmap> DataAggregator::buildHeatMap() { outs() << "PERF2BOLT: parse branch events...\n"; NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName, TimerGroupDesc, opts::TimeAggregator); @@ -1323,15 +1329,12 @@ std::error_code DataAggregator::printLBRHeatMap() { opts::HeatmapMaxAddress, getTextSections(BC)); if (!NumTotalSamples) { - if (opts::BasicAggregation) { - errs() << "HEATMAP-ERROR: no basic event samples detected in profile. " - "Cannot build heatmap."; - } else { - errs() << "HEATMAP-ERROR: no LBR traces detected in profile. " - "Cannot build heatmap. Use -nl for building heatmap from " - "basic events.\n"; - } - exit(1); + if (opts::BasicAggregation) + return createStringError( + "no basic event samples detected in profile. Cannot build heatmap"); + return createStringError( + "no LBR traces detected in profile. Cannot build heatmap. Use -nl for " + "building heatmap from basic events"); } outs() << "HEATMAP: building heat map...\n"; @@ -1347,24 +1350,44 @@ std::error_code DataAggregator::printLBRHeatMap() { if (HM.getNumInvalidRanges()) outs() << "HEATMAP: invalid traces: " << HM.getNumInvalidRanges() << '\n'; - if (!HM.size()) { - errs() << "HEATMAP-ERROR: no valid traces registered\n"; - exit(1); - } + if (!HM.size()) + return createStringError("no valid traces registered"); + return HM; +} +void DataAggregator::printHeatMap(const Heatmap::SectionStatsMap &Stats, + const Heatmap &HM) const { HM.print(opts::OutputFilename); if (opts::OutputFilename == "-") HM.printCDF(opts::OutputFilename); else HM.printCDF(opts::OutputFilename + ".csv"); - Heatmap::SectionStatsMap Stats = HM.computeSectionStats(); if (opts::OutputFilename == "-") HM.printSectionHotness(Stats, opts::OutputFilename); else HM.printSectionHotness(Stats, opts::OutputFilename + "-section-hotness.csv"); +} - return std::error_code(); +void DataAggregator::printHeatmapTextStats( + const Heatmap &HM, const Heatmap::SectionStatsMap &Stats) const { + Heatmap::SectionStatsMap::const_iterator TotalStatsIt = Stats.find("[total]"); + assert(TotalStatsIt != Stats.end() && "Malformed SectionStatsMap"); + Heatmap::SectionStatsMap::const_iterator TextStatsIt = + Stats.find(BC->getMainCodeSectionName()); + if (TextStatsIt == Stats.end()) + return; + + const Heatmap::SectionStats &TextStats = TextStatsIt->second; + const Heatmap::SectionStats &TotalStats = TotalStatsIt->second; + + const float TextHotness = 1. * TextStats.Samples / TotalStats.Samples; + const float TextUtilization = + 1. * TextStats.Buckets / HM.getNumBuckets(BC->getMainCodeSectionName()); + const float TextPartitionScore = TextHotness * TextUtilization; + outs() << "HEATMAP: " << BC->getMainCodeSectionName() << " scores: " + << formatv("hotness: {0:f4}, utilization: {1:f4}, partition: {2:f4}\n", + TextHotness, TextUtilization, TextPartitionScore); } void DataAggregator::parseLBRSample(const PerfBranchSample &Sample, @@ -1389,7 +1412,7 @@ void DataAggregator::parseLBRSample(const PerfBranchSample &Sample, const uint64_t TraceTo = NextLBR->From; const BinaryFunction *TraceBF = getBinaryFunctionContainingAddress(TraceFrom); - if (opts::HeatmapMode) { + if (opts::HeatmapMode || opts::HeatmapStats) { FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)]; ++Info.InternCount; } else if (TraceBF && TraceBF->containsAddress(TraceTo)) { @@ -1426,7 +1449,7 @@ void DataAggregator::parseLBRSample(const PerfBranchSample &Sample, } NextLBR = &LBR; - if (opts::HeatmapMode) { + if (opts::HeatmapMode || opts::HeatmapStats) { TakenBranchInfo &Info = BranchLBRs[Trace(LBR.From, LBR.To)]; ++Info.TakenCount; continue; @@ -1439,7 +1462,7 @@ void DataAggregator::parseLBRSample(const PerfBranchSample &Sample, ++Info.TakenCount; Info.MispredCount += LBR.Mispred; } - if (opts::HeatmapMode && !Sample.LBR.empty()) { + if ((opts::HeatmapMode || opts::HeatmapStats) && !Sample.LBR.empty()) { ++BasicSamples[Sample.LBR.front().To]; ++BasicSamples[Sample.LBR.back().From]; } diff --git a/bolt/lib/Profile/Heatmap.cpp b/bolt/lib/Profile/Heatmap.cpp index d3ff74f664046..09e4cdd7f4cd8 100644 --- a/bolt/lib/Profile/Heatmap.cpp +++ b/bolt/lib/Profile/Heatmap.cpp @@ -369,5 +369,14 @@ void Heatmap::printSectionHotness(const StringMap<SectionStats> &Stats, const float UnmappedPct = 100. * UnmappedIt->second.Samples / NumTotalCounts; OS << formatv("[unmapped], 0x0, 0x0, {0:f4}, 0\n", UnmappedPct); } + +uint64_t Heatmap::getNumBuckets(StringRef Name) const { + auto It = llvm::find_if(TextSections, [Name](const SectionNameAndRange &Sec) { + return Sec.Name == Name; + }); + if (It == TextSections.end()) + return 0; + return getNumBuckets(It->BeginAddress, It->EndAddress); +} } // namespace bolt } // namespace llvm diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp index ad714371436e0..98ae00b34652a 100644 --- a/bolt/lib/Utils/CommandLineOpts.cpp +++ b/bolt/lib/Utils/CommandLineOpts.cpp @@ -119,6 +119,11 @@ cl::opt<bool> HeatmapPrintMappings( "sections (default false)"), cl::Optional, cl::cat(HeatmapCategory)); +cl::opt<bool> HeatmapStats( + "print-heatmap-stats", + cl::desc("print heatmap statistics without producing the heatmap"), + cl::Optional, cl::cat(HeatmapCategory)); + cl::opt<bool> HotData("hot-data", cl::desc("hot data symbols support (relocation mode)"), cl::cat(BoltCategory)); diff --git a/bolt/test/X86/bolt-address-translation-yaml.test b/bolt/test/X86/bolt-address-translation-yaml.test index a6a212d9c1b38..443702dd6c4fb 100644 --- a/bolt/test/X86/bolt-address-translation-yaml.test +++ b/bolt/test/X86/bolt-address-translation-yaml.test @@ -28,7 +28,7 @@ ORDER-YAML-CHECK-NEXT: calls: [ { off: 0x26, fid: [[#]], cnt: 20 } ] ORDER-YAML-CHECK-NEXT: succ: [ { bid: 5, cnt: 7 } ## Large profile test RUN: perf2bolt %t.out --pa -p %p/Inputs/blarge_new_bat.preagg.txt -w %t.yaml -o %t.fdata \ -RUN: 2>&1 | FileCheck --check-prefix READ-BAT-CHECK %s +RUN: --print-heatmap-stats 2>&1 | FileCheck --check-prefix READ-BAT-CHECK %s RUN: FileCheck --input-file %t.yaml --check-prefix YAML-BAT-CHECK %s ## Check that YAML converted from fdata matches YAML created directly with BAT. RUN: llvm-bolt %t.exe -data %t.fdata -w %t.yaml-fdata -o /dev/null \ @@ -46,6 +46,7 @@ WRITE-BAT-CHECK: BOLT-INFO: BAT section size (bytes): 404 READ-BAT-CHECK-NOT: BOLT-ERROR: unable to save profile in YAML format for input file processed by BOLT READ-BAT-CHECK: BOLT-INFO: Parsed 5 BAT entries READ-BAT-CHECK: PERF2BOLT: read 79 aggregated LBR entries +READ-BAT-CHECK: HEATMAP: .text scores: hotness: 0.3876, utilization: 0.9167, partition: 0.3553 READ-BAT-CHECK: BOLT-INFO: 5 out of 21 functions in the binary (23.8%) have non-empty execution profile READ-BAT-FDATA-CHECK: BOLT-INFO: 5 out of 16 functions in the binary (31.2%) have non-empty execution profile diff --git a/bolt/test/X86/pre-aggregated-perf.test b/bolt/test/X86/pre-aggregated-perf.test index cf745ca7bf7b6..926b0cd44b8b4 100644 --- a/bolt/test/X86/pre-aggregated-perf.test +++ b/bolt/test/X86/pre-aggregated-perf.test @@ -11,10 +11,11 @@ REQUIRES: system-linux RUN: yaml2obj %p/Inputs/blarge.yaml &> %t.exe RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated.txt -w %t.new \ -RUN: --show-density \ +RUN: --show-density --print-heatmap-stats \ RUN: --profile-density-threshold=9 --profile-density-cutoff-hot=970000 \ RUN: --profile-use-dfs | FileCheck %s --check-prefix=CHECK-P2B +CHECK-P2B: HEATMAP: .text scores: hotness: 1.0000, utilization: 0.0426, partition: 0.0426 CHECK-P2B: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile CHECK-P2B: BOLT-INFO: Functions with density >= 21.7 account for 97.00% total sample counts. diff --git a/bolt/test/perf2bolt/perf_test.test b/bolt/test/perf2bolt/perf_test.test index 44111de89a4ea..f55e09191c3d6 100644 --- a/bolt/test/perf2bolt/perf_test.test +++ b/bolt/test/perf2bolt/perf_test.test @@ -4,12 +4,15 @@ REQUIRES: system-linux, perf RUN: %clang %S/Inputs/perf_test.c -fuse-ld=lld -Wl,--script=%S/Inputs/perf_test.lds -o %t RUN: perf record -Fmax -e cycles:u -o %t2 -- %t -RUN: perf2bolt %t -p=%t2 -o %t3 -nl -ignore-build-id 2>&1 | FileCheck %s +RUN: perf2bolt %t -p=%t2 -o %t3 -nl -ignore-build-id --print-heatmap-stats \ +RUN: 2>&1 | FileCheck %s CHECK-NOT: PERF2BOLT-ERROR CHECK-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection. +CHECK: HEATMAP: .text scores: hotness: {{.*}}, utilization: {{.*}}, partition: {{.*}} CHECK: BOLT-INFO: Functions with density >= {{.*}} account for 99.00% total sample counts. RUN: %clang %S/Inputs/perf_test.c -no-pie -fuse-ld=lld -o %t4 RUN: perf record -Fmax -e cycles:u -o %t5 -- %t4 -RUN: perf2bolt %t4 -p=%t5 -o %t6 -nl -ignore-build-id 2>&1 | FileCheck %s +RUN: perf2bolt %t4 -p=%t5 -o %t6 -nl -ignore-build-id --print-heatmap-stats \ +RUN: 2>&1 | FileCheck %s `````````` </details> https://github.com/llvm/llvm-project/pull/139194 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits