llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-bolt

Author: Amir Ayupov (aaupov)

<details>
<summary>Changes</summary>

Expose heatmap functionality of profile score computation for text
section under a new option `--print-heatmap-stats`.

This option collects and prints the following stats:
- hotness is the percentage of samples attributed to the section,
- utilization: percentage of executed buckets,
- partition score: hotness times utilization, higher is better.

Test Plan:
updated per2bolt tests
- pre-aggregated-perf.test: pre-aggregated data
- bolt-address-translation-yaml.test: pre-aggregated + BOLTed input
- perf_test.test: no-LBR perf data


---
Full diff: https://github.com/llvm/llvm-project/pull/139194.diff


9 Files Affected:

- (modified) bolt/include/bolt/Profile/DataAggregator.h (+6-1) 
- (modified) bolt/include/bolt/Profile/Heatmap.h (+2) 
- (modified) bolt/include/bolt/Utils/CommandLineOpts.h (+1) 
- (modified) bolt/lib/Profile/DataAggregator.cpp (+50-27) 
- (modified) bolt/lib/Profile/Heatmap.cpp (+9) 
- (modified) bolt/lib/Utils/CommandLineOpts.cpp (+5) 
- (modified) bolt/test/X86/bolt-address-translation-yaml.test (+2-1) 
- (modified) bolt/test/X86/pre-aggregated-perf.test (+2-1) 
- (modified) bolt/test/perf2bolt/perf_test.test (+5-2) 


``````````diff
diff --git a/bolt/include/bolt/Profile/DataAggregator.h 
b/bolt/include/bolt/Profile/DataAggregator.h
index d66d198e37d61..ac036fe167eed 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -15,6 +15,7 @@
 #define BOLT_PROFILE_DATA_AGGREGATOR_H
 
 #include "bolt/Profile/DataReader.h"
+#include "bolt/Profile/Heatmap.h"
 #include "bolt/Profile/YAMLProfileWriter.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Error.h"
@@ -270,8 +271,10 @@ class DataAggregator : public DataReader {
   /// everything
   bool hasData() const { return !ParsingBuf.empty(); }
 
+  /// Build heat map based on LBR samples.
+  Expected<Heatmap> buildHeatMap();
   /// Print heat map based on LBR samples.
-  std::error_code printLBRHeatMap();
+  void printHeatMap(const Heatmap::SectionStatsMap &, const Heatmap &) const;
 
   /// Parse a single perf sample containing a PID associated with a sequence of
   /// LBR entries. If the PID does not correspond to the binary we are looking
@@ -473,6 +476,8 @@ class DataAggregator : public DataReader {
   void printBranchSamplesDiagnostics() const;
   void printBasicSamplesDiagnostics(uint64_t OutOfRangeSamples) const;
   void printBranchStacksDiagnostics(uint64_t IgnoredSamples) const;
+  void printHeatmapTextStats(const Heatmap &,
+                             const Heatmap::SectionStatsMap &) const;
 
 public:
   /// If perf.data was collected without build ids, the buildid-list may 
contain
diff --git a/bolt/include/bolt/Profile/Heatmap.h 
b/bolt/include/bolt/Profile/Heatmap.h
index c7b3d45fa5cc2..bb073833ec9f7 100644
--- a/bolt/include/bolt/Profile/Heatmap.h
+++ b/bolt/include/bolt/Profile/Heatmap.h
@@ -88,6 +88,8 @@ class Heatmap {
     uint64_t Buckets{0};
   };
 
+  uint64_t getNumBuckets(StringRef Name) const;
+
   /// Mapping from section name to associated \p SectionStats. Special entries:
   /// - [total] for total stats,
   /// - [unmapped] for samples outside any section, if non-zero.
diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h 
b/bolt/include/bolt/Utils/CommandLineOpts.h
index 3de945f6a1507..b5a7be53e4189 100644
--- a/bolt/include/bolt/Utils/CommandLineOpts.h
+++ b/bolt/include/bolt/Utils/CommandLineOpts.h
@@ -44,6 +44,7 @@ extern llvm::cl::opt<unsigned> HeatmapBlock;
 extern llvm::cl::opt<unsigned long long> HeatmapMaxAddress;
 extern llvm::cl::opt<unsigned long long> HeatmapMinAddress;
 extern llvm::cl::opt<bool> HeatmapPrintMappings;
+extern llvm::cl::opt<bool> HeatmapStats;
 extern llvm::cl::opt<bool> HotData;
 extern llvm::cl::opt<bool> HotFunctionsAtEnd;
 extern llvm::cl::opt<bool> HotText;
diff --git a/bolt/lib/Profile/DataAggregator.cpp 
b/bolt/lib/Profile/DataAggregator.cpp
index 11850fab28bb8..b0ad4c69e2334 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -508,21 +508,27 @@ Error DataAggregator::preprocessProfile(BinaryContext 
&BC) {
     errs() << "PERF2BOLT: failed to parse samples\n";
 
   // Special handling for memory events
-  if (prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback))
-    return Error::success();
-
-  if (const std::error_code EC = parseMemEvents())
-    errs() << "PERF2BOLT: failed to parse memory events: " << EC.message()
-           << '\n';
+  if (!prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback))
+    if (const std::error_code EC = parseMemEvents())
+      errs() << "PERF2BOLT: failed to parse memory events: " << EC.message()
+             << '\n';
 
   deleteTempFiles();
 
 heatmap:
+  if (!opts::HeatmapMode && !opts::HeatmapStats)
+    return Error::success();
+
+  Expected<Heatmap> HM = buildHeatMap();
+  if (!HM)
+    return HM.takeError();
+  Heatmap::SectionStatsMap Stats = HM->computeSectionStats();
   if (opts::HeatmapMode) {
-    if (std::error_code EC = printLBRHeatMap())
-      return errorCodeToError(EC);
+    printHeatMap(Stats, *HM);
     exit(0);
   }
+  // opts::HeatmapStats
+  printHeatmapTextStats(*HM, Stats);
 
   return Error::success();
 }
@@ -1310,7 +1316,7 @@ bool DataAggregator::ignoreKernelInterrupt(LBREntry &LBR) 
const {
          (LBR.From >= KernelBaseAddr || LBR.To >= KernelBaseAddr);
 }
 
-std::error_code DataAggregator::printLBRHeatMap() {
+Expected<Heatmap> DataAggregator::buildHeatMap() {
   outs() << "PERF2BOLT: parse branch events...\n";
   NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
                      TimerGroupDesc, opts::TimeAggregator);
@@ -1323,15 +1329,12 @@ std::error_code DataAggregator::printLBRHeatMap() {
              opts::HeatmapMaxAddress, getTextSections(BC));
 
   if (!NumTotalSamples) {
-    if (opts::BasicAggregation) {
-      errs() << "HEATMAP-ERROR: no basic event samples detected in profile. "
-                "Cannot build heatmap.";
-    } else {
-      errs() << "HEATMAP-ERROR: no LBR traces detected in profile. "
-                "Cannot build heatmap. Use -nl for building heatmap from "
-                "basic events.\n";
-    }
-    exit(1);
+    if (opts::BasicAggregation)
+      return createStringError(
+          "no basic event samples detected in profile. Cannot build heatmap");
+    return createStringError(
+        "no LBR traces detected in profile. Cannot build heatmap. Use -nl for "
+        "building heatmap from basic events");
   }
 
   outs() << "HEATMAP: building heat map...\n";
@@ -1347,24 +1350,44 @@ std::error_code DataAggregator::printLBRHeatMap() {
   if (HM.getNumInvalidRanges())
     outs() << "HEATMAP: invalid traces: " << HM.getNumInvalidRanges() << '\n';
 
-  if (!HM.size()) {
-    errs() << "HEATMAP-ERROR: no valid traces registered\n";
-    exit(1);
-  }
+  if (!HM.size())
+    return createStringError("no valid traces registered");
+  return HM;
+}
 
+void DataAggregator::printHeatMap(const Heatmap::SectionStatsMap &Stats,
+                                  const Heatmap &HM) const {
   HM.print(opts::OutputFilename);
   if (opts::OutputFilename == "-")
     HM.printCDF(opts::OutputFilename);
   else
     HM.printCDF(opts::OutputFilename + ".csv");
-  Heatmap::SectionStatsMap Stats = HM.computeSectionStats();
   if (opts::OutputFilename == "-")
     HM.printSectionHotness(Stats, opts::OutputFilename);
   else
     HM.printSectionHotness(Stats,
                            opts::OutputFilename + "-section-hotness.csv");
+}
 
-  return std::error_code();
+void DataAggregator::printHeatmapTextStats(
+    const Heatmap &HM, const Heatmap::SectionStatsMap &Stats) const {
+  Heatmap::SectionStatsMap::const_iterator TotalStatsIt = 
Stats.find("[total]");
+  assert(TotalStatsIt != Stats.end() && "Malformed SectionStatsMap");
+  Heatmap::SectionStatsMap::const_iterator TextStatsIt =
+      Stats.find(BC->getMainCodeSectionName());
+  if (TextStatsIt == Stats.end())
+    return;
+
+  const Heatmap::SectionStats &TextStats = TextStatsIt->second;
+  const Heatmap::SectionStats &TotalStats = TotalStatsIt->second;
+
+  const float TextHotness = 1. * TextStats.Samples / TotalStats.Samples;
+  const float TextUtilization =
+      1. * TextStats.Buckets / HM.getNumBuckets(BC->getMainCodeSectionName());
+  const float TextPartitionScore = TextHotness * TextUtilization;
+  outs() << "HEATMAP: " << BC->getMainCodeSectionName() << " scores: "
+         << formatv("hotness: {0:f4}, utilization: {1:f4}, partition: 
{2:f4}\n",
+                    TextHotness, TextUtilization, TextPartitionScore);
 }
 
 void DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
@@ -1389,7 +1412,7 @@ void DataAggregator::parseLBRSample(const 
PerfBranchSample &Sample,
       const uint64_t TraceTo = NextLBR->From;
       const BinaryFunction *TraceBF =
           getBinaryFunctionContainingAddress(TraceFrom);
-      if (opts::HeatmapMode) {
+      if (opts::HeatmapMode || opts::HeatmapStats) {
         FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
         ++Info.InternCount;
       } else if (TraceBF && TraceBF->containsAddress(TraceTo)) {
@@ -1426,7 +1449,7 @@ void DataAggregator::parseLBRSample(const 
PerfBranchSample &Sample,
     }
     NextLBR = &LBR;
 
-    if (opts::HeatmapMode) {
+    if (opts::HeatmapMode || opts::HeatmapStats) {
       TakenBranchInfo &Info = BranchLBRs[Trace(LBR.From, LBR.To)];
       ++Info.TakenCount;
       continue;
@@ -1439,7 +1462,7 @@ void DataAggregator::parseLBRSample(const 
PerfBranchSample &Sample,
     ++Info.TakenCount;
     Info.MispredCount += LBR.Mispred;
   }
-  if (opts::HeatmapMode && !Sample.LBR.empty()) {
+  if ((opts::HeatmapMode || opts::HeatmapStats) && !Sample.LBR.empty()) {
     ++BasicSamples[Sample.LBR.front().To];
     ++BasicSamples[Sample.LBR.back().From];
   }
diff --git a/bolt/lib/Profile/Heatmap.cpp b/bolt/lib/Profile/Heatmap.cpp
index d3ff74f664046..09e4cdd7f4cd8 100644
--- a/bolt/lib/Profile/Heatmap.cpp
+++ b/bolt/lib/Profile/Heatmap.cpp
@@ -369,5 +369,14 @@ void Heatmap::printSectionHotness(const 
StringMap<SectionStats> &Stats,
   const float UnmappedPct = 100. * UnmappedIt->second.Samples / NumTotalCounts;
   OS << formatv("[unmapped], 0x0, 0x0, {0:f4}, 0\n", UnmappedPct);
 }
+
+uint64_t Heatmap::getNumBuckets(StringRef Name) const {
+  auto It = llvm::find_if(TextSections, [Name](const SectionNameAndRange &Sec) 
{
+    return Sec.Name == Name;
+  });
+  if (It == TextSections.end())
+    return 0;
+  return getNumBuckets(It->BeginAddress, It->EndAddress);
+}
 } // namespace bolt
 } // namespace llvm
diff --git a/bolt/lib/Utils/CommandLineOpts.cpp 
b/bolt/lib/Utils/CommandLineOpts.cpp
index ad714371436e0..98ae00b34652a 100644
--- a/bolt/lib/Utils/CommandLineOpts.cpp
+++ b/bolt/lib/Utils/CommandLineOpts.cpp
@@ -119,6 +119,11 @@ cl::opt<bool> HeatmapPrintMappings(
              "sections (default false)"),
     cl::Optional, cl::cat(HeatmapCategory));
 
+cl::opt<bool> HeatmapStats(
+    "print-heatmap-stats",
+    cl::desc("print heatmap statistics without producing the heatmap"),
+    cl::Optional, cl::cat(HeatmapCategory));
+
 cl::opt<bool> HotData("hot-data",
                       cl::desc("hot data symbols support (relocation mode)"),
                       cl::cat(BoltCategory));
diff --git a/bolt/test/X86/bolt-address-translation-yaml.test 
b/bolt/test/X86/bolt-address-translation-yaml.test
index a6a212d9c1b38..443702dd6c4fb 100644
--- a/bolt/test/X86/bolt-address-translation-yaml.test
+++ b/bolt/test/X86/bolt-address-translation-yaml.test
@@ -28,7 +28,7 @@ ORDER-YAML-CHECK-NEXT: calls: [ { off: 0x26, fid: [[#]], cnt: 
20 } ]
 ORDER-YAML-CHECK-NEXT: succ: [ { bid: 5, cnt: 7 }
 ## Large profile test
 RUN: perf2bolt %t.out --pa -p %p/Inputs/blarge_new_bat.preagg.txt -w %t.yaml 
-o %t.fdata \
-RUN:   2>&1 | FileCheck --check-prefix READ-BAT-CHECK %s
+RUN:   --print-heatmap-stats 2>&1 | FileCheck --check-prefix READ-BAT-CHECK %s
 RUN: FileCheck --input-file %t.yaml --check-prefix YAML-BAT-CHECK %s
 ## Check that YAML converted from fdata matches YAML created directly with BAT.
 RUN: llvm-bolt %t.exe -data %t.fdata -w %t.yaml-fdata -o /dev/null \
@@ -46,6 +46,7 @@ WRITE-BAT-CHECK: BOLT-INFO: BAT section size (bytes): 404
 READ-BAT-CHECK-NOT: BOLT-ERROR: unable to save profile in YAML format for 
input file processed by BOLT
 READ-BAT-CHECK: BOLT-INFO: Parsed 5 BAT entries
 READ-BAT-CHECK: PERF2BOLT: read 79 aggregated LBR entries
+READ-BAT-CHECK: HEATMAP: .text scores: hotness: 0.3876, utilization: 0.9167, 
partition: 0.3553
 READ-BAT-CHECK: BOLT-INFO: 5 out of 21 functions in the binary (23.8%) have 
non-empty execution profile
 READ-BAT-FDATA-CHECK: BOLT-INFO: 5 out of 16 functions in the binary (31.2%) 
have non-empty execution profile
 
diff --git a/bolt/test/X86/pre-aggregated-perf.test 
b/bolt/test/X86/pre-aggregated-perf.test
index cf745ca7bf7b6..926b0cd44b8b4 100644
--- a/bolt/test/X86/pre-aggregated-perf.test
+++ b/bolt/test/X86/pre-aggregated-perf.test
@@ -11,10 +11,11 @@ REQUIRES: system-linux
 
 RUN: yaml2obj %p/Inputs/blarge.yaml &> %t.exe
 RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated.txt -w %t.new \
-RUN:   --show-density \
+RUN:   --show-density --print-heatmap-stats \
 RUN:   --profile-density-threshold=9 --profile-density-cutoff-hot=970000 \
 RUN:   --profile-use-dfs | FileCheck %s --check-prefix=CHECK-P2B
 
+CHECK-P2B: HEATMAP: .text scores: hotness: 1.0000, utilization: 0.0426, 
partition: 0.0426
 CHECK-P2B: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have 
non-empty execution profile
 CHECK-P2B: BOLT-INFO: Functions with density >= 21.7 account for 97.00% total 
sample counts.
 
diff --git a/bolt/test/perf2bolt/perf_test.test 
b/bolt/test/perf2bolt/perf_test.test
index 44111de89a4ea..f55e09191c3d6 100644
--- a/bolt/test/perf2bolt/perf_test.test
+++ b/bolt/test/perf2bolt/perf_test.test
@@ -4,12 +4,15 @@ REQUIRES: system-linux, perf
 
 RUN: %clang %S/Inputs/perf_test.c -fuse-ld=lld 
-Wl,--script=%S/Inputs/perf_test.lds -o %t
 RUN: perf record -Fmax -e cycles:u -o %t2 -- %t
-RUN: perf2bolt %t -p=%t2 -o %t3 -nl -ignore-build-id 2>&1 | FileCheck %s
+RUN: perf2bolt %t -p=%t2 -o %t3 -nl -ignore-build-id --print-heatmap-stats \
+RUN:   2>&1 | FileCheck %s
 
 CHECK-NOT: PERF2BOLT-ERROR
 CHECK-NOT: !! WARNING !! This high mismatch ratio indicates the input binary 
is probably not the same binary used during profiling collection.
+CHECK: HEATMAP: .text scores: hotness: {{.*}}, utilization: {{.*}}, partition: 
{{.*}}
 CHECK: BOLT-INFO: Functions with density >= {{.*}} account for 99.00% total 
sample counts.
 
 RUN: %clang %S/Inputs/perf_test.c -no-pie -fuse-ld=lld -o %t4
 RUN: perf record -Fmax -e cycles:u -o %t5 -- %t4
-RUN: perf2bolt %t4 -p=%t5 -o %t6 -nl -ignore-build-id 2>&1 | FileCheck %s
+RUN: perf2bolt %t4 -p=%t5 -o %t6 -nl -ignore-build-id --print-heatmap-stats \
+RUN:   2>&1 | FileCheck %s

``````````

</details>


https://github.com/llvm/llvm-project/pull/139194
_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to