llvmorg-github-actions[bot] wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-bolt

Author: Amir Ayupov (aaupov)

<details>
<summary>Changes</summary>

Add a pre-aggregated profile output format (`--profile-format=preagg`)
so perf.data can be pre-parsed/aggregated and used as input with -pa.

Supports branch and basic profiles, emits traces (T/R) and S records.

Currently only covers main binary, can be extended to cover multi-DSO.

Test Plan: Updated perf_test.test, added perf_brstack.test


---
Full diff: https://github.com/llvm/llvm-project/pull/199465.diff


8 Files Affected:

- (modified) bolt/docs/profiles.md (+7) 
- (modified) bolt/include/bolt/Profile/DataAggregator.h (+5-2) 
- (modified) bolt/include/bolt/Utils/CommandLineOpts.h (+1-1) 
- (modified) bolt/lib/Profile/DataAggregator.cpp (+30-2) 
- (modified) bolt/lib/Utils/CommandLineOpts.cpp (+3-1) 
- (modified) bolt/test/perf2bolt/lit.local.cfg (+8-1) 
- (added) bolt/test/perf2bolt/perf_brstack.test (+12) 
- (modified) bolt/test/perf2bolt/perf_test.test (+4) 


``````````diff
diff --git a/bolt/docs/profiles.md b/bolt/docs/profiles.md
index f208620be85ce..4900c2fb008d0 100644
--- a/bolt/docs/profiles.md
+++ b/bolt/docs/profiles.md
@@ -16,6 +16,8 @@ $ perf2bolt executable \
   -o perf.fdata [-w perf.yaml]
 # the output format for `-o` can be switched with `--profile-format`:
   -o perf.yaml --profile-format=yaml
+# perf.data can also be cached as pre-aggregated trace data:
+  -o perf.preagg --profile-format=preagg
 ```
 
 # Unsymbolized profiles
@@ -153,6 +155,11 @@ Pre-aggregated profiles can be generated by external 
tools. See
 [ebpf-bolt](https://github.com/aaupov/ebpf-bolt) for a reference
 implementation using eBPF-based collection.
 
+`perf2bolt` can generate a pre-aggregated profile from`perf.data`:
+```
+perf2bolt ./binary -p perf.data -o perf.preagg --profile-format=preagg
+```
+
 # Symbolized profiles
 The profiles accepted by llvm-bolt. fdata is the legacy format, YAML is the 
rich (metadata-enabled) format.
 
diff --git a/bolt/include/bolt/Profile/DataAggregator.h 
b/bolt/include/bolt/Profile/DataAggregator.h
index c5133e90d07a6..dca24b9c57983 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -555,8 +555,11 @@ class DataAggregator : public DataReader {
   /// Force all subprocesses to stop and cancel aggregation
   void abort();
 
-  /// Dump data structures into a file readable by llvm-bolt
-  std::error_code writeAggregatedFile(StringRef OutputFilename) const;
+  /// Dump data structures into an fdata file readable by llvm-bolt.
+  std::error_code writeFdataFile(StringRef OutputFilename) const;
+
+  /// Dump TraceMap into a pre-aggregated file readable by perf2bolt -pa.
+  std::error_code writePreAggregatedFile(StringRef OutputFilename) const;
 
   /// Dump translated data structures into YAML
   std::error_code writeBATYAML(BinaryContext &BC,
diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h 
b/bolt/include/bolt/Utils/CommandLineOpts.h
index dc193477023d7..5a6440034350f 100644
--- a/bolt/include/bolt/Utils/CommandLineOpts.h
+++ b/bolt/include/bolt/Utils/CommandLineOpts.h
@@ -101,7 +101,7 @@ extern llvm::cl::opt<bool> UpdateBranchProtection;
 extern llvm::cl::opt<SplitFunctionsStrategy> SplitStrategy;
 
 // The format to use with -o in aggregation mode (perf2bolt)
-enum ProfileFormatKind { PF_Fdata, PF_YAML };
+enum ProfileFormatKind { PF_Fdata, PF_YAML, PF_PreAgg };
 
 extern llvm::cl::opt<ProfileFormatKind> ProfileFormat;
 extern llvm::cl::opt<bool> ShowDensity;
diff --git a/bolt/lib/Profile/DataAggregator.cpp 
b/bolt/lib/Profile/DataAggregator.cpp
index 5b46e8a30729b..b5017ccdcdb7f 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -790,6 +790,13 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) 
{
       exit(0);
   }
 
+  if (opts::AggregateOnly &&
+      opts::ProfileFormat == opts::ProfileFormatKind::PF_PreAgg) {
+    if (std::error_code EC = writePreAggregatedFile(opts::OutputFilename))
+      report_error("cannot create output data file", EC);
+    exit(0);
+  }
+
   return Error::success();
 }
 
@@ -801,7 +808,7 @@ Error DataAggregator::readProfile(BinaryContext &BC) {
 
   if (opts::AggregateOnly) {
     if (opts::ProfileFormat == opts::ProfileFormatKind::PF_Fdata)
-      if (std::error_code EC = writeAggregatedFile(opts::OutputFilename))
+      if (std::error_code EC = writeFdataFile(opts::OutputFilename))
         report_error("cannot create output data file", EC);
 
     // BAT YAML is handled by DataAggregator since normal YAML output requires
@@ -2325,7 +2332,28 @@ DataAggregator::getFileNameForBuildID(StringRef 
FileBuildID) {
 }
 
 std::error_code
-DataAggregator::writeAggregatedFile(StringRef OutputFilename) const {
+DataAggregator::writePreAggregatedFile(StringRef OutputFilename) const {
+  std::error_code EC;
+  raw_fd_ostream OS(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
+  if (EC)
+    return EC;
+
+  for (const auto &[Trace, Info] : Traces) {
+    const bool IsReturn = Returns.find(Trace.Branch) != Returns.end();
+    OS << formatv("{0} {1:x-} {2:x-} {3:x-} {4}\n", IsReturn ? 'R' : 'T',
+                  Trace.Branch, Trace.From, Trace.To, Info.TakenCount);
+  }
+  OS << formatv("E {0:$[,]}\n", EventNames.keys());
+  for (const auto &[PC, Count] : BasicSamples)
+    OS << formatv("S {0:x-} {1}\n", PC, Count);
+
+  outs() << "PERF2BOLT: wrote " << Traces.size() + BasicSamples.size()
+         << " pre-aggregated objects to " << OutputFilename << "\n";
+
+  return std::error_code();
+}
+
+std::error_code DataAggregator::writeFdataFile(StringRef OutputFilename) const 
{
   std::error_code EC;
   raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
   if (EC)
diff --git a/bolt/lib/Utils/CommandLineOpts.cpp 
b/bolt/lib/Utils/CommandLineOpts.cpp
index cbd0be4a806ae..36a55d7a9d283 100644
--- a/bolt/lib/Utils/CommandLineOpts.cpp
+++ b/bolt/lib/Utils/CommandLineOpts.cpp
@@ -277,7 +277,9 @@ cl::opt<ProfileFormatKind> ProfileFormat(
         "format to dump profile output in aggregation mode, default is fdata"),
     cl::init(PF_Fdata),
     cl::values(clEnumValN(PF_Fdata, "fdata", "offset-based plaintext format"),
-               clEnumValN(PF_YAML, "yaml", "dense YAML representation")),
+               clEnumValN(PF_YAML, "yaml", "dense YAML representation"),
+               clEnumValN(PF_PreAgg, "preagg",
+                          "pre-aggregated profile format")),
     cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory));
 
 cl::opt<std::string> SaveProfile("w",
diff --git a/bolt/test/perf2bolt/lit.local.cfg 
b/bolt/test/perf2bolt/lit.local.cfg
index 0fecf913aa98b..acad9786bd8d3 100644
--- a/bolt/test/perf2bolt/lit.local.cfg
+++ b/bolt/test/perf2bolt/lit.local.cfg
@@ -1,5 +1,12 @@
 import shutil
 import subprocess
 
-if shutil.which("perf") is not None and subprocess.run(["perf", "record", 
"-e", "cycles:u", "-o", "/dev/null", "--", "perf", "--version"], 
capture_output=True).returncode == 0:
+cmd = "perf record -e cycles:u -o /dev/null {} -- perf --version"
+cmd_basic = cmd.format("").split()
+cmd_brstack = cmd.format("-j any,u").split()
+
+if shutil.which("perf") is not None:
+  if subprocess.run(cmd_basic, capture_output=True).returncode == 0:
     config.available_features.add("perf")
+  if subprocess.run(cmd_brstack, capture_output=True).returncode == 0:
+    config.available_features.add("perf-brstack")
diff --git a/bolt/test/perf2bolt/perf_brstack.test 
b/bolt/test/perf2bolt/perf_brstack.test
new file mode 100644
index 0000000000000..e93f440650006
--- /dev/null
+++ b/bolt/test/perf2bolt/perf_brstack.test
@@ -0,0 +1,12 @@
+# Check perf2bolt pre-aggregated profile emission from branch-stack perf data.
+
+REQUIRES: system-linux, perf-brstack
+
+RUN: %clang %S/Inputs/perf_test.c -no-pie -fuse-ld=lld -o %t
+RUN: perf record -Fmax -j any,u -e cycles:u -o %t.perf.data -- %t
+RUN: perf2bolt %t -p=%t.perf.data -o %t.fdata -ignore-build-id
+RUN: perf2bolt %t -p=%t.perf.data -o %t.preagg -ignore-build-id 
--profile-format=preagg
+RUN: perf2bolt %t -pa -p=%t.preagg -o %t.roundtrip.fdata -ignore-build-id
+RUN: sort %t.fdata > %t.fdata.sorted
+RUN: sort %t.roundtrip.fdata > %t.roundtrip.fdata.sorted
+RUN: diff %t.fdata.sorted %t.roundtrip.fdata.sorted
diff --git a/bolt/test/perf2bolt/perf_test.test 
b/bolt/test/perf2bolt/perf_test.test
index e34ac76632113..03b13398c1055 100644
--- a/bolt/test/perf2bolt/perf_test.test
+++ b/bolt/test/perf2bolt/perf_test.test
@@ -17,6 +17,10 @@ RUN: cmp %t3.multi %t3.comma
 RUN: merge-fdata %t3 %t3 | sort > %t3.x2
 RUN: sort %t3.multi > %t3.multi.x2
 RUN: cmp %t3.x2 %t3.multi.x2
+# Pre-aggregated output: compare perf->preagg->fdata vs perf->fdata
+RUN: perf2bolt %t -p=%t2 -o %t2.pa -ba -ignore-build-id --profile-format=preagg
+RUN: perf2bolt %t -p=%t2.pa -o %t2.pa.fdata -ba -pa
+RUN: cmp %t2.pa.fdata %t3
 
 CHECK-NOT: PERF2BOLT-ERROR
 CHECK-NOT: !! WARNING !! This high mismatch ratio indicates the input binary 
is probably not the same binary used during profiling collection.

``````````

</details>


https://github.com/llvm/llvm-project/pull/199465
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to