llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-bolt Author: Amir Ayupov (aaupov) <details> <summary>Changes</summary> Add a pre-aggregated profile output format (`--profile-format=preagg`) so perf.data can be pre-parsed/aggregated and used as input with -pa. Supports branch and basic profiles, emits traces (T/R) and S records. Currently only covers main binary, can be extended to cover multi-DSO. Test Plan: Updated perf_test.test, added perf_brstack.test --- Full diff: https://github.com/llvm/llvm-project/pull/199465.diff 8 Files Affected: - (modified) bolt/docs/profiles.md (+7) - (modified) bolt/include/bolt/Profile/DataAggregator.h (+5-2) - (modified) bolt/include/bolt/Utils/CommandLineOpts.h (+1-1) - (modified) bolt/lib/Profile/DataAggregator.cpp (+30-2) - (modified) bolt/lib/Utils/CommandLineOpts.cpp (+3-1) - (modified) bolt/test/perf2bolt/lit.local.cfg (+8-1) - (added) bolt/test/perf2bolt/perf_brstack.test (+12) - (modified) bolt/test/perf2bolt/perf_test.test (+4) ``````````diff diff --git a/bolt/docs/profiles.md b/bolt/docs/profiles.md index f208620be85ce..4900c2fb008d0 100644 --- a/bolt/docs/profiles.md +++ b/bolt/docs/profiles.md @@ -16,6 +16,8 @@ $ perf2bolt executable \ -o perf.fdata [-w perf.yaml] # the output format for `-o` can be switched with `--profile-format`: -o perf.yaml --profile-format=yaml +# perf.data can also be cached as pre-aggregated trace data: + -o perf.preagg --profile-format=preagg ``` # Unsymbolized profiles @@ -153,6 +155,11 @@ Pre-aggregated profiles can be generated by external tools. See [ebpf-bolt](https://github.com/aaupov/ebpf-bolt) for a reference implementation using eBPF-based collection. +`perf2bolt` can generate a pre-aggregated profile from`perf.data`: +``` +perf2bolt ./binary -p perf.data -o perf.preagg --profile-format=preagg +``` + # Symbolized profiles The profiles accepted by llvm-bolt. fdata is the legacy format, YAML is the rich (metadata-enabled) format. diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h index c5133e90d07a6..dca24b9c57983 100644 --- a/bolt/include/bolt/Profile/DataAggregator.h +++ b/bolt/include/bolt/Profile/DataAggregator.h @@ -555,8 +555,11 @@ class DataAggregator : public DataReader { /// Force all subprocesses to stop and cancel aggregation void abort(); - /// Dump data structures into a file readable by llvm-bolt - std::error_code writeAggregatedFile(StringRef OutputFilename) const; + /// Dump data structures into an fdata file readable by llvm-bolt. + std::error_code writeFdataFile(StringRef OutputFilename) const; + + /// Dump TraceMap into a pre-aggregated file readable by perf2bolt -pa. + std::error_code writePreAggregatedFile(StringRef OutputFilename) const; /// Dump translated data structures into YAML std::error_code writeBATYAML(BinaryContext &BC, diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h index dc193477023d7..5a6440034350f 100644 --- a/bolt/include/bolt/Utils/CommandLineOpts.h +++ b/bolt/include/bolt/Utils/CommandLineOpts.h @@ -101,7 +101,7 @@ extern llvm::cl::opt<bool> UpdateBranchProtection; extern llvm::cl::opt<SplitFunctionsStrategy> SplitStrategy; // The format to use with -o in aggregation mode (perf2bolt) -enum ProfileFormatKind { PF_Fdata, PF_YAML }; +enum ProfileFormatKind { PF_Fdata, PF_YAML, PF_PreAgg }; extern llvm::cl::opt<ProfileFormatKind> ProfileFormat; extern llvm::cl::opt<bool> ShowDensity; diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index 5b46e8a30729b..b5017ccdcdb7f 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -790,6 +790,13 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) { exit(0); } + if (opts::AggregateOnly && + opts::ProfileFormat == opts::ProfileFormatKind::PF_PreAgg) { + if (std::error_code EC = writePreAggregatedFile(opts::OutputFilename)) + report_error("cannot create output data file", EC); + exit(0); + } + return Error::success(); } @@ -801,7 +808,7 @@ Error DataAggregator::readProfile(BinaryContext &BC) { if (opts::AggregateOnly) { if (opts::ProfileFormat == opts::ProfileFormatKind::PF_Fdata) - if (std::error_code EC = writeAggregatedFile(opts::OutputFilename)) + if (std::error_code EC = writeFdataFile(opts::OutputFilename)) report_error("cannot create output data file", EC); // BAT YAML is handled by DataAggregator since normal YAML output requires @@ -2325,7 +2332,28 @@ DataAggregator::getFileNameForBuildID(StringRef FileBuildID) { } std::error_code -DataAggregator::writeAggregatedFile(StringRef OutputFilename) const { +DataAggregator::writePreAggregatedFile(StringRef OutputFilename) const { + std::error_code EC; + raw_fd_ostream OS(OutputFilename, EC, sys::fs::OpenFlags::OF_None); + if (EC) + return EC; + + for (const auto &[Trace, Info] : Traces) { + const bool IsReturn = Returns.find(Trace.Branch) != Returns.end(); + OS << formatv("{0} {1:x-} {2:x-} {3:x-} {4}\n", IsReturn ? 'R' : 'T', + Trace.Branch, Trace.From, Trace.To, Info.TakenCount); + } + OS << formatv("E {0:$[,]}\n", EventNames.keys()); + for (const auto &[PC, Count] : BasicSamples) + OS << formatv("S {0:x-} {1}\n", PC, Count); + + outs() << "PERF2BOLT: wrote " << Traces.size() + BasicSamples.size() + << " pre-aggregated objects to " << OutputFilename << "\n"; + + return std::error_code(); +} + +std::error_code DataAggregator::writeFdataFile(StringRef OutputFilename) const { std::error_code EC; raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None); if (EC) diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp index cbd0be4a806ae..36a55d7a9d283 100644 --- a/bolt/lib/Utils/CommandLineOpts.cpp +++ b/bolt/lib/Utils/CommandLineOpts.cpp @@ -277,7 +277,9 @@ cl::opt<ProfileFormatKind> ProfileFormat( "format to dump profile output in aggregation mode, default is fdata"), cl::init(PF_Fdata), cl::values(clEnumValN(PF_Fdata, "fdata", "offset-based plaintext format"), - clEnumValN(PF_YAML, "yaml", "dense YAML representation")), + clEnumValN(PF_YAML, "yaml", "dense YAML representation"), + clEnumValN(PF_PreAgg, "preagg", + "pre-aggregated profile format")), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory)); cl::opt<std::string> SaveProfile("w", diff --git a/bolt/test/perf2bolt/lit.local.cfg b/bolt/test/perf2bolt/lit.local.cfg index 0fecf913aa98b..acad9786bd8d3 100644 --- a/bolt/test/perf2bolt/lit.local.cfg +++ b/bolt/test/perf2bolt/lit.local.cfg @@ -1,5 +1,12 @@ import shutil import subprocess -if shutil.which("perf") is not None and subprocess.run(["perf", "record", "-e", "cycles:u", "-o", "/dev/null", "--", "perf", "--version"], capture_output=True).returncode == 0: +cmd = "perf record -e cycles:u -o /dev/null {} -- perf --version" +cmd_basic = cmd.format("").split() +cmd_brstack = cmd.format("-j any,u").split() + +if shutil.which("perf") is not None: + if subprocess.run(cmd_basic, capture_output=True).returncode == 0: config.available_features.add("perf") + if subprocess.run(cmd_brstack, capture_output=True).returncode == 0: + config.available_features.add("perf-brstack") diff --git a/bolt/test/perf2bolt/perf_brstack.test b/bolt/test/perf2bolt/perf_brstack.test new file mode 100644 index 0000000000000..e93f440650006 --- /dev/null +++ b/bolt/test/perf2bolt/perf_brstack.test @@ -0,0 +1,12 @@ +# Check perf2bolt pre-aggregated profile emission from branch-stack perf data. + +REQUIRES: system-linux, perf-brstack + +RUN: %clang %S/Inputs/perf_test.c -no-pie -fuse-ld=lld -o %t +RUN: perf record -Fmax -j any,u -e cycles:u -o %t.perf.data -- %t +RUN: perf2bolt %t -p=%t.perf.data -o %t.fdata -ignore-build-id +RUN: perf2bolt %t -p=%t.perf.data -o %t.preagg -ignore-build-id --profile-format=preagg +RUN: perf2bolt %t -pa -p=%t.preagg -o %t.roundtrip.fdata -ignore-build-id +RUN: sort %t.fdata > %t.fdata.sorted +RUN: sort %t.roundtrip.fdata > %t.roundtrip.fdata.sorted +RUN: diff %t.fdata.sorted %t.roundtrip.fdata.sorted diff --git a/bolt/test/perf2bolt/perf_test.test b/bolt/test/perf2bolt/perf_test.test index e34ac76632113..03b13398c1055 100644 --- a/bolt/test/perf2bolt/perf_test.test +++ b/bolt/test/perf2bolt/perf_test.test @@ -17,6 +17,10 @@ RUN: cmp %t3.multi %t3.comma RUN: merge-fdata %t3 %t3 | sort > %t3.x2 RUN: sort %t3.multi > %t3.multi.x2 RUN: cmp %t3.x2 %t3.multi.x2 +# Pre-aggregated output: compare perf->preagg->fdata vs perf->fdata +RUN: perf2bolt %t -p=%t2 -o %t2.pa -ba -ignore-build-id --profile-format=preagg +RUN: perf2bolt %t -p=%t2.pa -o %t2.pa.fdata -ba -pa +RUN: cmp %t2.pa.fdata %t3 CHECK-NOT: PERF2BOLT-ERROR CHECK-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection. `````````` </details> https://github.com/llvm/llvm-project/pull/199465 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
