llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-bolt Author: Amir Ayupov (aaupov) <details> <summary>Changes</summary> Allow -p/-perfdata to accept multiple files as comma-separated and repeated arguments. Process them with `-perfdata-jobs` (default 4). Keep YAML/DataReader inputs single-profile only. Test Plan: Updated pre-aggregated-perf.test and perf_test --- Full diff: https://github.com/llvm/llvm-project/pull/199324.diff 9 Files Affected: - (modified) bolt/include/bolt/Profile/DataAggregator.h (+12-3) - (modified) bolt/include/bolt/Utils/CommandLineOpts.h (+1-1) - (modified) bolt/lib/Profile/DataAggregator.cpp (+49-5) - (modified) bolt/lib/Rewrite/RewriteInstance.cpp (+7) - (modified) bolt/lib/Utils/CommandLineOpts.cpp (+8-9) - (modified) bolt/test/X86/pre-aggregated-perf.test (+5) - (modified) bolt/test/perf2bolt/perf_test.test (+11) - (modified) bolt/tools/driver/llvm-bolt.cpp (+3-2) - (modified) bolt/tools/heatmap/heatmap.cpp (+3-2) ``````````diff diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h index 5f63087ec7409..c5133e90d07a6 100644 --- a/bolt/include/bolt/Profile/DataAggregator.h +++ b/bolt/include/bolt/Profile/DataAggregator.h @@ -51,9 +51,7 @@ class BoltAddressTranslation; /// specified by the user. class DataAggregator : public DataReader { public: - explicit DataAggregator(StringRef Filename) : DataReader(Filename) { - start(); - } + explicit DataAggregator(StringRef Filename) : DataReader(Filename) {} ~DataAggregator(); @@ -69,6 +67,10 @@ class DataAggregator : public DataReader { Error readProfile(BinaryContext &BC) override; + /// Add an additional perf.data or pre-aggregated profile input to be merged + /// into this aggregation job. + void addInputFile(StringRef Filename); + bool mayHaveProfileData(const BinaryFunction &BF) override; /// Set Bolt Address Translation Table when processing samples collected in @@ -147,6 +149,10 @@ class DataAggregator : public DataReader { std::unordered_map<uint64_t, uint64_t> BasicSamples; std::vector<PerfMemSample> MemSamples; + /// Perf.data or pre-aggregated inputs to aggregate and merge into this + /// reader. + std::vector<std::string> InputFilenames; + /// Filter pre-aggregated entries belonging to a DSO with this buildid. /// Set when processing a shared library, empty implies main binary. StringRef FilterBuildID; @@ -394,6 +400,9 @@ class DataAggregator : public DataReader { /// Parse this aggregator's input file. void parseInput(); + /// Merge parsed profile data from another aggregation job. + void mergeFrom(const DataAggregator &Other); + /// Mark binary functions covered by parsed profile data. void markFunctionsWithProfile(); diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h index fcea952919f11..dc193477023d7 100644 --- a/bolt/include/bolt/Utils/CommandLineOpts.h +++ b/bolt/include/bolt/Utils/CommandLineOpts.h @@ -94,7 +94,7 @@ extern llvm::cl::opt<bool> HotText; extern llvm::cl::opt<bool> Hugify; extern llvm::cl::opt<bool> Instrument; extern llvm::cl::opt<std::string> OutputFilename; -extern llvm::cl::opt<std::string> PerfData; +extern llvm::cl::list<std::string> PerfData; extern llvm::cl::opt<bool> PrintCacheMetrics; extern llvm::cl::opt<bool> PrintSections; extern llvm::cl::opt<bool> UpdateBranchProtection; diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index 33bfda160ae58..701054c325bf0 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/Errc.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/Process.h" #include "llvm/Support/Program.h" #include "llvm/Support/Regex.h" @@ -114,6 +115,11 @@ MaxSamples("max-samples", cl::Hidden, cl::cat(AggregatorCategory)); +cl::opt<unsigned> + PerfDataJobs("perfdata-jobs", + cl::desc("number of perf data files to process in parallel"), + cl::init(4), cl::cat(AggregatorCategory), + cl::sub(cl::SubCommand::getAll())); extern cl::opt<opts::ProfileFormatKind> ProfileFormat; extern cl::opt<bool> ProfileWritePseudoProbes; extern cl::opt<std::string> SaveProfile; @@ -169,6 +175,27 @@ std::vector<SectionNameAndRange> getTextSections(const BinaryContext *BC) { DataAggregator::~DataAggregator() { deleteTempFiles(); } +void DataAggregator::addInputFile(StringRef Filename) { + InputFilenames.emplace_back(Filename); +} + +void DataAggregator::mergeFrom(const DataAggregator &Other) { + Traces.insert(Traces.end(), Other.Traces.begin(), Other.Traces.end()); + + for (const auto &[PC, Count] : Other.BasicSamples) + BasicSamples[PC] += Count; + + MemSamples.insert(MemSamples.end(), Other.MemSamples.begin(), + Other.MemSamples.end()); + Returns.insert(Other.Returns.begin(), Other.Returns.end()); + EventNames.insert(Other.EventNames.begin(), Other.EventNames.end()); + + NumTraces += Other.NumTraces; + NumInvalidTraces += Other.NumInvalidTraces; + NumLongRangeTraces += Other.NumLongRangeTraces; + NumTotalSamples += Other.NumTotalSamples; +} + void DataAggregator::markFunctionsWithProfile() { std::unordered_set<uint64_t> Samples; std::unordered_set<BinaryFunction *> Funcs; @@ -228,10 +255,6 @@ void DataAggregator::findPerfExecutable() { void DataAggregator::start() { outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n"; - // Turn on heatmap building if requested by --heatmap flag. - if (!opts::HeatmapMode && opts::HeatmapOutput.getNumOccurrences()) - opts::HeatmapMode = opts::HeatmapModeKind::HM_Optional; - // Don't launch perf for pre-aggregated files or when perf input is specified // by the user. if (opts::ReadPreAggregated || !opts::ReadPerfEvents.empty()) @@ -707,6 +730,8 @@ void DataAggregator::imputeFallThroughs() { } void DataAggregator::parseInput() { + start(); + if (opts::ReadPreAggregated) parsePreAggregated(); else @@ -714,9 +739,14 @@ void DataAggregator::parseInput() { } Error DataAggregator::preprocessProfile(BinaryContext &BC) { + // Turn on heatmap building if requested by --heatmap flag. + if (!opts::HeatmapMode && opts::HeatmapOutput.getNumOccurrences()) + opts::HeatmapMode = opts::HeatmapModeKind::HM_Optional; + this->BC = &BC; if (opts::GeneratePerfTextProfile) { + start(); if (Error E = generatePerfTextData()) { deleteTempFiles(); exit(1); @@ -724,7 +754,21 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) { exit(0); } - parseInput(); + SmallVector<DataAggregator *, 1> Aggregators(1, this); + for (StringRef InputFilename : InputFilenames) { + auto *DA = Aggregators.emplace_back(new DataAggregator(InputFilename)); + DA->BC = &BC; + } + + ThreadPoolStrategy SavedStrategy = parallel::strategy; + parallel::strategy = hardware_concurrency(opts::PerfDataJobs); + parallelForEach(Aggregators, [](DataAggregator *DA) { DA->parseInput(); }); + parallel::strategy = SavedStrategy; + + for (DataAggregator *DA : llvm::drop_begin(Aggregators)) { + mergeFrom(*DA); + delete DA; + } markFunctionsWithProfile(); diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 4a2d83cbf8706..d47ea41a51780 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -473,6 +473,13 @@ Error RewriteInstance::setProfile(StringRef Filename) { return errorCodeToError(make_error_code(errc::no_such_file_or_directory)); if (ProfileReader) { + if (DataAggregator::checkPerfDataMagic(Filename) && + // Poor man's RTTI + ProfileReader->getReaderName() == StringRef("perf data aggregator")) { + static_cast<DataAggregator *>(ProfileReader.get()) + ->addInputFile(Filename); + return Error::success(); + } // Already exists return make_error<StringError>(Twine("multiple profiles specified: ") + ProfileReader->getFilename() + " and " + diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp index 49f9ee7403c50..cbd0be4a806ae 100644 --- a/bolt/lib/Utils/CommandLineOpts.cpp +++ b/bolt/lib/Utils/CommandLineOpts.cpp @@ -243,15 +243,14 @@ OutputFilename("o", cl::Optional, cl::cat(BoltOutputCategory)); -cl::opt<std::string> PerfData("perfdata", cl::desc("<data file>"), cl::Optional, - cl::cat(AggregatorCategory), - cl::sub(cl::SubCommand::getAll())); - -static cl::alias -PerfDataA("p", - cl::desc("alias for -perfdata"), - cl::aliasopt(PerfData), - cl::cat(AggregatorCategory)); +cl::list<std::string> PerfData("perfdata", cl::CommaSeparated, + cl::desc("<data file>"), + cl::cat(AggregatorCategory), + cl::sub(cl::SubCommand::getAll())); + +static cl::alias PerfDataA("p", cl::CommaSeparated, + cl::desc("alias for -perfdata"), + cl::aliasopt(PerfData), cl::cat(AggregatorCategory)); cl::opt<bool> PrintCacheMetrics( "print-cache-metrics", diff --git a/bolt/test/X86/pre-aggregated-perf.test b/bolt/test/X86/pre-aggregated-perf.test index 6951d09db3de6..50a0d6f9d6f71 100644 --- a/bolt/test/X86/pre-aggregated-perf.test +++ b/bolt/test/X86/pre-aggregated-perf.test @@ -33,8 +33,13 @@ CHECK-WARNING: BOLT-INFO: Functions with density >= 21.7 account for 97.00% tota RUN: llvm-bolt %t.exe -data %t -o %t.null | FileCheck %s RUN: llvm-bolt %t.exe -data %t.new -o %t.null | FileCheck %s RUN: llvm-bolt %t.exe -p %p/Inputs/pre-aggregated.txt --pa -o %t.null | FileCheck %s +RUN: llvm-bolt %t.exe --pa -perfdata %p/Inputs/pre-aggregated.txt -perfdata %p/Inputs/pre-aggregated.txt -o %t.multi-perfdata.null | FileCheck %s --check-prefix=CHECK-MULTI +RUN: llvm-bolt %t.exe --pa -p %p/Inputs/pre-aggregated.txt,%p/Inputs/pre-aggregated.txt -o %t.multi-perfdata-comma.null | FileCheck %s --check-prefix=CHECK-MULTI +RUN: not llvm-bolt %t.exe --pa -p %p/Inputs/pre-aggregated.txt -p %t.missing -o %t.multi-perfdata-missing.null 2>&1 | FileCheck %s --check-prefix=CHECK-MISSING CHECK: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile +CHECK-MULTI: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile +CHECK-MISSING: {{.*}}missing': No such file or directory. RUN: FileCheck %s -check-prefix=PERF2BOLT --input-file %t RUN: FileCheck %s -check-prefix=NEWFORMAT --input-file %t.new diff --git a/bolt/test/perf2bolt/perf_test.test b/bolt/test/perf2bolt/perf_test.test index 984432716992a..e34ac76632113 100644 --- a/bolt/test/perf2bolt/perf_test.test +++ b/bolt/test/perf2bolt/perf_test.test @@ -7,11 +7,22 @@ RUN: perf record -Fmax -e cycles:u -o %t2 -- %t RUN: perf2bolt %t -p=%t2 -o %t3 -ba -ignore-build-id --show-density \ RUN: --heatmap %t.hm 2>&1 | FileCheck %s RUN: FileCheck %s --input-file %t.hm-section-hotness.csv --check-prefix=CHECK-HM +# Multiple perf.data files as input +RUN: perf2bolt %t -p=%t2 -p %t2 -o %t3.multi -ba -ignore-build-id \ +RUN: | FileCheck %s --check-prefix=CHECK-MULTI +RUN: perf2bolt %t -p=%t2,%t2 -o %t3.comma -ba -ignore-build-id \ +RUN: | FileCheck %s --check-prefix=CHECK-MULTI +RUN: cmp %t3.multi %t3.comma +# Check counts are 2x the original +RUN: merge-fdata %t3 %t3 | sort > %t3.x2 +RUN: sort %t3.multi > %t3.multi.x2 +RUN: cmp %t3.x2 %t3.multi.x2 CHECK-NOT: PERF2BOLT-ERROR CHECK-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection. CHECK: HEATMAP: building heat map CHECK: BOLT-INFO: Functions with density >= {{.*}} account for 99.00% total sample counts. +CHECK-MULTI: BOLT-INFO RUN: %clang %S/Inputs/perf_test.c -no-pie -fuse-ld=lld -o %t4 RUN: perf record -Fmax -e cycles:u -o %t5 -- %t4 diff --git a/bolt/tools/driver/llvm-bolt.cpp b/bolt/tools/driver/llvm-bolt.cpp index 2cd5151bed49a..cc8e6d70001eb 100644 --- a/bolt/tools/driver/llvm-bolt.cpp +++ b/bolt/tools/driver/llvm-bolt.cpp @@ -234,8 +234,9 @@ int main(int argc, char **argv) { } if (!opts::PerfData.empty()) { - if (Error E = RI.setProfile(opts::PerfData)) - report_error(opts::PerfData, std::move(E)); + for (StringRef Filename : opts::PerfData) + if (Error E = RI.setProfile(Filename)) + report_error(Filename, std::move(E)); } else if (opts::AggregateOnly) { errs() << ToolName << ": missing required -perfdata option.\n"; exit(1); diff --git a/bolt/tools/heatmap/heatmap.cpp b/bolt/tools/heatmap/heatmap.cpp index 17a969e0c8598..682eb60779025 100644 --- a/bolt/tools/heatmap/heatmap.cpp +++ b/bolt/tools/heatmap/heatmap.cpp @@ -121,8 +121,9 @@ int main(int argc, char **argv) { report_error("RewriteInstance", std::move(E)); RewriteInstance &RI = *RIOrErr.get(); - if (Error E = RI.setProfile(opts::PerfData)) - report_error(opts::PerfData, std::move(E)); + for (StringRef Filename : opts::PerfData) + if (Error E = RI.setProfile(Filename)) + report_error(Filename, std::move(E)); if (Error E = RI.run()) report_error(opts::InputFilename, std::move(E)); `````````` </details> https://github.com/llvm/llvm-project/pull/199324 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
