llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-bolt Author: Amir Ayupov (aaupov) <details> <summary>Changes</summary> Reuse data structures used by perf data reader for pre-aggregated data. Combined with #<!-- -->136531 this allows using pre-aggregated data for heatmap. Test Plan: heatmap-preagg.test --- Full diff: https://github.com/llvm/llvm-project/pull/138798.diff 3 Files Affected: - (modified) bolt/include/bolt/Profile/DataAggregator.h (+1-19) - (modified) bolt/lib/Profile/DataAggregator.cpp (+62-89) - (added) bolt/test/X86/heatmap-preagg.test (+33) ``````````diff diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h index c4ee75e7a6da6..d66d198e37d61 100644 --- a/bolt/include/bolt/Profile/DataAggregator.h +++ b/bolt/include/bolt/Profile/DataAggregator.h @@ -92,16 +92,6 @@ class DataAggregator : public DataReader { uint64_t Addr; }; - /// Used for parsing specific pre-aggregated input files. - struct AggregatedLBREntry { - enum Type : char { BRANCH = 0, FT, FT_EXTERNAL_ORIGIN, TRACE }; - Location From; - Location To; - uint64_t Count; - uint64_t Mispreds; - Type EntryType; - }; - struct Trace { uint64_t From; uint64_t To; @@ -131,7 +121,6 @@ class DataAggregator : public DataReader { /// and use them later for processing and assigning profile. std::unordered_map<Trace, TakenBranchInfo, TraceHash> BranchLBRs; std::unordered_map<Trace, FTInfo, TraceHash> FallthroughLBRs; - std::vector<AggregatedLBREntry> AggregatedLBRs; std::unordered_map<uint64_t, uint64_t> BasicSamples; std::vector<PerfMemSample> MemSamples; @@ -416,14 +405,7 @@ class DataAggregator : public DataReader { /// F 41be90 41be90 4 /// B 4b1942 39b57f0 3 0 /// B 4b196f 4b19e0 2 0 - void parsePreAggregated(); - - /// Parse the full output of pre-aggregated LBR samples generated by - /// an external tool. - std::error_code parsePreAggregatedLBRSamples(); - - /// Process parsed pre-aggregated data. - void processPreAggregated(); + std::error_code parsePreAggregated(); /// If \p Address falls into the binary address space based on memory /// mapping info \p MMI, then adjust it for further processing by subtracting diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index aea6c67546ab1..a5ac87ee781b2 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -349,25 +349,29 @@ bool DataAggregator::checkPerfDataMagic(StringRef FileName) { return false; } -void DataAggregator::parsePreAggregated() { - std::string Error; +std::error_code DataAggregator::parsePreAggregated() { + outs() << "PERF2BOLT: parsing pre-aggregated profile...\n"; + NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events", + TimerGroupName, TimerGroupDesc, opts::TimeAggregator); ErrorOr<std::unique_ptr<MemoryBuffer>> MB = MemoryBuffer::getFileOrSTDIN(Filename); - if (std::error_code EC = MB.getError()) { - errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": " - << EC.message() << "\n"; - exit(1); - } + if (std::error_code EC = MB.getError()) + return EC; FileBuf = std::move(*MB); ParsingBuf = FileBuf->getBuffer(); Col = 0; Line = 1; - if (parsePreAggregatedLBRSamples()) { - errs() << "PERF2BOLT: failed to parse samples\n"; - exit(1); + size_t AggregatedLBRs = 0; + while (hasData()) { + if (std::error_code EC = parseAggregatedLBREntry()) + return EC; + ++AggregatedLBRs; } + + outs() << "PERF2BOLT: read " << AggregatedLBRs << " aggregated LBR entries\n"; + return std::error_code(); } void DataAggregator::filterBinaryMMapInfo() { @@ -446,11 +450,6 @@ int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process, Error DataAggregator::preprocessProfile(BinaryContext &BC) { this->BC = &BC; - if (opts::ReadPreAggregated) { - parsePreAggregated(); - return Error::success(); - } - if (std::optional<StringRef> FileBuildID = BC.getFileBuildID()) { outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n"; processFileBuildID(*FileBuildID); @@ -471,6 +470,12 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) { ErrorCallback(ReturnCode, ErrBuf); }; + if (opts::ReadPreAggregated) { + if (std::error_code EC = parsePreAggregated()) + return errorCodeToError(EC); + goto heatmap; + } + if (BC.IsLinuxKernel) { // Current MMap parsing logic does not work with linux kernel. // MMap entries for linux kernel uses PERF_RECORD_MMAP @@ -502,12 +507,6 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) { if (opts::BasicAggregation ? parseBasicEvents() : parseBranchEvents()) errs() << "PERF2BOLT: failed to parse samples\n"; - if (opts::HeatmapMode) { - if (std::error_code EC = printLBRHeatMap()) - return errorCodeToError(EC); - exit(0); - } - // Special handling for memory events if (prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback)) return Error::success(); @@ -518,6 +517,13 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) { deleteTempFiles(); +heatmap: + if (opts::HeatmapMode) { + if (std::error_code EC = printLBRHeatMap()) + return errorCodeToError(EC); + exit(0); + } + return Error::success(); } @@ -554,9 +560,7 @@ bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) { } void DataAggregator::processProfile(BinaryContext &BC) { - if (opts::ReadPreAggregated) - processPreAggregated(); - else if (opts::BasicAggregation) + if (opts::BasicAggregation) processBasicEvents(); else processBranchEvents(); @@ -584,7 +588,6 @@ void DataAggregator::processProfile(BinaryContext &BC) { // Release intermediate storage. clear(BranchLBRs); clear(FallthroughLBRs); - clear(AggregatedLBRs); clear(BasicSamples); clear(MemSamples); } @@ -1213,15 +1216,14 @@ std::error_code DataAggregator::parseAggregatedLBREntry() { ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator); if (std::error_code EC = TypeOrErr.getError()) return EC; - auto Type = AggregatedLBREntry::TRACE; - if (LLVM_LIKELY(TypeOrErr.get() == "T")) { - } else if (TypeOrErr.get() == "B") { - Type = AggregatedLBREntry::BRANCH; - } else if (TypeOrErr.get() == "F") { - Type = AggregatedLBREntry::FT; - } else if (TypeOrErr.get() == "f") { - Type = AggregatedLBREntry::FT_EXTERNAL_ORIGIN; - } else { + enum TType { TRACE, BRANCH, FT, FT_EXTERNAL_ORIGIN, INVALID }; + auto Type = StringSwitch<TType>(TypeOrErr.get()) + .Case("T", TRACE) + .Case("B", BRANCH) + .Case("F", FT) + .Case("f", FT_EXTERNAL_ORIGIN) + .Default(INVALID); + if (Type == INVALID) { reportError("expected T, B, F or f"); return make_error_code(llvm::errc::io_error); } @@ -1239,7 +1241,7 @@ std::error_code DataAggregator::parseAggregatedLBREntry() { return EC; ErrorOr<Location> TraceFtEnd = std::error_code(); - if (Type == AggregatedLBREntry::TRACE) { + if (Type == TRACE) { while (checkAndConsumeFS()) { } TraceFtEnd = parseLocationOrOffset(); @@ -1249,13 +1251,12 @@ std::error_code DataAggregator::parseAggregatedLBREntry() { while (checkAndConsumeFS()) { } - ErrorOr<int64_t> Frequency = - parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH); + ErrorOr<int64_t> Frequency = parseNumberField(FieldSeparator, Type != BRANCH); if (std::error_code EC = Frequency.getError()) return EC; uint64_t Mispreds = 0; - if (Type == AggregatedLBREntry::BRANCH) { + if (Type == BRANCH) { while (checkAndConsumeFS()) { } ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true); @@ -1277,13 +1278,28 @@ std::error_code DataAggregator::parseAggregatedLBREntry() { BF->setHasProfileAvailable(); uint64_t Count = static_cast<uint64_t>(Frequency.get()); - AggregatedLBREntry Entry{From.get(), To.get(), Count, Mispreds, Type}; - AggregatedLBRs.emplace_back(Entry); - if (Type == AggregatedLBREntry::TRACE) { - auto FtType = (FromFunc == ToFunc) ? AggregatedLBREntry::FT - : AggregatedLBREntry::FT_EXTERNAL_ORIGIN; - AggregatedLBREntry TraceFt{To.get(), TraceFtEnd.get(), Count, 0, FtType}; - AggregatedLBRs.emplace_back(TraceFt); + + Trace Trace(From->Offset, To->Offset); + // Taken trace + if (Type == TRACE || Type == BRANCH) { + TakenBranchInfo &Info = BranchLBRs[Trace]; + Info.TakenCount += Count; + Info.MispredCount += Mispreds; + + NumTotalSamples += Count; + } + // Construct fallthrough part of the trace + if (Type == TRACE) { + Trace.From = To->Offset; + Trace.To = TraceFtEnd->Offset; + Type = FromFunc == ToFunc ? FT : FT_EXTERNAL_ORIGIN; + } + // Add fallthrough trace + if (Type != BRANCH) { + FTInfo &Info = FallthroughLBRs[Trace]; + (Type == FT ? Info.InternCount : Info.ExternCount) += Count; + + NumTraces += Count; } return std::error_code(); @@ -1560,7 +1576,6 @@ std::error_code DataAggregator::parseBranchEvents() { printBranchStacksDiagnostics(NumTotalSamples - NumSamples); } } - printBranchSamplesDiagnostics(); return std::error_code(); } @@ -1588,6 +1603,7 @@ void DataAggregator::processBranchEvents() { const TakenBranchInfo &Info = AggrLBR.second; doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount); } + printBranchSamplesDiagnostics(); } std::error_code DataAggregator::parseBasicEvents() { @@ -1693,49 +1709,6 @@ void DataAggregator::processMemEvents() { } } -std::error_code DataAggregator::parsePreAggregatedLBRSamples() { - outs() << "PERF2BOLT: parsing pre-aggregated profile...\n"; - NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events", - TimerGroupName, TimerGroupDesc, opts::TimeAggregator); - while (hasData()) - if (std::error_code EC = parseAggregatedLBREntry()) - return EC; - - return std::error_code(); -} - -void DataAggregator::processPreAggregated() { - outs() << "PERF2BOLT: processing pre-aggregated profile...\n"; - NamedRegionTimer T("processAggregated", "Processing aggregated branch events", - TimerGroupName, TimerGroupDesc, opts::TimeAggregator); - - for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) { - switch (AggrEntry.EntryType) { - case AggregatedLBREntry::BRANCH: - case AggregatedLBREntry::TRACE: - doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count, - AggrEntry.Mispreds); - NumTotalSamples += AggrEntry.Count; - break; - case AggregatedLBREntry::FT: - case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: { - LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT - ? AggrEntry.From.Offset - : 0, - AggrEntry.From.Offset, false}; - LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false}; - doTrace(First, Second, AggrEntry.Count); - NumTraces += AggrEntry.Count; - break; - } - } - } - - outs() << "PERF2BOLT: read " << AggregatedLBRs.size() - << " aggregated LBR entries\n"; - printBranchSamplesDiagnostics(); -} - std::optional<int32_t> DataAggregator::parseCommExecEvent() { size_t LineEnd = ParsingBuf.find_first_of("\n"); if (LineEnd == StringRef::npos) { diff --git a/bolt/test/X86/heatmap-preagg.test b/bolt/test/X86/heatmap-preagg.test new file mode 100644 index 0000000000000..00d4d521b1adf --- /dev/null +++ b/bolt/test/X86/heatmap-preagg.test @@ -0,0 +1,33 @@ +## Test heatmap with pre-aggregated profile + +RUN: yaml2obj %p/Inputs/blarge_new.yaml &> %t.exe +## Non-BOLTed input binary +RUN: llvm-bolt-heatmap %t.exe -o %t --pa -p %p/Inputs/blarge_new.preagg.txt \ +RUN: 2>&1 | FileCheck --check-prefix CHECK-HEATMAP %s +RUN: FileCheck %s --check-prefix CHECK-SEC-HOT --input-file %t-section-hotness.csv + +## BOLTed input binary +RUN: llvm-bolt %t.exe -o %t.out --pa -p %p/Inputs/blarge_new.preagg.txt \ +RUN: --reorder-blocks=ext-tsp --split-functions --split-strategy=cdsplit \ +RUN: --reorder-functions=cdsort --enable-bat --dyno-stats --skip-funcs=main +RUN: llvm-bolt-heatmap %t.out -o %t2 --pa -p %p/Inputs/blarge_new_bat.preagg.txt \ +RUN: 2>&1 | FileCheck --check-prefix CHECK-HEATMAP-BAT %s +RUN: FileCheck %s --check-prefix CHECK-SEC-HOT-BAT --input-file %t2-section-hotness.csv + +CHECK-HEATMAP: PERF2BOLT: read 81 aggregated LBR entries +CHECK-HEATMAP: HEATMAP: invalid traces: 1 + +CHECK-SEC-HOT: .init, 0x401000, 0x40101b, 16.8545 +CHECK-SEC-HOT-NEXT: .plt, 0x401020, 0x4010b0, 4.7583 +CHECK-SEC-HOT-NEXT: .text, 0x4010b0, 0x401c25, 78.3872 +CHECK-SEC-HOT-NEXT: .fini, 0x401c28, 0x401c35, 0.0000 + +CHECK-HEATMAP-BAT: PERF2BOLT: read 79 aggregated LBR entries +CHECK-HEATMAP-BAT: HEATMAP: invalid traces: 2 + +CHECK-SEC-HOT-BAT: .init, 0x401000, 0x40101b, 17.2888 +CHECK-SEC-HOT-BAT-NEXT: .plt, 0x401020, 0x4010b0, 5.6132 +CHECK-SEC-HOT-BAT-NEXT: .bolt.org.text, 0x4010b0, 0x401c25, 38.3385 +CHECK-SEC-HOT-BAT-NEXT: .fini, 0x401c28, 0x401c35, 0.0000 +CHECK-SEC-HOT-BAT-NEXT: .text, 0x800000, 0x8002cc, 38.7595 +CHECK-SEC-HOT-BAT-NEXT: .text.cold, 0x800300, 0x800415, 0.0000 `````````` </details> https://github.com/llvm/llvm-project/pull/138798 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits