llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-bolt Author: Amir Ayupov (aaupov) <details> <summary>Changes</summary> Define a pre-aggregated basic sample format: ``` S <location> <count> ``` Test Plan: update pre-aggregated-perf.test --- Full diff: https://github.com/llvm/llvm-project/pull/140196.diff 5 Files Affected: - (modified) bolt/include/bolt/Profile/DataAggregator.h (+20-14) - (modified) bolt/lib/Profile/DataAggregator.cpp (+49-39) - (added) bolt/test/X86/Inputs/pre-aggregated-basic.txt (+18) - (modified) bolt/test/X86/pre-aggregated-perf.test (+8) - (modified) bolt/test/link_fdata.py (+3-3) ``````````diff diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h index 6d918134137d5..7d60b4689fb04 100644 --- a/bolt/include/bolt/Profile/DataAggregator.h +++ b/bolt/include/bolt/Profile/DataAggregator.h @@ -370,26 +370,25 @@ class DataAggregator : public DataReader { /// memory. /// /// File format syntax: - /// {B|F|f|T} [<start_id>:]<start_offset> [<end_id>:]<end_offset> [<ft_end>] - /// <count> [<mispred_count>] + /// {S|B|F|f|T} <start> [<end>] [<ft_end>] <count> [<mispred_count>] /// - /// B - indicates an aggregated branch - /// F - an aggregated fall-through + /// where <start>, <end>, <ft_end> have the format [<id>:]<offset> + /// + /// S - indicates an aggregated basic sample at <start> + /// B - indicates an aggregated branch from <start> to <end> + /// F - an aggregated fall-through from <start> to <end> /// f - an aggregated fall-through with external origin - used to disambiguate /// between a return hitting a basic block head and a regular internal /// jump to the block - /// T - an aggregated trace: branch with a fall-through (from, to, ft_end) - /// - /// <start_id> - build id of the object containing the start address. We can - /// skip it for the main binary and use "X" for an unknown object. This will - /// save some space and facilitate human parsing. - /// - /// <start_offset> - hex offset from the object base load address (0 for the - /// main executable unless it's PIE) to the start address. + /// T - an aggregated trace: branch from <start> to <end> with a fall-through + /// to <ft_end> /// - /// <end_id>, <end_offset> - same for the end address. + /// <id> - build id of the object containing the address. We can skip it for + /// the main binary and use "X" for an unknown object. This will save some + /// space and facilitate human parsing. /// - /// <ft_end> - same for the fallthrough_end address. + /// <offset> - hex offset from the object base load address (0 for the + /// main executable unless it's PIE) to the address. /// /// <count> - total aggregated count of the branch or a fall-through. /// @@ -397,10 +396,17 @@ class DataAggregator : public DataReader { /// Omitted for fall-throughs. /// /// Example: + /// Basic samples profile: + /// S 41be50 3 + /// + /// Soft-deprecated branch profile with separate branches and fall-throughs: /// F 41be50 41be50 3 /// F 41be90 41be90 4 /// B 4b1942 39b57f0 3 0 /// B 4b196f 4b19e0 2 0 + /// + /// Recommended branch profile with pre-aggregated traces: + /// T 4b196f 4b19e0 4b19ef 2 void parsePreAggregated(); /// Parse the full output of pre-aggregated LBR samples generated by diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index aa681e633c0d8..6e889908003da 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -1216,54 +1216,54 @@ std::error_code DataAggregator::parseAggregatedLBREntry() { ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator); if (std::error_code EC = TypeOrErr.getError()) return EC; - enum AggregatedLBREntry { TRACE, BRANCH, FT, FT_EXTERNAL_ORIGIN, INVALID }; + enum AggregatedLBREntry { + TRACE, + SAMPLE, + BRANCH, + FT, + FT_EXTERNAL_ORIGIN, + INVALID + }; auto Type = StringSwitch<AggregatedLBREntry>(TypeOrErr.get()) .Case("T", TRACE) + .Case("S", SAMPLE) .Case("B", BRANCH) .Case("F", FT) .Case("f", FT_EXTERNAL_ORIGIN) .Default(INVALID); if (Type == INVALID) { - reportError("expected T, B, F or f"); + reportError("expected T, S, B, F or f"); return make_error_code(llvm::errc::io_error); } - while (checkAndConsumeFS()) { - } - ErrorOr<Location> From = parseLocationOrOffset(); - if (std::error_code EC = From.getError()) - return EC; + std::optional<Location> Addrs[3]; + int AddrNum = 2; + if (Type == TRACE) + AddrNum = 3; + else if (Type == SAMPLE) + AddrNum = 1; - while (checkAndConsumeFS()) { - } - ErrorOr<Location> To = parseLocationOrOffset(); - if (std::error_code EC = To.getError()) - return EC; + int64_t Counters[2]; + int CounterNum = 1; + if (Type == BRANCH) + CounterNum = 2; - ErrorOr<Location> TraceFtEnd = std::error_code(); - if (Type == AggregatedLBREntry::TRACE) { + for (int I = 0; I < AddrNum; ++I) { while (checkAndConsumeFS()) { } - TraceFtEnd = parseLocationOrOffset(); - if (std::error_code EC = TraceFtEnd.getError()) - return EC; - } - - while (checkAndConsumeFS()) { + if (ErrorOr<Location> Addr = parseLocationOrOffset()) + Addrs[I] = Addr.get(); + else + return Addr.getError(); } - ErrorOr<int64_t> Frequency = - parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH); - if (std::error_code EC = Frequency.getError()) - return EC; - uint64_t Mispreds = 0; - if (Type == AggregatedLBREntry::BRANCH) { + for (int I = 0; I < CounterNum; ++I) { while (checkAndConsumeFS()) { } - ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true); - if (std::error_code EC = MispredsOrErr.getError()) - return EC; - Mispreds = static_cast<uint64_t>(MispredsOrErr.get()); + if (ErrorOr<int64_t> Count = parseNumberField(FieldSeparator, I + 1 == CounterNum)) + Counters[I] = Count.get(); + else + return Count.getError(); } if (!checkAndConsumeNewLine()) { @@ -1271,16 +1271,25 @@ std::error_code DataAggregator::parseAggregatedLBREntry() { return make_error_code(llvm::errc::io_error); } - BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From->Offset); - BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To->Offset); + const uint64_t FromOffset = Addrs[0]->Offset; + BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(FromOffset); + if (FromFunc) + FromFunc->setHasProfileAvailable(); - for (BinaryFunction *BF : {FromFunc, ToFunc}) - if (BF) - BF->setHasProfileAvailable(); + if (Type == SAMPLE) { + BasicSamples[FromOffset] += Counters[0]; + return std::error_code(); + } + + const uint64_t ToOffset = Addrs[1]->Offset; + BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(ToOffset); + if (ToFunc) + ToFunc->setHasProfileAvailable(); - uint64_t Count = static_cast<uint64_t>(Frequency.get()); + int64_t Count = Counters[0]; + int64_t Mispreds = Counters[1]; - Trace Trace(From->Offset, To->Offset); + Trace Trace(FromOffset, ToOffset); // Taken trace if (Type == TRACE || Type == BRANCH) { TakenBranchInfo &Info = BranchLBRs[Trace]; @@ -1291,8 +1300,9 @@ std::error_code DataAggregator::parseAggregatedLBREntry() { } // Construct fallthrough part of the trace if (Type == TRACE) { - Trace.From = To->Offset; - Trace.To = TraceFtEnd->Offset; + const uint64_t TraceFtEndOffset = Addrs[2]->Offset; + Trace.From = ToOffset; + Trace.To = TraceFtEndOffset; Type = FromFunc == ToFunc ? FT : FT_EXTERNAL_ORIGIN; } // Add fallthrough trace diff --git a/bolt/test/X86/Inputs/pre-aggregated-basic.txt b/bolt/test/X86/Inputs/pre-aggregated-basic.txt new file mode 100644 index 0000000000000..28bcacca70ee1 --- /dev/null +++ b/bolt/test/X86/Inputs/pre-aggregated-basic.txt @@ -0,0 +1,18 @@ +S 4005f0 1 +S 4005f0 1 +S 400610 1 +S 400ad1 2 +S 400b10 1 +S 400bb7 1 +S 400bbc 2 +S 400d90 1 +S 400dae 1 +S 400e00 2 +S 401170 22 +S 401180 58 +S 4011a0 33 +S 4011a9 33 +S 4011ad 58 +S 4011b2 22 +S X:7f36d18d60c0 2 +S X:7f36d18f2ce0 1 diff --git a/bolt/test/X86/pre-aggregated-perf.test b/bolt/test/X86/pre-aggregated-perf.test index c05a06bf74945..63488a9062bdd 100644 --- a/bolt/test/X86/pre-aggregated-perf.test +++ b/bolt/test/X86/pre-aggregated-perf.test @@ -57,6 +57,14 @@ RUN: llvm-bolt %t.exe -o %t.bolt.yaml --pa -p %p/Inputs/pre-aggregated.txt \ RUN: --aggregate-only --profile-format=yaml --profile-use-dfs RUN: cat %t.bolt.yaml | FileCheck %s -check-prefix=NEWFORMAT +## Test pre-aggregated basic profile +RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated-basic.txt -o %t.ba \ +RUN: 2>&1 | FileCheck %s --check-prefix=BASIC-ERROR +RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated-basic.txt -o %t.ba \ +RUN: -nl 2>&1 | FileCheck %s --check-prefix=BASIC-SUCCESS +BASIC-ERROR: BOLT-INFO: 0 out of 7 functions in the binary (0.0%) have non-empty execution profile +BASIC-SUCCESS: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile + PERF2BOLT: 0 [unknown] 7f36d18d60c0 1 main 53c 0 2 PERF2BOLT: 1 main 451 1 SolveCubic 0 0 2 PERF2BOLT: 1 main 490 0 [unknown] 4005f0 0 1 diff --git a/bolt/test/link_fdata.py b/bolt/test/link_fdata.py index bcf9a777922d5..6a391c10b9481 100755 --- a/bolt/test/link_fdata.py +++ b/bolt/test/link_fdata.py @@ -36,9 +36,9 @@ fdata_pat = re.compile(r"([01].*) (?P<exec>\d+) (?P<mispred>\d+)") # Pre-aggregated profile: -# {T|B|F|f} [<start_id>:]<start_offset> [<end_id>:]<end_offset> [<ft_end>] -# <count> [<mispred_count>] -preagg_pat = re.compile(r"(?P<type>[TBFf]) (?P<offsets_count>.*)") +# {T|S|B|F|f} <start> [<end>] [<ft_end>] <count> [<mispred_count>] +# <loc>: [<id>:]<offset> +preagg_pat = re.compile(r"(?P<type>[TSBFf]) (?P<offsets_count>.*)") # No-LBR profile: # <is symbol?> <closest elf symbol or DSO name> <relative address> <count> `````````` </details> https://github.com/llvm/llvm-project/pull/140196 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits