llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-bolt Author: Amir Ayupov (aaupov) <details> <summary>Changes</summary> Record the number of function invocations from external code - code outside the binary, which may include JIT code and DSOs. The purpose of it is to exclude external entry counts from call graph flow conservation analysis. It is known that external entries increase CG flow imbalance. Test Plan: updated shrinkwrapping.test --- Full diff: https://github.com/llvm/llvm-project/pull/141674.diff 9 Files Affected: - (modified) bolt/include/bolt/Core/BinaryFunction.h (+12) - (modified) bolt/include/bolt/Profile/DataReader.h (+3) - (modified) bolt/include/bolt/Profile/ProfileYAMLMapping.h (+2) - (modified) bolt/lib/Core/BinaryFunction.cpp (+2) - (modified) bolt/lib/Profile/DataAggregator.cpp (+1) - (modified) bolt/lib/Profile/DataReader.cpp (+6) - (modified) bolt/lib/Profile/YAMLProfileReader.cpp (+1) - (modified) bolt/lib/Profile/YAMLProfileWriter.cpp (+1) - (modified) bolt/test/X86/shrinkwrapping.test (+2) ``````````diff diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index 6f3b5923d3ef4..54187b32968a5 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -388,6 +388,10 @@ class BinaryFunction { /// The profile data for the number of times the function was executed. uint64_t ExecutionCount{COUNT_NO_PROFILE}; + /// Profile data for the number of times this function was entered from + /// external code (DSO, JIT, etc). + uint64_t ExternEntryCount{0}; + /// Profile match ratio. float ProfileMatchRatio{0.0f}; @@ -1864,6 +1868,10 @@ class BinaryFunction { return *this; } + /// Set the profile data for the number of times the function was entered from + /// external code (DSO/JIT). + void setExternEntryCount(uint64_t Count) { ExternEntryCount = Count; } + /// Adjust execution count for the function by a given \p Count. The value /// \p Count will be subtracted from the current function count. /// @@ -1891,6 +1899,10 @@ class BinaryFunction { /// Return COUNT_NO_PROFILE if there's no profile info. uint64_t getExecutionCount() const { return ExecutionCount; } + /// Return the profile information about the number of times the function was + /// entered from external code (DSO/JIT). + uint64_t getExternEntryCount() const { return ExternEntryCount; } + /// Return the raw profile information about the number of branch /// executions corresponding to this function. uint64_t getRawSampleCount() const { return RawSampleCount; } diff --git a/bolt/include/bolt/Profile/DataReader.h b/bolt/include/bolt/Profile/DataReader.h index 80031f8f6ef4a..9bc61ec83364f 100644 --- a/bolt/include/bolt/Profile/DataReader.h +++ b/bolt/include/bolt/Profile/DataReader.h @@ -109,6 +109,9 @@ struct FuncBranchData { /// Total execution count for the function. int64_t ExecutionCount{0}; + /// Total entry count from external code for the function. + uint64_t ExternEntryCount{0}; + /// Indicate if the data was used. bool Used{false}; diff --git a/bolt/include/bolt/Profile/ProfileYAMLMapping.h b/bolt/include/bolt/Profile/ProfileYAMLMapping.h index a8d9a15311d94..41e2bd1651efd 100644 --- a/bolt/include/bolt/Profile/ProfileYAMLMapping.h +++ b/bolt/include/bolt/Profile/ProfileYAMLMapping.h @@ -206,6 +206,7 @@ struct BinaryFunctionProfile { uint32_t Id{0}; llvm::yaml::Hex64 Hash{0}; uint64_t ExecCount{0}; + uint64_t ExternEntryCount{0}; std::vector<BinaryBasicBlockProfile> Blocks; std::vector<InlineTreeNode> InlineTree; bool Used{false}; @@ -218,6 +219,7 @@ template <> struct MappingTraits<bolt::BinaryFunctionProfile> { YamlIO.mapRequired("fid", BFP.Id); YamlIO.mapRequired("hash", BFP.Hash); YamlIO.mapRequired("exec", BFP.ExecCount); + YamlIO.mapOptional("extern", BFP.ExternEntryCount, 0); YamlIO.mapRequired("nblocks", BFP.NumBasicBlocks); YamlIO.mapOptional("blocks", BFP.Blocks, std::vector<bolt::BinaryBasicBlockProfile>()); diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 851fa36a6b4b7..68477f778470c 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -476,6 +476,8 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) { OS << "\n Sample Count: " << RawSampleCount; OS << "\n Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f); } + if (ExternEntryCount) + OS << "\n Extern Entry Count: " << ExternEntryCount; if (opts::PrintDynoStats && !getLayout().block_empty()) { OS << '\n'; diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index 4b7a9fd912869..7d62dadff887a 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -2289,6 +2289,7 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC, YamlBF.Id = BF->getFunctionNumber(); YamlBF.Hash = BAT->getBFHash(FuncAddress); YamlBF.ExecCount = BF->getKnownExecutionCount(); + YamlBF.ExternEntryCount = BF->getExternEntryCount(); YamlBF.NumBasicBlocks = BAT->getNumBasicBlocks(FuncAddress); const BoltAddressTranslation::BBHashMapTy &BlockMap = BAT->getBBHashMap(FuncAddress); diff --git a/bolt/lib/Profile/DataReader.cpp b/bolt/lib/Profile/DataReader.cpp index 198f7d8642738..9c9d9ca9ef7dd 100644 --- a/bolt/lib/Profile/DataReader.cpp +++ b/bolt/lib/Profile/DataReader.cpp @@ -85,6 +85,7 @@ void FuncBranchData::appendFrom(const FuncBranchData &FBD, uint64_t Offset) { } llvm::stable_sort(Data); ExecutionCount += FBD.ExecutionCount; + ExternEntryCount += FBD.ExternEntryCount; for (auto I = FBD.EntryData.begin(), E = FBD.EntryData.end(); I != E; ++I) { assert(I->To.Name == FBD.Name); auto NewElmt = EntryData.insert(EntryData.end(), *I); @@ -269,6 +270,7 @@ Error DataReader::preprocessProfile(BinaryContext &BC) { if (FuncBranchData *FuncData = getBranchDataForNames(Function.getNames())) { setBranchData(Function, FuncData); Function.ExecutionCount = FuncData->ExecutionCount; + Function.ExternEntryCount = FuncData->ExternEntryCount; FuncData->Used = true; } } @@ -419,6 +421,7 @@ void DataReader::matchProfileData(BinaryFunction &BF) { if (fetchProfileForOtherEntryPoints(BF)) { BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD); BF.ExecutionCount = FBD->ExecutionCount; + BF.ExternEntryCount = FBD->ExternEntryCount; BF.RawSampleCount = FBD->getNumExecutedBranches(); } return; @@ -449,6 +452,7 @@ void DataReader::matchProfileData(BinaryFunction &BF) { setBranchData(BF, NewBranchData); NewBranchData->Used = true; BF.ExecutionCount = NewBranchData->ExecutionCount; + BF.ExternEntryCount = NewBranchData->ExternEntryCount; BF.ProfileMatchRatio = 1.0f; break; } @@ -1220,6 +1224,8 @@ std::error_code DataReader::parse() { if (BI.To.IsSymbol && BI.To.Offset == 0) { I = GetOrCreateFuncEntry(BI.To.Name); I->second.ExecutionCount += BI.Branches; + if (!BI.From.IsSymbol) + I->second.ExternEntryCount += BI.Branches; } } diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 33ce40ac2eeec..086e47b661e10 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -176,6 +176,7 @@ bool YAMLProfileReader::parseFunctionProfile( uint64_t FunctionExecutionCount = 0; BF.setExecutionCount(YamlBF.ExecCount); + BF.setExternEntryCount(YamlBF.ExternEntryCount); uint64_t FuncRawBranchCount = 0; for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp index f1fe45f21a0f6..f4308d6fc1992 100644 --- a/bolt/lib/Profile/YAMLProfileWriter.cpp +++ b/bolt/lib/Profile/YAMLProfileWriter.cpp @@ -226,6 +226,7 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS, YamlBF.Hash = BF.getHash(); YamlBF.NumBasicBlocks = BF.size(); YamlBF.ExecCount = BF.getKnownExecutionCount(); + YamlBF.ExternEntryCount = BF.getExternEntryCount(); DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId; if (PseudoProbeDecoder && BF.getGUID()) { std::tie(YamlBF.InlineTree, InlineTreeNodeId) = diff --git a/bolt/test/X86/shrinkwrapping.test b/bolt/test/X86/shrinkwrapping.test index 8581d7e0c0f7b..521b4561b3ba6 100644 --- a/bolt/test/X86/shrinkwrapping.test +++ b/bolt/test/X86/shrinkwrapping.test @@ -8,6 +8,7 @@ REQUIRES: shell RUN: %clangxx %cxxflags -no-pie %S/Inputs/exc4sw.S -o %t.exe -Wl,-q RUN: llvm-bolt %t.exe -o %t --relocs --frame-opt=all \ +RUN: --print-only=main --print-cfg \ RUN: --data=%p/Inputs/exc4sw.fdata --reorder-blocks=cache 2>&1 | \ RUN: FileCheck %s --check-prefix=CHECK-BOLT @@ -19,6 +20,7 @@ RUN: llvm-objdump --dwarf=frames %t | grep -A20 -e \ RUN: `llvm-nm --numeric-sort %t | grep main | tail -n 1 | cut -f1 -d' ' | \ RUN: tail -c9` 2>&1 | FileCheck %s --check-prefix=CHECK-OUTPUT +CHECK-BOLT: Extern Entry Count: 100 CHECK-BOLT: Shrink wrapping moved 2 spills inserting load/stores and 0 spills inserting push/pops CHECK-INPUT: DW_CFA_advance_loc: 2 `````````` </details> https://github.com/llvm/llvm-project/pull/141674 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits