llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-bolt

Author: Amir Ayupov (aaupov)

<details>
<summary>Changes</summary>

Record the number of function invocations from external code - code
outside the binary, which may include JIT code and DSOs.

The purpose of it is to exclude external entry counts from call graph
flow conservation analysis. It is known that external entries increase
CG flow imbalance.

Test Plan: updated shrinkwrapping.test


---
Full diff: https://github.com/llvm/llvm-project/pull/141674.diff


9 Files Affected:

- (modified) bolt/include/bolt/Core/BinaryFunction.h (+12) 
- (modified) bolt/include/bolt/Profile/DataReader.h (+3) 
- (modified) bolt/include/bolt/Profile/ProfileYAMLMapping.h (+2) 
- (modified) bolt/lib/Core/BinaryFunction.cpp (+2) 
- (modified) bolt/lib/Profile/DataAggregator.cpp (+1) 
- (modified) bolt/lib/Profile/DataReader.cpp (+6) 
- (modified) bolt/lib/Profile/YAMLProfileReader.cpp (+1) 
- (modified) bolt/lib/Profile/YAMLProfileWriter.cpp (+1) 
- (modified) bolt/test/X86/shrinkwrapping.test (+2) 


``````````diff
diff --git a/bolt/include/bolt/Core/BinaryFunction.h 
b/bolt/include/bolt/Core/BinaryFunction.h
index 6f3b5923d3ef4..54187b32968a5 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -388,6 +388,10 @@ class BinaryFunction {
   /// The profile data for the number of times the function was executed.
   uint64_t ExecutionCount{COUNT_NO_PROFILE};
 
+  /// Profile data for the number of times this function was entered from
+  /// external code (DSO, JIT, etc).
+  uint64_t ExternEntryCount{0};
+
   /// Profile match ratio.
   float ProfileMatchRatio{0.0f};
 
@@ -1864,6 +1868,10 @@ class BinaryFunction {
     return *this;
   }
 
+  /// Set the profile data for the number of times the function was entered 
from
+  /// external code (DSO/JIT).
+  void setExternEntryCount(uint64_t Count) { ExternEntryCount = Count; }
+
   /// Adjust execution count for the function by a given \p Count. The value
   /// \p Count will be subtracted from the current function count.
   ///
@@ -1891,6 +1899,10 @@ class BinaryFunction {
   /// Return COUNT_NO_PROFILE if there's no profile info.
   uint64_t getExecutionCount() const { return ExecutionCount; }
 
+  /// Return the profile information about the number of times the function was
+  /// entered from external code (DSO/JIT).
+  uint64_t getExternEntryCount() const { return ExternEntryCount; }
+
   /// Return the raw profile information about the number of branch
   /// executions corresponding to this function.
   uint64_t getRawSampleCount() const { return RawSampleCount; }
diff --git a/bolt/include/bolt/Profile/DataReader.h 
b/bolt/include/bolt/Profile/DataReader.h
index 80031f8f6ef4a..9bc61ec83364f 100644
--- a/bolt/include/bolt/Profile/DataReader.h
+++ b/bolt/include/bolt/Profile/DataReader.h
@@ -109,6 +109,9 @@ struct FuncBranchData {
   /// Total execution count for the function.
   int64_t ExecutionCount{0};
 
+  /// Total entry count from external code for the function.
+  uint64_t ExternEntryCount{0};
+
   /// Indicate if the data was used.
   bool Used{false};
 
diff --git a/bolt/include/bolt/Profile/ProfileYAMLMapping.h 
b/bolt/include/bolt/Profile/ProfileYAMLMapping.h
index a8d9a15311d94..41e2bd1651efd 100644
--- a/bolt/include/bolt/Profile/ProfileYAMLMapping.h
+++ b/bolt/include/bolt/Profile/ProfileYAMLMapping.h
@@ -206,6 +206,7 @@ struct BinaryFunctionProfile {
   uint32_t Id{0};
   llvm::yaml::Hex64 Hash{0};
   uint64_t ExecCount{0};
+  uint64_t ExternEntryCount{0};
   std::vector<BinaryBasicBlockProfile> Blocks;
   std::vector<InlineTreeNode> InlineTree;
   bool Used{false};
@@ -218,6 +219,7 @@ template <> struct 
MappingTraits<bolt::BinaryFunctionProfile> {
     YamlIO.mapRequired("fid", BFP.Id);
     YamlIO.mapRequired("hash", BFP.Hash);
     YamlIO.mapRequired("exec", BFP.ExecCount);
+    YamlIO.mapOptional("extern", BFP.ExternEntryCount, 0);
     YamlIO.mapRequired("nblocks", BFP.NumBasicBlocks);
     YamlIO.mapOptional("blocks", BFP.Blocks,
                        std::vector<bolt::BinaryBasicBlockProfile>());
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 851fa36a6b4b7..68477f778470c 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -476,6 +476,8 @@ void BinaryFunction::print(raw_ostream &OS, std::string 
Annotation) {
     OS << "\n  Sample Count: " << RawSampleCount;
     OS << "\n  Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f);
   }
+  if (ExternEntryCount)
+    OS << "\n  Extern Entry Count: " << ExternEntryCount;
 
   if (opts::PrintDynoStats && !getLayout().block_empty()) {
     OS << '\n';
diff --git a/bolt/lib/Profile/DataAggregator.cpp 
b/bolt/lib/Profile/DataAggregator.cpp
index 4b7a9fd912869..7d62dadff887a 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -2289,6 +2289,7 @@ std::error_code 
DataAggregator::writeBATYAML(BinaryContext &BC,
       YamlBF.Id = BF->getFunctionNumber();
       YamlBF.Hash = BAT->getBFHash(FuncAddress);
       YamlBF.ExecCount = BF->getKnownExecutionCount();
+      YamlBF.ExternEntryCount = BF->getExternEntryCount();
       YamlBF.NumBasicBlocks = BAT->getNumBasicBlocks(FuncAddress);
       const BoltAddressTranslation::BBHashMapTy &BlockMap =
           BAT->getBBHashMap(FuncAddress);
diff --git a/bolt/lib/Profile/DataReader.cpp b/bolt/lib/Profile/DataReader.cpp
index 198f7d8642738..9c9d9ca9ef7dd 100644
--- a/bolt/lib/Profile/DataReader.cpp
+++ b/bolt/lib/Profile/DataReader.cpp
@@ -85,6 +85,7 @@ void FuncBranchData::appendFrom(const FuncBranchData &FBD, 
uint64_t Offset) {
   }
   llvm::stable_sort(Data);
   ExecutionCount += FBD.ExecutionCount;
+  ExternEntryCount += FBD.ExternEntryCount;
   for (auto I = FBD.EntryData.begin(), E = FBD.EntryData.end(); I != E; ++I) {
     assert(I->To.Name == FBD.Name);
     auto NewElmt = EntryData.insert(EntryData.end(), *I);
@@ -269,6 +270,7 @@ Error DataReader::preprocessProfile(BinaryContext &BC) {
     if (FuncBranchData *FuncData = getBranchDataForNames(Function.getNames())) 
{
       setBranchData(Function, FuncData);
       Function.ExecutionCount = FuncData->ExecutionCount;
+      Function.ExternEntryCount = FuncData->ExternEntryCount;
       FuncData->Used = true;
     }
   }
@@ -419,6 +421,7 @@ void DataReader::matchProfileData(BinaryFunction &BF) {
       if (fetchProfileForOtherEntryPoints(BF)) {
         BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
         BF.ExecutionCount = FBD->ExecutionCount;
+        BF.ExternEntryCount = FBD->ExternEntryCount;
         BF.RawSampleCount = FBD->getNumExecutedBranches();
       }
       return;
@@ -449,6 +452,7 @@ void DataReader::matchProfileData(BinaryFunction &BF) {
     setBranchData(BF, NewBranchData);
     NewBranchData->Used = true;
     BF.ExecutionCount = NewBranchData->ExecutionCount;
+    BF.ExternEntryCount = NewBranchData->ExternEntryCount;
     BF.ProfileMatchRatio = 1.0f;
     break;
   }
@@ -1220,6 +1224,8 @@ std::error_code DataReader::parse() {
     if (BI.To.IsSymbol && BI.To.Offset == 0) {
       I = GetOrCreateFuncEntry(BI.To.Name);
       I->second.ExecutionCount += BI.Branches;
+      if (!BI.From.IsSymbol)
+        I->second.ExternEntryCount += BI.Branches;
     }
   }
 
diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 33ce40ac2eeec..086e47b661e10 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -176,6 +176,7 @@ bool YAMLProfileReader::parseFunctionProfile(
   uint64_t FunctionExecutionCount = 0;
 
   BF.setExecutionCount(YamlBF.ExecCount);
+  BF.setExternEntryCount(YamlBF.ExternEntryCount);
 
   uint64_t FuncRawBranchCount = 0;
   for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks)
diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp 
b/bolt/lib/Profile/YAMLProfileWriter.cpp
index f1fe45f21a0f6..f4308d6fc1992 100644
--- a/bolt/lib/Profile/YAMLProfileWriter.cpp
+++ b/bolt/lib/Profile/YAMLProfileWriter.cpp
@@ -226,6 +226,7 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool 
UseDFS,
   YamlBF.Hash = BF.getHash();
   YamlBF.NumBasicBlocks = BF.size();
   YamlBF.ExecCount = BF.getKnownExecutionCount();
+  YamlBF.ExternEntryCount = BF.getExternEntryCount();
   DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId;
   if (PseudoProbeDecoder && BF.getGUID()) {
     std::tie(YamlBF.InlineTree, InlineTreeNodeId) =
diff --git a/bolt/test/X86/shrinkwrapping.test 
b/bolt/test/X86/shrinkwrapping.test
index 8581d7e0c0f7b..521b4561b3ba6 100644
--- a/bolt/test/X86/shrinkwrapping.test
+++ b/bolt/test/X86/shrinkwrapping.test
@@ -8,6 +8,7 @@ REQUIRES: shell
 
 RUN: %clangxx %cxxflags -no-pie %S/Inputs/exc4sw.S -o %t.exe -Wl,-q
 RUN: llvm-bolt %t.exe -o %t --relocs --frame-opt=all \
+RUN:   --print-only=main --print-cfg \
 RUN:   --data=%p/Inputs/exc4sw.fdata --reorder-blocks=cache 2>&1 | \
 RUN:   FileCheck %s --check-prefix=CHECK-BOLT
 
@@ -19,6 +20,7 @@ RUN: llvm-objdump --dwarf=frames %t | grep -A20 -e \
 RUN:   `llvm-nm --numeric-sort %t | grep main | tail -n 1 | cut -f1 -d' ' | \
 RUN:    tail -c9` 2>&1 | FileCheck %s --check-prefix=CHECK-OUTPUT
 
+CHECK-BOLT: Extern Entry Count: 100
 CHECK-BOLT: Shrink wrapping moved 2 spills inserting load/stores and 0 spills 
inserting push/pops
 
 CHECK-INPUT:  DW_CFA_advance_loc: 2

``````````

</details>


https://github.com/llvm/llvm-project/pull/141674
_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to