zequanwu updated this revision to Diff 276526.
zequanwu added a comment.

- Remove "enable-call-graph-profile" option and enable CGProfilePass by 
default, unless `-no-integrated-as` is given in clang.
- Use `LazyBlockFrequencyInfoPass` instead of `BlockFrequencyInfoWrapperPass` 
and check `F.getEntryCount` before get `BFI` to reduce cost.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83013/new/

https://reviews.llvm.org/D83013

Files:
  clang/include/clang/Basic/CodeGenOptions.def
  clang/lib/CodeGen/BackendUtil.cpp
  clang/lib/Frontend/CompilerInvocation.cpp
  llvm/include/llvm/InitializePasses.h
  llvm/include/llvm/Transforms/IPO.h
  llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
  llvm/include/llvm/Transforms/Instrumentation/CGProfile.h
  llvm/lib/Passes/PassBuilder.cpp
  llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
  llvm/lib/Transforms/Instrumentation/CGProfile.cpp
  llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
  llvm/test/CodeGen/AMDGPU/opt-pipeline.ll
  llvm/test/Instrumentation/cgprofile.ll
  llvm/test/Other/new-pm-cgprofile.ll
  llvm/test/Other/opt-O2-pipeline.ll
  llvm/test/Other/opt-O3-pipeline.ll
  llvm/test/Other/opt-Os-pipeline.ll

Index: llvm/test/Other/opt-Os-pipeline.ll
===================================================================
--- llvm/test/Other/opt-Os-pipeline.ll
+++ llvm/test/Other/opt-Os-pipeline.ll
@@ -266,6 +266,12 @@
 ; CHECK-NEXT:     Strip Unused Function Prototypes
 ; CHECK-NEXT:     Dead Global Elimination
 ; CHECK-NEXT:     Merge Duplicate Global Constants
+; CHECK-NEXT:     Call Graph Profile
+; CHECK-NEXT:       FunctionPass Manager
+; CHECK-NEXT:         Dominator Tree Construction
+; CHECK-NEXT:         Natural Loop Information
+; CHECK-NEXT:         Lazy Branch Probability Analysis
+; CHECK-NEXT:         Lazy Block Frequency Analysis
 ; CHECK-NEXT:     FunctionPass Manager
 ; CHECK-NEXT:       Dominator Tree Construction
 ; CHECK-NEXT:       Natural Loop Information
Index: llvm/test/Other/opt-O3-pipeline.ll
===================================================================
--- llvm/test/Other/opt-O3-pipeline.ll
+++ llvm/test/Other/opt-O3-pipeline.ll
@@ -285,6 +285,12 @@
 ; CHECK-NEXT:     Strip Unused Function Prototypes
 ; CHECK-NEXT:     Dead Global Elimination
 ; CHECK-NEXT:     Merge Duplicate Global Constants
+; CHECK-NEXT:     Call Graph Profile
+; CHECK-NEXT:       FunctionPass Manager
+; CHECK-NEXT:         Dominator Tree Construction
+; CHECK-NEXT:         Natural Loop Information
+; CHECK-NEXT:         Lazy Branch Probability Analysis
+; CHECK-NEXT:         Lazy Block Frequency Analysis
 ; CHECK-NEXT:     FunctionPass Manager
 ; CHECK-NEXT:       Dominator Tree Construction
 ; CHECK-NEXT:       Natural Loop Information
Index: llvm/test/Other/opt-O2-pipeline.ll
===================================================================
--- llvm/test/Other/opt-O2-pipeline.ll
+++ llvm/test/Other/opt-O2-pipeline.ll
@@ -280,6 +280,12 @@
 ; CHECK-NEXT:     Strip Unused Function Prototypes
 ; CHECK-NEXT:     Dead Global Elimination
 ; CHECK-NEXT:     Merge Duplicate Global Constants
+; CHECK-NEXT:     Call Graph Profile
+; CHECK-NEXT:       FunctionPass Manager
+; CHECK-NEXT:         Dominator Tree Construction
+; CHECK-NEXT:         Natural Loop Information
+; CHECK-NEXT:         Lazy Branch Probability Analysis
+; CHECK-NEXT:         Lazy Block Frequency Analysis
 ; CHECK-NEXT:     FunctionPass Manager
 ; CHECK-NEXT:       Dominator Tree Construction
 ; CHECK-NEXT:       Natural Loop Information
Index: llvm/test/Other/new-pm-cgprofile.ll
===================================================================
--- llvm/test/Other/new-pm-cgprofile.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: opt -debug-pass-manager -passes='default<O2>' %s 2>&1 |FileCheck %s --check-prefixes=DEFAULT
-; RUN: opt -debug-pass-manager -passes='default<O2>' -enable-npm-call-graph-profile=0 %s 2>&1 |FileCheck %s --check-prefixes=OFF
-; RUN: opt -debug-pass-manager -passes='default<O2>' -enable-npm-call-graph-profile=1 %s 2>&1 |FileCheck %s --check-prefixes=ON
-;
-; DEFAULT: Running pass: CGProfilePass
-; OFF-NOT: Running pass: CGProfilePass
-; ON: Running pass: CGProfilePass
-
-define void @foo() {
-  ret void
-}
Index: llvm/test/Instrumentation/cgprofile.ll
===================================================================
--- llvm/test/Instrumentation/cgprofile.ll
+++ llvm/test/Instrumentation/cgprofile.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -passes cg-profile -S | FileCheck %s
+; RUN: opt < %s -cg-profile -S | FileCheck %s
 
 declare void @b()
 
Index: llvm/test/CodeGen/AMDGPU/opt-pipeline.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/opt-pipeline.ll
+++ llvm/test/CodeGen/AMDGPU/opt-pipeline.ll
@@ -276,6 +276,13 @@
 ; GCN-O1-NEXT:       Warn about non-applied transformations
 ; GCN-O1-NEXT:       Alignment from assumptions
 ; GCN-O1-NEXT:     Strip Unused Function Prototypes
+; GCN-O1-NEXT:     Call Graph Profile
+; GCN-O1-NEXT:       FunctionPass Manager
+; GCN-O1-NEXT:         Dominator Tree Construction
+; GCN-O1-NEXT:         Natural Loop Information
+; GCN-O1-NEXT:         Post-Dominator Tree Construction
+; GCN-O1-NEXT:         Branch Probability Analysis
+; GCN-O1-NEXT:         Block Frequency Analysis
 ; GCN-O1-NEXT:     FunctionPass Manager
 ; GCN-O1-NEXT:       Dominator Tree Construction
 ; GCN-O1-NEXT:       Natural Loop Information
@@ -623,6 +630,13 @@
 ; GCN-O2-NEXT:     Strip Unused Function Prototypes
 ; GCN-O2-NEXT:     Dead Global Elimination
 ; GCN-O2-NEXT:     Merge Duplicate Global Constants
+; GCN-O2-NEXT:     Call Graph Profile
+; GCN-O2-NEXT:       FunctionPass Manager
+; GCN-O2-NEXT:         Dominator Tree Construction
+; GCN-O2-NEXT:         Natural Loop Information
+; GCN-O2-NEXT:         Post-Dominator Tree Construction
+; GCN-O2-NEXT:         Branch Probability Analysis
+; GCN-O2-NEXT:         Block Frequency Analysis
 ; GCN-O2-NEXT:     FunctionPass Manager
 ; GCN-O2-NEXT:       Dominator Tree Construction
 ; GCN-O2-NEXT:       Natural Loop Information
@@ -975,6 +989,13 @@
 ; GCN-O3-NEXT:     Strip Unused Function Prototypes
 ; GCN-O3-NEXT:     Dead Global Elimination
 ; GCN-O3-NEXT:     Merge Duplicate Global Constants
+; GCN-O3-NEXT:     Call Graph Profile
+; GCN-O3-NEXT:       FunctionPass Manager
+; GCN-O3-NEXT:         Dominator Tree Construction
+; GCN-O3-NEXT:         Natural Loop Information
+; GCN-O3-NEXT:         Post-Dominator Tree Construction
+; GCN-O3-NEXT:         Branch Probability Analysis
+; GCN-O3-NEXT:         Block Frequency Analysis
 ; GCN-O3-NEXT:     FunctionPass Manager
 ; GCN-O3-NEXT:       Dominator Tree Construction
 ; GCN-O3-NEXT:       Natural Loop Information
Index: llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
===================================================================
--- llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -112,6 +112,7 @@
   initializePGOInstrumentationUseLegacyPassPass(Registry);
   initializePGOIndirectCallPromotionLegacyPassPass(Registry);
   initializePGOMemOPSizeOptLegacyPassPass(Registry);
+  initializeCGProfileLegacyPassPass(Registry);
   initializeInstrOrderFileLegacyPassPass(Registry);
   initializeInstrProfilingLegacyPassPass(Registry);
   initializeMemorySanitizerLegacyPassPass(Registry);
Index: llvm/lib/Transforms/Instrumentation/CGProfile.cpp
===================================================================
--- llvm/lib/Transforms/Instrumentation/CGProfile.cpp
+++ llvm/lib/Transforms/Instrumentation/CGProfile.cpp
@@ -10,22 +10,48 @@
 
 #include "llvm/ADT/MapVector.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/Instrumentation.h"
 
 #include <array>
 
 using namespace llvm;
 
-PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) {
+static bool
+addModuleFlags(Module &M,
+               MapVector<std::pair<Function *, Function *>, uint64_t> &Counts) {
+  if (Counts.empty())
+    return false;
+
+  LLVMContext &Context = M.getContext();
+  MDBuilder MDB(Context);
+  std::vector<Metadata *> Nodes;
+
+  for (auto E : Counts) {
+    Metadata *Vals[] = {ValueAsMetadata::get(E.first.first),
+                        ValueAsMetadata::get(E.first.second),
+                        MDB.createConstant(ConstantInt::get(
+                            Type::getInt64Ty(Context), E.second))};
+    Nodes.push_back(MDNode::get(Context, Vals));
+  }
+
+  M.addModuleFlag(Module::Append, "CG Profile", MDNode::get(Context, Nodes));
+  return true;
+}
+
+static bool
+runCGProfilePass(Module &M,
+                 function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
+                 function_ref<TargetTransformInfo &(Function &)> GetTTI) {
   MapVector<std::pair<Function *, Function *>, uint64_t> Counts;
-  FunctionAnalysisManager &FAM =
-      MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
   InstrProfSymtab Symtab;
   auto UpdateCounts = [&](TargetTransformInfo &TTI, Function *F,
                           Function *CalledF, uint64_t NewCount) {
@@ -35,14 +61,14 @@
     Count = SaturatingAdd(Count, NewCount);
   };
   // Ignore error here.  Indirect calls are ignored if this fails.
-  (void)(bool)Symtab.create(M);
+  (void)(bool) Symtab.create(M);
   for (auto &F : M) {
-    if (F.isDeclaration())
+    if (F.isDeclaration() || !F.getEntryCount())
       continue;
-    auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
+    auto &BFI = GetBFI(F);
     if (BFI.getEntryFreq() == 0)
       continue;
-    TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(F);
+    TargetTransformInfo &TTI = GetTTI(F);
     for (auto &BB : F) {
       Optional<uint64_t> BBCount = BFI.getBlockProfileCount(&BB);
       if (!BBCount)
@@ -69,28 +95,56 @@
     }
   }
 
-  addModuleFlags(M, Counts);
-
-  return PreservedAnalyses::all();
+  return addModuleFlags(M, Counts);
 }
 
-void CGProfilePass::addModuleFlags(
-    Module &M,
-    MapVector<std::pair<Function *, Function *>, uint64_t> &Counts) const {
-  if (Counts.empty())
-    return;
+namespace {
+struct CGProfileLegacyPass final : public ModulePass {
+  static char ID;
+  CGProfileLegacyPass() : ModulePass(ID) {
+    initializeCGProfileLegacyPassPass(*PassRegistry::getPassRegistry());
+  }
 
-  LLVMContext &Context = M.getContext();
-  MDBuilder MDB(Context);
-  std::vector<Metadata *> Nodes;
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addRequired<LazyBlockFrequencyInfoPass>();
+    AU.addRequired<TargetTransformInfoWrapperPass>();
+  }
 
-  for (auto E : Counts) {
-    Metadata *Vals[] = {ValueAsMetadata::get(E.first.first),
-                        ValueAsMetadata::get(E.first.second),
-                        MDB.createConstant(ConstantInt::get(
-                            Type::getInt64Ty(Context), E.second))};
-    Nodes.push_back(MDNode::get(Context, Vals));
+  bool runOnModule(Module &M) override {
+    auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & {
+      return this->getAnalysis<LazyBlockFrequencyInfoPass>(F).getBFI();
+    };
+    auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
+      return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+    };
+
+    return runCGProfilePass(M, GetBFI, GetTTI);
   }
+};
 
-  M.addModuleFlag(Module::Append, "CG Profile", MDNode::get(Context, Nodes));
+} // namespace
+
+char CGProfileLegacyPass::ID = 0;
+
+INITIALIZE_PASS(CGProfileLegacyPass, "cg-profile", "Call Graph Profile", false,
+                false)
+
+ModulePass *llvm::createCGProfileLegacyPass() {
+  return new CGProfileLegacyPass();
+}
+
+PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) {
+  FunctionAnalysisManager &FAM =
+      MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+  auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
+    return FAM.getResult<BlockFrequencyAnalysis>(F);
+  };
+  auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
+    return FAM.getResult<TargetIRAnalysis>(F);
+  };
+
+  runCGProfilePass(M, GetBFI, GetTTI);
+
+  return PreservedAnalyses::all();
 }
Index: llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
===================================================================
--- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -195,6 +195,7 @@
     PrepareForThinLTO = EnablePrepareForThinLTO;
     PerformThinLTO = EnablePerformThinLTO;
     DivergentTarget = false;
+    CallGraphProfile = true;
 }
 
 PassManagerBuilder::~PassManagerBuilder() {
@@ -834,6 +835,10 @@
   if (MergeFunctions)
     MPM.add(createMergeFunctionsPass());
 
+  // Add Module flag "CG Profile" based on Branch Frequency Information.
+  if (CallGraphProfile)
+    MPM.add(createCGProfileLegacyPass());
+
   // LoopSink pass sinks instructions hoisted by LICM, which serves as a
   // canonicalization pass that enables other optimizations. As a result,
   // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
Index: llvm/lib/Passes/PassBuilder.cpp
===================================================================
--- llvm/lib/Passes/PassBuilder.cpp
+++ llvm/lib/Passes/PassBuilder.cpp
@@ -248,10 +248,6 @@
     EnableCHR("enable-chr-npm", cl::init(true), cl::Hidden,
               cl::desc("Enable control height reduction optimization (CHR)"));
 
-static cl::opt<bool> EnableCallGraphProfile(
-    "enable-npm-call-graph-profile", cl::init(true), cl::Hidden,
-    cl::desc("Enable call graph profile pass for the new PM (default = on)"));
-
 /// Flag to enable inline deferral during PGO.
 static cl::opt<bool>
     EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
@@ -267,7 +263,7 @@
   Coroutines = false;
   LicmMssaOptCap = SetLicmMssaOptCap;
   LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
-  CallGraphProfile = EnableCallGraphProfile;
+  CallGraphProfile = true;
 }
 
 extern cl::opt<bool> EnableHotColdSplit;
Index: llvm/include/llvm/Transforms/Instrumentation/CGProfile.h
===================================================================
--- llvm/include/llvm/Transforms/Instrumentation/CGProfile.h
+++ llvm/include/llvm/Transforms/Instrumentation/CGProfile.h
@@ -19,11 +19,6 @@
 class CGProfilePass : public PassInfoMixin<CGProfilePass> {
 public:
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
-
-private:
-  void addModuleFlags(
-      Module &M,
-      MapVector<std::pair<Function *, Function *>, uint64_t> &Counts) const;
 };
 } // end namespace llvm
 
Index: llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
===================================================================
--- llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
+++ llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
@@ -156,6 +156,7 @@
 
   bool DisableTailCalls;
   bool DisableUnrollLoops;
+  bool CallGraphProfile;
   bool SLPVectorize;
   bool LoopVectorize;
   bool LoopsInterleaved;
Index: llvm/include/llvm/Transforms/IPO.h
===================================================================
--- llvm/include/llvm/Transforms/IPO.h
+++ llvm/include/llvm/Transforms/IPO.h
@@ -282,6 +282,8 @@
 ModulePass *createWriteThinLTOBitcodePass(raw_ostream &Str,
                                           raw_ostream *ThinLinkOS = nullptr);
 
+ModulePass *createCGProfileLegacyPass();
+
 } // End llvm namespace
 
 #endif
Index: llvm/include/llvm/InitializePasses.h
===================================================================
--- llvm/include/llvm/InitializePasses.h
+++ llvm/include/llvm/InitializePasses.h
@@ -103,6 +103,7 @@
 void initializeCFIInstrInserterPass(PassRegistry&);
 void initializeCFLAndersAAWrapperPassPass(PassRegistry&);
 void initializeCFLSteensAAWrapperPassPass(PassRegistry&);
+void initializeCGProfileLegacyPassPass(PassRegistry &);
 void initializeCallGraphDOTPrinterPass(PassRegistry&);
 void initializeCallGraphPrinterLegacyPassPass(PassRegistry&);
 void initializeCallGraphViewerPass(PassRegistry&);
Index: clang/lib/Frontend/CompilerInvocation.cpp
===================================================================
--- clang/lib/Frontend/CompilerInvocation.cpp
+++ clang/lib/Frontend/CompilerInvocation.cpp
@@ -860,7 +860,6 @@
   Opts.RerollLoops = Args.hasArg(OPT_freroll_loops);
 
   Opts.DisableIntegratedAS = Args.hasArg(OPT_fno_integrated_as);
-  Opts.CallGraphProfile = !Opts.DisableIntegratedAS;
   Opts.Autolink = !Args.hasArg(OPT_fno_autolink);
   Opts.SampleProfileFile =
       std::string(Args.getLastArgValue(OPT_fprofile_sample_use_EQ));
Index: clang/lib/CodeGen/BackendUtil.cpp
===================================================================
--- clang/lib/CodeGen/BackendUtil.cpp
+++ clang/lib/CodeGen/BackendUtil.cpp
@@ -620,6 +620,7 @@
   PMBuilder.SizeLevel = CodeGenOpts.OptimizeSize;
   PMBuilder.SLPVectorize = CodeGenOpts.VectorizeSLP;
   PMBuilder.LoopVectorize = CodeGenOpts.VectorizeLoop;
+  PMBuilder.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS;
 
   PMBuilder.DisableUnrollLoops = !CodeGenOpts.UnrollLoops;
   // Loop interleaving in the loop vectorizer has historically been set to be
@@ -1144,7 +1145,7 @@
   PTO.LoopInterleaving = CodeGenOpts.UnrollLoops;
   PTO.LoopVectorization = CodeGenOpts.VectorizeLoop;
   PTO.SLPVectorization = CodeGenOpts.VectorizeSLP;
-  PTO.CallGraphProfile = CodeGenOpts.CallGraphProfile;
+  PTO.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS;
   PTO.Coroutines = LangOpts.Coroutines;
 
   PassInstrumentationCallbacks PIC;
@@ -1562,7 +1563,7 @@
   Conf.PTO.LoopInterleaving = CGOpts.UnrollLoops;
   Conf.PTO.LoopVectorization = CGOpts.VectorizeLoop;
   Conf.PTO.SLPVectorization = CGOpts.VectorizeSLP;
-  Conf.PTO.CallGraphProfile = CGOpts.CallGraphProfile;
+  Conf.PTO.CallGraphProfile = !CGOpts.DisableIntegratedAS;
 
   // Context sensitive profile.
   if (CGOpts.hasProfileCSIRInstr()) {
Index: clang/include/clang/Basic/CodeGenOptions.def
===================================================================
--- clang/include/clang/Basic/CodeGenOptions.def
+++ clang/include/clang/Basic/CodeGenOptions.def
@@ -252,7 +252,6 @@
 CODEGENOPT(VectorizeLoop     , 1, 0) ///< Run loop vectorizer.
 CODEGENOPT(VectorizeSLP      , 1, 0) ///< Run SLP vectorizer.
 CODEGENOPT(ProfileSampleAccurate, 1, 0) ///< Sample profile is accurate.
-CODEGENOPT(CallGraphProfile  , 1, 0) ///< Run call graph profile.
 
   /// Attempt to use register sized accesses to bit-fields in structures, when
   /// possible.
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to