mtrofin updated this revision to Diff 238107.
mtrofin marked 2 inline comments as done.
mtrofin added a comment.

Alternative: expose speedup/size components to more closely align with legacy PM


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72547/new/

https://reviews.llvm.org/D72547

Files:
  clang/lib/CodeGen/BackendUtil.cpp
  llvm/include/llvm/Passes/PassBuilder.h
  llvm/lib/LTO/LTOBackend.cpp
  llvm/lib/Passes/PassBuilder.cpp

Index: llvm/lib/Passes/PassBuilder.cpp
===================================================================
--- llvm/lib/Passes/PassBuilder.cpp
+++ llvm/lib/Passes/PassBuilder.cpp
@@ -243,20 +243,18 @@
 
 extern cl::opt<bool> FlattenedProfileUsed;
 
-static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) {
-  switch (Level) {
-  case PassBuilder::O0:
-  case PassBuilder::O1:
-  case PassBuilder::O2:
-  case PassBuilder::O3:
-    return false;
-
-  case PassBuilder::Os:
-  case PassBuilder::Oz:
-    return true;
-  }
-  llvm_unreachable("Invalid optimization level!");
-}
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O0 = {0,
+                                                                           0};
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O1 = {1,
+                                                                           0};
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O2 = {2,
+                                                                           0};
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O3 = {3,
+                                                                           0};
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::Os = {2,
+                                                                           1};
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::Oz = {2,
+                                                                           2};
 
 namespace {
 
@@ -395,7 +393,7 @@
 PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
                                                  ThinLTOPhase Phase,
                                                  bool DebugLogging) {
-  assert(Level != O0 && "Must request optimizations!");
+  assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
   FunctionPassManager FPM(DebugLogging);
 
   // Form SSA out of local memory accesses after breaking apart aggregates into
@@ -406,7 +404,7 @@
   FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
 
   // Hoisting of scalars and load expressions.
-  if (Level > O1) {
+  if (Level.getSpeedupLevel() >= 2) {
     if (EnableGVNHoist)
       FPM.addPass(GVNHoistPass());
 
@@ -418,7 +416,7 @@
   }
 
   // Speculative execution if the target has divergent branches; otherwise nop.
-  if (Level > O1) {
+  if (Level.getSpeedupLevel() > 1) {
     FPM.addPass(SpeculativeExecutionPass());
 
     // Optimize based on known information about branches, and cleanup afterward.
@@ -426,11 +424,11 @@
     FPM.addPass(CorrelatedValuePropagationPass());
   }
   FPM.addPass(SimplifyCFGPass());
-  if (Level == O3)
+  if (Level == OptimizationLevel::O3)
     FPM.addPass(AggressiveInstCombinePass());
   FPM.addPass(InstCombinePass());
 
-  if (!isOptimizingForSize(Level))
+  if (!Level.isOptimizingForSize())
     FPM.addPass(LibCallsShrinkWrapPass());
 
   invokePeepholeEPCallbacks(FPM, Level);
@@ -438,11 +436,11 @@
   // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
   // using the size value profile. Don't perform this when optimizing for size.
   if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
-      !isOptimizingForSize(Level) && Level > O1)
+      (Level.getSpeedupLevel() > 1 && !Level.isOptimizingForSize()))
     FPM.addPass(PGOMemOPSizeOpt());
 
   // TODO: Investigate the cost/benefit of tail call elimination on debugging.
-  if (Level > O1)
+  if (Level.getSpeedupLevel() > 1)
     FPM.addPass(TailCallElimPass());
   FPM.addPass(SimplifyCFGPass());
 
@@ -469,7 +467,7 @@
   LPM1.addPass(LoopSimplifyCFGPass());
 
   // Rotate Loop - disable header duplication at -Oz
-  LPM1.addPass(LoopRotatePass(Level != Oz));
+  LPM1.addPass(LoopRotatePass(Level != OptimizationLevel::Oz));
   // TODO: Investigate promotion cap for O1.
   LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
   LPM1.addPass(SimpleLoopUnswitchPass());
@@ -486,7 +484,8 @@
   if ((Phase != ThinLTOPhase::PreLink || !PGOOpt ||
        PGOOpt->Action != PGOOptions::SampleUse) &&
       PTO.LoopUnrolling)
-    LPM2.addPass(LoopFullUnrollPass(Level, /*OnlyWhenForced=*/false,
+    LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
+                                    /*OnlyWhenForced=*/false,
                                     PTO.ForgetAllSCEVInLoopUnroll));
 
   for (auto &C : LoopOptimizerEndEPCallbacks)
@@ -509,7 +508,7 @@
   FPM.addPass(SROA());
 
   // Eliminate redundancies.
-  if (Level != O1) {
+  if (Level != OptimizationLevel::O1) {
     // These passes add substantial compile time so skip them at O1.
     FPM.addPass(MergedLoadStoreMotionPass());
     if (RunNewGVN)
@@ -538,7 +537,7 @@
 
   // Re-consider control flow based optimizations after redundancy elimination,
   // redo DCE, etc.
-  if (Level > O1) {
+  if (Level.getSpeedupLevel() > 1) {
     FPM.addPass(JumpThreadingPass());
     FPM.addPass(CorrelatedValuePropagationPass());
     FPM.addPass(DSEPass());
@@ -558,7 +557,7 @@
   FPM.addPass(InstCombinePass());
   invokePeepholeEPCallbacks(FPM, Level);
 
-  if (EnableCHR && Level == O3 && PGOOpt &&
+  if (EnableCHR && Level == OptimizationLevel::O3 && PGOOpt &&
       (PGOOpt->Action == PGOOptions::IRUse ||
        PGOOpt->Action == PGOOptions::SampleUse))
     FPM.addPass(ControlHeightReductionPass());
@@ -571,13 +570,13 @@
                                     bool RunProfileGen, bool IsCS,
                                     std::string ProfileFile,
                                     std::string ProfileRemappingFile) {
-  assert(Level != O0 && "Not expecting O0 here!");
+  assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
   // Generally running simplification passes and the inliner with an high
   // threshold results in smaller executables, but there may be cases where
   // the size grows, so let's be conservative here and skip this simplification
   // at -Os/Oz. We will not do this  inline for context sensistive PGO (when
   // IsCS is true).
-  if (!isOptimizingForSize(Level) && !IsCS) {
+  if (!Level.isOptimizingForSize() && !IsCS) {
     InlineParams IP;
 
     IP.DefaultThreshold = PreInlineThreshold;
@@ -662,10 +661,7 @@
 
 static InlineParams
 getInlineParamsFromOptLevel(PassBuilder::OptimizationLevel Level) {
-  auto O3 = PassBuilder::O3;
-  unsigned OptLevel = Level > O3 ? 2 : Level;
-  unsigned SizeLevel = Level > O3 ? Level - O3 : 0;
-  return getInlineParams(OptLevel, SizeLevel);
+  return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
 }
 
 ModulePassManager
@@ -711,7 +707,7 @@
   EarlyFPM.addPass(SROA());
   EarlyFPM.addPass(EarlyCSEPass());
   EarlyFPM.addPass(LowerExpectIntrinsicPass());
-  if (Level == O3)
+  if (Level == OptimizationLevel::O3)
     EarlyFPM.addPass(CallSiteSplittingPass());
 
   // In SamplePGO ThinLTO backend, we need instcombine before profile annotation
@@ -830,7 +826,7 @@
 
   // When at O3 add argument promotion to the pass pipeline.
   // FIXME: It isn't at all clear why this should be limited to O3.
-  if (Level == O3)
+  if (Level == OptimizationLevel::O3)
     MainCGPipeline.addPass(ArgumentPromotionPass());
 
   // Lastly, add the core function simplification pipeline nested inside the
@@ -974,11 +970,11 @@
   // across the loop nests.
   // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
   if (EnableUnrollAndJam && PTO.LoopUnrolling) {
-    OptimizePM.addPass(LoopUnrollAndJamPass(Level));
+    OptimizePM.addPass(LoopUnrollAndJamPass(Level.getSpeedupLevel()));
   }
-  OptimizePM.addPass(LoopUnrollPass(
-      LoopUnrollOptions(Level, /*OnlyWhenForced=*/!PTO.LoopUnrolling,
-                        PTO.ForgetAllSCEVInLoopUnroll)));
+  OptimizePM.addPass(LoopUnrollPass(LoopUnrollOptions(
+      Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
+      PTO.ForgetAllSCEVInLoopUnroll)));
   OptimizePM.addPass(WarnMissedTransformationsPass());
   OptimizePM.addPass(InstCombinePass());
   OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
@@ -1040,7 +1036,8 @@
 ModulePassManager
 PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
                                            bool DebugLogging, bool LTOPreLink) {
-  assert(Level != O0 && "Must request optimizations for the default pipeline!");
+  assert(Level != OptimizationLevel::O0 &&
+         "Must request optimizations for the default pipeline!");
 
   ModulePassManager MPM(DebugLogging);
 
@@ -1067,7 +1064,8 @@
 ModulePassManager
 PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level,
                                                 bool DebugLogging) {
-  assert(Level != O0 && "Must request optimizations for the default pipeline!");
+  assert(Level != OptimizationLevel::O0 &&
+         "Must request optimizations for the default pipeline!");
 
   ModulePassManager MPM(DebugLogging);
 
@@ -1128,7 +1126,7 @@
     MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
   }
 
-  if (Level == O0)
+  if (Level == OptimizationLevel::O0)
     return MPM;
 
   // Force any function attributes we want the rest of the pipeline to observe.
@@ -1147,10 +1145,11 @@
 ModulePassManager
 PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level,
                                             bool DebugLogging) {
-  assert(Level != O0 && "Must request optimizations for the default pipeline!");
+  assert(Level != OptimizationLevel::O0 &&
+         "Must request optimizations for the default pipeline!");
   // FIXME: We should use a customized pre-link pipeline!
   return buildPerModuleDefaultPipeline(Level, DebugLogging,
-                                       /* LTOPreLink */true);
+                                       /* LTOPreLink */ true);
 }
 
 ModulePassManager
@@ -1158,7 +1157,7 @@
                                      ModuleSummaryIndex *ExportSummary) {
   ModulePassManager MPM(DebugLogging);
 
-  if (Level == O0) {
+  if (Level == OptimizationLevel::O0) {
     // The WPD and LowerTypeTest passes need to run at -O0 to lower type
     // metadata and intrinsics.
     MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
@@ -1187,7 +1186,7 @@
   // libraries and other oracles.
   MPM.addPass(InferFunctionAttrsPass());
 
-  if (Level > 1) {
+  if (Level.getSpeedupLevel() > 1) {
     FunctionPassManager EarlyFPM(DebugLogging);
     EarlyFPM.addPass(CallSiteSplittingPass());
     MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM)));
@@ -1225,7 +1224,7 @@
   MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
 
   // Stop here at -O1.
-  if (Level == 1) {
+  if (Level == OptimizationLevel::O1) {
     // The LowerTypeTestsPass needs to run to lower type metadata and the
     // type.test intrinsics. The pass does nothing if CFI is disabled.
     MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
@@ -1250,7 +1249,7 @@
   // function pointers.  When this happens, we often have to resolve varargs
   // calls, etc, so let instcombine do this.
   FunctionPassManager PeepholeFPM(DebugLogging);
-  if (Level == O3)
+  if (Level == OptimizationLevel::O3)
     PeepholeFPM.addPass(AggressiveInstCombinePass());
   PeepholeFPM.addPass(InstCombinePass());
   invokePeepholeEPCallbacks(PeepholeFPM, Level);
@@ -1886,13 +1885,13 @@
     assert(Matches.size() == 3 && "Must capture two matched strings!");
 
     OptimizationLevel L = StringSwitch<OptimizationLevel>(Matches[2])
-                              .Case("O0", O0)
-                              .Case("O1", O1)
-                              .Case("O2", O2)
-                              .Case("O3", O3)
-                              .Case("Os", Os)
-                              .Case("Oz", Oz);
-    if (L == O0) {
+                              .Case("O0", OptimizationLevel::O0)
+                              .Case("O1", OptimizationLevel::O1)
+                              .Case("O2", OptimizationLevel::O2)
+                              .Case("O3", OptimizationLevel::O3)
+                              .Case("Os", OptimizationLevel::Os)
+                              .Case("Oz", OptimizationLevel::Oz);
+    if (L == OptimizationLevel::O0) {
       // Add instrumentation PGO passes -- at O0 we can still do PGO.
       if (PGOOpt && Matches[1] != "thinlto" &&
           (PGOOpt->Action == PGOOptions::IRInstr ||
@@ -1909,8 +1908,10 @@
     // This is consistent with old pass manager invoked via opt, but
     // inconsistent with clang. Clang doesn't enable loop vectorization
     // but does enable slp vectorization at Oz.
-    PTO.LoopVectorization = L > O1 && L < Oz;
-    PTO.SLPVectorization = L > O1 && L < Oz;
+    PTO.LoopVectorization =
+        L.getSpeedupLevel() > 1 && L != OptimizationLevel::Oz;
+    PTO.SLPVectorization =
+        L.getSpeedupLevel() > 1 && L != OptimizationLevel::Oz;
 
     if (Matches[1] == "default") {
       MPM.addPass(buildPerModuleDefaultPipeline(L, DebugLogging));
Index: llvm/lib/LTO/LTOBackend.cpp
===================================================================
--- llvm/lib/LTO/LTOBackend.cpp
+++ llvm/lib/LTO/LTOBackend.cpp
@@ -203,16 +203,16 @@
   default:
     llvm_unreachable("Invalid optimization level");
   case 0:
-    OL = PassBuilder::O0;
+    OL = PassBuilder::OptimizationLevel::O0;
     break;
   case 1:
-    OL = PassBuilder::O1;
+    OL = PassBuilder::OptimizationLevel::O1;
     break;
   case 2:
-    OL = PassBuilder::O2;
+    OL = PassBuilder::OptimizationLevel::O2;
     break;
   case 3:
-    OL = PassBuilder::O3;
+    OL = PassBuilder::OptimizationLevel::O3;
     break;
   }
 
Index: llvm/include/llvm/Passes/PassBuilder.h
===================================================================
--- llvm/include/llvm/Passes/PassBuilder.h
+++ llvm/include/llvm/Passes/PassBuilder.h
@@ -143,11 +143,26 @@
   ///
   /// This enumerates the LLVM-provided high-level optimization levels. Each
   /// level has a specific goal and rationale.
-  enum OptimizationLevel {
+  class OptimizationLevel final {
+    unsigned SpeedLevel = 2;
+    unsigned SizeLevel = 0;
+    OptimizationLevel(unsigned SpeedLevel, unsigned SizeLevel)
+        : SpeedLevel(SpeedLevel), SizeLevel(SizeLevel) {
+      // Check that only valid combinations are passed.
+      assert((0 <= SpeedLevel && SpeedLevel <= 3) &&
+             "Optimization level for speed should be 0, 1, 2, or 3");
+      assert((0 <= SizeLevel && SizeLevel <= 2) &&
+             "Optimization level for size should be 0, 1, or 2");
+      assert((SizeLevel == 0 || SpeedLevel == 2) &&
+             "Optimize for size should be encoded with speedup level == 2");
+    }
+
+  public:
+    OptimizationLevel() = default;
     /// Disable as many optimizations as possible. This doesn't completely
     /// disable the optimizer in all cases, for example always_inline functions
     /// can be required to be inlined for correctness.
-    O0,
+    static const OptimizationLevel O0;
 
     /// Optimize quickly without destroying debuggability.
     ///
@@ -161,10 +176,9 @@
     ///
     /// As an example, complex loop transformations such as versioning,
     /// vectorization, or fusion don't make sense here due to the degree to
-    /// which the executed code differs from the source code, and the compile time
-    /// cost.
-    O1,
-
+    /// which the executed code differs from the source code, and the compile
+    /// time cost.
+    static const OptimizationLevel O1;
     /// Optimize for fast execution as much as possible without triggering
     /// significant incremental compile time or code size growth.
     ///
@@ -181,8 +195,7 @@
     ///
     /// This is expected to be a good default optimization level for the vast
     /// majority of users.
-    O2,
-
+    static const OptimizationLevel O2;
     /// Optimize for fast execution as much as possible.
     ///
     /// This mode is significantly more aggressive in trading off compile time
@@ -197,8 +210,7 @@
     /// order to make even significantly slower compile times at least scale
     /// reasonably. This does not preclude very substantial constant factor
     /// costs though.
-    O3,
-
+    static const OptimizationLevel O3;
     /// Similar to \c O2 but tries to optimize for small code size instead of
     /// fast execution without triggering significant incremental execution
     /// time slowdowns.
@@ -209,8 +221,7 @@
     /// A consequence of the different core goal is that this should in general
     /// produce substantially smaller executables that still run in
     /// a reasonable amount of time.
-    Os,
-
+    static const OptimizationLevel Os;
     /// A very specialized mode that will optimize for code size at any and all
     /// costs.
     ///
@@ -218,7 +229,24 @@
     /// any effort taken to reduce the size is worth it regardless of the
     /// execution time impact. You should expect this level to produce rather
     /// slow, but very small, code.
-    Oz
+    static const OptimizationLevel Oz;
+
+    bool isOptimizingForSpeed() const {
+      return SizeLevel == 0 && SpeedLevel > 0;
+    }
+
+    bool isOptimizingForSize() const { return SizeLevel > 0; }
+
+    bool operator==(const OptimizationLevel &Other) const {
+      return SizeLevel == Other.SizeLevel && SpeedLevel == Other.SpeedLevel;
+    }
+    bool operator!=(const OptimizationLevel &Other) const {
+      return SizeLevel != Other.SizeLevel || SpeedLevel != Other.SpeedLevel;
+    }
+
+    unsigned getSpeedupLevel() const { return SpeedLevel; }
+
+    unsigned getSizeLevel() const { return SizeLevel; }
   };
 
   explicit PassBuilder(TargetMachine *TM = nullptr,
Index: clang/lib/CodeGen/BackendUtil.cpp
===================================================================
--- clang/lib/CodeGen/BackendUtil.cpp
+++ clang/lib/CodeGen/BackendUtil.cpp
@@ -924,7 +924,7 @@
     llvm_unreachable("Invalid optimization level!");
 
   case 1:
-    return PassBuilder::O1;
+    return PassBuilder::OptimizationLevel::O1;
 
   case 2:
     switch (Opts.OptimizeSize) {
@@ -932,17 +932,17 @@
       llvm_unreachable("Invalid optimization level for size!");
 
     case 0:
-      return PassBuilder::O2;
+      return PassBuilder::OptimizationLevel::O2;
 
     case 1:
-      return PassBuilder::Os;
+      return PassBuilder::OptimizationLevel::Os;
 
     case 2:
-      return PassBuilder::Oz;
+      return PassBuilder::OptimizationLevel::Oz;
     }
 
   case 3:
-    return PassBuilder::O3;
+    return PassBuilder::OptimizationLevel::O3;
   }
 }
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to