[PATCH] D72547: [llvm] Make new pass manager's OptimizationLevel a class

2020-01-16 Thread Mircea Trofin via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG7acfda633f13: [llvm] Make new pass manager's 
OptimizationLevel a class (authored by mtrofin).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72547/new/

https://reviews.llvm.org/D72547

Files:
  clang/lib/CodeGen/BackendUtil.cpp
  llvm/include/llvm/Passes/PassBuilder.h
  llvm/lib/LTO/LTOBackend.cpp
  llvm/lib/Passes/PassBuilder.cpp
  llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
  llvm/test/Transforms/LoopUnroll/opt-levels.ll
  llvm/test/Transforms/LoopUnrollAndJam/opt-levels.ll

Index: llvm/test/Transforms/LoopUnrollAndJam/opt-levels.ll
===
--- /dev/null
+++ llvm/test/Transforms/LoopUnrollAndJam/opt-levels.ll
@@ -0,0 +1,61 @@
+; RUN: opt < %s -S -passes="default" -unroll-runtime=true -enable-npm-unroll-and-jam -unroll-threshold-default=0 -unroll-threshold-aggressive=300 | FileCheck %s -check-prefix=O2
+; RUN: opt < %s -S -passes="default" -unroll-runtime=true -enable-npm-unroll-and-jam -unroll-threshold-default=0 -unroll-threshold-aggressive=300 | FileCheck %s -check-prefix=O3
+; RUN: opt < %s -S -passes="default" -unroll-runtime=true -enable-npm-unroll-and-jam -unroll-threshold-default=0 -unroll-threshold-aggressive=300 | FileCheck %s -check-prefix=Os
+; RUN: opt < %s -S -passes="default" -unroll-runtime=true -enable-npm-unroll-and-jam -unroll-threshold-default=0 -unroll-threshold-aggressive=300 | FileCheck %s -check-prefix=Oz
+
+; Check that Os and Oz are optimized like O2, not like O3. To easily highlight
+; the behavior, we artificially disable unrolling for anything but O3 by setting
+; the default threshold to 0.
+
+; O3: for.inner.1
+; O2-NOT: for.inner.1
+; Os-NOT: for.inner.1
+; Oz-NOT: for.inner.1
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+define void @test1(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmpJ = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmpJ
+  br i1 %or.cond, label %for.outer.preheader, label %for.end
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i = phi i32 [ %add8, %for.latch ], [ 0, %for.outer.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+  %sum = phi i32 [ 0, %for.outer ], [ %add, %for.inner ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %j
+  %0 = load i32, i32* %arrayidx, align 4, !tbaa !5
+  %add = add i32 %0, %sum
+  %inc = add nuw i32 %j, 1
+  %exitcond = icmp eq i32 %inc, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add.lcssa = phi i32 [ %add, %for.inner ]
+  %arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 %add.lcssa, i32* %arrayidx6, align 4, !tbaa !5
+  %add8 = add nuw i32 %i, 1
+  %exitcond25 = icmp eq i32 %add8, %I
+  br i1 %exitcond25, label %for.end.loopexit, label %for.outer
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+
+
+!5 = !{!6, !6, i64 0}
+!6 = !{!"int", !7, i64 0}
+!7 = !{!"omnipotent char", !8, i64 0}
+!8 = !{!"Simple C/C++ TBAA"}
Index: llvm/test/Transforms/LoopUnroll/opt-levels.ll
===
--- /dev/null
+++ llvm/test/Transforms/LoopUnroll/opt-levels.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -S -passes="default" -unroll-runtime=true -unroll-threshold-default=0 -unroll-threshold-aggressive=300 | FileCheck %s -check-prefix=O2
+; RUN: opt < %s -S -passes="default" -unroll-runtime=true -unroll-threshold-default=0 -unroll-threshold-aggressive=300 | FileCheck %s -check-prefix=O3
+; RUN: opt < %s -S -passes="default" -unroll-runtime=true -unroll-threshold-default=0 -unroll-threshold-aggressive=300 | FileCheck %s -check-prefix=Os
+; RUN: opt < %s -S -passes="default" -unroll-runtime=true -unroll-threshold-default=0 -unroll-threshold-aggressive=300 | FileCheck %s -check-prefix=Oz
+
+; Check that Os and Oz are optimized like O2, not like O3. To easily highlight
+; the behavior, we artificially disable unrolling for anything but O3 by setting
+; the default threshold to 0.
+
+; O3: loop2.preheader
+; O2-NOT: loop2.preheader
+; Os-NOT: loop2.preheader
+; Oz-NOT: loop2.preheader
+
+define void @unroll(i32 %iter, i32* %addr1, i32* %addr2) nounwind {
+entry:
+  br label %loop1
+
+loop1:
+  %iv1 = phi i32 [ 0, %entry ], [ %inc1, %loop1.latch ]
+  %offset1 = getelementptr i32, i32* %addr1, i32 %iv1
+  store i32 %iv1, i32* %offset1, align 4
+  br label %loop2.header
+
+loop2.header:
+  %e = icmp uge i32 %iter, 1
+  br i1 %e, label %loop2, label %exit2
+
+loop2:
+  %iv2 = phi i32 [ 0, %loop2.header ], [ %inc2, %loop2 ]
+  %offset2 = getelementptr i32, i32* %addr2, i32 %iv2
+  store i32 %iv2, i32* %offset2, align 4
+  %inc2 = add i32 %iv2, 1
+  %exitcnd2 = icmp uge i32 %inc2, %iter
+  br i1 %exitcnd2, label %exit2, label %loop2
+
+e

[PATCH] D72547: [llvm] Make new pass manager's OptimizationLevel a class

2020-01-15 Thread Teresa Johnson via Phabricator via cfe-commits
tejohnson accepted this revision.
tejohnson added a comment.
This revision is now accepted and ready to land.

LGTM

One thing to consider changing/removing in the summary is this comment:
"For example, (enum) "Level > 1" captures not only O2 
 and O3 
, but also Os, and Oz."
since that is actually correct (Os/Oz should be included in Level>1 as they are 
O2 ).


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72547/new/

https://reviews.llvm.org/D72547



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72547: [llvm] Make new pass manager's OptimizationLevel a class

2020-01-15 Thread Mircea Trofin via Phabricator via cfe-commits
mtrofin updated this revision to Diff 238352.
mtrofin marked 4 inline comments as done.
mtrofin added a comment.
Herald added a subscriber: zzheng.

Incorporated feedback:

- tests
- updated patch description


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72547/new/

https://reviews.llvm.org/D72547

Files:
  clang/lib/CodeGen/BackendUtil.cpp
  llvm/include/llvm/Passes/PassBuilder.h
  llvm/lib/LTO/LTOBackend.cpp
  llvm/lib/Passes/PassBuilder.cpp
  llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
  llvm/test/Transforms/LoopUnroll/opt-levels.ll
  llvm/test/Transforms/LoopUnrollAndJam/opt-levels.ll

Index: llvm/test/Transforms/LoopUnrollAndJam/opt-levels.ll
===
--- /dev/null
+++ llvm/test/Transforms/LoopUnrollAndJam/opt-levels.ll
@@ -0,0 +1,61 @@
+; RUN: opt < %s -S -passes="default" -unroll-runtime=true -enable-npm-unroll-and-jam -unroll-threshold-default=0 -unroll-threshold-aggressive=300 | FileCheck %s -check-prefix=O2
+; RUN: opt < %s -S -passes="default" -unroll-runtime=true -enable-npm-unroll-and-jam -unroll-threshold-default=0 -unroll-threshold-aggressive=300 | FileCheck %s -check-prefix=O3
+; RUN: opt < %s -S -passes="default" -unroll-runtime=true -enable-npm-unroll-and-jam -unroll-threshold-default=0 -unroll-threshold-aggressive=300 | FileCheck %s -check-prefix=Os
+; RUN: opt < %s -S -passes="default" -unroll-runtime=true -enable-npm-unroll-and-jam -unroll-threshold-default=0 -unroll-threshold-aggressive=300 | FileCheck %s -check-prefix=Oz
+
+; Check that Os and Oz are optimized like O2, not like O3. To easily highlight
+; the behavior, we artificially disable unrolling for anything but O3 by setting
+; the default threshold to 0.
+
+; O3: for.inner.1
+; O2-NOT: for.inner.1
+; Os-NOT: for.inner.1
+; Oz-NOT: for.inner.1
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+define void @test1(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmpJ = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmpJ
+  br i1 %or.cond, label %for.outer.preheader, label %for.end
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i = phi i32 [ %add8, %for.latch ], [ 0, %for.outer.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+  %sum = phi i32 [ 0, %for.outer ], [ %add, %for.inner ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %j
+  %0 = load i32, i32* %arrayidx, align 4, !tbaa !5
+  %add = add i32 %0, %sum
+  %inc = add nuw i32 %j, 1
+  %exitcond = icmp eq i32 %inc, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add.lcssa = phi i32 [ %add, %for.inner ]
+  %arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 %add.lcssa, i32* %arrayidx6, align 4, !tbaa !5
+  %add8 = add nuw i32 %i, 1
+  %exitcond25 = icmp eq i32 %add8, %I
+  br i1 %exitcond25, label %for.end.loopexit, label %for.outer
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+
+
+!5 = !{!6, !6, i64 0}
+!6 = !{!"int", !7, i64 0}
+!7 = !{!"omnipotent char", !8, i64 0}
+!8 = !{!"Simple C/C++ TBAA"}
Index: llvm/test/Transforms/LoopUnroll/opt-levels.ll
===
--- /dev/null
+++ llvm/test/Transforms/LoopUnroll/opt-levels.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -S -passes="default" -unroll-runtime=true -unroll-threshold-default=0 -unroll-threshold-aggressive=300 | FileCheck %s -check-prefix=O2
+; RUN: opt < %s -S -passes="default" -unroll-runtime=true -unroll-threshold-default=0 -unroll-threshold-aggressive=300 | FileCheck %s -check-prefix=O3
+; RUN: opt < %s -S -passes="default" -unroll-runtime=true -unroll-threshold-default=0 -unroll-threshold-aggressive=300 | FileCheck %s -check-prefix=Os
+; RUN: opt < %s -S -passes="default" -unroll-runtime=true -unroll-threshold-default=0 -unroll-threshold-aggressive=300 | FileCheck %s -check-prefix=Oz
+
+; Check that Os and Oz are optimized like O2, not like O3. To easily highlight
+; the behavior, we artificially disable unrolling for anything but O3 by setting
+; the default threshold to 0.
+
+; O3: loop2.preheader
+; O2-NOT: loop2.preheader
+; Os-NOT: loop2.preheader
+; Oz-NOT: loop2.preheader
+
+define void @unroll(i32 %iter, i32* %addr1, i32* %addr2) nounwind {
+entry:
+  br label %loop1
+
+loop1:
+  %iv1 = phi i32 [ 0, %entry ], [ %inc1, %loop1.latch ]
+  %offset1 = getelementptr i32, i32* %addr1, i32 %iv1
+  store i32 %iv1, i32* %offset1, align 4
+  br label %loop2.header
+
+loop2.header:
+  %e = icmp uge i32 %iter, 1
+  br i1 %e, label %loop2, label %exit2
+
+loop2:
+  %iv2 = phi i32 [ 0, %loop2.header ], [ %inc2, %loop2 ]
+  %offset2 = getelementptr i32, i32* %addr2, i32 %iv2
+  store i32 %iv2, i32* %offset2, align 4
+  %inc2 = add i32 %iv2, 1
+  %exitcnd2 = icmp uge i32 %inc2, %iter
+  br i1 %exitcnd2, label %ex

[PATCH] D72547: [llvm] Make new pass manager's OptimizationLevel a class

2020-01-14 Thread Teresa Johnson via Phabricator via cfe-commits
tejohnson added a comment.

Overall I like this approach.




Comment at: llvm/lib/Passes/PassBuilder.cpp:246
 
-static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) {
-  switch (Level) {
-  case PassBuilder::O0:
-  case PassBuilder::O1:
-  case PassBuilder::O2:
-  case PassBuilder::O3:
-return false;
-
-  case PassBuilder::Os:
-  case PassBuilder::Oz:
-return true;
-  }
-  llvm_unreachable("Invalid optimization level!");
-}
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O0 = {0,
+   0};

Nit, it would be good to document the constant parameters, e.g. 
{/*SpeedLevel*/0, /*SizeLevel*/0}



Comment at: llvm/lib/Passes/PassBuilder.cpp:407
   // Hoisting of scalars and load expressions.
-  if (Level > O1) {
+  if (Level.getSpeedupLevel() >= 2) {
 if (EnableGVNHoist)

Nit, all similar checks below are Level.getSpeedupLevel() > 1, make this one 
consistent.



Comment at: llvm/lib/Passes/PassBuilder.cpp:487
   PTO.LoopUnrolling)
-LPM2.addPass(LoopFullUnrollPass(Level, /*OnlyWhenForced=*/false,
+LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
+/*OnlyWhenForced=*/false,

This results in a behavior, change, right? I.e. we used to inadvertently get 
the O3 threshold for full unrolling with Oz/Os but no longer will. If so, make 
sure you note this in the patch summary. Also add a test.



Comment at: llvm/lib/Passes/PassBuilder.cpp:973
   if (EnableUnrollAndJam && PTO.LoopUnrolling) {
-OptimizePM.addPass(LoopUnrollAndJamPass(Level));
+OptimizePM.addPass(LoopUnrollAndJamPass(Level.getSpeedupLevel()));
   }

This one and the loop unrolling pass below will also get a change in behavior 
for Os/Oz, correct? That seems reasonable, but needs to be noted in summary and 
tested.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72547/new/

https://reviews.llvm.org/D72547



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72547: [llvm] Make new pass manager's OptimizationLevel a class

2020-01-14 Thread Mircea Trofin via Phabricator via cfe-commits
mtrofin updated this revision to Diff 238107.
mtrofin marked 2 inline comments as done.
mtrofin added a comment.

Alternative: expose speedup/size components to more closely align with legacy PM


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72547/new/

https://reviews.llvm.org/D72547

Files:
  clang/lib/CodeGen/BackendUtil.cpp
  llvm/include/llvm/Passes/PassBuilder.h
  llvm/lib/LTO/LTOBackend.cpp
  llvm/lib/Passes/PassBuilder.cpp

Index: llvm/lib/Passes/PassBuilder.cpp
===
--- llvm/lib/Passes/PassBuilder.cpp
+++ llvm/lib/Passes/PassBuilder.cpp
@@ -243,20 +243,18 @@
 
 extern cl::opt FlattenedProfileUsed;
 
-static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) {
-  switch (Level) {
-  case PassBuilder::O0:
-  case PassBuilder::O1:
-  case PassBuilder::O2:
-  case PassBuilder::O3:
-return false;
-
-  case PassBuilder::Os:
-  case PassBuilder::Oz:
-return true;
-  }
-  llvm_unreachable("Invalid optimization level!");
-}
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O0 = {0,
+   0};
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O1 = {1,
+   0};
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O2 = {2,
+   0};
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O3 = {3,
+   0};
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::Os = {2,
+   1};
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::Oz = {2,
+   2};
 
 namespace {
 
@@ -395,7 +393,7 @@
 PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
  ThinLTOPhase Phase,
  bool DebugLogging) {
-  assert(Level != O0 && "Must request optimizations!");
+  assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
   FunctionPassManager FPM(DebugLogging);
 
   // Form SSA out of local memory accesses after breaking apart aggregates into
@@ -406,7 +404,7 @@
   FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
 
   // Hoisting of scalars and load expressions.
-  if (Level > O1) {
+  if (Level.getSpeedupLevel() >= 2) {
 if (EnableGVNHoist)
   FPM.addPass(GVNHoistPass());
 
@@ -418,7 +416,7 @@
   }
 
   // Speculative execution if the target has divergent branches; otherwise nop.
-  if (Level > O1) {
+  if (Level.getSpeedupLevel() > 1) {
 FPM.addPass(SpeculativeExecutionPass());
 
 // Optimize based on known information about branches, and cleanup afterward.
@@ -426,11 +424,11 @@
 FPM.addPass(CorrelatedValuePropagationPass());
   }
   FPM.addPass(SimplifyCFGPass());
-  if (Level == O3)
+  if (Level == OptimizationLevel::O3)
 FPM.addPass(AggressiveInstCombinePass());
   FPM.addPass(InstCombinePass());
 
-  if (!isOptimizingForSize(Level))
+  if (!Level.isOptimizingForSize())
 FPM.addPass(LibCallsShrinkWrapPass());
 
   invokePeepholeEPCallbacks(FPM, Level);
@@ -438,11 +436,11 @@
   // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
   // using the size value profile. Don't perform this when optimizing for size.
   if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
-  !isOptimizingForSize(Level) && Level > O1)
+  (Level.getSpeedupLevel() > 1 && !Level.isOptimizingForSize()))
 FPM.addPass(PGOMemOPSizeOpt());
 
   // TODO: Investigate the cost/benefit of tail call elimination on debugging.
-  if (Level > O1)
+  if (Level.getSpeedupLevel() > 1)
 FPM.addPass(TailCallElimPass());
   FPM.addPass(SimplifyCFGPass());
 
@@ -469,7 +467,7 @@
   LPM1.addPass(LoopSimplifyCFGPass());
 
   // Rotate Loop - disable header duplication at -Oz
-  LPM1.addPass(LoopRotatePass(Level != Oz));
+  LPM1.addPass(LoopRotatePass(Level != OptimizationLevel::Oz));
   // TODO: Investigate promotion cap for O1.
   LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
   LPM1.addPass(SimpleLoopUnswitchPass());
@@ -486,7 +484,8 @@
   if ((Phase != ThinLTOPhase::PreLink || !PGOOpt ||
PGOOpt->Action != PGOOptions::SampleUse) &&
   PTO.LoopUnrolling)
-LPM2.addPass(LoopFullUnrollPass(Level, /*OnlyWhenForced=*/false,
+LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
+/*OnlyWhenForced=*/false,
 PTO.ForgetAllSCEVInLoopUnroll));
 
   for (auto &C : LoopOptimizerEndEPCallbacks)
@@ -509,7 +508,7 @@
   FPM.addP

[PATCH] D72547: [llvm] Make new pass manager's OptimizationLevel a class

2020-01-10 Thread Teresa Johnson via Phabricator via cfe-commits
tejohnson added a comment.

I just have a few high level comments from looking through it just now. The 
summary needs a fix since Os/Oz are in fact O2 
 so OptLevel > 1 was not doing the 
wrong thing.




Comment at: llvm/include/llvm/Passes/PassBuilder.h:224
+
+bool isOptimizingForSpeed() const { return Level > 0 && Level < 4; }
+bool isOptimizingForSize() const { return Level == 4 || Level == 5; }

Can you add a comment as to why Os and Oz are considered as optimizing for 
speed? I know this is for compatibility with the current code, but would be 
good to document (and consider changing in the future).



Comment at: llvm/include/llvm/Passes/PassBuilder.h:225
+bool isOptimizingForSpeed() const { return Level > 0 && Level < 4; }
+bool isOptimizingForSize() const { return Level == 4 || Level == 5; }
+bool isO2Or3() const { return Level == 2 || Level == 3; }

This one is a little confusing to read, since at this point there is no 
correlation between the values 4 and 5, and the Os and Oz static variables. 
Consider making some constexpr values for each level, used in the methods here 
and in the static variable initializations?



Comment at: llvm/include/llvm/Passes/PassBuilder.h:226
+bool isOptimizingForSize() const { return Level == 4 || Level == 5; }
+bool isO2Or3() const { return Level == 2 || Level == 3; }
+bool operator==(const OptimizationLevel &Other) const {

Since (as discussed off-patch), in the old PM Os and Oz are also opt level 2, 
this should presumably return true for those as well. That should obviate the 
need for many places in the patch where you are currently checking isO2Or3 || 
isOptimizingForSize, and you can just check isO2Or3.



Comment at: llvm/include/llvm/Passes/PassBuilder.h:274
+  /// This is an interface that can be used to populate a \c
+  /// CGSCCAnalysisManager with all registered CGSCC analyses. Callers can 
still
+  /// manually register any additional analyses. Callers can also pre-register

There are a lot of formatting changes throughout the patch that are unrelated 
to your changes - it seems like you might have clang formatted the whole files? 
Can you only include the changes related to the patch here, it's harder to 
review with lots of spurious diffs.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72547/new/

https://reviews.llvm.org/D72547



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72547: [llvm] Make new pass manager's OptimizationLevel a class

2020-01-10 Thread Mircea Trofin via Phabricator via cfe-commits
mtrofin updated this revision to Diff 237464.
mtrofin added a comment.

Speedup level is actually '2' for Os and Oz


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72547/new/

https://reviews.llvm.org/D72547

Files:
  clang/lib/CodeGen/BackendUtil.cpp
  llvm/include/llvm/Passes/PassBuilder.h
  llvm/lib/LTO/LTOBackend.cpp
  llvm/lib/Passes/PassBuilder.cpp

Index: llvm/lib/Passes/PassBuilder.cpp
===
--- llvm/lib/Passes/PassBuilder.cpp
+++ llvm/lib/Passes/PassBuilder.cpp
@@ -192,10 +192,9 @@
cl::Hidden, cl::ZeroOrMore,
cl::desc("Run Partial inlinining pass"));
 
-static cl::opt
-RunNewGVN("enable-npm-newgvn", cl::init(false),
-  cl::Hidden, cl::ZeroOrMore,
-  cl::desc("Run NewGVN instead of GVN"));
+static cl::opt RunNewGVN("enable-npm-newgvn", cl::init(false), cl::Hidden,
+   cl::ZeroOrMore,
+   cl::desc("Run NewGVN instead of GVN"));
 
 static cl::opt EnableGVNHoist(
 "enable-npm-gvn-hoist", cl::init(false), cl::Hidden,
@@ -238,20 +237,12 @@
 
 extern cl::opt FlattenedProfileUsed;
 
-static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) {
-  switch (Level) {
-  case PassBuilder::O0:
-  case PassBuilder::O1:
-  case PassBuilder::O2:
-  case PassBuilder::O3:
-return false;
-
-  case PassBuilder::Os:
-  case PassBuilder::Oz:
-return true;
-  }
-  llvm_unreachable("Invalid optimization level!");
-}
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O0 = {0};
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O1 = {1};
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O2 = {2};
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O3 = {3};
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::Os = {4};
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::Oz = {5};
 
 namespace {
 
@@ -386,11 +377,9 @@
 C(LAM);
 }
 
-FunctionPassManager
-PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
- ThinLTOPhase Phase,
- bool DebugLogging) {
-  assert(Level != O0 && "Must request optimizations!");
+FunctionPassManager PassBuilder::buildFunctionSimplificationPipeline(
+OptimizationLevel Level, ThinLTOPhase Phase, bool DebugLogging) {
+  assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
   FunctionPassManager FPM(DebugLogging);
 
   // Form SSA out of local memory accesses after breaking apart aggregates into
@@ -401,7 +390,7 @@
   FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
 
   // Hoisting of scalars and load expressions.
-  if (Level > O1) {
+  if (Level.isO2Or3() || Level.isOptimizingForSize()) {
 if (EnableGVNHoist)
   FPM.addPass(GVNHoistPass());
 
@@ -413,31 +402,31 @@
   }
 
   // Speculative execution if the target has divergent branches; otherwise nop.
-  if (Level > O1) {
+  if (Level.isO2Or3() || Level.isOptimizingForSize()) {
 FPM.addPass(SpeculativeExecutionPass());
 
-// Optimize based on known information about branches, and cleanup afterward.
+// Optimize based on known information about branches, and cleanup
+// afterward.
 FPM.addPass(JumpThreadingPass());
 FPM.addPass(CorrelatedValuePropagationPass());
   }
   FPM.addPass(SimplifyCFGPass());
-  if (Level == O3)
+  if (Level == OptimizationLevel::O3)
 FPM.addPass(AggressiveInstCombinePass());
   FPM.addPass(InstCombinePass());
 
-  if (!isOptimizingForSize(Level))
+  if (!Level.isOptimizingForSize())
 FPM.addPass(LibCallsShrinkWrapPass());
 
   invokePeepholeEPCallbacks(FPM, Level);
 
   // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
   // using the size value profile. Don't perform this when optimizing for size.
-  if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
-  !isOptimizingForSize(Level) && Level > O1)
+  if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse && Level.isO2Or3())
 FPM.addPass(PGOMemOPSizeOpt());
 
   // TODO: Investigate the cost/benefit of tail call elimination on debugging.
-  if (Level > O1)
+  if (Level.isO2Or3() || Level.isOptimizingForSize())
 FPM.addPass(TailCallElimPass());
   FPM.addPass(SimplifyCFGPass());
 
@@ -464,7 +453,7 @@
   LPM1.addPass(LoopSimplifyCFGPass());
 
   // Rotate Loop - disable header duplication at -Oz
-  LPM1.addPass(LoopRotatePass(Level != Oz));
+  LPM1.addPass(LoopRotatePass(Level != OptimizationLevel::Oz));
   // TODO: Investigate promotion cap for O1.
   LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
   LPM1.addPass(SimpleLoopUnswitchPass());
@@ -481,7 +470,8 @@
   if ((Phase != ThinLTOPhase::PreLink || !PGOOpt ||
PGOOpt->Action != PGOOptions::SampleUse) 

[PATCH] D72547: [llvm] Make new pass manager's OptimizationLevel a class

2020-01-10 Thread Mircea Trofin via Phabricator via cfe-commits
mtrofin added a comment.

Another example where there is a discrepancy with the old pass manager: in the 
old pass manager (PassManagerBuilder::addFunctionSimplificationPasses):

  if (OptLevel > 1) {
  if (EnableGVNHoist)
MPM.add(createGVNHoistPass());

(before this change, new pass manager):

  if (Level > O1) {
 if (EnableGVNHoist)
   FPM.addPass(GVNHoistPass());

Which really means "O2 -3, and Os 
and Oz". I currently left it backwards compatible - since I'm not sure the 
added support for gvn hoisting for Os/z was intentional.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72547/new/

https://reviews.llvm.org/D72547



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72547: [llvm] Make new pass manager's OptimizationLevel a class

2020-01-10 Thread Mircea Trofin via Phabricator via cfe-commits
mtrofin created this revision.
Herald added subscribers: llvm-commits, cfe-commits, dang, dexonsmith, 
steven_wu, hiraditya, mehdi_amini.
Herald added projects: clang, LLVM.
mtrofin added reviewers: tejohnson, davidxl.
mtrofin added a comment.

Another example where there is a discrepancy with the old pass manager: in the 
old pass manager (PassManagerBuilder::addFunctionSimplificationPasses):

  if (OptLevel > 1) {
  if (EnableGVNHoist)
MPM.add(createGVNHoistPass());

(before this change, new pass manager):

  if (Level > O1) {
 if (EnableGVNHoist)
   FPM.addPass(GVNHoistPass());

Which really means "O2 -3, and Os 
and Oz". I currently left it backwards compatible - since I'm not sure the 
added support for gvn hoisting for Os/z was intentional.


The old pass manager separated speed optimization and size optimization
levels into two unsigned values. Coallescing both in an enum in the new
pass manager may lead to unintentional casts and comparisons. For example,
(enum) "Level > 1" captures not only O2 
 and O3 
, but also  Os, and Oz.

In particular, taking a look at how the loop unroll passes were constructed
previously, the Os/Oz are now (==new pass manager) treated just like O3 
,
likely unintentionally.

This change disallows raw comparisons between optimization levels, to
avoid such unintended effects.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D72547

Files:
  clang/lib/CodeGen/BackendUtil.cpp
  llvm/include/llvm/Passes/PassBuilder.h
  llvm/lib/LTO/LTOBackend.cpp
  llvm/lib/Passes/PassBuilder.cpp

Index: llvm/lib/Passes/PassBuilder.cpp
===
--- llvm/lib/Passes/PassBuilder.cpp
+++ llvm/lib/Passes/PassBuilder.cpp
@@ -192,10 +192,9 @@
cl::Hidden, cl::ZeroOrMore,
cl::desc("Run Partial inlinining pass"));
 
-static cl::opt
-RunNewGVN("enable-npm-newgvn", cl::init(false),
-  cl::Hidden, cl::ZeroOrMore,
-  cl::desc("Run NewGVN instead of GVN"));
+static cl::opt RunNewGVN("enable-npm-newgvn", cl::init(false), cl::Hidden,
+   cl::ZeroOrMore,
+   cl::desc("Run NewGVN instead of GVN"));
 
 static cl::opt EnableGVNHoist(
 "enable-npm-gvn-hoist", cl::init(false), cl::Hidden,
@@ -238,20 +237,12 @@
 
 extern cl::opt FlattenedProfileUsed;
 
-static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) {
-  switch (Level) {
-  case PassBuilder::O0:
-  case PassBuilder::O1:
-  case PassBuilder::O2:
-  case PassBuilder::O3:
-return false;
-
-  case PassBuilder::Os:
-  case PassBuilder::Oz:
-return true;
-  }
-  llvm_unreachable("Invalid optimization level!");
-}
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O0 = {0};
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O1 = {1};
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O2 = {2};
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O3 = {3};
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::Os = {4};
+const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::Oz = {5};
 
 namespace {
 
@@ -386,11 +377,9 @@
 C(LAM);
 }
 
-FunctionPassManager
-PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
- ThinLTOPhase Phase,
- bool DebugLogging) {
-  assert(Level != O0 && "Must request optimizations!");
+FunctionPassManager PassBuilder::buildFunctionSimplificationPipeline(
+OptimizationLevel Level, ThinLTOPhase Phase, bool DebugLogging) {
+  assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
   FunctionPassManager FPM(DebugLogging);
 
   // Form SSA out of local memory accesses after breaking apart aggregates into
@@ -401,7 +390,7 @@
   FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
 
   // Hoisting of scalars and load expressions.
-  if (Level > O1) {
+  if (Level.isO2Or3() || Level.isOptimizingForSize()) {
 if (EnableGVNHoist)
   FPM.addPass(GVNHoistPass());
 
@@ -413,31 +402,31 @@
   }
 
   // Speculative execution if the target has divergent branches; otherwise nop.
-  if (Level > O1) {
+  if (Level.isO2Or3() || Level.isOptimizingForSize()) {
 FPM.addPass(SpeculativeExecutionPass());
 
-// Optimize based on known information about branches, and cleanup afterward.
+// Optimize based on known information about branches, and cleanup
+// afterward.
 FPM.addPass(JumpThreadingPass());
 FPM.addPass(CorrelatedValuePropagationPass());
   }
   FPM.addPass(SimplifyCFGPass());
-  if (Level == O3)
+  if (Level == OptimizationLevel::O3)
 FPM.addPass(Aggressive