echristo updated this revision to Diff 261928.
echristo edited the summary of this revision.
echristo added a comment.
Herald added a subscriber: zzheng.

Add a testcase with opt and command line option so we can enable it.

  rG LLVM Github Monorepo



Index: llvm/tools/opt/NewPMDriver.cpp
--- llvm/tools/opt/NewPMDriver.cpp
+++ llvm/tools/opt/NewPMDriver.cpp
@@ -100,6 +100,11 @@
              "the OptimizerLast extension point into default pipelines"),
+// Individual pipeline tuning options.
+static cl::opt<bool> DisableLoopUnrolling(
+    "new-pm-disable-loop-unrolling",
+    cl::desc("Disable loop unrolling in all relevant passes"), cl::init(false));
 extern cl::opt<PGOKind> PGOKindFlag;
 extern cl::opt<std::string> ProfileFile;
 extern cl::opt<CSPGOKind> CSPGOKindFlag;
@@ -260,6 +265,10 @@
   PipelineTuningOptions PTO;
+  // LoopUnrolling defaults on to true and DisableLoopUnrolling is initialized
+  // to false above so we shouldn't necessarily need to check whether or not the
+  // option has been enabled.
+  PTO.LoopUnrolling = !DisableLoopUnrolling;
   PTO.Coroutines = Coroutines;
   PassBuilder PB(TM, PTO, P, &PIC);
   registerEPCallbacks(PB, VerifyEachPass, DebugPM);
Index: llvm/test/Transforms/LoopUnroll/FullUnroll.ll
--- /dev/null
+++ llvm/test/Transforms/LoopUnroll/FullUnroll.ll
@@ -0,0 +1,81 @@
+; RUN: opt -passes='default<O1>' -disable-verify --mtriple x86_64-pc-linux-gnu -new-pm-disable-loop-unrolling=true \
+; RUN: -S -o - %s | FileCheck %s
+; We don't end up deleting the loop, but we remove everything inside of it so checking for any
+; reasonable instruction from the original loop will work.
+; CHECK-NOT: br i1
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+$_Z6Helperv = comdat any
+; Function Attrs: noinline optnone uwtable
+define dso_local void @_Z3Runv() #0 {
+  call void @_Z6Helperv()
+  ret void
+; Function Attrs: noinline nounwind optnone uwtable
+define linkonce_odr dso_local void @_Z6Helperv() #1 comdat {
+  %nodes = alloca [5 x i32*], align 16
+  %num_active = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 5, i32* %num_active, align 4
+  br label %while.cond
+while.cond:                                       ; preds = %for.end, %entry
+  %0 = load i32, i32* %num_active, align 4
+  %tobool = icmp ne i32 %0, 0
+  br i1 %tobool, label %while.body, label %while.end
+while.body:                                       ; preds = %while.cond
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+for.cond:                                         ; preds =, %while.body
+  %1 = load i32, i32* %i, align 4
+  %cmp = icmp slt i32 %1, 5
+  br i1 %cmp, label %for.body, label %for.end
+for.body:                                         ; preds = %for.cond
+  %2 = load i32, i32* %i, align 4
+  %idxprom = sext i32 %2 to i64
+  %arrayidx = getelementptr inbounds [5 x i32*], [5 x i32*]* %nodes, i64 0, i64 %idxprom
+  %3 = load i32*, i32** %arrayidx, align 8
+  %tobool1 = icmp ne i32* %3, null
+  br i1 %tobool1, label %if.then, label %if.end
+if.then:                                          ; preds = %for.body
+  %4 = load i32, i32* %num_active, align 4
+  %dec = add nsw i32 %4, -1
+  store i32 %dec, i32* %num_active, align 4
+  br label %if.end
+if.end:                                           ; preds = %if.then, %for.body
+  br label
+                                          ; preds = %if.end
+  %5 = load i32, i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond, !llvm.loop !2
+for.end:                                          ; preds = %for.cond
+  br label %while.cond
+while.end:                                        ; preds = %while.cond
+  ret void
+attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 11.0.0 ( 3ccd454c102b069d2230a18cfe16b84a5f005fc8)"}
+!2 = distinct !{!2, !3}
+!3 = !{!"llvm.loop.unroll.full"}
Index: llvm/lib/Passes/PassBuilder.cpp
--- llvm/lib/Passes/PassBuilder.cpp
+++ llvm/lib/Passes/PassBuilder.cpp
@@ -503,12 +503,13 @@
   // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
   // because it changes IR to makes profile annotation in back compile
-  // inaccurate.
-  if ((Phase != ThinLTOPhase::PreLink || !PGOOpt ||
-       PGOOpt->Action != PGOOptions::SampleUse) &&
-      PTO.LoopUnrolling)
+  // inaccurate. The normal unroller doesn't pay attention to forced full unroll
+  // attributes so we need to make sure and allow the full unroll pass to pay
+  // attention to it.
+  if (Phase != ThinLTOPhase::PreLink || !PGOOpt ||
+      PGOOpt->Action != PGOOptions::SampleUse)
-                                    /*OnlyWhenForced=*/false,
+                                    /* OnlyWhenForced= */ !PTO.LoopUnrolling,
   for (auto &C : LoopOptimizerEndEPCallbacks)
Index: clang/test/Misc/loop-opt-setup.c
--- clang/test/Misc/loop-opt-setup.c
+++ clang/test/Misc/loop-opt-setup.c
@@ -1,5 +1,5 @@
-// RUN: %clang -O1 -fexperimental-new-pass-manager -fno-unroll-loops -S -o - %s -emit-llvm | FileCheck %s
-// RUN: %clang -O1 -fno-experimental-new-pass-manager -fno-unroll-loops -S -o - %s -emit-llvm | FileCheck %s
+// RUN: %clang -O1 -fexperimental-new-pass-manager -fno-unroll-loops -S -o - %s -emit-llvm | FileCheck %s -check-prefix=CHECK-NEWPM
+// RUN: %clang -O1 -fno-experimental-new-pass-manager -fno-unroll-loops -S -o - %s -emit-llvm | FileCheck %s -check-prefix=CHECK-OLDPM
 extern int a[16];
 int b = 0;
 int foo(void) {
@@ -8,5 +8,34 @@
     a[i] = b += 2;
   return b;
+// Check br i1 to make sure that the loop is fully unrolled
 // CHECK-NOT: br i1
+inline void Helper() {
+  const int *nodes[5];
+  int num_active = 5;
+  while (num_active) {
+#pragma clang loop unroll(full)
+    for (int i = 0; i < 5; ++i) {
+      if (nodes[i]) {
+        --num_active;
+      }
+    }
+  }
+void Run() {
+  Helper();
+// Check br i1 to make sure the loop is gone, there will still be a label branch for the infinite loop.
+// CHECK-NEWPM-NOT: br i1
+// The old pass manager doesn't remove the loop so check for 5 load i32*.
+// CHECK-OLDPM: Helper
+// CHECK-OLDPM: load i32*
+// CHECK-OLDPM: load i32*
+// CHECK-OLDPM: load i32*
+// CHECK-OLDPM: load i32*
+// CHECK-OLDPM: load i32*
cfe-commits mailing list

Reply via email to