https://github.com/Meinersbur created 
https://github.com/llvm/llvm-project/pull/154999

The emitted code tests whether the current tile should executing the remainder 
iterations by checking the logical iteration number is the one after the floor 
iterations that execute the non-remainder iterations. There are two counts of 
how many iterations there are: Those of non-remainder iterations (simply 
rounded-down division of tripcount and tile size), and those including an 
additional floor iteration for the remainder iterations. The code was used the 
wrong one that caused the condition to never match.

>From 6dc21da545dc46323eea681f640a95204a4d39f7 Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-proj...@meinersbur.de>
Date: Fri, 22 Aug 2025 19:54:03 +0200
Subject: [PATCH 1/2] [OpenMPIRBuilder] Fix tripcount not a multiple of tile
 size

---
 .../test/OpenMP/irbuilder_unroll_partial_factor_for.c | 10 +++++-----
 .../irbuilder_unroll_partial_heuristic_constant_for.c |  8 ++++----
 .../irbuilder_unroll_partial_heuristic_runtime_for.c  | 10 +++++-----
 .../OpenMP/irbuilder_unroll_unroll_partial_factor.c   |  8 ++++----
 .../irbuilder_unroll_unroll_partial_heuristic.c       |  8 ++++----
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp             | 11 ++++++-----
 6 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c 
b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c
index 8780d51de8a36..79d1c6bb524c9 100644
--- a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c
+++ b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c
@@ -79,7 +79,7 @@ void unroll_partial_heuristic_for(int n, float *a, float *b, 
float *c, float *d)
 // CHECK-NEXT:    br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], 
label [[OMP_FLOOR0_EXIT:%.*]]
 // CHECK:       omp_floor0.body:
 // CHECK-NEXT:    [[TMP13:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP9]]
-// CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i32 [[TMP13]], 
[[OMP_FLOOR0_TRIPCOUNT]]
+// CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i32 [[TMP13]], [[TMP4]]
 // CHECK-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP5]], i32 13
 // CHECK-NEXT:    br label [[OMP_TILE0_PREHEADER:%.*]]
 // CHECK:       omp_tile0.preheader:
@@ -152,11 +152,11 @@ void unroll_partial_heuristic_for(int n, float *a, float 
*b, float *c, float *d)
 // CHECK-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw 
[[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull 
[[META6:![0-9]+]], !align [[META7:![0-9]+]]
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 // CHECK-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4
 // CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], 
ptr [[TMP0]], i32 0, i32 1
-// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8, !nonnull 
[[META6]], !align [[META7]]
 // CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 // CHECK-NEXT:    store i32 [[TMP6]], ptr [[DOTSTOP]], align 4
 // CHECK-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4
@@ -178,7 +178,7 @@ void unroll_partial_heuristic_for(int n, float *a, float 
*b, float *c, float *d)
 // CHECK-NEXT:    br label [[COND_END]]
 // CHECK:       cond.end:
 // CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, 
[[COND_FALSE]] ]
-// CHECK-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8
+// CHECK-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, 
!nonnull [[META6]], !align [[META7]]
 // CHECK-NEXT:    store i32 [[COND]], ptr [[TMP13]], align 4
 // CHECK-NEXT:    ret void
 //
@@ -198,7 +198,7 @@ void unroll_partial_heuristic_for(int n, float *a, float 
*b, float *c, float *d)
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4
 // CHECK-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]]
 // CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]]
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, 
!nonnull [[META6]], !align [[META7]]
 // CHECK-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4
 // CHECK-NEXT:    ret void
 //
diff --git 
a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c 
b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c
index 728f67ccf2843..3183aa7bbca74 100644
--- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c
+++ b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c
@@ -85,7 +85,7 @@ void unroll_partial_heuristic_constant_for(float *a, float 
*b, float *c, float *
 // CHECK-NEXT:    br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], 
label [[OMP_FLOOR0_EXIT:%.*]]
 // CHECK:       omp_floor0.body:
 // CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP8]]
-// CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 
[[OMP_FLOOR0_TRIPCOUNT]]
+// CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[TMP12]], [[TMP3]]
 // CHECK-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP4]], i32 4
 // CHECK-NEXT:    br label [[OMP_TILE0_PREHEADER:%.*]]
 // CHECK:       omp_tile0.preheader:
@@ -176,7 +176,7 @@ void unroll_partial_heuristic_constant_for(float *a, float 
*b, float *c, float *
 // CHECK-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw 
[[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull 
[[META6:![0-9]+]], !align [[META7:![0-9]+]]
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 // CHECK-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4
 // CHECK-NEXT:    store i32 128, ptr [[DOTSTOP]], align 4
@@ -199,7 +199,7 @@ void unroll_partial_heuristic_constant_for(float *a, float 
*b, float *c, float *
 // CHECK-NEXT:    br label [[COND_END]]
 // CHECK:       cond.end:
 // CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, 
[[COND_FALSE]] ]
-// CHECK-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8
+// CHECK-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, 
!nonnull [[META6]], !align [[META7]]
 // CHECK-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4
 // CHECK-NEXT:    ret void
 //
@@ -219,7 +219,7 @@ void unroll_partial_heuristic_constant_for(float *a, float 
*b, float *c, float *
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4
 // CHECK-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]]
 // CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]]
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, 
!nonnull [[META6]], !align [[META7]]
 // CHECK-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4
 // CHECK-NEXT:    ret void
 //
diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c 
b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c
index f41f1fe5ce719..91e56bfb08894 100644
--- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c
+++ b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c
@@ -87,7 +87,7 @@ void unroll_partial_heuristic_runtime_for(int n, float *a, 
float *b, float *c, f
 // CHECK-NEXT:    br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], 
label [[OMP_FLOOR0_EXIT:%.*]]
 // CHECK:       omp_floor0.body:
 // CHECK-NEXT:    [[TMP13:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP9]]
-// CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i32 [[TMP13]], 
[[OMP_FLOOR0_TRIPCOUNT]]
+// CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i32 [[TMP13]], [[TMP4]]
 // CHECK-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP5]], i32 4
 // CHECK-NEXT:    br label [[OMP_TILE0_PREHEADER:%.*]]
 // CHECK:       omp_tile0.preheader:
@@ -178,11 +178,11 @@ void unroll_partial_heuristic_runtime_for(int n, float 
*a, float *b, float *c, f
 // CHECK-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw 
[[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull 
[[META6:![0-9]+]], !align [[META7:![0-9]+]]
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 // CHECK-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4
 // CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], 
ptr [[TMP0]], i32 0, i32 1
-// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8, !nonnull 
[[META6]], !align [[META7]]
 // CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 // CHECK-NEXT:    store i32 [[TMP6]], ptr [[DOTSTOP]], align 4
 // CHECK-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4
@@ -204,7 +204,7 @@ void unroll_partial_heuristic_runtime_for(int n, float *a, 
float *b, float *c, f
 // CHECK-NEXT:    br label [[COND_END]]
 // CHECK:       cond.end:
 // CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, 
[[COND_FALSE]] ]
-// CHECK-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8
+// CHECK-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, 
!nonnull [[META6]], !align [[META7]]
 // CHECK-NEXT:    store i32 [[COND]], ptr [[TMP13]], align 4
 // CHECK-NEXT:    ret void
 //
@@ -224,7 +224,7 @@ void unroll_partial_heuristic_runtime_for(int n, float *a, 
float *b, float *c, f
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4
 // CHECK-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]]
 // CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]]
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, 
!nonnull [[META6]], !align [[META7]]
 // CHECK-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4
 // CHECK-NEXT:    ret void
 //
diff --git a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c 
b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c
index 3c2407851e5a5..0855ac3c6c1f4 100644
--- a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c
+++ b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c
@@ -75,7 +75,7 @@ void unroll_partial_factor_for(float *a, float *b, float *c, 
float *d) {
 // CHECK-NEXT:    br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], 
label [[OMP_FLOOR0_EXIT:%.*]]
 // CHECK:       omp_floor0.body:
 // CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP8]]
-// CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 
[[OMP_FLOOR0_TRIPCOUNT]]
+// CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[TMP12]], [[TMP3]]
 // CHECK-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP4]], i32 2
 // CHECK-NEXT:    br label [[OMP_TILE0_PREHEADER:%.*]]
 // CHECK:       omp_tile0.preheader:
@@ -148,7 +148,7 @@ void unroll_partial_factor_for(float *a, float *b, float 
*c, float *d) {
 // CHECK-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw 
[[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull 
[[META6:![0-9]+]], !align [[META7:![0-9]+]]
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 // CHECK-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4
 // CHECK-NEXT:    store i32 2, ptr [[DOTSTOP]], align 4
@@ -171,7 +171,7 @@ void unroll_partial_factor_for(float *a, float *b, float 
*c, float *d) {
 // CHECK-NEXT:    br label [[COND_END]]
 // CHECK:       cond.end:
 // CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, 
[[COND_FALSE]] ]
-// CHECK-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8
+// CHECK-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, 
!nonnull [[META6]], !align [[META7]]
 // CHECK-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4
 // CHECK-NEXT:    ret void
 //
@@ -191,7 +191,7 @@ void unroll_partial_factor_for(float *a, float *b, float 
*c, float *d) {
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4
 // CHECK-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]]
 // CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]]
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, 
!nonnull [[META6]], !align [[META7]]
 // CHECK-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4
 // CHECK-NEXT:    ret void
 //
diff --git a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c 
b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c
index a03bd47ca8b8f..5e6123259c917 100644
--- a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c
+++ b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c
@@ -59,7 +59,7 @@ void unroll_unroll_partial_heuristic(float *a, float *b, 
float *c, float *d) {
 // CHECK-NEXT:    [[OMP_FLOOR0_CMP:%.*]] = icmp ult i32 [[OMP_FLOOR0_IV]], 
[[OMP_FLOOR0_TRIPCOUNT]]
 // CHECK-NEXT:    br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], 
label [[OMP_FLOOR0_EXIT:%.*]]
 // CHECK:       omp_floor0.body:
-// CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[OMP_FLOOR0_IV]], 
[[OMP_FLOOR0_TRIPCOUNT]]
+// CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[OMP_FLOOR0_IV]], [[TMP3]]
 // CHECK-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP4]], i32 8
 // CHECK-NEXT:    br label [[OMP_TILE0_PREHEADER:%.*]]
 // CHECK:       omp_tile0.preheader:
@@ -129,7 +129,7 @@ void unroll_unroll_partial_heuristic(float *a, float *b, 
float *c, float *d) {
 // CHECK-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw 
[[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull 
[[META7:![0-9]+]], !align [[META8:![0-9]+]]
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 // CHECK-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4
 // CHECK-NEXT:    store i32 2, ptr [[DOTSTOP]], align 4
@@ -152,7 +152,7 @@ void unroll_unroll_partial_heuristic(float *a, float *b, 
float *c, float *d) {
 // CHECK-NEXT:    br label [[COND_END]]
 // CHECK:       cond.end:
 // CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, 
[[COND_FALSE]] ]
-// CHECK-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8
+// CHECK-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, 
!nonnull [[META7]], !align [[META8]]
 // CHECK-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4
 // CHECK-NEXT:    ret void
 //
@@ -172,7 +172,7 @@ void unroll_unroll_partial_heuristic(float *a, float *b, 
float *c, float *d) {
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4
 // CHECK-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]]
 // CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]]
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, 
!nonnull [[META7]], !align [[META8]]
 // CHECK-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4
 // CHECK-NEXT:    ret void
 //
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp 
b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 989bcf45e0006..606f2e03821a3 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -5582,13 +5582,13 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, 
ArrayRef<CanonicalLoopInfo *> Loops,
   // Compute the trip counts of the floor loops.
   Builder.SetCurrentDebugLocation(DL);
   Builder.restoreIP(OutermostLoop->getPreheaderIP());
-  SmallVector<Value *, 4> FloorCount, FloorRems;
+  SmallVector<Value *, 4> FloorCompleteCount, FloorCount, FloorRems;
   for (int i = 0; i < NumLoops; ++i) {
     Value *TileSize = TileSizes[i];
     Value *OrigTripCount = OrigTripCounts[i];
     Type *IVType = OrigTripCount->getType();
 
-    Value *FloorTripCount = Builder.CreateUDiv(OrigTripCount, TileSize);
+    Value *FloorCompleteTripCount = Builder.CreateUDiv(OrigTripCount, 
TileSize);
     Value *FloorTripRem = Builder.CreateURem(OrigTripCount, TileSize);
 
     // 0 if tripcount divides the tilesize, 1 otherwise.
@@ -5602,11 +5602,12 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, 
ArrayRef<CanonicalLoopInfo *> Loops,
         Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
 
     FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType);
-    FloorTripCount =
-        Builder.CreateAdd(FloorTripCount, FloorTripOverflow,
+    Value *FloorTripCount =
+        Builder.CreateAdd(FloorCompleteTripCount, FloorTripOverflow,
                           "omp_floor" + Twine(i) + ".tripcount", true);
 
     // Remember some values for later use.
+    FloorCompleteCount.push_back(FloorCompleteTripCount);
     FloorCount.push_back(FloorTripCount);
     FloorRems.push_back(FloorTripRem);
   }
@@ -5661,7 +5662,7 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, 
ArrayRef<CanonicalLoopInfo *> Loops,
     Value *TileSize = TileSizes[i];
 
     Value *FloorIsEpilogue =
-        Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCount[i]);
+        Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCompleteCount[i]);
     Value *TileTripCount =
         Builder.CreateSelect(FloorIsEpilogue, FloorRems[i], TileSize);
 

>From 7e6de456dd28e934f62de4e7c95a30df600a4dfc Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-proj...@meinersbur.de>
Date: Fri, 22 Aug 2025 19:58:53 +0200
Subject: [PATCH 2/2] Reduce change noise

---
 clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c   | 8 ++++----
 .../irbuilder_unroll_partial_heuristic_constant_for.c     | 6 +++---
 .../irbuilder_unroll_partial_heuristic_runtime_for.c      | 8 ++++----
 .../test/OpenMP/irbuilder_unroll_unroll_partial_factor.c  | 6 +++---
 .../OpenMP/irbuilder_unroll_unroll_partial_heuristic.c    | 6 +++---
 5 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c 
b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c
index 79d1c6bb524c9..a9514e1e7d145 100644
--- a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c
+++ b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c
@@ -152,11 +152,11 @@ void unroll_partial_heuristic_for(int n, float *a, float 
*b, float *c, float *d)
 // CHECK-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw 
[[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull 
[[META6:![0-9]+]], !align [[META7:![0-9]+]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 // CHECK-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4
 // CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], 
ptr [[TMP0]], i32 0, i32 1
-// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8, !nonnull 
[[META6]], !align [[META7]]
+// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
 // CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 // CHECK-NEXT:    store i32 [[TMP6]], ptr [[DOTSTOP]], align 4
 // CHECK-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4
@@ -178,7 +178,7 @@ void unroll_partial_heuristic_for(int n, float *a, float 
*b, float *c, float *d)
 // CHECK-NEXT:    br label [[COND_END]]
 // CHECK:       cond.end:
 // CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, 
[[COND_FALSE]] ]
-// CHECK-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, 
!nonnull [[META6]], !align [[META7]]
+// CHECK-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8
 // CHECK-NEXT:    store i32 [[COND]], ptr [[TMP13]], align 4
 // CHECK-NEXT:    ret void
 //
@@ -198,7 +198,7 @@ void unroll_partial_heuristic_for(int n, float *a, float 
*b, float *c, float *d)
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4
 // CHECK-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]]
 // CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]]
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, 
!nonnull [[META6]], !align [[META7]]
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8
 // CHECK-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4
 // CHECK-NEXT:    ret void
 //
diff --git 
a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c 
b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c
index 3183aa7bbca74..8ca000a05792f 100644
--- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c
+++ b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c
@@ -176,7 +176,7 @@ void unroll_partial_heuristic_constant_for(float *a, float 
*b, float *c, float *
 // CHECK-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw 
[[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull 
[[META6:![0-9]+]], !align [[META7:![0-9]+]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 // CHECK-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4
 // CHECK-NEXT:    store i32 128, ptr [[DOTSTOP]], align 4
@@ -199,7 +199,7 @@ void unroll_partial_heuristic_constant_for(float *a, float 
*b, float *c, float *
 // CHECK-NEXT:    br label [[COND_END]]
 // CHECK:       cond.end:
 // CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, 
[[COND_FALSE]] ]
-// CHECK-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, 
!nonnull [[META6]], !align [[META7]]
+// CHECK-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8
 // CHECK-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4
 // CHECK-NEXT:    ret void
 //
@@ -219,7 +219,7 @@ void unroll_partial_heuristic_constant_for(float *a, float 
*b, float *c, float *
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4
 // CHECK-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]]
 // CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]]
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, 
!nonnull [[META6]], !align [[META7]]
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8
 // CHECK-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4
 // CHECK-NEXT:    ret void
 //
diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c 
b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c
index 91e56bfb08894..5fbcf8f2d030c 100644
--- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c
+++ b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c
@@ -178,11 +178,11 @@ void unroll_partial_heuristic_runtime_for(int n, float 
*a, float *b, float *c, f
 // CHECK-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw 
[[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull 
[[META6:![0-9]+]], !align [[META7:![0-9]+]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 // CHECK-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4
 // CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], 
ptr [[TMP0]], i32 0, i32 1
-// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8, !nonnull 
[[META6]], !align [[META7]]
+// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
 // CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 // CHECK-NEXT:    store i32 [[TMP6]], ptr [[DOTSTOP]], align 4
 // CHECK-NEXT:    store i32 1, ptr [[DOTSTEP]], align 4
@@ -204,7 +204,7 @@ void unroll_partial_heuristic_runtime_for(int n, float *a, 
float *b, float *c, f
 // CHECK-NEXT:    br label [[COND_END]]
 // CHECK:       cond.end:
 // CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, 
[[COND_FALSE]] ]
-// CHECK-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, 
!nonnull [[META6]], !align [[META7]]
+// CHECK-NEXT:    [[TMP13:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8
 // CHECK-NEXT:    store i32 [[COND]], ptr [[TMP13]], align 4
 // CHECK-NEXT:    ret void
 //
@@ -224,7 +224,7 @@ void unroll_partial_heuristic_runtime_for(int n, float *a, 
float *b, float *c, f
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4
 // CHECK-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]]
 // CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]]
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, 
!nonnull [[META6]], !align [[META7]]
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8
 // CHECK-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4
 // CHECK-NEXT:    ret void
 //
diff --git a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c 
b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c
index 0855ac3c6c1f4..9a28c0c1bf713 100644
--- a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c
+++ b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c
@@ -148,7 +148,7 @@ void unroll_partial_factor_for(float *a, float *b, float 
*c, float *d) {
 // CHECK-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw 
[[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull 
[[META6:![0-9]+]], !align [[META7:![0-9]+]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 // CHECK-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4
 // CHECK-NEXT:    store i32 2, ptr [[DOTSTOP]], align 4
@@ -171,7 +171,7 @@ void unroll_partial_factor_for(float *a, float *b, float 
*c, float *d) {
 // CHECK-NEXT:    br label [[COND_END]]
 // CHECK:       cond.end:
 // CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, 
[[COND_FALSE]] ]
-// CHECK-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, 
!nonnull [[META6]], !align [[META7]]
+// CHECK-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8
 // CHECK-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4
 // CHECK-NEXT:    ret void
 //
@@ -191,7 +191,7 @@ void unroll_partial_factor_for(float *a, float *b, float 
*c, float *d) {
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4
 // CHECK-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]]
 // CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]]
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, 
!nonnull [[META6]], !align [[META7]]
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8
 // CHECK-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4
 // CHECK-NEXT:    ret void
 //
diff --git a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c 
b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c
index 5e6123259c917..24d42d265d6a6 100644
--- a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c
+++ b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c
@@ -129,7 +129,7 @@ void unroll_unroll_partial_heuristic(float *a, float *b, 
float *c, float *d) {
 // CHECK-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw 
[[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull 
[[META7:![0-9]+]], !align [[META8:![0-9]+]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 // CHECK-NEXT:    store i32 [[TMP3]], ptr [[DOTSTART]], align 4
 // CHECK-NEXT:    store i32 2, ptr [[DOTSTOP]], align 4
@@ -152,7 +152,7 @@ void unroll_unroll_partial_heuristic(float *a, float *b, 
float *c, float *d) {
 // CHECK-NEXT:    br label [[COND_END]]
 // CHECK:       cond.end:
 // CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, 
[[COND_FALSE]] ]
-// CHECK-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, 
!nonnull [[META7]], !align [[META8]]
+// CHECK-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8
 // CHECK-NEXT:    store i32 [[COND]], ptr [[TMP10]], align 4
 // CHECK-NEXT:    ret void
 //
@@ -172,7 +172,7 @@ void unroll_unroll_partial_heuristic(float *a, float *b, 
float *c, float *d) {
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4
 // CHECK-NEXT:    [[MUL:%.*]] = mul i32 1, [[TMP3]]
 // CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]]
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, 
!nonnull [[META7]], !align [[META8]]
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8
 // CHECK-NEXT:    store i32 [[ADD]], ptr [[TMP4]], align 4
 // CHECK-NEXT:    ret void
 //

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to