Author: Michael Kruse
Date: 2025-08-26T11:30:42+02:00
New Revision: e9a27a3a7515eb9854588700a589912ee851c4e3

URL: 
https://github.com/llvm/llvm-project/commit/e9a27a3a7515eb9854588700a589912ee851c4e3
DIFF: 
https://github.com/llvm/llvm-project/commit/e9a27a3a7515eb9854588700a589912ee851c4e3.diff

LOG: [OpenMPIRBuilder] Fix tripcount not a multiple of tile size (#154999)

The emitted code tests whether the current tile should executing the
remainder iterations by checking the logical iteration number is the one
after the floor iterations that execute the non-remainder iterations.
There are two counts of how many iterations there are: Those of
non-remainder iterations (simply rounded-down division of tripcount and
tile size), and those including an additional floor iteration for the
remainder iterations. The code was used the wrong one that caused the
condition to never match.

Added: 
    

Modified: 
    clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c
    clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c
    clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c
    clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c
    clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c
    llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Removed: 
    


################################################################################
diff  --git a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c 
b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c
index 8780d51de8a36..a9514e1e7d145 100644
--- a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c
+++ b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c
@@ -79,7 +79,7 @@ void unroll_partial_heuristic_for(int n, float *a, float *b, 
float *c, float *d)
 // CHECK-NEXT:    br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], 
label [[OMP_FLOOR0_EXIT:%.*]]
 // CHECK:       omp_floor0.body:
 // CHECK-NEXT:    [[TMP13:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP9]]
-// CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i32 [[TMP13]], 
[[OMP_FLOOR0_TRIPCOUNT]]
+// CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i32 [[TMP13]], [[TMP4]]
 // CHECK-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP5]], i32 13
 // CHECK-NEXT:    br label [[OMP_TILE0_PREHEADER:%.*]]
 // CHECK:       omp_tile0.preheader:

diff  --git 
a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c 
b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c
index 728f67ccf2843..8ca000a05792f 100644
--- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c
+++ b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c
@@ -85,7 +85,7 @@ void unroll_partial_heuristic_constant_for(float *a, float 
*b, float *c, float *
 // CHECK-NEXT:    br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], 
label [[OMP_FLOOR0_EXIT:%.*]]
 // CHECK:       omp_floor0.body:
 // CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP8]]
-// CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 
[[OMP_FLOOR0_TRIPCOUNT]]
+// CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[TMP12]], [[TMP3]]
 // CHECK-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP4]], i32 4
 // CHECK-NEXT:    br label [[OMP_TILE0_PREHEADER:%.*]]
 // CHECK:       omp_tile0.preheader:

diff  --git 
a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c 
b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c
index f41f1fe5ce719..5fbcf8f2d030c 100644
--- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c
+++ b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c
@@ -87,7 +87,7 @@ void unroll_partial_heuristic_runtime_for(int n, float *a, 
float *b, float *c, f
 // CHECK-NEXT:    br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], 
label [[OMP_FLOOR0_EXIT:%.*]]
 // CHECK:       omp_floor0.body:
 // CHECK-NEXT:    [[TMP13:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP9]]
-// CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i32 [[TMP13]], 
[[OMP_FLOOR0_TRIPCOUNT]]
+// CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i32 [[TMP13]], [[TMP4]]
 // CHECK-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP5]], i32 4
 // CHECK-NEXT:    br label [[OMP_TILE0_PREHEADER:%.*]]
 // CHECK:       omp_tile0.preheader:

diff  --git a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c 
b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c
index 3c2407851e5a5..9a28c0c1bf713 100644
--- a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c
+++ b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c
@@ -75,7 +75,7 @@ void unroll_partial_factor_for(float *a, float *b, float *c, 
float *d) {
 // CHECK-NEXT:    br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], 
label [[OMP_FLOOR0_EXIT:%.*]]
 // CHECK:       omp_floor0.body:
 // CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP8]]
-// CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 
[[OMP_FLOOR0_TRIPCOUNT]]
+// CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[TMP12]], [[TMP3]]
 // CHECK-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP4]], i32 2
 // CHECK-NEXT:    br label [[OMP_TILE0_PREHEADER:%.*]]
 // CHECK:       omp_tile0.preheader:

diff  --git a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c 
b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c
index a03bd47ca8b8f..24d42d265d6a6 100644
--- a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c
+++ b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c
@@ -59,7 +59,7 @@ void unroll_unroll_partial_heuristic(float *a, float *b, 
float *c, float *d) {
 // CHECK-NEXT:    [[OMP_FLOOR0_CMP:%.*]] = icmp ult i32 [[OMP_FLOOR0_IV]], 
[[OMP_FLOOR0_TRIPCOUNT]]
 // CHECK-NEXT:    br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], 
label [[OMP_FLOOR0_EXIT:%.*]]
 // CHECK:       omp_floor0.body:
-// CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[OMP_FLOOR0_IV]], 
[[OMP_FLOOR0_TRIPCOUNT]]
+// CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[OMP_FLOOR0_IV]], [[TMP3]]
 // CHECK-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP4]], i32 8
 // CHECK-NEXT:    br label [[OMP_TILE0_PREHEADER:%.*]]
 // CHECK:       omp_tile0.preheader:

diff  --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp 
b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 50ab206e2db8e..c0e956840f989 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -5581,13 +5581,13 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, 
ArrayRef<CanonicalLoopInfo *> Loops,
   // Compute the trip counts of the floor loops.
   Builder.SetCurrentDebugLocation(DL);
   Builder.restoreIP(OutermostLoop->getPreheaderIP());
-  SmallVector<Value *, 4> FloorCount, FloorRems;
+  SmallVector<Value *, 4> FloorCompleteCount, FloorCount, FloorRems;
   for (int i = 0; i < NumLoops; ++i) {
     Value *TileSize = TileSizes[i];
     Value *OrigTripCount = OrigTripCounts[i];
     Type *IVType = OrigTripCount->getType();
 
-    Value *FloorTripCount = Builder.CreateUDiv(OrigTripCount, TileSize);
+    Value *FloorCompleteTripCount = Builder.CreateUDiv(OrigTripCount, 
TileSize);
     Value *FloorTripRem = Builder.CreateURem(OrigTripCount, TileSize);
 
     // 0 if tripcount divides the tilesize, 1 otherwise.
@@ -5601,11 +5601,12 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, 
ArrayRef<CanonicalLoopInfo *> Loops,
         Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
 
     FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType);
-    FloorTripCount =
-        Builder.CreateAdd(FloorTripCount, FloorTripOverflow,
+    Value *FloorTripCount =
+        Builder.CreateAdd(FloorCompleteTripCount, FloorTripOverflow,
                           "omp_floor" + Twine(i) + ".tripcount", true);
 
     // Remember some values for later use.
+    FloorCompleteCount.push_back(FloorCompleteTripCount);
     FloorCount.push_back(FloorTripCount);
     FloorRems.push_back(FloorTripRem);
   }
@@ -5660,7 +5661,7 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, 
ArrayRef<CanonicalLoopInfo *> Loops,
     Value *TileSize = TileSizes[i];
 
     Value *FloorIsEpilogue =
-        Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCount[i]);
+        Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCompleteCount[i]);
     Value *TileTripCount =
         Builder.CreateSelect(FloorIsEpilogue, FloorRems[i], TileSize);
 


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to