Meinersbur created this revision.
Meinersbur added reviewers: kiranchandramohan, ftynse, peixin, jdoerfert, 
clementval, Leporacanthicus, kiranktp, arnamoy10, bryanpkc, Chuanfeng, 
AMDChirag, anchu-rajendran, SouraVX, fghanim, jdenny, MatsPetersson, ABataev.
Herald added subscribers: zzheng, guansong, yaxunl.
Meinersbur requested review of this revision.
Herald added a subscriber: sstefan1.
Herald added a project: clang.

When the stepsize does not evenly divide the range's end, round-up to ensure 
that that last multiple of the stepsize before the reaching the upper boud is 
reached. For instance, the trip count of

  for (int i = 0; i < 7; i+=5)

is two (i=0 and i=5), not (7-0)/5 == 1.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D118542

Files:
  clang/lib/Sema/SemaOpenMP.cpp
  clang/test/OpenMP/irbuilder_for_unsigned.c
  clang/test/OpenMP/irbuilder_for_unsigned_down.c
  clang/test/OpenMP/irbuilder_unroll_full.c
  clang/test/OpenMP/irbuilder_unroll_heuristic.c
  clang/test/OpenMP/irbuilder_unroll_partial_factor.c
  clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c
  clang/test/OpenMP/irbuilder_unroll_partial_heuristic.c
  clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c
  clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c
  clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c
  clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c

Index: clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c
===================================================================
--- clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c
+++ clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c
@@ -154,7 +154,10 @@
 // CHECK-NEXT:    %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4
 // CHECK-NEXT:    %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]]
 // CHECK-NEXT:    %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4
-// CHECK-NEXT:    %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]]
+// CHECK-NEXT:    %[[SUB1:.+]] = sub i32 %[[TMP8]], 1
+// CHECK-NEXT:    %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]]
+// CHECK-NEXT:    %[[TMP9:.+]] = load i32, i32* %[[DOTSTEP]], align 4
+// CHECK-NEXT:    %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]]
 // CHECK-NEXT:    br label %[[COND_END:.+]]
 // CHECK-EMPTY:
 // CHECK-NEXT:  [[COND_FALSE]]:
@@ -162,8 +165,8 @@
 // CHECK-EMPTY:
 // CHECK-NEXT:  [[COND_END]]:
 // CHECK-NEXT:    %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ]
-// CHECK-NEXT:    %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8
-// CHECK-NEXT:    store i32 %[[COND]], i32* %[[TMP9]], align 4
+// CHECK-NEXT:    %[[TMP10:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8
+// CHECK-NEXT:    store i32 %[[COND]], i32* %[[TMP10]], align 4
 // CHECK-NEXT:    ret void
 // CHECK-NEXT:  }
 
Index: clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c
===================================================================
--- clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c
+++ clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c
@@ -173,7 +173,10 @@
 // CHECK-NEXT:    %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4
 // CHECK-NEXT:    %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]]
 // CHECK-NEXT:    %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4
-// CHECK-NEXT:    %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]]
+// CHECK-NEXT:    %[[SUB1:.+]] = sub i32 %[[TMP8]], 1
+// CHECK-NEXT:    %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]]
+// CHECK-NEXT:    %[[TMP9:.+]] = load i32, i32* %[[DOTSTEP]], align 4
+// CHECK-NEXT:    %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]]
 // CHECK-NEXT:    br label %[[COND_END:.+]]
 // CHECK-EMPTY:
 // CHECK-NEXT:  [[COND_FALSE]]:
@@ -181,8 +184,8 @@
 // CHECK-EMPTY:
 // CHECK-NEXT:  [[COND_END]]:
 // CHECK-NEXT:    %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ]
-// CHECK-NEXT:    %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8
-// CHECK-NEXT:    store i32 %[[COND]], i32* %[[TMP9]], align 4
+// CHECK-NEXT:    %[[TMP10:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8
+// CHECK-NEXT:    store i32 %[[COND]], i32* %[[TMP10]], align 4
 // CHECK-NEXT:    ret void
 // CHECK-NEXT:  }
 
Index: clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c
===================================================================
--- clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c
+++ clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c
@@ -206,7 +206,10 @@
 // CHECK-NEXT:    %[[TMP10:.+]] = load i32, i32* %[[DOTSTART]], align 4
 // CHECK-NEXT:    %[[SUB:.+]] = sub nsw i32 %[[TMP9]], %[[TMP10]]
 // CHECK-NEXT:    %[[TMP11:.+]] = load i32, i32* %[[DOTSTEP]], align 4
-// CHECK-NEXT:    %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP11]]
+// CHECK-NEXT:    %[[SUB1:.+]] = sub i32 %[[TMP11]], 1
+// CHECK-NEXT:    %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]]
+// CHECK-NEXT:    %[[TMP12:.+]] = load i32, i32* %[[DOTSTEP]], align 4
+// CHECK-NEXT:    %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP12]]
 // CHECK-NEXT:    br label %[[COND_END:.+]]
 // CHECK-EMPTY:
 // CHECK-NEXT:  [[COND_FALSE]]:
@@ -214,8 +217,8 @@
 // CHECK-EMPTY:
 // CHECK-NEXT:  [[COND_END]]:
 // CHECK-NEXT:    %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ]
-// CHECK-NEXT:    %[[TMP12:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8
-// CHECK-NEXT:    store i32 %[[COND]], i32* %[[TMP12]], align 4
+// CHECK-NEXT:    %[[TMP13:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8
+// CHECK-NEXT:    store i32 %[[COND]], i32* %[[TMP13]], align 4
 // CHECK-NEXT:    ret void
 // CHECK-NEXT:  }
 
Index: clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c
===================================================================
--- clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c
+++ clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c
@@ -201,7 +201,10 @@
 // CHECK-NEXT:    %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4
 // CHECK-NEXT:    %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]]
 // CHECK-NEXT:    %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4
-// CHECK-NEXT:    %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]]
+// CHECK-NEXT:    %[[SUB1:.+]] = sub i32 %[[TMP8]], 1
+// CHECK-NEXT:    %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]]
+// CHECK-NEXT:    %[[TMP9:.+]] = load i32, i32* %[[DOTSTEP]], align 4
+// CHECK-NEXT:    %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]]
 // CHECK-NEXT:    br label %[[COND_END:.+]]
 // CHECK-EMPTY:
 // CHECK-NEXT:  [[COND_FALSE]]:
@@ -209,8 +212,8 @@
 // CHECK-EMPTY:
 // CHECK-NEXT:  [[COND_END]]:
 // CHECK-NEXT:    %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ]
-// CHECK-NEXT:    %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8
-// CHECK-NEXT:    store i32 %[[COND]], i32* %[[TMP9]], align 4
+// CHECK-NEXT:    %[[TMP10:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8
+// CHECK-NEXT:    store i32 %[[COND]], i32* %[[TMP10]], align 4
 // CHECK-NEXT:    ret void
 // CHECK-NEXT:  }
 
Index: clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c
===================================================================
--- clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c
+++ clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c
@@ -180,7 +180,10 @@
 // CHECK-NEXT:    %[[TMP10:.+]] = load i32, i32* %[[DOTSTART]], align 4
 // CHECK-NEXT:    %[[SUB:.+]] = sub nsw i32 %[[TMP9]], %[[TMP10]]
 // CHECK-NEXT:    %[[TMP11:.+]] = load i32, i32* %[[DOTSTEP]], align 4
-// CHECK-NEXT:    %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP11]]
+// CHECK-NEXT:    %[[SUB1:.+]] = sub i32 %[[TMP11]], 1
+// CHECK-NEXT:    %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]]
+// CHECK-NEXT:    %[[TMP12:.+]] = load i32, i32* %[[DOTSTEP]], align 4
+// CHECK-NEXT:    %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP12]]
 // CHECK-NEXT:    br label %[[COND_END:.+]]
 // CHECK-EMPTY:
 // CHECK-NEXT:  [[COND_FALSE]]:
@@ -188,8 +191,8 @@
 // CHECK-EMPTY:
 // CHECK-NEXT:  [[COND_END]]:
 // CHECK-NEXT:    %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ]
-// CHECK-NEXT:    %[[TMP12:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8
-// CHECK-NEXT:    store i32 %[[COND]], i32* %[[TMP12]], align 4
+// CHECK-NEXT:    %[[TMP13:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8
+// CHECK-NEXT:    store i32 %[[COND]], i32* %[[TMP13]], align 4
 // CHECK-NEXT:    ret void
 // CHECK-NEXT:  }
 
Index: clang/test/OpenMP/irbuilder_unroll_partial_factor.c
===================================================================
--- clang/test/OpenMP/irbuilder_unroll_partial_factor.c
+++ clang/test/OpenMP/irbuilder_unroll_partial_factor.c
@@ -111,7 +111,10 @@
 // CHECK-NEXT:    %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4
 // CHECK-NEXT:    %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]]
 // CHECK-NEXT:    %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4
-// CHECK-NEXT:    %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]]
+// CHECK-NEXT:    %[[SUB1:.+]] = sub i32 %[[TMP8]], 1
+// CHECK-NEXT:    %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]]
+// CHECK-NEXT:    %[[TMP9:.+]] = load i32, i32* %[[DOTSTEP]], align 4
+// CHECK-NEXT:    %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]]
 // CHECK-NEXT:    br label %[[COND_END:.+]]
 // CHECK-EMPTY:
 // CHECK-NEXT:  [[COND_FALSE]]:
@@ -119,8 +122,8 @@
 // CHECK-EMPTY:
 // CHECK-NEXT:  [[COND_END]]:
 // CHECK-NEXT:    %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ]
-// CHECK-NEXT:    %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8
-// CHECK-NEXT:    store i32 %[[COND]], i32* %[[TMP9]], align 4
+// CHECK-NEXT:    %[[TMP10:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8
+// CHECK-NEXT:    store i32 %[[COND]], i32* %[[TMP10]], align 4
 // CHECK-NEXT:    ret void
 // CHECK-NEXT:  }
 
Index: clang/test/OpenMP/irbuilder_unroll_heuristic.c
===================================================================
--- clang/test/OpenMP/irbuilder_unroll_heuristic.c
+++ clang/test/OpenMP/irbuilder_unroll_heuristic.c
@@ -112,7 +112,10 @@
 // CHECK-NEXT:    %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4
 // CHECK-NEXT:    %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]]
 // CHECK-NEXT:    %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4
-// CHECK-NEXT:    %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]]
+// CHECK-NEXT:    %[[SUB1:.+]] = sub i32 %[[TMP8]], 1
+// CHECK-NEXT:    %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]]
+// CHECK-NEXT:    %[[TMP9:.+]] = load i32, i32* %[[DOTSTEP]], align 4
+// CHECK-NEXT:    %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]]
 // CHECK-NEXT:    br label %[[COND_END:.+]]
 // CHECK-EMPTY:
 // CHECK-NEXT:  [[COND_FALSE]]:
@@ -120,8 +123,8 @@
 // CHECK-EMPTY:
 // CHECK-NEXT:  [[COND_END]]:
 // CHECK-NEXT:    %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ]
-// CHECK-NEXT:    %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8
-// CHECK-NEXT:    store i32 %[[COND]], i32* %[[TMP9]], align 4
+// CHECK-NEXT:    %[[TMP10:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8
+// CHECK-NEXT:    store i32 %[[COND]], i32* %[[TMP10]], align 4
 // CHECK-NEXT:    ret void
 // CHECK-NEXT:  }
 
Index: clang/test/OpenMP/irbuilder_unroll_full.c
===================================================================
--- clang/test/OpenMP/irbuilder_unroll_full.c
+++ clang/test/OpenMP/irbuilder_unroll_full.c
@@ -111,7 +111,10 @@
 // CHECK-NEXT:    %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4
 // CHECK-NEXT:    %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]]
 // CHECK-NEXT:    %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4
-// CHECK-NEXT:    %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]]
+// CHECK-NEXT:    %[[SUB1:.+]] = sub i32 %[[TMP8]], 1
+// CHECK-NEXT:    %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]]
+// CHECK-NEXT:    %[[TMP9:.+]] = load i32, i32* %[[DOTSTEP]], align 4
+// CHECK-NEXT:    %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]]
 // CHECK-NEXT:    br label %[[COND_END:.+]]
 // CHECK-EMPTY:
 // CHECK-NEXT:  [[COND_FALSE]]:
@@ -119,8 +122,8 @@
 // CHECK-EMPTY:
 // CHECK-NEXT:  [[COND_END]]:
 // CHECK-NEXT:    %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ]
-// CHECK-NEXT:    %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8
-// CHECK-NEXT:    store i32 %[[COND]], i32* %[[TMP9]], align 4
+// CHECK-NEXT:    %[[TMP10:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8
+// CHECK-NEXT:    store i32 %[[COND]], i32* %[[TMP10]], align 4
 // CHECK-NEXT:    ret void
 // CHECK-NEXT:  }
 
Index: clang/test/OpenMP/irbuilder_unroll_partial_heuristic.c
===================================================================
--- clang/test/OpenMP/irbuilder_unroll_partial_heuristic.c
+++ clang/test/OpenMP/irbuilder_unroll_partial_heuristic.c
@@ -111,7 +111,10 @@
 // CHECK-NEXT:    %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4
 // CHECK-NEXT:    %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]]
 // CHECK-NEXT:    %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4
-// CHECK-NEXT:    %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]]
+// CHECK-NEXT:    %[[SUB1:.+]] = sub i32 %[[TMP8]], 1
+// CHECK-NEXT:    %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]]
+// CHECK-NEXT:    %[[TMP9:.+]] = load i32, i32* %[[DOTSTEP]], align 4
+// CHECK-NEXT:    %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]]
 // CHECK-NEXT:    br label %[[COND_END:.+]]
 // CHECK-EMPTY:
 // CHECK-NEXT:  [[COND_FALSE]]:
@@ -119,8 +122,8 @@
 // CHECK-EMPTY:
 // CHECK-NEXT:  [[COND_END]]:
 // CHECK-NEXT:    %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ]
-// CHECK-NEXT:    %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8
-// CHECK-NEXT:    store i32 %[[COND]], i32* %[[TMP9]], align 4
+// CHECK-NEXT:    %[[TMP10:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8
+// CHECK-NEXT:    store i32 %[[COND]], i32* %[[TMP10]], align 4
 // CHECK-NEXT:    ret void
 // CHECK-NEXT:  }
 
Index: clang/test/OpenMP/irbuilder_for_unsigned_down.c
===================================================================
--- clang/test/OpenMP/irbuilder_for_unsigned_down.c
+++ clang/test/OpenMP/irbuilder_for_unsigned_down.c
@@ -1,25 +1,23 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs
-// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=51 -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
 #define HEADER
 
-// CHECK-LABEL: define {{.*}}@unroll_partial_heuristic(
+// CHECK-LABEL: define {{.*}}@workshareloop_unsigned(
 // CHECK-NEXT:  [[ENTRY:.*]]:
 // CHECK-NEXT:    %[[A_ADDR:.+]] = alloca float*, align 8
-// CHECK-NEXT:    %[[B_ADDR:.+]] = alloca float*, align 8
-// CHECK-NEXT:    %[[C_ADDR:.+]] = alloca float*, align 8
-// CHECK-NEXT:    %[[D_ADDR:.+]] = alloca float*, align 8
 // CHECK-NEXT:    %[[I:.+]] = alloca i32, align 4
 // CHECK-NEXT:    %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8
 // CHECK-NEXT:    %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4
 // CHECK-NEXT:    %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4
+// CHECK-NEXT:    %[[P_LASTITER:.+]] = alloca i32, align 4
+// CHECK-NEXT:    %[[P_LOWERBOUND:.+]] = alloca i32, align 4
+// CHECK-NEXT:    %[[P_UPPERBOUND:.+]] = alloca i32, align 4
+// CHECK-NEXT:    %[[P_STRIDE:.+]] = alloca i32, align 4
 // CHECK-NEXT:    store float* %[[A:.+]], float** %[[A_ADDR]], align 8
-// CHECK-NEXT:    store float* %[[B:.+]], float** %[[B_ADDR]], align 8
-// CHECK-NEXT:    store float* %[[C:.+]], float** %[[C_ADDR]], align 8
-// CHECK-NEXT:    store float* %[[D:.+]], float** %[[D_ADDR]], align 8
-// CHECK-NEXT:    store i32 0, i32* %[[I]], align 4
+// CHECK-NEXT:    store i32 32000000, i32* %[[I]], align 4
 // CHECK-NEXT:    %[[TMP0:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 0
 // CHECK-NEXT:    store i32* %[[I]], i32** %[[TMP0]], align 8
 // CHECK-NEXT:    %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[AGG_CAPTURED1]], i32 0, i32 0
@@ -30,6 +28,16 @@
 // CHECK-NEXT:    br label %[[OMP_LOOP_PREHEADER:.+]]
 // CHECK-EMPTY:
 // CHECK-NEXT:  [[OMP_LOOP_PREHEADER]]:
+// CHECK-NEXT:    store i32 0, i32* %[[P_LOWERBOUND]], align 4
+// CHECK-NEXT:    %[[TMP3:.+]] = sub i32 %[[DOTCOUNT]], 1
+// CHECK-NEXT:    store i32 %[[TMP3]], i32* %[[P_UPPERBOUND]], align 4
+// CHECK-NEXT:    store i32 1, i32* %[[P_STRIDE]], align 4
+// CHECK-NEXT:    %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
+// CHECK-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]], i32 1, i32 1)
+// CHECK-NEXT:    %[[TMP4:.+]] = load i32, i32* %[[P_LOWERBOUND]], align 4
+// CHECK-NEXT:    %[[TMP5:.+]] = load i32, i32* %[[P_UPPERBOUND]], align 4
+// CHECK-NEXT:    %[[TMP6:.+]] = sub i32 %[[TMP5]], %[[TMP4]]
+// CHECK-NEXT:    %[[TMP7:.+]] = add i32 %[[TMP6]], 1
 // CHECK-NEXT:    br label %[[OMP_LOOP_HEADER:.+]]
 // CHECK-EMPTY:
 // CHECK-NEXT:  [[OMP_LOOP_HEADER]]:
@@ -37,53 +45,48 @@
 // CHECK-NEXT:    br label %[[OMP_LOOP_COND:.+]]
 // CHECK-EMPTY:
 // CHECK-NEXT:  [[OMP_LOOP_COND]]:
-// CHECK-NEXT:    %[[OMP_LOOP_CMP:.+]] = icmp ult i32 %[[OMP_LOOP_IV]], %[[DOTCOUNT]]
+// CHECK-NEXT:    %[[OMP_LOOP_CMP:.+]] = icmp ult i32 %[[OMP_LOOP_IV]], %[[TMP7]]
 // CHECK-NEXT:    br i1 %[[OMP_LOOP_CMP]], label %[[OMP_LOOP_BODY:.+]], label %[[OMP_LOOP_EXIT:.+]]
 // CHECK-EMPTY:
 // CHECK-NEXT:  [[OMP_LOOP_BODY]]:
-// CHECK-NEXT:    call void @__captured_stmt.1(i32* %[[I]], i32 %[[OMP_LOOP_IV]], %struct.anon.0* %[[AGG_CAPTURED1]])
-// CHECK-NEXT:    %[[TMP3:.+]] = load float*, float** %[[B_ADDR]], align 8
-// CHECK-NEXT:    %[[TMP4:.+]] = load i32, i32* %[[I]], align 4
-// CHECK-NEXT:    %[[IDXPROM:.+]] = sext i32 %[[TMP4]] to i64
-// CHECK-NEXT:    %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP3]], i64 %[[IDXPROM]]
-// CHECK-NEXT:    %[[TMP5:.+]] = load float, float* %[[ARRAYIDX]], align 4
-// CHECK-NEXT:    %[[TMP6:.+]] = load float*, float** %[[C_ADDR]], align 8
-// CHECK-NEXT:    %[[TMP7:.+]] = load i32, i32* %[[I]], align 4
-// CHECK-NEXT:    %[[IDXPROM2:.+]] = sext i32 %[[TMP7]] to i64
-// CHECK-NEXT:    %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP6]], i64 %[[IDXPROM2]]
-// CHECK-NEXT:    %[[TMP8:.+]] = load float, float* %[[ARRAYIDX3]], align 4
-// CHECK-NEXT:    %[[MUL:.+]] = fmul float %[[TMP5]], %[[TMP8]]
-// CHECK-NEXT:    %[[TMP9:.+]] = load float*, float** %[[D_ADDR]], align 8
-// CHECK-NEXT:    %[[TMP10:.+]] = load i32, i32* %[[I]], align 4
-// CHECK-NEXT:    %[[IDXPROM4:.+]] = sext i32 %[[TMP10]] to i64
-// CHECK-NEXT:    %[[ARRAYIDX5:.+]] = getelementptr inbounds float, float* %[[TMP9]], i64 %[[IDXPROM4]]
-// CHECK-NEXT:    %[[TMP11:.+]] = load float, float* %[[ARRAYIDX5]], align 4
-// CHECK-NEXT:    %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP11]]
-// CHECK-NEXT:    %[[TMP12:.+]] = load float*, float** %[[A_ADDR]], align 8
-// CHECK-NEXT:    %[[TMP13:.+]] = load i32, i32* %[[I]], align 4
-// CHECK-NEXT:    %[[IDXPROM7:.+]] = sext i32 %[[TMP13]] to i64
-// CHECK-NEXT:    %[[ARRAYIDX8:.+]] = getelementptr inbounds float, float* %[[TMP12]], i64 %[[IDXPROM7]]
-// CHECK-NEXT:    store float %[[MUL6]], float* %[[ARRAYIDX8]], align 4
+// CHECK-NEXT:    %[[TMP8:.+]] = add i32 %[[OMP_LOOP_IV]], %[[TMP4]]
+// CHECK-NEXT:    call void @__captured_stmt.1(i32* %[[I]], i32 %[[TMP8]], %struct.anon.0* %[[AGG_CAPTURED1]])
+// CHECK-NEXT:    %[[TMP9:.+]] = load i32, i32* %[[I]], align 4
+// CHECK-NEXT:    %[[CONV:.+]] = uitofp i32 %[[TMP9]] to float
+// CHECK-NEXT:    %[[TMP10:.+]] = load float*, float** %[[A_ADDR]], align 8
+// CHECK-NEXT:    %[[TMP11:.+]] = load i32, i32* %[[I]], align 4
+// CHECK-NEXT:    %[[IDXPROM:.+]] = zext i32 %[[TMP11]] to i64
+// CHECK-NEXT:    %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP10]], i64 %[[IDXPROM]]
+// CHECK-NEXT:    store float %[[CONV]], float* %[[ARRAYIDX]], align 4
 // CHECK-NEXT:    br label %[[OMP_LOOP_INC]]
 // CHECK-EMPTY:
 // CHECK-NEXT:  [[OMP_LOOP_INC]]:
 // CHECK-NEXT:    %[[OMP_LOOP_NEXT]] = add nuw i32 %[[OMP_LOOP_IV]], 1
-// CHECK-NEXT:    br label %[[OMP_LOOP_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]]
+// CHECK-NEXT:    br label %[[OMP_LOOP_HEADER]]
 // CHECK-EMPTY:
 // CHECK-NEXT:  [[OMP_LOOP_EXIT]]:
+// CHECK-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]])
+// CHECK-NEXT:    %[[OMP_GLOBAL_THREAD_NUM2:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
+// CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @2, i32 %[[OMP_GLOBAL_THREAD_NUM2]])
 // CHECK-NEXT:    br label %[[OMP_LOOP_AFTER:.+]]
 // CHECK-EMPTY:
 // CHECK-NEXT:  [[OMP_LOOP_AFTER]]:
 // CHECK-NEXT:    ret void
 // CHECK-NEXT:  }
-void unroll_partial_heuristic(float *a, float *b, float *c, float *d) {
-#pragma omp unroll partial
-  for (int i = 0; i < 2; i++) {
-    a[i] = b[i] * c[i] * d[i];
+
+extern "C" void workshareloop_unsigned(float *a) {
+#pragma omp for
+  for (unsigned i = 32000000; i > 33; i -= 7) {
+    a[i] = i;
   }
 }
 
 #endif // HEADER
+//
+//
+//
+//
+//
 
 // CHECK-LABEL: define {{.*}}@__captured_stmt(
 // CHECK-NEXT:  [[ENTRY:.*]]:
@@ -99,19 +102,24 @@
 // CHECK-NEXT:    %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8
 // CHECK-NEXT:    %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4
 // CHECK-NEXT:    store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4
-// CHECK-NEXT:    store i32 2, i32* %[[DOTSTOP]], align 4
-// CHECK-NEXT:    store i32 1, i32* %[[DOTSTEP]], align 4
+// CHECK-NEXT:    store i32 33, i32* %[[DOTSTOP]], align 4
+// CHECK-NEXT:    store i32 -7, i32* %[[DOTSTEP]], align 4
 // CHECK-NEXT:    %[[TMP4:.+]] = load i32, i32* %[[DOTSTART]], align 4
 // CHECK-NEXT:    %[[TMP5:.+]] = load i32, i32* %[[DOTSTOP]], align 4
-// CHECK-NEXT:    %[[CMP:.+]] = icmp slt i32 %[[TMP4]], %[[TMP5]]
+// CHECK-NEXT:    %[[CMP:.+]] = icmp ugt i32 %[[TMP4]], %[[TMP5]]
 // CHECK-NEXT:    br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]]
 // CHECK-EMPTY:
 // CHECK-NEXT:  [[COND_TRUE]]:
-// CHECK-NEXT:    %[[TMP6:.+]] = load i32, i32* %[[DOTSTOP]], align 4
-// CHECK-NEXT:    %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4
-// CHECK-NEXT:    %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]]
+// CHECK-NEXT:    %[[TMP6:.+]] = load i32, i32* %[[DOTSTART]], align 4
+// CHECK-NEXT:    %[[TMP7:.+]] = load i32, i32* %[[DOTSTOP]], align 4
+// CHECK-NEXT:    %[[SUB:.+]] = sub i32 %[[TMP6]], %[[TMP7]]
 // CHECK-NEXT:    %[[TMP8:.+]] = load i32, i32* %[[DOTSTEP]], align 4
-// CHECK-NEXT:    %[[DIV:.+]] = udiv i32 %[[SUB]], %[[TMP8]]
+// CHECK-NEXT:    %[[SUB1:.+]] = sub nsw i32 0, %[[TMP8]]
+// CHECK-NEXT:    %[[SUB2:.+]] = sub i32 %[[SUB1]], 1
+// CHECK-NEXT:    %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB2]]
+// CHECK-NEXT:    %[[TMP9:.+]] = load i32, i32* %[[DOTSTEP]], align 4
+// CHECK-NEXT:    %[[SUB3:.+]] = sub nsw i32 0, %[[TMP9]]
+// CHECK-NEXT:    %[[DIV:.+]] = udiv i32 %[[ADD]], %[[SUB3]]
 // CHECK-NEXT:    br label %[[COND_END:.+]]
 // CHECK-EMPTY:
 // CHECK-NEXT:  [[COND_FALSE]]:
@@ -119,8 +127,8 @@
 // CHECK-EMPTY:
 // CHECK-NEXT:  [[COND_END]]:
 // CHECK-NEXT:    %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ]
-// CHECK-NEXT:    %[[TMP9:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8
-// CHECK-NEXT:    store i32 %[[COND]], i32* %[[TMP9]], align 4
+// CHECK-NEXT:    %[[TMP10:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8
+// CHECK-NEXT:    store i32 %[[COND]], i32* %[[TMP10]], align 4
 // CHECK-NEXT:    ret void
 // CHECK-NEXT:  }
 
@@ -137,7 +145,7 @@
 // CHECK-NEXT:    %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[TMP0]], i32 0, i32 0
 // CHECK-NEXT:    %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4
 // CHECK-NEXT:    %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4
-// CHECK-NEXT:    %[[MUL:.+]] = mul i32 1, %[[TMP3]]
+// CHECK-NEXT:    %[[MUL:.+]] = mul i32 -7, %[[TMP3]]
 // CHECK-NEXT:    %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]]
 // CHECK-NEXT:    %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8
 // CHECK-NEXT:    store i32 %[[ADD]], i32* %[[TMP4]], align 4
@@ -146,7 +154,5 @@
 
 
 // CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51}
+// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 45}
 // CHECK: ![[META2:[0-9]+]] =
-// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]]}
-// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"}
Index: clang/test/OpenMP/irbuilder_for_unsigned.c
===================================================================
--- clang/test/OpenMP/irbuilder_for_unsigned.c
+++ clang/test/OpenMP/irbuilder_for_unsigned.c
@@ -123,14 +123,17 @@
 // CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTSTART]], align 4
 // CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[TMP6]], [[TMP7]]
 // CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTSTEP]], align 4
-// CHECK-NEXT:    [[DIV:%.*]] = udiv i32 [[SUB]], [[TMP8]]
+// CHECK-NEXT:    [[SUB1:%.*]] = sub i32 [[TMP8]], 1
+// CHECK-NEXT:    [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]]
+// CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTSTEP]], align 4
+// CHECK-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]]
 // CHECK-NEXT:    br label [[COND_END:%.*]]
 // CHECK:       cond.false:
 // CHECK-NEXT:    br label [[COND_END]]
 // CHECK:       cond.end:
 // CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ]
-// CHECK-NEXT:    [[TMP9:%.*]] = load i32*, i32** [[DISTANCE_ADDR]], align 8
-// CHECK-NEXT:    store i32 [[COND]], i32* [[TMP9]], align 4
+// CHECK-NEXT:    [[TMP10:%.*]] = load i32*, i32** [[DISTANCE_ADDR]], align 8
+// CHECK-NEXT:    store i32 [[COND]], i32* [[TMP10]], align 4
 // CHECK-NEXT:    ret void
 //
 //
Index: clang/lib/Sema/SemaOpenMP.cpp
===================================================================
--- clang/lib/Sema/SemaOpenMP.cpp
+++ clang/lib/Sema/SemaOpenMP.cpp
@@ -5327,6 +5327,8 @@
 
     IntegerLiteral *Zero = IntegerLiteral::Create(
         Ctx, llvm::APInt(Ctx.getIntWidth(LogicalTy), 0), LogicalTy, {});
+    IntegerLiteral *One = IntegerLiteral::Create(
+        Ctx, llvm::APInt(Ctx.getIntWidth(LogicalTy), 1), LogicalTy, {});
     Expr *Dist;
     if (Rel == BO_NE) {
       // When using a != comparison, the increment can be +1 or -1. This can be
@@ -5381,18 +5383,25 @@
 
       if (Rel == BO_LE || Rel == BO_GE) {
         // Add one to the range if the relational operator is inclusive.
-        Range = AssertSuccess(Actions.BuildBinOp(
-            nullptr, {}, BO_Add, Range,
-            Actions.ActOnIntegerConstant(SourceLocation(), 1).get()));
+        Range =
+            AssertSuccess(Actions.BuildBinOp(nullptr, {}, BO_Add, Range, One));
       }
 
-      // Divide by the absolute step amount.
+      // Divide by the absolute step amount. If the range is not a multiple of
+      // the step size, rounding-up the effective upper bound ensures that the
+      // last iteration is included.
+      // Note that the rounding-up may cause an overflow in a temporry that
+      // could be avoided, but would have occured in a C-style for-loop as well.
       Expr *Divisor = BuildVarRef(NewStep);
       if (Rel == BO_GE || Rel == BO_GT)
         Divisor =
             AssertSuccess(Actions.BuildUnaryOp(nullptr, {}, UO_Minus, Divisor));
+      Expr *DivisorMinusOne =
+          AssertSuccess(Actions.BuildBinOp(nullptr, {}, BO_Sub, Divisor, One));
+      Expr *RangeRoundUp = AssertSuccess(
+          Actions.BuildBinOp(nullptr, {}, BO_Add, Range, DivisorMinusOne));
       Dist = AssertSuccess(
-          Actions.BuildBinOp(nullptr, {}, BO_Div, Range, Divisor));
+          Actions.BuildBinOp(nullptr, {}, BO_Div, RangeRoundUp, Divisor));
 
       // If there is not at least one iteration, the range contains garbage. Fix
       // to zero in this case.
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to