https://github.com/zahiraam updated 
https://github.com/llvm/llvm-project/pull/190153

>From 1ec0ddf936195edfbee14b6f199850843161d868 Mon Sep 17 00:00:00 2001
From: Ammarguellat <[email protected]>
Date: Thu, 2 Apr 2026 04:53:10 -0700
Subject: [PATCH 1/2] [OpenMP] Fix iteration count for nested triangular OpenMP
 loops

---
 clang/lib/Sema/SemaOpenMP.cpp                 |  27 +-
 .../test/OpenMP/for_non_rectangular_codegen.c | 206 ++---
 .../OpenMP/for_private_reduction_codegen.cpp  |  34 +-
 clang/test/OpenMP/loop_collapse_codegen.cpp   | 797 ++++++++++++------
 4 files changed, 668 insertions(+), 396 deletions(-)

diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 0d3c7fc4907a2..44ce3adac05ae 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -8854,19 +8854,22 @@ calculateNumIters(Sema &SemaRef, Scope *S, 
SourceLocation DefaultLoc,
 
   ExprResult Diff;
 
-  // For triangular loops, use already computed Upper and Lower bounds to
-  // calculate the number of iterations: Upper - Lower + 1.
+  // For nested triangular loops (depth >= 2), use already computed Upper and
+  // Lower bounds to calculate the number of iterations: Upper - Lower + 1.
+  // Don't apply to first-level triangular loops as the standard formula 
handles
+  // those correctly.
   if (TestIsStrictOp && InitDependOnLC.has_value() &&
-      !CondDependOnLC.has_value()) {
-    Diff = SemaRef.BuildBinOp(S, DefaultLoc, BO_Sub, Upper, Lower);
-    if (!Diff.isUsable())
-      return nullptr;
-    Diff =
-        SemaRef.BuildBinOp(S, DefaultLoc, BO_Add, Diff.get(),
-                           SemaRef.ActOnIntegerConstant(DefaultLoc, 1).get());
-    if (!Diff.isUsable())
-      return nullptr;
-    return Diff.get();
+      InitDependOnLC.value() >= 2 && !CondDependOnLC.has_value()) {
+      Diff = SemaRef.BuildBinOp(S, DefaultLoc, BO_Sub, Upper, Lower);
+      if (!Diff.isUsable())
+          return nullptr;
+
+      Diff = SemaRef.BuildBinOp(S, DefaultLoc, BO_Add, Diff.get(),
+          SemaRef.ActOnIntegerConstant(DefaultLoc, 1).get());
+      if (!Diff.isUsable())
+          return nullptr;
+
+      return Diff.get();
   }
 
   // If need to reorganize, then calculate the form as Upper - (Lower - Step [+
diff --git a/clang/test/OpenMP/for_non_rectangular_codegen.c 
b/clang/test/OpenMP/for_non_rectangular_codegen.c
index b35a8f6e27b50..f8c7c84093d89 100644
--- a/clang/test/OpenMP/for_non_rectangular_codegen.c
+++ b/clang/test/OpenMP/for_non_rectangular_codegen.c
@@ -40,14 +40,14 @@ void collapsed(int mp) {
 // CHECK-NEXT:    [[J:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[I0:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[_TMP13:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[_TMP15:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[DOTOMP_LB:%.*]] = alloca i64, align 8
 // CHECK-NEXT:    [[DOTOMP_UB:%.*]] = alloca i64, align 8
 // CHECK-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
 // CHECK-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[J17:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[I18:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[I019:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[J19:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[I20:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[I021:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr 
@[[GLOB2:[0-9]+]])
 // CHECK-NEXT:    store i32 [[MP]], ptr [[MP_ADDR]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[MP_ADDR]], align 4
@@ -90,20 +90,22 @@ void collapsed(int mp) {
 // CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTUPPER]], align 4
 // CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTLOWER]], align 4
 // CHECK-NEXT:    [[SUB7:%.*]] = sub i32 [[TMP12]], [[TMP13]]
-// CHECK-NEXT:    [[ADD8:%.*]] = add i32 [[SUB7]], 1
-// CHECK-NEXT:    [[CONV9:%.*]] = zext i32 [[ADD8]] to i64
-// CHECK-NEXT:    [[MUL10:%.*]] = mul nsw i64 [[CONV6]], [[CONV9]]
-// CHECK-NEXT:    [[MUL11:%.*]] = mul nsw i64 [[MUL10]], 10
-// CHECK-NEXT:    [[SUB12:%.*]] = sub nsw i64 [[MUL11]], 1
-// CHECK-NEXT:    store i64 [[SUB12]], ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK-NEXT:    [[SUB8:%.*]] = sub i32 [[SUB7]], 1
+// CHECK-NEXT:    [[ADD9:%.*]] = add i32 [[SUB8]], 1
+// CHECK-NEXT:    [[DIV10:%.*]] = udiv i32 [[ADD9]], 1
+// CHECK-NEXT:    [[CONV11:%.*]] = zext i32 [[DIV10]] to i64
+// CHECK-NEXT:    [[MUL12:%.*]] = mul nsw i64 [[CONV6]], [[CONV11]]
+// CHECK-NEXT:    [[MUL13:%.*]] = mul nsw i64 [[MUL12]], 10
+// CHECK-NEXT:    [[SUB14:%.*]] = sub nsw i64 [[MUL13]], 1
+// CHECK-NEXT:    store i64 [[SUB14]], ptr [[DOTCAPTURE_EXPR_3]], align 8
 // CHECK-NEXT:    store i32 0, ptr [[J]], align 4
 // CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr [[J]], align 4
 // CHECK-NEXT:    store i32 [[TMP14]], ptr [[I]], align 4
 // CHECK-NEXT:    store i32 0, ptr [[I0]], align 4
-// CHECK-NEXT:    store i32 0, ptr [[_TMP13]], align 4
+// CHECK-NEXT:    store i32 0, ptr [[_TMP15]], align 4
 // CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK-NEXT:    [[CMP14:%.*]] = icmp slt i32 0, [[TMP15]]
-// CHECK-NEXT:    br i1 [[CMP14]], label [[OMP_PRECOND_THEN:%.*]], label 
[[OMP_PRECOND_END:%.*]]
+// CHECK-NEXT:    [[CMP16:%.*]] = icmp slt i32 0, [[TMP15]]
+// CHECK-NEXT:    br i1 [[CMP16]], label [[OMP_PRECOND_THEN:%.*]], label 
[[OMP_PRECOND_END:%.*]]
 // CHECK:       omp.precond.then:
 // CHECK-NEXT:    store i64 0, ptr [[DOTOMP_LB]], align 8
 // CHECK-NEXT:    [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
@@ -113,122 +115,136 @@ void collapsed(int mp) {
 // CHECK-NEXT:    call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], 
i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr 
[[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
 // CHECK-NEXT:    [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
 // CHECK-NEXT:    [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK-NEXT:    [[CMP20:%.*]] = icmp sgt i64 [[TMP17]], [[TMP18]]
-// CHECK-NEXT:    br i1 [[CMP20]], label [[COND_TRUE22:%.*]], label 
[[COND_FALSE23:%.*]]
-// CHECK:       cond.true22:
+// CHECK-NEXT:    [[CMP22:%.*]] = icmp sgt i64 [[TMP17]], [[TMP18]]
+// CHECK-NEXT:    br i1 [[CMP22]], label [[COND_TRUE24:%.*]], label 
[[COND_FALSE25:%.*]]
+// CHECK:       cond.true24:
 // CHECK-NEXT:    [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK-NEXT:    br label [[COND_END24:%.*]]
-// CHECK:       cond.false23:
+// CHECK-NEXT:    br label [[COND_END26:%.*]]
+// CHECK:       cond.false25:
 // CHECK-NEXT:    [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
-// CHECK-NEXT:    br label [[COND_END24]]
-// CHECK:       cond.end24:
-// CHECK-NEXT:    [[COND25:%.*]] = phi i64 [ [[TMP19]], [[COND_TRUE22]] ], [ 
[[TMP20]], [[COND_FALSE23]] ]
-// CHECK-NEXT:    store i64 [[COND25]], ptr [[DOTOMP_UB]], align 8
+// CHECK-NEXT:    br label [[COND_END26]]
+// CHECK:       cond.end26:
+// CHECK-NEXT:    [[COND27:%.*]] = phi i64 [ [[TMP19]], [[COND_TRUE24]] ], [ 
[[TMP20]], [[COND_FALSE25]] ]
+// CHECK-NEXT:    store i64 [[COND27]], ptr [[DOTOMP_UB]], align 8
 // CHECK-NEXT:    [[TMP21:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8
 // CHECK-NEXT:    store i64 [[TMP21]], ptr [[DOTOMP_IV]], align 8
 // CHECK-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
 // CHECK:       omp.inner.for.cond:
 // CHECK-NEXT:    [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
 // CHECK-NEXT:    [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
-// CHECK-NEXT:    [[CMP26:%.*]] = icmp sle i64 [[TMP22]], [[TMP23]]
-// CHECK-NEXT:    br i1 [[CMP26]], label [[OMP_INNER_FOR_BODY:%.*]], label 
[[OMP_INNER_FOR_END:%.*]]
+// CHECK-NEXT:    [[CMP28:%.*]] = icmp sle i64 [[TMP22]], [[TMP23]]
+// CHECK-NEXT:    br i1 [[CMP28]], label [[OMP_INNER_FOR_BODY:%.*]], label 
[[OMP_INNER_FOR_END:%.*]]
 // CHECK:       omp.inner.for.body:
 // CHECK-NEXT:    [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
 // CHECK-NEXT:    [[TMP25:%.*]] = load i32, ptr [[DOTUPPER]], align 4
 // CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[DOTLOWER]], align 4
-// CHECK-NEXT:    [[SUB28:%.*]] = sub i32 [[TMP25]], [[TMP26]]
-// CHECK-NEXT:    [[ADD29:%.*]] = add i32 [[SUB28]], 1
-// CHECK-NEXT:    [[MUL30:%.*]] = mul i32 1, [[ADD29]]
-// CHECK-NEXT:    [[MUL31:%.*]] = mul i32 [[MUL30]], 10
-// CHECK-NEXT:    [[CONV32:%.*]] = zext i32 [[MUL31]] to i64
-// CHECK-NEXT:    [[DIV33:%.*]] = sdiv i64 [[TMP24]], [[CONV32]]
-// CHECK-NEXT:    [[MUL34:%.*]] = mul nsw i64 [[DIV33]], 1
-// CHECK-NEXT:    [[ADD35:%.*]] = add nsw i64 0, [[MUL34]]
-// CHECK-NEXT:    [[CONV36:%.*]] = trunc i64 [[ADD35]] to i32
-// CHECK-NEXT:    store i32 [[CONV36]], ptr [[J17]], align 4
-// CHECK-NEXT:    [[TMP27:%.*]] = load i32, ptr [[J17]], align 4
-// CHECK-NEXT:    [[CONV37:%.*]] = sext i32 [[TMP27]] to i64
+// CHECK-NEXT:    [[SUB30:%.*]] = sub i32 [[TMP25]], [[TMP26]]
+// CHECK-NEXT:    [[SUB31:%.*]] = sub i32 [[SUB30]], 1
+// CHECK-NEXT:    [[ADD32:%.*]] = add i32 [[SUB31]], 1
+// CHECK-NEXT:    [[DIV33:%.*]] = udiv i32 [[ADD32]], 1
+// CHECK-NEXT:    [[MUL34:%.*]] = mul i32 1, [[DIV33]]
+// CHECK-NEXT:    [[MUL35:%.*]] = mul i32 [[MUL34]], 10
+// CHECK-NEXT:    [[CONV36:%.*]] = zext i32 [[MUL35]] to i64
+// CHECK-NEXT:    [[DIV37:%.*]] = sdiv i64 [[TMP24]], [[CONV36]]
+// CHECK-NEXT:    [[MUL38:%.*]] = mul nsw i64 [[DIV37]], 1
+// CHECK-NEXT:    [[ADD39:%.*]] = add nsw i64 0, [[MUL38]]
+// CHECK-NEXT:    [[CONV40:%.*]] = trunc i64 [[ADD39]] to i32
+// CHECK-NEXT:    store i32 [[CONV40]], ptr [[J19]], align 4
+// CHECK-NEXT:    [[TMP27:%.*]] = load i32, ptr [[J19]], align 4
+// CHECK-NEXT:    [[CONV41:%.*]] = sext i32 [[TMP27]] to i64
 // CHECK-NEXT:    [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
 // CHECK-NEXT:    [[TMP29:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
 // CHECK-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTUPPER]], align 4
 // CHECK-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTLOWER]], align 4
-// CHECK-NEXT:    [[SUB38:%.*]] = sub i32 [[TMP30]], [[TMP31]]
-// CHECK-NEXT:    [[ADD39:%.*]] = add i32 [[SUB38]], 1
-// CHECK-NEXT:    [[MUL40:%.*]] = mul i32 1, [[ADD39]]
-// CHECK-NEXT:    [[MUL41:%.*]] = mul i32 [[MUL40]], 10
-// CHECK-NEXT:    [[CONV42:%.*]] = zext i32 [[MUL41]] to i64
-// CHECK-NEXT:    [[DIV43:%.*]] = sdiv i64 [[TMP29]], [[CONV42]]
-// CHECK-NEXT:    [[TMP32:%.*]] = load i32, ptr [[DOTUPPER]], align 4
-// CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTLOWER]], align 4
-// CHECK-NEXT:    [[SUB44:%.*]] = sub i32 [[TMP32]], [[TMP33]]
-// CHECK-NEXT:    [[ADD45:%.*]] = add i32 [[SUB44]], 1
-// CHECK-NEXT:    [[MUL46:%.*]] = mul i32 1, [[ADD45]]
+// CHECK-NEXT:    [[SUB42:%.*]] = sub i32 [[TMP30]], [[TMP31]]
+// CHECK-NEXT:    [[SUB43:%.*]] = sub i32 [[SUB42]], 1
+// CHECK-NEXT:    [[ADD44:%.*]] = add i32 [[SUB43]], 1
+// CHECK-NEXT:    [[DIV45:%.*]] = udiv i32 [[ADD44]], 1
+// CHECK-NEXT:    [[MUL46:%.*]] = mul i32 1, [[DIV45]]
 // CHECK-NEXT:    [[MUL47:%.*]] = mul i32 [[MUL46]], 10
 // CHECK-NEXT:    [[CONV48:%.*]] = zext i32 [[MUL47]] to i64
-// CHECK-NEXT:    [[MUL49:%.*]] = mul nsw i64 [[DIV43]], [[CONV48]]
-// CHECK-NEXT:    [[SUB50:%.*]] = sub nsw i64 [[TMP28]], [[MUL49]]
-// CHECK-NEXT:    [[DIV51:%.*]] = sdiv i64 [[SUB50]], 10
-// CHECK-NEXT:    [[MUL52:%.*]] = mul nsw i64 [[DIV51]], 1
-// CHECK-NEXT:    [[ADD53:%.*]] = add nsw i64 [[CONV37]], [[MUL52]]
-// CHECK-NEXT:    [[CONV54:%.*]] = trunc i64 [[ADD53]] to i32
-// CHECK-NEXT:    store i32 [[CONV54]], ptr [[I18]], align 4
+// CHECK-NEXT:    [[DIV49:%.*]] = sdiv i64 [[TMP29]], [[CONV48]]
+// CHECK-NEXT:    [[TMP32:%.*]] = load i32, ptr [[DOTUPPER]], align 4
+// CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTLOWER]], align 4
+// CHECK-NEXT:    [[SUB50:%.*]] = sub i32 [[TMP32]], [[TMP33]]
+// CHECK-NEXT:    [[SUB51:%.*]] = sub i32 [[SUB50]], 1
+// CHECK-NEXT:    [[ADD52:%.*]] = add i32 [[SUB51]], 1
+// CHECK-NEXT:    [[DIV53:%.*]] = udiv i32 [[ADD52]], 1
+// CHECK-NEXT:    [[MUL54:%.*]] = mul i32 1, [[DIV53]]
+// CHECK-NEXT:    [[MUL55:%.*]] = mul i32 [[MUL54]], 10
+// CHECK-NEXT:    [[CONV56:%.*]] = zext i32 [[MUL55]] to i64
+// CHECK-NEXT:    [[MUL57:%.*]] = mul nsw i64 [[DIV49]], [[CONV56]]
+// CHECK-NEXT:    [[SUB58:%.*]] = sub nsw i64 [[TMP28]], [[MUL57]]
+// CHECK-NEXT:    [[DIV59:%.*]] = sdiv i64 [[SUB58]], 10
+// CHECK-NEXT:    [[MUL60:%.*]] = mul nsw i64 [[DIV59]], 1
+// CHECK-NEXT:    [[ADD61:%.*]] = add nsw i64 [[CONV41]], [[MUL60]]
+// CHECK-NEXT:    [[CONV62:%.*]] = trunc i64 [[ADD61]] to i32
+// CHECK-NEXT:    store i32 [[CONV62]], ptr [[I20]], align 4
 // CHECK-NEXT:    [[TMP34:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
 // CHECK-NEXT:    [[TMP35:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
 // CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[DOTUPPER]], align 4
 // CHECK-NEXT:    [[TMP37:%.*]] = load i32, ptr [[DOTLOWER]], align 4
-// CHECK-NEXT:    [[SUB55:%.*]] = sub i32 [[TMP36]], [[TMP37]]
-// CHECK-NEXT:    [[ADD56:%.*]] = add i32 [[SUB55]], 1
-// CHECK-NEXT:    [[MUL57:%.*]] = mul i32 1, [[ADD56]]
-// CHECK-NEXT:    [[MUL58:%.*]] = mul i32 [[MUL57]], 10
-// CHECK-NEXT:    [[CONV59:%.*]] = zext i32 [[MUL58]] to i64
-// CHECK-NEXT:    [[DIV60:%.*]] = sdiv i64 [[TMP35]], [[CONV59]]
+// CHECK-NEXT:    [[SUB63:%.*]] = sub i32 [[TMP36]], [[TMP37]]
+// CHECK-NEXT:    [[SUB64:%.*]] = sub i32 [[SUB63]], 1
+// CHECK-NEXT:    [[ADD65:%.*]] = add i32 [[SUB64]], 1
+// CHECK-NEXT:    [[DIV66:%.*]] = udiv i32 [[ADD65]], 1
+// CHECK-NEXT:    [[MUL67:%.*]] = mul i32 1, [[DIV66]]
+// CHECK-NEXT:    [[MUL68:%.*]] = mul i32 [[MUL67]], 10
+// CHECK-NEXT:    [[CONV69:%.*]] = zext i32 [[MUL68]] to i64
+// CHECK-NEXT:    [[DIV70:%.*]] = sdiv i64 [[TMP35]], [[CONV69]]
 // CHECK-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTUPPER]], align 4
 // CHECK-NEXT:    [[TMP39:%.*]] = load i32, ptr [[DOTLOWER]], align 4
-// CHECK-NEXT:    [[SUB61:%.*]] = sub i32 [[TMP38]], [[TMP39]]
-// CHECK-NEXT:    [[ADD62:%.*]] = add i32 [[SUB61]], 1
-// CHECK-NEXT:    [[MUL63:%.*]] = mul i32 1, [[ADD62]]
-// CHECK-NEXT:    [[MUL64:%.*]] = mul i32 [[MUL63]], 10
-// CHECK-NEXT:    [[CONV65:%.*]] = zext i32 [[MUL64]] to i64
-// CHECK-NEXT:    [[MUL66:%.*]] = mul nsw i64 [[DIV60]], [[CONV65]]
-// CHECK-NEXT:    [[SUB67:%.*]] = sub nsw i64 [[TMP34]], [[MUL66]]
+// CHECK-NEXT:    [[SUB71:%.*]] = sub i32 [[TMP38]], [[TMP39]]
+// CHECK-NEXT:    [[SUB72:%.*]] = sub i32 [[SUB71]], 1
+// CHECK-NEXT:    [[ADD73:%.*]] = add i32 [[SUB72]], 1
+// CHECK-NEXT:    [[DIV74:%.*]] = udiv i32 [[ADD73]], 1
+// CHECK-NEXT:    [[MUL75:%.*]] = mul i32 1, [[DIV74]]
+// CHECK-NEXT:    [[MUL76:%.*]] = mul i32 [[MUL75]], 10
+// CHECK-NEXT:    [[CONV77:%.*]] = zext i32 [[MUL76]] to i64
+// CHECK-NEXT:    [[MUL78:%.*]] = mul nsw i64 [[DIV70]], [[CONV77]]
+// CHECK-NEXT:    [[SUB79:%.*]] = sub nsw i64 [[TMP34]], [[MUL78]]
 // CHECK-NEXT:    [[TMP40:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
 // CHECK-NEXT:    [[TMP41:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
 // CHECK-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTUPPER]], align 4
 // CHECK-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTLOWER]], align 4
-// CHECK-NEXT:    [[SUB68:%.*]] = sub i32 [[TMP42]], [[TMP43]]
-// CHECK-NEXT:    [[ADD69:%.*]] = add i32 [[SUB68]], 1
-// CHECK-NEXT:    [[MUL70:%.*]] = mul i32 1, [[ADD69]]
-// CHECK-NEXT:    [[MUL71:%.*]] = mul i32 [[MUL70]], 10
-// CHECK-NEXT:    [[CONV72:%.*]] = zext i32 [[MUL71]] to i64
-// CHECK-NEXT:    [[DIV73:%.*]] = sdiv i64 [[TMP41]], [[CONV72]]
+// CHECK-NEXT:    [[SUB80:%.*]] = sub i32 [[TMP42]], [[TMP43]]
+// CHECK-NEXT:    [[SUB81:%.*]] = sub i32 [[SUB80]], 1
+// CHECK-NEXT:    [[ADD82:%.*]] = add i32 [[SUB81]], 1
+// CHECK-NEXT:    [[DIV83:%.*]] = udiv i32 [[ADD82]], 1
+// CHECK-NEXT:    [[MUL84:%.*]] = mul i32 1, [[DIV83]]
+// CHECK-NEXT:    [[MUL85:%.*]] = mul i32 [[MUL84]], 10
+// CHECK-NEXT:    [[CONV86:%.*]] = zext i32 [[MUL85]] to i64
+// CHECK-NEXT:    [[DIV87:%.*]] = sdiv i64 [[TMP41]], [[CONV86]]
 // CHECK-NEXT:    [[TMP44:%.*]] = load i32, ptr [[DOTUPPER]], align 4
 // CHECK-NEXT:    [[TMP45:%.*]] = load i32, ptr [[DOTLOWER]], align 4
-// CHECK-NEXT:    [[SUB74:%.*]] = sub i32 [[TMP44]], [[TMP45]]
-// CHECK-NEXT:    [[ADD75:%.*]] = add i32 [[SUB74]], 1
-// CHECK-NEXT:    [[MUL76:%.*]] = mul i32 1, [[ADD75]]
-// CHECK-NEXT:    [[MUL77:%.*]] = mul i32 [[MUL76]], 10
-// CHECK-NEXT:    [[CONV78:%.*]] = zext i32 [[MUL77]] to i64
-// CHECK-NEXT:    [[MUL79:%.*]] = mul nsw i64 [[DIV73]], [[CONV78]]
-// CHECK-NEXT:    [[SUB80:%.*]] = sub nsw i64 [[TMP40]], [[MUL79]]
-// CHECK-NEXT:    [[DIV81:%.*]] = sdiv i64 [[SUB80]], 10
-// CHECK-NEXT:    [[MUL82:%.*]] = mul nsw i64 [[DIV81]], 10
-// CHECK-NEXT:    [[SUB83:%.*]] = sub nsw i64 [[SUB67]], [[MUL82]]
-// CHECK-NEXT:    [[MUL84:%.*]] = mul nsw i64 [[SUB83]], 1
-// CHECK-NEXT:    [[ADD85:%.*]] = add nsw i64 0, [[MUL84]]
-// CHECK-NEXT:    [[CONV86:%.*]] = trunc i64 [[ADD85]] to i32
-// CHECK-NEXT:    store i32 [[CONV86]], ptr [[I019]], align 4
-// CHECK-NEXT:    [[TMP46:%.*]] = load i32, ptr [[I18]], align 4
+// CHECK-NEXT:    [[SUB88:%.*]] = sub i32 [[TMP44]], [[TMP45]]
+// CHECK-NEXT:    [[SUB89:%.*]] = sub i32 [[SUB88]], 1
+// CHECK-NEXT:    [[ADD90:%.*]] = add i32 [[SUB89]], 1
+// CHECK-NEXT:    [[DIV91:%.*]] = udiv i32 [[ADD90]], 1
+// CHECK-NEXT:    [[MUL92:%.*]] = mul i32 1, [[DIV91]]
+// CHECK-NEXT:    [[MUL93:%.*]] = mul i32 [[MUL92]], 10
+// CHECK-NEXT:    [[CONV94:%.*]] = zext i32 [[MUL93]] to i64
+// CHECK-NEXT:    [[MUL95:%.*]] = mul nsw i64 [[DIV87]], [[CONV94]]
+// CHECK-NEXT:    [[SUB96:%.*]] = sub nsw i64 [[TMP40]], [[MUL95]]
+// CHECK-NEXT:    [[DIV97:%.*]] = sdiv i64 [[SUB96]], 10
+// CHECK-NEXT:    [[MUL98:%.*]] = mul nsw i64 [[DIV97]], 10
+// CHECK-NEXT:    [[SUB99:%.*]] = sub nsw i64 [[SUB79]], [[MUL98]]
+// CHECK-NEXT:    [[MUL100:%.*]] = mul nsw i64 [[SUB99]], 1
+// CHECK-NEXT:    [[ADD101:%.*]] = add nsw i64 0, [[MUL100]]
+// CHECK-NEXT:    [[CONV102:%.*]] = trunc i64 [[ADD101]] to i32
+// CHECK-NEXT:    store i32 [[CONV102]], ptr [[I021]], align 4
+// CHECK-NEXT:    [[TMP46:%.*]] = load i32, ptr [[I20]], align 4
 // CHECK-NEXT:    [[TMP47:%.*]] = load i32, ptr [[MP_ADDR]], align 4
-// CHECK-NEXT:    [[CMP87:%.*]] = icmp slt i32 [[TMP46]], [[TMP47]]
-// CHECK-NEXT:    br i1 [[CMP87]], label [[OMP_BODY_NEXT:%.*]], label 
[[OMP_BODY_CONTINUE:%.*]]
+// CHECK-NEXT:    [[CMP103:%.*]] = icmp slt i32 [[TMP46]], [[TMP47]]
+// CHECK-NEXT:    br i1 [[CMP103]], label [[OMP_BODY_NEXT:%.*]], label 
[[OMP_BODY_CONTINUE:%.*]]
 // CHECK:       omp.body.next:
 // CHECK-NEXT:    br label [[OMP_BODY_CONTINUE]]
 // CHECK:       omp.body.continue:
 // CHECK-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK:       omp.inner.for.inc:
 // CHECK-NEXT:    [[TMP48:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK-NEXT:    [[ADD89:%.*]] = add nsw i64 [[TMP48]], 1
-// CHECK-NEXT:    store i64 [[ADD89]], ptr [[DOTOMP_IV]], align 8
+// CHECK-NEXT:    [[ADD105:%.*]] = add nsw i64 [[TMP48]], 1
+// CHECK-NEXT:    store i64 [[ADD105]], ptr [[DOTOMP_IV]], align 8
 // CHECK-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK:       omp.inner.for.end:
 // CHECK-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
@@ -277,21 +293,21 @@ void collapsed(int mp) {
 // SIMD-ONLY0-NEXT:    [[TMP6:%.*]] = load i32, ptr [[I0]], align 4
 // SIMD-ONLY0-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP6]], 1
 // SIMD-ONLY0-NEXT:    store i32 [[INC]], ptr [[I0]], align 4
-// SIMD-ONLY0-NEXT:    br label [[FOR_COND4]], !llvm.loop [[LOOP2:![0-9]+]]
+// SIMD-ONLY0-NEXT:    br label [[FOR_COND4]], !llvm.loop [[LOOP1:![0-9]+]]
 // SIMD-ONLY0:       for.end:
 // SIMD-ONLY0-NEXT:    br label [[FOR_INC7:%.*]]
 // SIMD-ONLY0:       for.inc7:
 // SIMD-ONLY0-NEXT:    [[TMP7:%.*]] = load i32, ptr [[I]], align 4
 // SIMD-ONLY0-NEXT:    [[INC8:%.*]] = add nsw i32 [[TMP7]], 1
 // SIMD-ONLY0-NEXT:    store i32 [[INC8]], ptr [[I]], align 4
-// SIMD-ONLY0-NEXT:    br label [[FOR_COND1]], !llvm.loop [[LOOP4:![0-9]+]]
+// SIMD-ONLY0-NEXT:    br label [[FOR_COND1]], !llvm.loop [[LOOP3:![0-9]+]]
 // SIMD-ONLY0:       for.end9:
 // SIMD-ONLY0-NEXT:    br label [[FOR_INC10:%.*]]
 // SIMD-ONLY0:       for.inc10:
 // SIMD-ONLY0-NEXT:    [[TMP8:%.*]] = load i32, ptr [[J]], align 4
 // SIMD-ONLY0-NEXT:    [[INC11:%.*]] = add nsw i32 [[TMP8]], 1
 // SIMD-ONLY0-NEXT:    store i32 [[INC11]], ptr [[J]], align 4
-// SIMD-ONLY0-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
+// SIMD-ONLY0-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
 // SIMD-ONLY0:       for.end12:
 // SIMD-ONLY0-NEXT:    ret void
 //
diff --git a/clang/test/OpenMP/for_private_reduction_codegen.cpp 
b/clang/test/OpenMP/for_private_reduction_codegen.cpp
index f27b6b1d35225..fdaf6f3068c1b 100644
--- a/clang/test/OpenMP/for_private_reduction_codegen.cpp
+++ b/clang/test/OpenMP/for_private_reduction_codegen.cpp
@@ -112,7 +112,7 @@ int main(void) {
 // CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP4]], 1
 // CHECK-NEXT:    store i32 [[INC]], ptr [[I]], align 4
-// CHECK-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
+// CHECK-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]]
 // CHECK:       for.end:
 // CHECK-NEXT:    call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 
[[TMP0]], i32 4)
 // CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr 
@[[GLOB3]], i32 1, ptr @_Z8func_redv.omp_outlined, ptr [[ARRAY]])
@@ -152,7 +152,7 @@ int main(void) {
 // CHECK-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], 
align 8
 // CHECK-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], 
align 8
 // CHECK-NEXT:    store ptr [[ARRAY]], ptr [[ARRAY_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARRAY_ADDR]], align 8, 
!nonnull [[META5:![0-9]+]], !align [[META6:![0-9]+]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARRAY_ADDR]], align 8, 
!nonnull [[META4:![0-9]+]], !align [[META5:![0-9]+]]
 // CHECK-NEXT:    call void @_ZN3SumC1Ei(ptr noundef nonnull align 4 
dereferenceable(4) [[RESULT]], i32 noundef 0)
 // CHECK-NEXT:    store i32 0, ptr [[DOTOMP_LB]], align 4
 // CHECK-NEXT:    store i32 9, ptr [[DOTOMP_UB]], align 4
@@ -259,7 +259,7 @@ int main(void) {
 // CHECK-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
 // CHECK-NEXT:    store ptr [[RHS]], ptr [[RHS_ADDR]], align 8
 // CHECK-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[RHS_ADDR]], align 8, !nonnull 
[[META5]], !align [[META6]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[RHS_ADDR]], align 8, !nonnull 
[[META4]], !align [[META5]]
 // CHECK-NEXT:    [[VAL:%.*]] = getelementptr inbounds nuw [[CLASS_SUM:%.*]], 
ptr [[TMP0]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[VAL]], align 4
 // CHECK-NEXT:    [[VAL2:%.*]] = getelementptr inbounds nuw [[CLASS_SUM]], ptr 
[[THIS1]], i32 0, i32 0
@@ -293,7 +293,7 @@ int main(void) {
 // CHECK-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK-NEXT:    [[VAL:%.*]] = getelementptr inbounds nuw [[CLASS_SUM]], ptr 
[[THIS1]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[VAL]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[RHS_ADDR]], align 8, !nonnull 
[[META5]], !align [[META6]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[RHS_ADDR]], align 8, !nonnull 
[[META4]], !align [[META5]]
 // CHECK-NEXT:    [[VAL2:%.*]] = getelementptr inbounds nuw [[CLASS_SUM]], ptr 
[[TMP1]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[VAL2]], align 4
 // CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP2]]
@@ -353,9 +353,9 @@ int main(void) {
 // CHECK-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
 // CHECK-NEXT:    store ptr [[V]], ptr [[V_ADDR]], align 8
 // CHECK-NEXT:    store ptr [[SUM_V]], ptr [[SUM_V_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[SUM_V_ADDR]], align 8, 
!nonnull [[META5]], !align [[META6]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[SUM_V_ADDR]], align 8, 
!nonnull [[META4]], !align [[META5]]
 // CHECK-NEXT:    store i32 0, ptr [[TMP1]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[SUM_V_ADDR]], align 8, 
!nonnull [[META5]], !align [[META6]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[SUM_V_ADDR]], align 8, 
!nonnull [[META4]], !align [[META5]]
 // CHECK-NEXT:    store ptr [[TMP2]], ptr [[TMP]], align 8
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
 // CHECK-NEXT:    store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4
@@ -374,7 +374,7 @@ int main(void) {
 // CHECK-NEXT:    store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4
 // CHECK-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
 // CHECK-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull 
[[META5]], !align [[META6]]
+// CHECK-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull 
[[META4]], !align [[META5]]
 // CHECK-NEXT:    store i32 0, ptr [[SUM_V4]], align 4
 // CHECK-NEXT:    store ptr [[SUM_V4]], ptr [[_TMP5]], align 8
 // CHECK-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 
[[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], 
ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
@@ -409,7 +409,7 @@ int main(void) {
 // CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64
 // CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr 
[[TMP16]], i64 [[IDXPROM]]
 // CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-// CHECK-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[_TMP5]], align 8, !nonnull 
[[META5]], !align [[META6]]
+// CHECK-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[_TMP5]], align 8, !nonnull 
[[META4]], !align [[META5]]
 // CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
 // CHECK-NEXT:    [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP18]]
 // CHECK-NEXT:    store i32 [[ADD9]], ptr [[TMP19]], align 4
@@ -507,13 +507,13 @@ int main(void) {
 // CHECK-NEXT:    store ptr [[V]], ptr [[V_ADDR]], align 8
 // CHECK-NEXT:    store ptr [[SUM_V]], ptr [[SUM_V_ADDR]], align 8
 // CHECK-NEXT:    store ptr [[PROD_V]], ptr [[PROD_V_ADDR]], align 8
-// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[SUM_V_ADDR]], align 8, 
!nonnull [[META5]], !align [[META6]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[SUM_V_ADDR]], align 8, 
!nonnull [[META4]], !align [[META5]]
 // CHECK-NEXT:    store i32 0, ptr [[TMP1]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[PROD_V_ADDR]], align 8, 
!nonnull [[META5]], !align [[META6]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[PROD_V_ADDR]], align 8, 
!nonnull [[META4]], !align [[META5]]
 // CHECK-NEXT:    store i32 1, ptr [[TMP2]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[SUM_V_ADDR]], align 8, 
!nonnull [[META5]], !align [[META6]]
+// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[SUM_V_ADDR]], align 8, 
!nonnull [[META4]], !align [[META5]]
 // CHECK-NEXT:    store ptr [[TMP3]], ptr [[TMP]], align 8
-// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[PROD_V_ADDR]], align 8, 
!nonnull [[META5]], !align [[META6]]
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[PROD_V_ADDR]], align 8, 
!nonnull [[META4]], !align [[META5]]
 // CHECK-NEXT:    store ptr [[TMP4]], ptr [[_TMP1]], align 8
 // CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4
 // CHECK-NEXT:    store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4
@@ -532,10 +532,10 @@ int main(void) {
 // CHECK-NEXT:    store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4
 // CHECK-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
 // CHECK-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull 
[[META5]], !align [[META6]]
+// CHECK-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull 
[[META4]], !align [[META5]]
 // CHECK-NEXT:    store i32 0, ptr [[SUM_V5]], align 4
 // CHECK-NEXT:    store ptr [[SUM_V5]], ptr [[_TMP6]], align 8
-// CHECK-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8, !nonnull 
[[META5]], !align [[META6]]
+// CHECK-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8, !nonnull 
[[META4]], !align [[META5]]
 // CHECK-NEXT:    store i32 1, ptr [[PROD_V7]], align 4
 // CHECK-NEXT:    store ptr [[PROD_V7]], ptr [[_TMP8]], align 8
 // CHECK-NEXT:    call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 
[[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], 
ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
@@ -570,7 +570,7 @@ int main(void) {
 // CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64
 // CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr 
[[TMP19]], i64 [[IDXPROM]]
 // CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-// CHECK-NEXT:    [[TMP22:%.*]] = load ptr, ptr [[_TMP6]], align 8, !nonnull 
[[META5]], !align [[META6]]
+// CHECK-NEXT:    [[TMP22:%.*]] = load ptr, ptr [[_TMP6]], align 8, !nonnull 
[[META4]], !align [[META5]]
 // CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
 // CHECK-NEXT:    [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP21]]
 // CHECK-NEXT:    store i32 [[ADD12]], ptr [[TMP22]], align 4
@@ -579,7 +579,7 @@ int main(void) {
 // CHECK-NEXT:    [[IDXPROM13:%.*]] = sext i32 [[TMP25]] to i64
 // CHECK-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, ptr 
[[TMP24]], i64 [[IDXPROM13]]
 // CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[ARRAYIDX14]], align 4
-// CHECK-NEXT:    [[TMP27:%.*]] = load ptr, ptr [[_TMP8]], align 8, !nonnull 
[[META5]], !align [[META6]]
+// CHECK-NEXT:    [[TMP27:%.*]] = load ptr, ptr [[_TMP8]], align 8, !nonnull 
[[META4]], !align [[META5]]
 // CHECK-NEXT:    [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
 // CHECK-NEXT:    [[MUL15:%.*]] = mul nsw i32 [[TMP28]], [[TMP26]]
 // CHECK-NEXT:    store i32 [[MUL15]], ptr [[TMP27]], align 4
@@ -698,7 +698,7 @@ int main(void) {
 // CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I]], align 4
 // CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP4]], 1
 // CHECK-NEXT:    store i32 [[INC]], ptr [[I]], align 4
-// CHECK-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]]
+// CHECK-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]]
 // CHECK:       for.end:
 // CHECK-NEXT:    call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 
[[TMP0]], i32 4)
 // CHECK-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr 
@[[GLOB3]], i32 1, ptr @main.omp_outlined, ptr [[V]])
diff --git a/clang/test/OpenMP/loop_collapse_codegen.cpp 
b/clang/test/OpenMP/loop_collapse_codegen.cpp
index dbe42e934076d..9c8da1d86cb39 100644
--- a/clang/test/OpenMP/loop_collapse_codegen.cpp
+++ b/clang/test/OpenMP/loop_collapse_codegen.cpp
@@ -1,17 +1,18 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --include-generated-funcs --version 6
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown \
 // RUN: -emit-llvm %s -o - | FileCheck %s
 
 // expected-no-diagnostics
 
-// CHECK-LABEL: define internal void @_Z17triangulat_loop_1v.omp_outlined(
+// CHECK-LABEL: define internal void @_Z17triangular_loop_1v.omp_outlined(
 // CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef 
[[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
 // CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 // CHECK-NEXT:    [[DOTOMP_IV:%.*]] = alloca i64, align 8
 // CHECK-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[_TMP2:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[DOTLB_MIN:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[DOTLB_MAX:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[DOTMIN_LESS_MAX:%.*]] = alloca i8, align 1
@@ -35,8 +36,8 @@
 // CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP0]], 1
 // CHECK-NEXT:    store i32 [[ADD]], ptr [[DOTLB_MIN]], align 4
 // CHECK-NEXT:    store i32 9, ptr [[TMP]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP]], align 4
-// CHECK-NEXT:    [[ADD3:%.*]] = add i32 [[TMP1]], 1
+// CHECK-NEXT:    [[TMP100:%.*]] = load i32, ptr [[TMP]], align 4
+// CHECK-NEXT:    [[ADD3:%.*]] = add i32 [[TMP100]], 1
 // CHECK-NEXT:    store i32 [[ADD3]], ptr [[DOTLB_MAX]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTLB_MIN]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTLB_MAX]], align 4
@@ -58,8 +59,8 @@
 // CHECK-NEXT:    store i32 [[COND]], ptr [[DOTLOWER]], align 4
 // CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP]], align 4
 // CHECK-NEXT:    [[ADD5:%.*]] = add i32 [[TMP7]], 1
-// CHECK-NEXT:    store i32 [[ADD5]], ptr [[_TMP1]], align 4
-// CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[_TMP1]], align 4
+// CHECK-NEXT:    store i32 [[ADD5]], ptr [[TMP1]], align 4
+// CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4
 // CHECK-NEXT:    [[ADD6:%.*]] = add i32 [[TMP8]], 1
 // CHECK-NEXT:    store i32 [[ADD6]], ptr [[DOTLB_MIN4]], align 4
 // CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP]], align 4
@@ -67,11 +68,12 @@
 // CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP]], align 4
 // CHECK-NEXT:    [[ADD9:%.*]] = add i32 [[TMP10]], 1
 // CHECK-NEXT:    [[SUB:%.*]] = sub i32 10, [[ADD9]]
-// CHECK-NEXT:    [[ADD10:%.*]] = add i32 [[SUB]], 1
-// CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[ADD10]], 1
+// CHECK-NEXT:    [[SUB10:%.*]] = sub i32 [[SUB]], 1
+// CHECK-NEXT:    [[DIV:%.*]] = udiv i32 [[SUB10]], 1
+// CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[DIV]], 1
 // CHECK-NEXT:    [[ADD11:%.*]] = add i32 [[ADD8]], [[MUL]]
-// CHECK-NEXT:    store i32 [[ADD11]], ptr [[_TMP1]], align 4
-// CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[_TMP1]], align 4
+// CHECK-NEXT:    store i32 [[ADD11]], ptr [[TMP1]], align 4
+// CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP1]], align 4
 // CHECK-NEXT:    [[ADD12:%.*]] = add i32 [[TMP11]], 1
 // CHECK-NEXT:    store i32 [[ADD12]], ptr [[DOTLB_MAX7]], align 4
 // CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTLB_MIN4]], align 4
@@ -90,20 +92,22 @@
 // CHECK-NEXT:    br label %[[COND_END20]]
 // CHECK:       [[COND_END20]]:
 // CHECK-NEXT:    [[COND21:%.*]] = phi i32 [ [[TMP15]], %[[COND_TRUE18]] ], [ 
[[TMP16]], %[[COND_FALSE19]] ]
-// CHECK-NEXT:    store i32 [[COND21]], ptr [[_TMP1]], align 4
+// CHECK-NEXT:    store i32 [[COND21]], ptr [[TMP1]], align 4
 // CHECK-NEXT:    store i32 [[COND21]], ptr [[DOTLOWER16]], align 4
 // CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTLOWER]], align 4
 // CHECK-NEXT:    [[SUB22:%.*]] = sub i32 10, [[TMP17]]
-// CHECK-NEXT:    [[ADD23:%.*]] = add i32 [[SUB22]], 1
-// CHECK-NEXT:    [[CONV:%.*]] = zext i32 [[ADD23]] to i64
-// CHECK-NEXT:    [[MUL24:%.*]] = mul nsw i64 10, [[CONV]]
+// CHECK-NEXT:    [[SUB23:%.*]] = sub i32 [[SUB22]], 1
+// CHECK-NEXT:    [[ADD24:%.*]] = add i32 [[SUB23]], 1
+// CHECK-NEXT:    [[DIV25:%.*]] = udiv i32 [[ADD24]], 1
+// CHECK-NEXT:    [[CONV:%.*]] = zext i32 [[DIV25]] to i64
+// CHECK-NEXT:    [[MUL26:%.*]] = mul nsw i64 10, [[CONV]]
 // CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTLOWER16]], align 4
-// CHECK-NEXT:    [[SUB25:%.*]] = sub i32 10, [[TMP18]]
-// CHECK-NEXT:    [[ADD26:%.*]] = add i32 [[SUB25]], 1
-// CHECK-NEXT:    [[CONV27:%.*]] = zext i32 [[ADD26]] to i64
-// CHECK-NEXT:    [[MUL28:%.*]] = mul nsw i64 [[MUL24]], [[CONV27]]
-// CHECK-NEXT:    [[SUB29:%.*]] = sub nsw i64 [[MUL28]], 1
-// CHECK-NEXT:    store i64 [[SUB29]], ptr [[DOTCAPTURE_EXPR_]], align 8
+// CHECK-NEXT:    [[SUB27:%.*]] = sub i32 10, [[TMP18]]
+// CHECK-NEXT:    [[ADD28:%.*]] = add i32 [[SUB27]], 1
+// CHECK-NEXT:    [[CONV29:%.*]] = zext i32 [[ADD28]] to i64
+// CHECK-NEXT:    [[MUL30:%.*]] = mul nsw i64 [[MUL26]], [[CONV29]]
+// CHECK-NEXT:    [[SUB31:%.*]] = sub nsw i64 [[MUL30]], 1
+// CHECK-NEXT:    store i64 [[SUB31]], ptr [[DOTCAPTURE_EXPR_]], align 8
 // CHECK-NEXT:    store i64 0, ptr [[DOTOMP_LB]], align 8
 // CHECK-NEXT:    [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_]], align 8
 // CHECK-NEXT:    store i64 [[TMP19]], ptr [[DOTOMP_UB]], align 8
@@ -114,158 +118,172 @@
 // CHECK-NEXT:    call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], 
i32 [[TMP21]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr 
[[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
 // CHECK-NEXT:    [[TMP22:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
 // CHECK-NEXT:    [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_]], align 8
-// CHECK-NEXT:    [[CMP30:%.*]] = icmp sgt i64 [[TMP22]], [[TMP23]]
-// CHECK-NEXT:    br i1 [[CMP30]], label %[[COND_TRUE31:.*]], label 
%[[COND_FALSE32:.*]]
-// CHECK:       [[COND_TRUE31]]:
+// CHECK-NEXT:    [[CMP32:%.*]] = icmp sgt i64 [[TMP22]], [[TMP23]]
+// CHECK-NEXT:    br i1 [[CMP32]], label %[[COND_TRUE33:.*]], label 
%[[COND_FALSE34:.*]]
+// CHECK:       [[COND_TRUE33]]:
 // CHECK-NEXT:    [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_]], align 8
-// CHECK-NEXT:    br label %[[COND_END33:.*]]
-// CHECK:       [[COND_FALSE32]]:
+// CHECK-NEXT:    br label %[[COND_END35:.*]]
+// CHECK:       [[COND_FALSE34]]:
 // CHECK-NEXT:    [[TMP25:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
-// CHECK-NEXT:    br label %[[COND_END33]]
-// CHECK:       [[COND_END33]]:
-// CHECK-NEXT:    [[COND34:%.*]] = phi i64 [ [[TMP24]], %[[COND_TRUE31]] ], [ 
[[TMP25]], %[[COND_FALSE32]] ]
-// CHECK-NEXT:    store i64 [[COND34]], ptr [[DOTOMP_UB]], align 8
+// CHECK-NEXT:    br label %[[COND_END35]]
+// CHECK:       [[COND_END35]]:
+// CHECK-NEXT:    [[COND36:%.*]] = phi i64 [ [[TMP24]], %[[COND_TRUE33]] ], [ 
[[TMP25]], %[[COND_FALSE34]] ]
+// CHECK-NEXT:    store i64 [[COND36]], ptr [[DOTOMP_UB]], align 8
 // CHECK-NEXT:    [[TMP26:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8
 // CHECK-NEXT:    store i64 [[TMP26]], ptr [[DOTOMP_IV]], align 8
 // CHECK-NEXT:    br label %[[OMP_INNER_FOR_COND:.*]]
 // CHECK:       [[OMP_INNER_FOR_COND]]:
 // CHECK-NEXT:    [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
 // CHECK-NEXT:    [[TMP28:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
-// CHECK-NEXT:    [[CMP35:%.*]] = icmp sle i64 [[TMP27]], [[TMP28]]
-// CHECK-NEXT:    br i1 [[CMP35]], label %[[OMP_INNER_FOR_BODY:.*]], label 
%[[OMP_INNER_FOR_END:.*]]
+// CHECK-NEXT:    [[CMP37:%.*]] = icmp sle i64 [[TMP27]], [[TMP28]]
+// CHECK-NEXT:    br i1 [[CMP37]], label %[[OMP_INNER_FOR_BODY:.*]], label 
%[[OMP_INNER_FOR_END:.*]]
 // CHECK:       [[OMP_INNER_FOR_BODY]]:
 // CHECK-NEXT:    [[TMP29:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
 // CHECK-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTLOWER]], align 4
-// CHECK-NEXT:    [[SUB36:%.*]] = sub i32 10, [[TMP30]]
-// CHECK-NEXT:    [[ADD37:%.*]] = add i32 [[SUB36]], 1
-// CHECK-NEXT:    [[MUL38:%.*]] = mul i32 1, [[ADD37]]
-// CHECK-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTLOWER16]], align 4
-// CHECK-NEXT:    [[SUB39:%.*]] = sub i32 10, [[TMP31]]
+// CHECK-NEXT:    [[SUB38:%.*]] = sub i32 10, [[TMP30]]
+// CHECK-NEXT:    [[SUB39:%.*]] = sub i32 [[SUB38]], 1
 // CHECK-NEXT:    [[ADD40:%.*]] = add i32 [[SUB39]], 1
-// CHECK-NEXT:    [[MUL41:%.*]] = mul i32 [[MUL38]], [[ADD40]]
-// CHECK-NEXT:    [[CONV42:%.*]] = zext i32 [[MUL41]] to i64
-// CHECK-NEXT:    [[DIV:%.*]] = sdiv i64 [[TMP29]], [[CONV42]]
-// CHECK-NEXT:    [[MUL43:%.*]] = mul nsw i64 [[DIV]], 1
-// CHECK-NEXT:    [[ADD44:%.*]] = add nsw i64 0, [[MUL43]]
-// CHECK-NEXT:    [[CONV45:%.*]] = trunc i64 [[ADD44]] to i32
-// CHECK-NEXT:    store i32 [[CONV45]], ptr [[I]], align 4
+// CHECK-NEXT:    [[DIV41:%.*]] = udiv i32 [[ADD40]], 1
+// CHECK-NEXT:    [[MUL42:%.*]] = mul i32 1, [[DIV41]]
+// CHECK-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTLOWER16]], align 4
+// CHECK-NEXT:    [[SUB43:%.*]] = sub i32 10, [[TMP31]]
+// CHECK-NEXT:    [[ADD44:%.*]] = add i32 [[SUB43]], 1
+// CHECK-NEXT:    [[MUL45:%.*]] = mul i32 [[MUL42]], [[ADD44]]
+// CHECK-NEXT:    [[CONV46:%.*]] = zext i32 [[MUL45]] to i64
+// CHECK-NEXT:    [[DIV47:%.*]] = sdiv i64 [[TMP29]], [[CONV46]]
+// CHECK-NEXT:    [[MUL48:%.*]] = mul nsw i64 [[DIV47]], 1
+// CHECK-NEXT:    [[ADD49:%.*]] = add nsw i64 0, [[MUL48]]
+// CHECK-NEXT:    [[CONV50:%.*]] = trunc i64 [[ADD49]] to i32
+// CHECK-NEXT:    store i32 [[CONV50]], ptr [[I]], align 4
 // CHECK-NEXT:    [[TMP32:%.*]] = load i32, ptr [[I]], align 4
-// CHECK-NEXT:    [[ADD46:%.*]] = add i32 [[TMP32]], 1
-// CHECK-NEXT:    [[CONV47:%.*]] = zext i32 [[ADD46]] to i64
+// CHECK-NEXT:    [[ADD51:%.*]] = add i32 [[TMP32]], 1
+// CHECK-NEXT:    [[CONV52:%.*]] = zext i32 [[ADD51]] to i64
 // CHECK-NEXT:    [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
 // CHECK-NEXT:    [[TMP34:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
 // CHECK-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTLOWER]], align 4
-// CHECK-NEXT:    [[SUB48:%.*]] = sub i32 10, [[TMP35]]
-// CHECK-NEXT:    [[ADD49:%.*]] = add i32 [[SUB48]], 1
-// CHECK-NEXT:    [[MUL50:%.*]] = mul i32 1, [[ADD49]]
+// CHECK-NEXT:    [[SUB53:%.*]] = sub i32 10, [[TMP35]]
+// CHECK-NEXT:    [[SUB54:%.*]] = sub i32 [[SUB53]], 1
+// CHECK-NEXT:    [[ADD55:%.*]] = add i32 [[SUB54]], 1
+// CHECK-NEXT:    [[DIV56:%.*]] = udiv i32 [[ADD55]], 1
+// CHECK-NEXT:    [[MUL57:%.*]] = mul i32 1, [[DIV56]]
 // CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[DOTLOWER16]], align 4
-// CHECK-NEXT:    [[SUB51:%.*]] = sub i32 10, [[TMP36]]
-// CHECK-NEXT:    [[ADD52:%.*]] = add i32 [[SUB51]], 1
-// CHECK-NEXT:    [[MUL53:%.*]] = mul i32 [[MUL50]], [[ADD52]]
-// CHECK-NEXT:    [[CONV54:%.*]] = zext i32 [[MUL53]] to i64
-// CHECK-NEXT:    [[DIV55:%.*]] = sdiv i64 [[TMP34]], [[CONV54]]
+// CHECK-NEXT:    [[SUB58:%.*]] = sub i32 10, [[TMP36]]
+// CHECK-NEXT:    [[ADD59:%.*]] = add i32 [[SUB58]], 1
+// CHECK-NEXT:    [[MUL60:%.*]] = mul i32 [[MUL57]], [[ADD59]]
+// CHECK-NEXT:    [[CONV61:%.*]] = zext i32 [[MUL60]] to i64
+// CHECK-NEXT:    [[DIV62:%.*]] = sdiv i64 [[TMP34]], [[CONV61]]
 // CHECK-NEXT:    [[TMP37:%.*]] = load i32, ptr [[DOTLOWER]], align 4
-// CHECK-NEXT:    [[SUB56:%.*]] = sub i32 10, [[TMP37]]
-// CHECK-NEXT:    [[ADD57:%.*]] = add i32 [[SUB56]], 1
-// CHECK-NEXT:    [[MUL58:%.*]] = mul i32 1, [[ADD57]]
+// CHECK-NEXT:    [[SUB63:%.*]] = sub i32 10, [[TMP37]]
+// CHECK-NEXT:    [[SUB64:%.*]] = sub i32 [[SUB63]], 1
+// CHECK-NEXT:    [[ADD65:%.*]] = add i32 [[SUB64]], 1
+// CHECK-NEXT:    [[DIV66:%.*]] = udiv i32 [[ADD65]], 1
+// CHECK-NEXT:    [[MUL67:%.*]] = mul i32 1, [[DIV66]]
 // CHECK-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTLOWER16]], align 4
-// CHECK-NEXT:    [[SUB59:%.*]] = sub i32 10, [[TMP38]]
-// CHECK-NEXT:    [[ADD60:%.*]] = add i32 [[SUB59]], 1
-// CHECK-NEXT:    [[MUL61:%.*]] = mul i32 [[MUL58]], [[ADD60]]
-// CHECK-NEXT:    [[CONV62:%.*]] = zext i32 [[MUL61]] to i64
-// CHECK-NEXT:    [[MUL63:%.*]] = mul nsw i64 [[DIV55]], [[CONV62]]
-// CHECK-NEXT:    [[SUB64:%.*]] = sub nsw i64 [[TMP33]], [[MUL63]]
+// CHECK-NEXT:    [[SUB68:%.*]] = sub i32 10, [[TMP38]]
+// CHECK-NEXT:    [[ADD69:%.*]] = add i32 [[SUB68]], 1
+// CHECK-NEXT:    [[MUL70:%.*]] = mul i32 [[MUL67]], [[ADD69]]
+// CHECK-NEXT:    [[CONV71:%.*]] = zext i32 [[MUL70]] to i64
+// CHECK-NEXT:    [[MUL72:%.*]] = mul nsw i64 [[DIV62]], [[CONV71]]
+// CHECK-NEXT:    [[SUB73:%.*]] = sub nsw i64 [[TMP33]], [[MUL72]]
 // CHECK-NEXT:    [[TMP39:%.*]] = load i32, ptr [[DOTLOWER16]], align 4
-// CHECK-NEXT:    [[SUB65:%.*]] = sub i32 10, [[TMP39]]
-// CHECK-NEXT:    [[ADD66:%.*]] = add i32 [[SUB65]], 1
-// CHECK-NEXT:    [[MUL67:%.*]] = mul i32 1, [[ADD66]]
-// CHECK-NEXT:    [[CONV68:%.*]] = zext i32 [[MUL67]] to i64
-// CHECK-NEXT:    [[DIV69:%.*]] = sdiv i64 [[SUB64]], [[CONV68]]
-// CHECK-NEXT:    [[MUL70:%.*]] = mul nsw i64 [[DIV69]], 1
-// CHECK-NEXT:    [[ADD71:%.*]] = add nsw i64 [[CONV47]], [[MUL70]]
-// CHECK-NEXT:    [[CONV72:%.*]] = trunc i64 [[ADD71]] to i32
-// CHECK-NEXT:    store i32 [[CONV72]], ptr [[J]], align 4
+// CHECK-NEXT:    [[SUB74:%.*]] = sub i32 10, [[TMP39]]
+// CHECK-NEXT:    [[ADD75:%.*]] = add i32 [[SUB74]], 1
+// CHECK-NEXT:    [[MUL76:%.*]] = mul i32 1, [[ADD75]]
+// CHECK-NEXT:    [[CONV77:%.*]] = zext i32 [[MUL76]] to i64
+// CHECK-NEXT:    [[DIV78:%.*]] = sdiv i64 [[SUB73]], [[CONV77]]
+// CHECK-NEXT:    [[MUL79:%.*]] = mul nsw i64 [[DIV78]], 1
+// CHECK-NEXT:    [[ADD80:%.*]] = add nsw i64 [[CONV52]], [[MUL79]]
+// CHECK-NEXT:    [[CONV81:%.*]] = trunc i64 [[ADD80]] to i32
+// CHECK-NEXT:    store i32 [[CONV81]], ptr [[J]], align 4
 // CHECK-NEXT:    [[TMP40:%.*]] = load i32, ptr [[J]], align 4
-// CHECK-NEXT:    [[ADD73:%.*]] = add i32 [[TMP40]], 1
-// CHECK-NEXT:    [[CONV74:%.*]] = zext i32 [[ADD73]] to i64
+// CHECK-NEXT:    [[ADD82:%.*]] = add i32 [[TMP40]], 1
+// CHECK-NEXT:    [[CONV83:%.*]] = zext i32 [[ADD82]] to i64
 // CHECK-NEXT:    [[TMP41:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
 // CHECK-NEXT:    [[TMP42:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
 // CHECK-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTLOWER]], align 4
-// CHECK-NEXT:    [[SUB75:%.*]] = sub i32 10, [[TMP43]]
-// CHECK-NEXT:    [[ADD76:%.*]] = add i32 [[SUB75]], 1
-// CHECK-NEXT:    [[MUL77:%.*]] = mul i32 1, [[ADD76]]
+// CHECK-NEXT:    [[SUB84:%.*]] = sub i32 10, [[TMP43]]
+// CHECK-NEXT:    [[SUB85:%.*]] = sub i32 [[SUB84]], 1
+// CHECK-NEXT:    [[ADD86:%.*]] = add i32 [[SUB85]], 1
+// CHECK-NEXT:    [[DIV87:%.*]] = udiv i32 [[ADD86]], 1
+// CHECK-NEXT:    [[MUL88:%.*]] = mul i32 1, [[DIV87]]
 // CHECK-NEXT:    [[TMP44:%.*]] = load i32, ptr [[DOTLOWER16]], align 4
-// CHECK-NEXT:    [[SUB78:%.*]] = sub i32 10, [[TMP44]]
-// CHECK-NEXT:    [[ADD79:%.*]] = add i32 [[SUB78]], 1
-// CHECK-NEXT:    [[MUL80:%.*]] = mul i32 [[MUL77]], [[ADD79]]
-// CHECK-NEXT:    [[CONV81:%.*]] = zext i32 [[MUL80]] to i64
-// CHECK-NEXT:    [[DIV82:%.*]] = sdiv i64 [[TMP42]], [[CONV81]]
+// CHECK-NEXT:    [[SUB89:%.*]] = sub i32 10, [[TMP44]]
+// CHECK-NEXT:    [[ADD90:%.*]] = add i32 [[SUB89]], 1
+// CHECK-NEXT:    [[MUL91:%.*]] = mul i32 [[MUL88]], [[ADD90]]
+// CHECK-NEXT:    [[CONV92:%.*]] = zext i32 [[MUL91]] to i64
+// CHECK-NEXT:    [[DIV93:%.*]] = sdiv i64 [[TMP42]], [[CONV92]]
 // CHECK-NEXT:    [[TMP45:%.*]] = load i32, ptr [[DOTLOWER]], align 4
-// CHECK-NEXT:    [[SUB83:%.*]] = sub i32 10, [[TMP45]]
-// CHECK-NEXT:    [[ADD84:%.*]] = add i32 [[SUB83]], 1
-// CHECK-NEXT:    [[MUL85:%.*]] = mul i32 1, [[ADD84]]
+// CHECK-NEXT:    [[SUB94:%.*]] = sub i32 10, [[TMP45]]
+// CHECK-NEXT:    [[SUB95:%.*]] = sub i32 [[SUB94]], 1
+// CHECK-NEXT:    [[ADD96:%.*]] = add i32 [[SUB95]], 1
+// CHECK-NEXT:    [[DIV97:%.*]] = udiv i32 [[ADD96]], 1
+// CHECK-NEXT:    [[MUL98:%.*]] = mul i32 1, [[DIV97]]
 // CHECK-NEXT:    [[TMP46:%.*]] = load i32, ptr [[DOTLOWER16]], align 4
-// CHECK-NEXT:    [[SUB86:%.*]] = sub i32 10, [[TMP46]]
-// CHECK-NEXT:    [[ADD87:%.*]] = add i32 [[SUB86]], 1
-// CHECK-NEXT:    [[MUL88:%.*]] = mul i32 [[MUL85]], [[ADD87]]
-// CHECK-NEXT:    [[CONV89:%.*]] = zext i32 [[MUL88]] to i64
-// CHECK-NEXT:    [[MUL90:%.*]] = mul nsw i64 [[DIV82]], [[CONV89]]
-// CHECK-NEXT:    [[SUB91:%.*]] = sub nsw i64 [[TMP41]], [[MUL90]]
+// CHECK-NEXT:    [[SUB99:%.*]] = sub i32 10, [[TMP46]]
+// CHECK-NEXT:    [[ADD100:%.*]] = add i32 [[SUB99]], 1
+// CHECK-NEXT:    [[MUL101:%.*]] = mul i32 [[MUL98]], [[ADD100]]
+// CHECK-NEXT:    [[CONV102:%.*]] = zext i32 [[MUL101]] to i64
+// CHECK-NEXT:    [[MUL103:%.*]] = mul nsw i64 [[DIV93]], [[CONV102]]
+// CHECK-NEXT:    [[SUB104:%.*]] = sub nsw i64 [[TMP41]], [[MUL103]]
 // CHECK-NEXT:    [[TMP47:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
 // CHECK-NEXT:    [[TMP48:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
 // CHECK-NEXT:    [[TMP49:%.*]] = load i32, ptr [[DOTLOWER]], align 4
-// CHECK-NEXT:    [[SUB92:%.*]] = sub i32 10, [[TMP49]]
-// CHECK-NEXT:    [[ADD93:%.*]] = add i32 [[SUB92]], 1
-// CHECK-NEXT:    [[MUL94:%.*]] = mul i32 1, [[ADD93]]
+// CHECK-NEXT:    [[SUB105:%.*]] = sub i32 10, [[TMP49]]
+// CHECK-NEXT:    [[SUB106:%.*]] = sub i32 [[SUB105]], 1
+// CHECK-NEXT:    [[ADD107:%.*]] = add i32 [[SUB106]], 1
+// CHECK-NEXT:    [[DIV108:%.*]] = udiv i32 [[ADD107]], 1
+// CHECK-NEXT:    [[MUL109:%.*]] = mul i32 1, [[DIV108]]
 // CHECK-NEXT:    [[TMP50:%.*]] = load i32, ptr [[DOTLOWER16]], align 4
-// CHECK-NEXT:    [[SUB95:%.*]] = sub i32 10, [[TMP50]]
-// CHECK-NEXT:    [[ADD96:%.*]] = add i32 [[SUB95]], 1
-// CHECK-NEXT:    [[MUL97:%.*]] = mul i32 [[MUL94]], [[ADD96]]
-// CHECK-NEXT:    [[CONV98:%.*]] = zext i32 [[MUL97]] to i64
-// CHECK-NEXT:    [[DIV99:%.*]] = sdiv i64 [[TMP48]], [[CONV98]]
+// CHECK-NEXT:    [[SUB110:%.*]] = sub i32 10, [[TMP50]]
+// CHECK-NEXT:    [[ADD111:%.*]] = add i32 [[SUB110]], 1
+// CHECK-NEXT:    [[MUL112:%.*]] = mul i32 [[MUL109]], [[ADD111]]
+// CHECK-NEXT:    [[CONV113:%.*]] = zext i32 [[MUL112]] to i64
+// CHECK-NEXT:    [[DIV114:%.*]] = sdiv i64 [[TMP48]], [[CONV113]]
 // CHECK-NEXT:    [[TMP51:%.*]] = load i32, ptr [[DOTLOWER]], align 4
-// CHECK-NEXT:    [[SUB100:%.*]] = sub i32 10, [[TMP51]]
-// CHECK-NEXT:    [[ADD101:%.*]] = add i32 [[SUB100]], 1
-// CHECK-NEXT:    [[MUL102:%.*]] = mul i32 1, [[ADD101]]
+// CHECK-NEXT:    [[SUB115:%.*]] = sub i32 10, [[TMP51]]
+// CHECK-NEXT:    [[SUB116:%.*]] = sub i32 [[SUB115]], 1
+// CHECK-NEXT:    [[ADD117:%.*]] = add i32 [[SUB116]], 1
+// CHECK-NEXT:    [[DIV118:%.*]] = udiv i32 [[ADD117]], 1
+// CHECK-NEXT:    [[MUL119:%.*]] = mul i32 1, [[DIV118]]
 // CHECK-NEXT:    [[TMP52:%.*]] = load i32, ptr [[DOTLOWER16]], align 4
-// CHECK-NEXT:    [[SUB103:%.*]] = sub i32 10, [[TMP52]]
-// CHECK-NEXT:    [[ADD104:%.*]] = add i32 [[SUB103]], 1
-// CHECK-NEXT:    [[MUL105:%.*]] = mul i32 [[MUL102]], [[ADD104]]
-// CHECK-NEXT:    [[CONV106:%.*]] = zext i32 [[MUL105]] to i64
-// CHECK-NEXT:    [[MUL107:%.*]] = mul nsw i64 [[DIV99]], [[CONV106]]
-// CHECK-NEXT:    [[SUB108:%.*]] = sub nsw i64 [[TMP47]], [[MUL107]]
+// CHECK-NEXT:    [[SUB120:%.*]] = sub i32 10, [[TMP52]]
+// CHECK-NEXT:    [[ADD121:%.*]] = add i32 [[SUB120]], 1
+// CHECK-NEXT:    [[MUL122:%.*]] = mul i32 [[MUL119]], [[ADD121]]
+// CHECK-NEXT:    [[CONV123:%.*]] = zext i32 [[MUL122]] to i64
+// CHECK-NEXT:    [[MUL124:%.*]] = mul nsw i64 [[DIV114]], [[CONV123]]
+// CHECK-NEXT:    [[SUB125:%.*]] = sub nsw i64 [[TMP47]], [[MUL124]]
 // CHECK-NEXT:    [[TMP53:%.*]] = load i32, ptr [[DOTLOWER16]], align 4
-// CHECK-NEXT:    [[SUB109:%.*]] = sub i32 10, [[TMP53]]
-// CHECK-NEXT:    [[ADD110:%.*]] = add i32 [[SUB109]], 1
-// CHECK-NEXT:    [[MUL111:%.*]] = mul i32 1, [[ADD110]]
-// CHECK-NEXT:    [[CONV112:%.*]] = zext i32 [[MUL111]] to i64
-// CHECK-NEXT:    [[DIV113:%.*]] = sdiv i64 [[SUB108]], [[CONV112]]
+// CHECK-NEXT:    [[SUB126:%.*]] = sub i32 10, [[TMP53]]
+// CHECK-NEXT:    [[ADD127:%.*]] = add i32 [[SUB126]], 1
+// CHECK-NEXT:    [[MUL128:%.*]] = mul i32 1, [[ADD127]]
+// CHECK-NEXT:    [[CONV129:%.*]] = zext i32 [[MUL128]] to i64
+// CHECK-NEXT:    [[DIV130:%.*]] = sdiv i64 [[SUB125]], [[CONV129]]
 // CHECK-NEXT:    [[TMP54:%.*]] = load i32, ptr [[DOTLOWER16]], align 4
-// CHECK-NEXT:    [[SUB114:%.*]] = sub i32 10, [[TMP54]]
-// CHECK-NEXT:    [[ADD115:%.*]] = add i32 [[SUB114]], 1
-// CHECK-NEXT:    [[MUL116:%.*]] = mul i32 1, [[ADD115]]
-// CHECK-NEXT:    [[CONV117:%.*]] = zext i32 [[MUL116]] to i64
-// CHECK-NEXT:    [[MUL118:%.*]] = mul nsw i64 [[DIV113]], [[CONV117]]
-// CHECK-NEXT:    [[SUB119:%.*]] = sub nsw i64 [[SUB91]], [[MUL118]]
-// CHECK-NEXT:    [[MUL120:%.*]] = mul nsw i64 [[SUB119]], 1
-// CHECK-NEXT:    [[ADD121:%.*]] = add nsw i64 [[CONV74]], [[MUL120]]
-// CHECK-NEXT:    [[CONV122:%.*]] = trunc i64 [[ADD121]] to i32
-// CHECK-NEXT:    store i32 [[CONV122]], ptr [[K]], align 4
+// CHECK-NEXT:    [[SUB131:%.*]] = sub i32 10, [[TMP54]]
+// CHECK-NEXT:    [[ADD132:%.*]] = add i32 [[SUB131]], 1
+// CHECK-NEXT:    [[MUL133:%.*]] = mul i32 1, [[ADD132]]
+// CHECK-NEXT:    [[CONV134:%.*]] = zext i32 [[MUL133]] to i64
+// CHECK-NEXT:    [[MUL135:%.*]] = mul nsw i64 [[DIV130]], [[CONV134]]
+// CHECK-NEXT:    [[SUB136:%.*]] = sub nsw i64 [[SUB104]], [[MUL135]]
+// CHECK-NEXT:    [[MUL137:%.*]] = mul nsw i64 [[SUB136]], 1
+// CHECK-NEXT:    [[ADD138:%.*]] = add nsw i64 [[CONV83]], [[MUL137]]
+// CHECK-NEXT:    [[CONV139:%.*]] = trunc i64 [[ADD138]] to i32
+// CHECK-NEXT:    store i32 [[CONV139]], ptr [[K]], align 4
 // CHECK-NEXT:    [[TMP55:%.*]] = load i32, ptr [[J]], align 4
-// CHECK-NEXT:    [[CMP123:%.*]] = icmp ult i32 [[TMP55]], 10
-// CHECK-NEXT:    br i1 [[CMP123]], label %[[OMP_BODY_NEXT:.*]], label 
%[[OMP_BODY_CONTINUE:.*]]
+// CHECK-NEXT:    [[CMP140:%.*]] = icmp ult i32 [[TMP55]], 10
+// CHECK-NEXT:    br i1 [[CMP140]], label %[[OMP_BODY_NEXT:.*]], label 
%[[OMP_BODY_CONTINUE:.*]]
 // CHECK:       [[OMP_BODY_NEXT]]:
 // CHECK-NEXT:    [[TMP56:%.*]] = load i32, ptr [[K]], align 4
-// CHECK-NEXT:    [[CMP124:%.*]] = icmp ult i32 [[TMP56]], 10
-// CHECK-NEXT:    br i1 [[CMP124]], label %[[OMP_BODY_NEXT125:.*]], label 
%[[OMP_BODY_CONTINUE]]
-// CHECK:       [[OMP_BODY_NEXT125]]:
+// CHECK-NEXT:    [[CMP141:%.*]] = icmp ult i32 [[TMP56]], 10
+// CHECK-NEXT:    br i1 [[CMP141]], label %[[OMP_BODY_NEXT142:.*]], label 
%[[OMP_BODY_CONTINUE]]
+// CHECK:       [[OMP_BODY_NEXT142]]:
 // CHECK-NEXT:    br label %[[OMP_BODY_CONTINUE]]
 // CHECK:       [[OMP_BODY_CONTINUE]]:
 // CHECK-NEXT:    br label %[[OMP_INNER_FOR_INC:.*]]
 // CHECK:       [[OMP_INNER_FOR_INC]]:
 // CHECK-NEXT:    [[TMP57:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK-NEXT:    [[ADD126:%.*]] = add nsw i64 [[TMP57]], 1
-// CHECK-NEXT:    store i64 [[ADD126]], ptr [[DOTOMP_IV]], align 8
+// CHECK-NEXT:    [[ADD143:%.*]] = add nsw i64 [[TMP57]], 1
+// CHECK-NEXT:    store i64 [[ADD143]], ptr [[DOTOMP_IV]], align 8
 // CHECK-NEXT:    br label %[[OMP_INNER_FOR_COND]]
 // CHECK:       [[OMP_INNER_FOR_END]]:
 // CHECK-NEXT:    br label %[[OMP_LOOP_EXIT:.*]]
@@ -274,7 +292,7 @@
 // CHECK-NEXT:    [[TMP59:%.*]] = load i32, ptr [[TMP58]], align 4
 // CHECK-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 
[[TMP59]])
 // CHECK-NEXT:    ret void
-void triangulat_loop_1() {
+void triangular_loop_1() {
 #pragma omp parallel for collapse(3)
   for (unsigned int i = 0; i < 10; ++i)
     for (unsigned int j = i + 1; j < 10; ++j)
@@ -282,15 +300,15 @@ void triangulat_loop_1() {
        ;
 }
 
-// CHECK-LABEL: define internal void @_Z17triangulat_loop_2v.omp_outlined(
+// CHECK-LABEL: define internal void @_Z17triangular_loop_2v.omp_outlined(
 // CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef 
[[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
 // CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 // CHECK-NEXT:    [[DOTOMP_IV:%.*]] = alloca i64, align 8
 // CHECK-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[_TMP2:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[DOTLB_MIN:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[DOTLB_MAX:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[DOTMIN_LESS_MAX:%.*]] = alloca i8, align 1
@@ -314,8 +332,8 @@ void triangulat_loop_1() {
 // CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP0]], 1
 // CHECK-NEXT:    store i32 [[ADD]], ptr [[DOTLB_MIN]], align 4
 // CHECK-NEXT:    store i32 9, ptr [[TMP]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP]], align 4
-// CHECK-NEXT:    [[ADD3:%.*]] = add i32 [[TMP1]], 1
+// CHECK-NEXT:    [[TMP100:%.*]] = load i32, ptr [[TMP]], align 4
+// CHECK-NEXT:    [[ADD3:%.*]] = add i32 [[TMP100]], 1
 // CHECK-NEXT:    store i32 [[ADD3]], ptr [[DOTLB_MAX]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTLB_MIN]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTLB_MAX]], align 4
@@ -337,8 +355,8 @@ void triangulat_loop_1() {
 // CHECK-NEXT:    store i32 [[COND]], ptr [[DOTLOWER]], align 4
 // CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP]], align 4
 // CHECK-NEXT:    [[ADD5:%.*]] = add i32 [[TMP7]], 1
-// CHECK-NEXT:    store i32 [[ADD5]], ptr [[_TMP1]], align 4
-// CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[_TMP1]], align 4
+// CHECK-NEXT:    store i32 [[ADD5]], ptr [[TMP1]], align 4
+// CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4
 // CHECK-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP8]], 1
 // CHECK-NEXT:    store i32 [[ADD6]], ptr [[DOTLB_MIN4]], align 4
 // CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP]], align 4
@@ -346,11 +364,12 @@ void triangulat_loop_1() {
 // CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP]], align 4
 // CHECK-NEXT:    [[ADD9:%.*]] = add i32 [[TMP10]], 1
 // CHECK-NEXT:    [[SUB:%.*]] = sub i32 10, [[ADD9]]
-// CHECK-NEXT:    [[ADD10:%.*]] = add i32 [[SUB]], 1
-// CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[ADD10]], 2
+// CHECK-NEXT:    [[SUB10:%.*]] = sub i32 [[SUB]], 1
+// CHECK-NEXT:    [[DIV:%.*]] = udiv i32 [[SUB10]], 2
+// CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[DIV]], 2
 // CHECK-NEXT:    [[ADD11:%.*]] = add i32 [[ADD8]], [[MUL]]
-// CHECK-NEXT:    store i32 [[ADD11]], ptr [[_TMP1]], align 4
-// CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[_TMP1]], align 4
+// CHECK-NEXT:    store i32 [[ADD11]], ptr [[TMP1]], align 4
+// CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP1]], align 4
 // CHECK-NEXT:    [[ADD12:%.*]] = add nsw i32 [[TMP11]], 1
 // CHECK-NEXT:    store i32 [[ADD12]], ptr [[DOTLB_MAX7]], align 4
 // CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTLB_MIN4]], align 4
@@ -369,20 +388,22 @@ void triangulat_loop_1() {
 // CHECK-NEXT:    br label %[[COND_END20]]
 // CHECK:       [[COND_END20]]:
 // CHECK-NEXT:    [[COND21:%.*]] = phi i32 [ [[TMP15]], %[[COND_TRUE18]] ], [ 
[[TMP16]], %[[COND_FALSE19]] ]
-// CHECK-NEXT:    store i32 [[COND21]], ptr [[_TMP1]], align 4
+// CHECK-NEXT:    store i32 [[COND21]], ptr [[TMP1]], align 4
 // CHECK-NEXT:    store i32 [[COND21]], ptr [[DOTLOWER16]], align 4
 // CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTLOWER]], align 4
 // CHECK-NEXT:    [[SUB22:%.*]] = sub i32 10, [[TMP17]]
-// CHECK-NEXT:    [[ADD23:%.*]] = add i32 [[SUB22]], 1
-// CHECK-NEXT:    [[CONV:%.*]] = zext i32 [[ADD23]] to i64
-// CHECK-NEXT:    [[MUL24:%.*]] = mul nsw i64 10, [[CONV]]
+// CHECK-NEXT:    [[SUB23:%.*]] = sub i32 [[SUB22]], 1
+// CHECK-NEXT:    [[ADD24:%.*]] = add i32 [[SUB23]], 2
+// CHECK-NEXT:    [[DIV25:%.*]] = udiv i32 [[ADD24]], 2
+// CHECK-NEXT:    [[CONV:%.*]] = zext i32 [[DIV25]] to i64
+// CHECK-NEXT:    [[MUL26:%.*]] = mul nsw i64 10, [[CONV]]
 // CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTLOWER16]], align 4
-// CHECK-NEXT:    [[SUB25:%.*]] = sub i32 10, [[TMP18]]
-// CHECK-NEXT:    [[ADD26:%.*]] = add i32 [[SUB25]], 1
-// CHECK-NEXT:    [[CONV27:%.*]] = zext i32 [[ADD26]] to i64
-// CHECK-NEXT:    [[MUL28:%.*]] = mul nsw i64 [[MUL24]], [[CONV27]]
-// CHECK-NEXT:    [[SUB29:%.*]] = sub nsw i64 [[MUL28]], 1
-// CHECK-NEXT:    store i64 [[SUB29]], ptr [[DOTCAPTURE_EXPR_]], align 8
+// CHECK-NEXT:    [[SUB27:%.*]] = sub i32 10, [[TMP18]]
+// CHECK-NEXT:    [[ADD28:%.*]] = add i32 [[SUB27]], 1
+// CHECK-NEXT:    [[CONV29:%.*]] = zext i32 [[ADD28]] to i64
+// CHECK-NEXT:    [[MUL30:%.*]] = mul nsw i64 [[MUL26]], [[CONV29]]
+// CHECK-NEXT:    [[SUB31:%.*]] = sub nsw i64 [[MUL30]], 1
+// CHECK-NEXT:    store i64 [[SUB31]], ptr [[DOTCAPTURE_EXPR_]], align 8
 // CHECK-NEXT:    store i64 0, ptr [[DOTOMP_LB]], align 8
 // CHECK-NEXT:    [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_]], align 8
 // CHECK-NEXT:    store i64 [[TMP19]], ptr [[DOTOMP_UB]], align 8
@@ -393,158 +414,172 @@ void triangulat_loop_1() {
 // CHECK-NEXT:    call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 
[[TMP21]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr 
[[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
 // CHECK-NEXT:    [[TMP22:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
 // CHECK-NEXT:    [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_]], align 8
-// CHECK-NEXT:    [[CMP30:%.*]] = icmp sgt i64 [[TMP22]], [[TMP23]]
-// CHECK-NEXT:    br i1 [[CMP30]], label %[[COND_TRUE31:.*]], label 
%[[COND_FALSE32:.*]]
-// CHECK:       [[COND_TRUE31]]:
+// CHECK-NEXT:    [[CMP32:%.*]] = icmp sgt i64 [[TMP22]], [[TMP23]]
+// CHECK-NEXT:    br i1 [[CMP32]], label %[[COND_TRUE33:.*]], label 
%[[COND_FALSE34:.*]]
+// CHECK:       [[COND_TRUE33]]:
 // CHECK-NEXT:    [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_]], align 8
-// CHECK-NEXT:    br label %[[COND_END33:.*]]
-// CHECK:       [[COND_FALSE32]]:
+// CHECK-NEXT:    br label %[[COND_END35:.*]]
+// CHECK:       [[COND_FALSE34]]:
 // CHECK-NEXT:    [[TMP25:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
-// CHECK-NEXT:    br label %[[COND_END33]]
-// CHECK:       [[COND_END33]]:
-// CHECK-NEXT:    [[COND34:%.*]] = phi i64 [ [[TMP24]], %[[COND_TRUE31]] ], [ 
[[TMP25]], %[[COND_FALSE32]] ]
-// CHECK-NEXT:    store i64 [[COND34]], ptr [[DOTOMP_UB]], align 8
+// CHECK-NEXT:    br label %[[COND_END35]]
+// CHECK:       [[COND_END35]]:
+// CHECK-NEXT:    [[COND36:%.*]] = phi i64 [ [[TMP24]], %[[COND_TRUE33]] ], [ 
[[TMP25]], %[[COND_FALSE34]] ]
+// CHECK-NEXT:    store i64 [[COND36]], ptr [[DOTOMP_UB]], align 8
 // CHECK-NEXT:    [[TMP26:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8
 // CHECK-NEXT:    store i64 [[TMP26]], ptr [[DOTOMP_IV]], align 8
 // CHECK-NEXT:    br label %[[OMP_INNER_FOR_COND:.*]]
 // CHECK:       [[OMP_INNER_FOR_COND]]:
 // CHECK-NEXT:    [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
 // CHECK-NEXT:    [[TMP28:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
-// CHECK-NEXT:    [[CMP35:%.*]] = icmp sle i64 [[TMP27]], [[TMP28]]
-// CHECK-NEXT:    br i1 [[CMP35]], label %[[OMP_INNER_FOR_BODY:.*]], label 
%[[OMP_INNER_FOR_END:.*]]
+// CHECK-NEXT:    [[CMP37:%.*]] = icmp sle i64 [[TMP27]], [[TMP28]]
+// CHECK-NEXT:    br i1 [[CMP37]], label %[[OMP_INNER_FOR_BODY:.*]], label 
%[[OMP_INNER_FOR_END:.*]]
 // CHECK:       [[OMP_INNER_FOR_BODY]]:
 // CHECK-NEXT:    [[TMP29:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
 // CHECK-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTLOWER]], align 4
-// CHECK-NEXT:    [[SUB36:%.*]] = sub i32 10, [[TMP30]]
-// CHECK-NEXT:    [[ADD37:%.*]] = add i32 [[SUB36]], 1
-// CHECK-NEXT:    [[MUL38:%.*]] = mul i32 1, [[ADD37]]
+// CHECK-NEXT:    [[SUB38:%.*]] = sub i32 10, [[TMP30]]
+// CHECK-NEXT:    [[SUB39:%.*]] = sub i32 [[SUB38]], 1
+// CHECK-NEXT:    [[ADD40:%.*]] = add i32 [[SUB39]], 2
+// CHECK-NEXT:    [[DIV41:%.*]] = udiv i32 [[ADD40]], 2
+// CHECK-NEXT:    [[MUL42:%.*]] = mul i32 1, [[DIV41]]
 // CHECK-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTLOWER16]], align 4
-// CHECK-NEXT:    [[SUB39:%.*]] = sub i32 10, [[TMP31]]
-// CHECK-NEXT:    [[ADD40:%.*]] = add i32 [[SUB39]], 1
-// CHECK-NEXT:    [[MUL41:%.*]] = mul i32 [[MUL38]], [[ADD40]]
-// CHECK-NEXT:    [[CONV42:%.*]] = zext i32 [[MUL41]] to i64
-// CHECK-NEXT:    [[DIV:%.*]] = sdiv i64 [[TMP29]], [[CONV42]]
-// CHECK-NEXT:    [[MUL43:%.*]] = mul nsw i64 [[DIV]], 1
-// CHECK-NEXT:    [[ADD44:%.*]] = add nsw i64 0, [[MUL43]]
-// CHECK-NEXT:    [[CONV45:%.*]] = trunc i64 [[ADD44]] to i32
-// CHECK-NEXT:    store i32 [[CONV45]], ptr [[I]], align 4
+// CHECK-NEXT:    [[SUB43:%.*]] = sub i32 10, [[TMP31]]
+// CHECK-NEXT:    [[ADD44:%.*]] = add i32 [[SUB43]], 1
+// CHECK-NEXT:    [[MUL45:%.*]] = mul i32 [[MUL42]], [[ADD44]]
+// CHECK-NEXT:    [[CONV46:%.*]] = zext i32 [[MUL45]] to i64
+// CHECK-NEXT:    [[DIV47:%.*]] = sdiv i64 [[TMP29]], [[CONV46]]
+// CHECK-NEXT:    [[MUL48:%.*]] = mul nsw i64 [[DIV47]], 1
+// CHECK-NEXT:    [[ADD49:%.*]] = add nsw i64 0, [[MUL48]]
+// CHECK-NEXT:    [[CONV50:%.*]] = trunc i64 [[ADD49]] to i32
+// CHECK-NEXT:    store i32 [[CONV50]], ptr [[I]], align 4
 // CHECK-NEXT:    [[TMP32:%.*]] = load i32, ptr [[I]], align 4
-// CHECK-NEXT:    [[ADD46:%.*]] = add i32 [[TMP32]], 1
-// CHECK-NEXT:    [[CONV47:%.*]] = sext i32 [[ADD46]] to i64
+// CHECK-NEXT:    [[ADD51:%.*]] = add i32 [[TMP32]], 1
+// CHECK-NEXT:    [[CONV52:%.*]] = sext i32 [[ADD51]] to i64
 // CHECK-NEXT:    [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
 // CHECK-NEXT:    [[TMP34:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
 // CHECK-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTLOWER]], align 4
-// CHECK-NEXT:    [[SUB48:%.*]] = sub i32 10, [[TMP35]]
-// CHECK-NEXT:    [[ADD49:%.*]] = add i32 [[SUB48]], 1
-// CHECK-NEXT:    [[MUL50:%.*]] = mul i32 1, [[ADD49]]
+// CHECK-NEXT:    [[SUB53:%.*]] = sub i32 10, [[TMP35]]
+// CHECK-NEXT:    [[SUB54:%.*]] = sub i32 [[SUB53]], 1
+// CHECK-NEXT:    [[ADD55:%.*]] = add i32 [[SUB54]], 2
+// CHECK-NEXT:    [[DIV56:%.*]] = udiv i32 [[ADD55]], 2
+// CHECK-NEXT:    [[MUL57:%.*]] = mul i32 1, [[DIV56]]
 // CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[DOTLOWER16]], align 4
-// CHECK-NEXT:    [[SUB51:%.*]] = sub i32 10, [[TMP36]]
-// CHECK-NEXT:    [[ADD52:%.*]] = add i32 [[SUB51]], 1
-// CHECK-NEXT:    [[MUL53:%.*]] = mul i32 [[MUL50]], [[ADD52]]
-// CHECK-NEXT:    [[CONV54:%.*]] = zext i32 [[MUL53]] to i64
-// CHECK-NEXT:    [[DIV55:%.*]] = sdiv i64 [[TMP34]], [[CONV54]]
+// CHECK-NEXT:    [[SUB58:%.*]] = sub i32 10, [[TMP36]]
+// CHECK-NEXT:    [[ADD59:%.*]] = add i32 [[SUB58]], 1
+// CHECK-NEXT:    [[MUL60:%.*]] = mul i32 [[MUL57]], [[ADD59]]
+// CHECK-NEXT:    [[CONV61:%.*]] = zext i32 [[MUL60]] to i64
+// CHECK-NEXT:    [[DIV62:%.*]] = sdiv i64 [[TMP34]], [[CONV61]]
 // CHECK-NEXT:    [[TMP37:%.*]] = load i32, ptr [[DOTLOWER]], align 4
-// CHECK-NEXT:    [[SUB56:%.*]] = sub i32 10, [[TMP37]]
-// CHECK-NEXT:    [[ADD57:%.*]] = add i32 [[SUB56]], 1
-// CHECK-NEXT:    [[MUL58:%.*]] = mul i32 1, [[ADD57]]
+// CHECK-NEXT:    [[SUB63:%.*]] = sub i32 10, [[TMP37]]
+// CHECK-NEXT:    [[SUB64:%.*]] = sub i32 [[SUB63]], 1
+// CHECK-NEXT:    [[ADD65:%.*]] = add i32 [[SUB64]], 2
+// CHECK-NEXT:    [[DIV66:%.*]] = udiv i32 [[ADD65]], 2
+// CHECK-NEXT:    [[MUL67:%.*]] = mul i32 1, [[DIV66]]
 // CHECK-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTLOWER16]], align 4
-// CHECK-NEXT:    [[SUB59:%.*]] = sub i32 10, [[TMP38]]
-// CHECK-NEXT:    [[ADD60:%.*]] = add i32 [[SUB59]], 1
-// CHECK-NEXT:    [[MUL61:%.*]] = mul i32 [[MUL58]], [[ADD60]]
-// CHECK-NEXT:    [[CONV62:%.*]] = zext i32 [[MUL61]] to i64
-// CHECK-NEXT:    [[MUL63:%.*]] = mul nsw i64 [[DIV55]], [[CONV62]]
-// CHECK-NEXT:    [[SUB64:%.*]] = sub nsw i64 [[TMP33]], [[MUL63]]
+// CHECK-NEXT:    [[SUB68:%.*]] = sub i32 10, [[TMP38]]
+// CHECK-NEXT:    [[ADD69:%.*]] = add i32 [[SUB68]], 1
+// CHECK-NEXT:    [[MUL70:%.*]] = mul i32 [[MUL67]], [[ADD69]]
+// CHECK-NEXT:    [[CONV71:%.*]] = zext i32 [[MUL70]] to i64
+// CHECK-NEXT:    [[MUL72:%.*]] = mul nsw i64 [[DIV62]], [[CONV71]]
+// CHECK-NEXT:    [[SUB73:%.*]] = sub nsw i64 [[TMP33]], [[MUL72]]
 // CHECK-NEXT:    [[TMP39:%.*]] = load i32, ptr [[DOTLOWER16]], align 4
-// CHECK-NEXT:    [[SUB65:%.*]] = sub i32 10, [[TMP39]]
-// CHECK-NEXT:    [[ADD66:%.*]] = add i32 [[SUB65]], 1
-// CHECK-NEXT:    [[MUL67:%.*]] = mul i32 1, [[ADD66]]
-// CHECK-NEXT:    [[CONV68:%.*]] = zext i32 [[MUL67]] to i64
-// CHECK-NEXT:    [[DIV69:%.*]] = sdiv i64 [[SUB64]], [[CONV68]]
-// CHECK-NEXT:    [[MUL70:%.*]] = mul nsw i64 [[DIV69]], 2
-// CHECK-NEXT:    [[ADD71:%.*]] = add nsw i64 [[CONV47]], [[MUL70]]
-// CHECK-NEXT:    [[CONV72:%.*]] = trunc i64 [[ADD71]] to i32
-// CHECK-NEXT:    store i32 [[CONV72]], ptr [[J]], align 4
+// CHECK-NEXT:    [[SUB74:%.*]] = sub i32 10, [[TMP39]]
+// CHECK-NEXT:    [[ADD75:%.*]] = add i32 [[SUB74]], 1
+// CHECK-NEXT:    [[MUL76:%.*]] = mul i32 1, [[ADD75]]
+// CHECK-NEXT:    [[CONV77:%.*]] = zext i32 [[MUL76]] to i64
+// CHECK-NEXT:    [[DIV78:%.*]] = sdiv i64 [[SUB73]], [[CONV77]]
+// CHECK-NEXT:    [[MUL79:%.*]] = mul nsw i64 [[DIV78]], 2
+// CHECK-NEXT:    [[ADD80:%.*]] = add nsw i64 [[CONV52]], [[MUL79]]
+// CHECK-NEXT:    [[CONV81:%.*]] = trunc i64 [[ADD80]] to i32
+// CHECK-NEXT:    store i32 [[CONV81]], ptr [[J]], align 4
 // CHECK-NEXT:    [[TMP40:%.*]] = load i32, ptr [[J]], align 4
-// CHECK-NEXT:    [[ADD73:%.*]] = add nsw i32 [[TMP40]], 1
-// CHECK-NEXT:    [[CONV74:%.*]] = zext i32 [[ADD73]] to i64
+// CHECK-NEXT:    [[ADD82:%.*]] = add nsw i32 [[TMP40]], 1
+// CHECK-NEXT:    [[CONV83:%.*]] = zext i32 [[ADD82]] to i64
 // CHECK-NEXT:    [[TMP41:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
 // CHECK-NEXT:    [[TMP42:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
 // CHECK-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTLOWER]], align 4
-// CHECK-NEXT:    [[SUB75:%.*]] = sub i32 10, [[TMP43]]
-// CHECK-NEXT:    [[ADD76:%.*]] = add i32 [[SUB75]], 1
-// CHECK-NEXT:    [[MUL77:%.*]] = mul i32 1, [[ADD76]]
+// CHECK-NEXT:    [[SUB84:%.*]] = sub i32 10, [[TMP43]]
+// CHECK-NEXT:    [[SUB85:%.*]] = sub i32 [[SUB84]], 1
+// CHECK-NEXT:    [[ADD86:%.*]] = add i32 [[SUB85]], 2
+// CHECK-NEXT:    [[DIV87:%.*]] = udiv i32 [[ADD86]], 2
+// CHECK-NEXT:    [[MUL88:%.*]] = mul i32 1, [[DIV87]]
 // CHECK-NEXT:    [[TMP44:%.*]] = load i32, ptr [[DOTLOWER16]], align 4
-// CHECK-NEXT:    [[SUB78:%.*]] = sub i32 10, [[TMP44]]
-// CHECK-NEXT:    [[ADD79:%.*]] = add i32 [[SUB78]], 1
-// CHECK-NEXT:    [[MUL80:%.*]] = mul i32 [[MUL77]], [[ADD79]]
-// CHECK-NEXT:    [[CONV81:%.*]] = zext i32 [[MUL80]] to i64
-// CHECK-NEXT:    [[DIV82:%.*]] = sdiv i64 [[TMP42]], [[CONV81]]
+// CHECK-NEXT:    [[SUB89:%.*]] = sub i32 10, [[TMP44]]
+// CHECK-NEXT:    [[ADD90:%.*]] = add i32 [[SUB89]], 1
+// CHECK-NEXT:    [[MUL91:%.*]] = mul i32 [[MUL88]], [[ADD90]]
+// CHECK-NEXT:    [[CONV92:%.*]] = zext i32 [[MUL91]] to i64
+// CHECK-NEXT:    [[DIV93:%.*]] = sdiv i64 [[TMP42]], [[CONV92]]
 // CHECK-NEXT:    [[TMP45:%.*]] = load i32, ptr [[DOTLOWER]], align 4
-// CHECK-NEXT:    [[SUB83:%.*]] = sub i32 10, [[TMP45]]
-// CHECK-NEXT:    [[ADD84:%.*]] = add i32 [[SUB83]], 1
-// CHECK-NEXT:    [[MUL85:%.*]] = mul i32 1, [[ADD84]]
+// CHECK-NEXT:    [[SUB94:%.*]] = sub i32 10, [[TMP45]]
+// CHECK-NEXT:    [[SUB95:%.*]] = sub i32 [[SUB94]], 1
+// CHECK-NEXT:    [[ADD96:%.*]] = add i32 [[SUB95]], 2
+// CHECK-NEXT:    [[DIV97:%.*]] = udiv i32 [[ADD96]], 2
+// CHECK-NEXT:    [[MUL98:%.*]] = mul i32 1, [[DIV97]]
 // CHECK-NEXT:    [[TMP46:%.*]] = load i32, ptr [[DOTLOWER16]], align 4
-// CHECK-NEXT:    [[SUB86:%.*]] = sub i32 10, [[TMP46]]
-// CHECK-NEXT:    [[ADD87:%.*]] = add i32 [[SUB86]], 1
-// CHECK-NEXT:    [[MUL88:%.*]] = mul i32 [[MUL85]], [[ADD87]]
-// CHECK-NEXT:    [[CONV89:%.*]] = zext i32 [[MUL88]] to i64
-// CHECK-NEXT:    [[MUL90:%.*]] = mul nsw i64 [[DIV82]], [[CONV89]]
-// CHECK-NEXT:    [[SUB91:%.*]] = sub nsw i64 [[TMP41]], [[MUL90]]
+// CHECK-NEXT:    [[SUB99:%.*]] = sub i32 10, [[TMP46]]
+// CHECK-NEXT:    [[ADD100:%.*]] = add i32 [[SUB99]], 1
+// CHECK-NEXT:    [[MUL101:%.*]] = mul i32 [[MUL98]], [[ADD100]]
+// CHECK-NEXT:    [[CONV102:%.*]] = zext i32 [[MUL101]] to i64
+// CHECK-NEXT:    [[MUL103:%.*]] = mul nsw i64 [[DIV93]], [[CONV102]]
+// CHECK-NEXT:    [[SUB104:%.*]] = sub nsw i64 [[TMP41]], [[MUL103]]
 // CHECK-NEXT:    [[TMP47:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
 // CHECK-NEXT:    [[TMP48:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
 // CHECK-NEXT:    [[TMP49:%.*]] = load i32, ptr [[DOTLOWER]], align 4
-// CHECK-NEXT:    [[SUB92:%.*]] = sub i32 10, [[TMP49]]
-// CHECK-NEXT:    [[ADD93:%.*]] = add i32 [[SUB92]], 1
-// CHECK-NEXT:    [[MUL94:%.*]] = mul i32 1, [[ADD93]]
+// CHECK-NEXT:    [[SUB105:%.*]] = sub i32 10, [[TMP49]]
+// CHECK-NEXT:    [[SUB106:%.*]] = sub i32 [[SUB105]], 1
+// CHECK-NEXT:    [[ADD107:%.*]] = add i32 [[SUB106]], 2
+// CHECK-NEXT:    [[DIV108:%.*]] = udiv i32 [[ADD107]], 2
+// CHECK-NEXT:    [[MUL109:%.*]] = mul i32 1, [[DIV108]]
 // CHECK-NEXT:    [[TMP50:%.*]] = load i32, ptr [[DOTLOWER16]], align 4
-// CHECK-NEXT:    [[SUB95:%.*]] = sub i32 10, [[TMP50]]
-// CHECK-NEXT:    [[ADD96:%.*]] = add i32 [[SUB95]], 1
-// CHECK-NEXT:    [[MUL97:%.*]] = mul i32 [[MUL94]], [[ADD96]]
-// CHECK-NEXT:    [[CONV98:%.*]] = zext i32 [[MUL97]] to i64
-// CHECK-NEXT:    [[DIV99:%.*]] = sdiv i64 [[TMP48]], [[CONV98]]
+// CHECK-NEXT:    [[SUB110:%.*]] = sub i32 10, [[TMP50]]
+// CHECK-NEXT:    [[ADD111:%.*]] = add i32 [[SUB110]], 1
+// CHECK-NEXT:    [[MUL112:%.*]] = mul i32 [[MUL109]], [[ADD111]]
+// CHECK-NEXT:    [[CONV113:%.*]] = zext i32 [[MUL112]] to i64
+// CHECK-NEXT:    [[DIV114:%.*]] = sdiv i64 [[TMP48]], [[CONV113]]
 // CHECK-NEXT:    [[TMP51:%.*]] = load i32, ptr [[DOTLOWER]], align 4
-// CHECK-NEXT:    [[SUB100:%.*]] = sub i32 10, [[TMP51]]
-// CHECK-NEXT:    [[ADD101:%.*]] = add i32 [[SUB100]], 1
-// CHECK-NEXT:    [[MUL102:%.*]] = mul i32 1, [[ADD101]]
+// CHECK-NEXT:    [[SUB115:%.*]] = sub i32 10, [[TMP51]]
+// CHECK-NEXT:    [[SUB116:%.*]] = sub i32 [[SUB115]], 1
+// CHECK-NEXT:    [[ADD117:%.*]] = add i32 [[SUB116]], 2
+// CHECK-NEXT:    [[DIV118:%.*]] = udiv i32 [[ADD117]], 2
+// CHECK-NEXT:    [[MUL119:%.*]] = mul i32 1, [[DIV118]]
 // CHECK-NEXT:    [[TMP52:%.*]] = load i32, ptr [[DOTLOWER16]], align 4
-// CHECK-NEXT:    [[SUB103:%.*]] = sub i32 10, [[TMP52]]
-// CHECK-NEXT:    [[ADD104:%.*]] = add i32 [[SUB103]], 1
-// CHECK-NEXT:    [[MUL105:%.*]] = mul i32 [[MUL102]], [[ADD104]]
-// CHECK-NEXT:    [[CONV106:%.*]] = zext i32 [[MUL105]] to i64
-// CHECK-NEXT:    [[MUL107:%.*]] = mul nsw i64 [[DIV99]], [[CONV106]]
-// CHECK-NEXT:    [[SUB108:%.*]] = sub nsw i64 [[TMP47]], [[MUL107]]
+// CHECK-NEXT:    [[SUB120:%.*]] = sub i32 10, [[TMP52]]
+// CHECK-NEXT:    [[ADD121:%.*]] = add i32 [[SUB120]], 1
+// CHECK-NEXT:    [[MUL122:%.*]] = mul i32 [[MUL119]], [[ADD121]]
+// CHECK-NEXT:    [[CONV123:%.*]] = zext i32 [[MUL122]] to i64
+// CHECK-NEXT:    [[MUL124:%.*]] = mul nsw i64 [[DIV114]], [[CONV123]]
+// CHECK-NEXT:    [[SUB125:%.*]] = sub nsw i64 [[TMP47]], [[MUL124]]
 // CHECK-NEXT:    [[TMP53:%.*]] = load i32, ptr [[DOTLOWER16]], align 4
-// CHECK-NEXT:    [[SUB109:%.*]] = sub i32 10, [[TMP53]]
-// CHECK-NEXT:    [[ADD110:%.*]] = add i32 [[SUB109]], 1
-// CHECK-NEXT:    [[MUL111:%.*]] = mul i32 1, [[ADD110]]
-// CHECK-NEXT:    [[CONV112:%.*]] = zext i32 [[MUL111]] to i64
-// CHECK-NEXT:    [[DIV113:%.*]] = sdiv i64 [[SUB108]], [[CONV112]]
+// CHECK-NEXT:    [[SUB126:%.*]] = sub i32 10, [[TMP53]]
+// CHECK-NEXT:    [[ADD127:%.*]] = add i32 [[SUB126]], 1
+// CHECK-NEXT:    [[MUL128:%.*]] = mul i32 1, [[ADD127]]
+// CHECK-NEXT:    [[CONV129:%.*]] = zext i32 [[MUL128]] to i64
+// CHECK-NEXT:    [[DIV130:%.*]] = sdiv i64 [[SUB125]], [[CONV129]]
 // CHECK-NEXT:    [[TMP54:%.*]] = load i32, ptr [[DOTLOWER16]], align 4
-// CHECK-NEXT:    [[SUB114:%.*]] = sub i32 10, [[TMP54]]
-// CHECK-NEXT:    [[ADD115:%.*]] = add i32 [[SUB114]], 1
-// CHECK-NEXT:    [[MUL116:%.*]] = mul i32 1, [[ADD115]]
-// CHECK-NEXT:    [[CONV117:%.*]] = zext i32 [[MUL116]] to i64
-// CHECK-NEXT:    [[MUL118:%.*]] = mul nsw i64 [[DIV113]], [[CONV117]]
-// CHECK-NEXT:    [[SUB119:%.*]] = sub nsw i64 [[SUB91]], [[MUL118]]
-// CHECK-NEXT:    [[MUL120:%.*]] = mul nsw i64 [[SUB119]], 1
-// CHECK-NEXT:    [[ADD121:%.*]] = add nsw i64 [[CONV74]], [[MUL120]]
-// CHECK-NEXT:    [[CONV122:%.*]] = trunc i64 [[ADD121]] to i32
-// CHECK-NEXT:    store i32 [[CONV122]], ptr [[K]], align 4
+// CHECK-NEXT:    [[SUB131:%.*]] = sub i32 10, [[TMP54]]
+// CHECK-NEXT:    [[ADD132:%.*]] = add i32 [[SUB131]], 1
+// CHECK-NEXT:    [[MUL133:%.*]] = mul i32 1, [[ADD132]]
+// CHECK-NEXT:    [[CONV134:%.*]] = zext i32 [[MUL133]] to i64
+// CHECK-NEXT:    [[MUL135:%.*]] = mul nsw i64 [[DIV130]], [[CONV134]]
+// CHECK-NEXT:    [[SUB136:%.*]] = sub nsw i64 [[SUB104]], [[MUL135]]
+// CHECK-NEXT:    [[MUL137:%.*]] = mul nsw i64 [[SUB136]], 1
+// CHECK-NEXT:    [[ADD138:%.*]] = add nsw i64 [[CONV83]], [[MUL137]]
+// CHECK-NEXT:    [[CONV139:%.*]] = trunc i64 [[ADD138]] to i32
+// CHECK-NEXT:    store i32 [[CONV139]], ptr [[K]], align 4
 // CHECK-NEXT:    [[TMP55:%.*]] = load i32, ptr [[J]], align 4
-// CHECK-NEXT:    [[CMP123:%.*]] = icmp slt i32 [[TMP55]], 10
-// CHECK-NEXT:    br i1 [[CMP123]], label %[[OMP_BODY_NEXT:.*]], label 
%[[OMP_BODY_CONTINUE:.*]]
+// CHECK-NEXT:    [[CMP140:%.*]] = icmp slt i32 [[TMP55]], 10
+// CHECK-NEXT:    br i1 [[CMP140]], label %[[OMP_BODY_NEXT:.*]], label 
%[[OMP_BODY_CONTINUE:.*]]
 // CHECK:       [[OMP_BODY_NEXT]]:
 // CHECK-NEXT:    [[TMP56:%.*]] = load i32, ptr [[K]], align 4
-// CHECK-NEXT:    [[CMP124:%.*]] = icmp ult i32 [[TMP56]], 10
-// CHECK-NEXT:    br i1 [[CMP124]], label %[[OMP_BODY_NEXT125:.*]], label 
%[[OMP_BODY_CONTINUE]]
-// CHECK:       [[OMP_BODY_NEXT125]]:
+// CHECK-NEXT:    [[CMP141:%.*]] = icmp ult i32 [[TMP56]], 10
+// CHECK-NEXT:    br i1 [[CMP141]], label %[[OMP_BODY_NEXT142:.*]], label 
%[[OMP_BODY_CONTINUE]]
+// CHECK:       [[OMP_BODY_NEXT142]]:
 // CHECK-NEXT:    br label %[[OMP_BODY_CONTINUE]]
 // CHECK:       [[OMP_BODY_CONTINUE]]:
 // CHECK-NEXT:    br label %[[OMP_INNER_FOR_INC:.*]]
 // CHECK:       [[OMP_INNER_FOR_INC]]:
 // CHECK-NEXT:    [[TMP57:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK-NEXT:    [[ADD126:%.*]] = add nsw i64 [[TMP57]], 1
-// CHECK-NEXT:    store i64 [[ADD126]], ptr [[DOTOMP_IV]], align 8
+// CHECK-NEXT:    [[ADD143:%.*]] = add nsw i64 [[TMP57]], 1
+// CHECK-NEXT:    store i64 [[ADD143]], ptr [[DOTOMP_IV]], align 8
 // CHECK-NEXT:    br label %[[OMP_INNER_FOR_COND]]
 // CHECK:       [[OMP_INNER_FOR_END]]:
 // CHECK-NEXT:    br label %[[OMP_LOOP_EXIT:.*]]
@@ -553,7 +588,7 @@ void triangulat_loop_1() {
 // CHECK-NEXT:    [[TMP59:%.*]] = load i32, ptr [[TMP58]], align 4
 // CHECK-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 
[[TMP59]])
 // CHECK-NEXT:    ret void
-void triangulat_loop_2() {
+void triangular_loop_2() {
 #pragma omp parallel for collapse(3)
   for (unsigned int i = 0; i < 10; ++i)
     for (int j = i + 1; j < 10; j += 2)
@@ -561,7 +596,7 @@ void triangulat_loop_2() {
        ;
 }
 
-// CHECK-LABEL: define internal void @_Z10mixed_loopv.omp_outlined(
+// CHECK-LABEL: define internal void 
@{{.*}}triangular_loop_3v_l810.omp_outlined(
 // CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef 
[[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
@@ -584,13 +619,231 @@ void triangulat_loop_2() {
 // CHECK-NEXT:    [[K:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], 
align 8
 // CHECK-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], 
align 8
-// CHECK-NEXT:    store i32 0, ptr [[_TMP1]], align 4
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[_TMP1]], align 4
+// CHECK-NEXT:    store i32 0, ptr [[TMP]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TMP]], align 4
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[DOTLB_MIN]], align 4
+// CHECK-NEXT:    store i32 9, ptr [[TMP]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP]], align 4
+// CHECK-NEXT:    store i32 [[TMP1]], ptr [[DOTLB_MAX]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTLB_MIN]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTLB_MAX]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[TMP2]], [[TMP3]]
+// CHECK-NEXT:    [[STOREDV:%.*]] = zext i1 [[CMP]] to i8
+// CHECK-NEXT:    store i8 [[STOREDV]], ptr [[DOTMIN_LESS_MAX]], align 1
+// CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[DOTMIN_LESS_MAX]], align 1
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP4]] to i1
+// CHECK-NEXT:    br i1 [[LOADEDV]], label %[[COND_TRUE:.*]], label 
%[[COND_FALSE:.*]]
+// CHECK:       [[COND_TRUE]]:
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTLB_MIN]], align 4
+// CHECK-NEXT:    br label %[[COND_END:.*]]
+// CHECK:       [[COND_FALSE]]:
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTLB_MAX]], align 4
+// CHECK-NEXT:    br label %[[COND_END]]
+// CHECK:       [[COND_END]]:
+// CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP5]], %[[COND_TRUE]] ], [ 
[[TMP6]], %[[COND_FALSE]] ]
+// CHECK-NEXT:    store i32 [[COND]], ptr [[TMP]], align 4
+// CHECK-NEXT:    store i32 [[COND]], ptr [[DOTLOWER]], align 4
+// CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTLOWER]], align 4
+// CHECK-NEXT:    [[SUB:%.*]] = sub i32 10, [[TMP7]]
+// CHECK-NEXT:    [[SUB3:%.*]] = sub i32 [[SUB]], 1
+// CHECK-NEXT:    [[ADD:%.*]] = add i32 [[SUB3]], 1
+// CHECK-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], 1
+// CHECK-NEXT:    [[CONV:%.*]] = zext i32 [[DIV]] to i64
+// CHECK-NEXT:    [[MUL:%.*]] = mul nsw i64 100, [[CONV]]
+// CHECK-NEXT:    [[SUB4:%.*]] = sub nsw i64 [[MUL]], 1
+// CHECK-NEXT:    store i64 [[SUB4]], ptr [[DOTCAPTURE_EXPR_]], align 8
+// CHECK-NEXT:    store i64 0, ptr [[DOTOMP_LB]], align 8
+// CHECK-NEXT:    [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_]], align 8
+// CHECK-NEXT:    store i64 [[TMP8]], ptr [[DOTOMP_UB]], align 8
+// CHECK-NEXT:    store i64 1, ptr [[DOTOMP_STRIDE]], align 8
+// CHECK-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
+// CHECK-NEXT:    call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 
[[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr 
[[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
+// CHECK-NEXT:    [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
+// CHECK-NEXT:    [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_]], align 8
+// CHECK-NEXT:    [[CMP5:%.*]] = icmp sgt i64 [[TMP11]], [[TMP12]]
+// CHECK-NEXT:    br i1 [[CMP5]], label %[[COND_TRUE6:.*]], label 
%[[COND_FALSE7:.*]]
+// CHECK:       [[COND_TRUE6]]:
+// CHECK-NEXT:    [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_]], align 8
+// CHECK-NEXT:    br label %[[COND_END8:.*]]
+// CHECK:       [[COND_FALSE7]]:
+// CHECK-NEXT:    [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
+// CHECK-NEXT:    br label %[[COND_END8]]
+// CHECK:       [[COND_END8]]:
+// CHECK-NEXT:    [[COND9:%.*]] = phi i64 [ [[TMP13]], %[[COND_TRUE6]] ], [ 
[[TMP14]], %[[COND_FALSE7]] ]
+// CHECK-NEXT:    store i64 [[COND9]], ptr [[DOTOMP_UB]], align 8
+// CHECK-NEXT:    [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8
+// CHECK-NEXT:    store i64 [[TMP15]], ptr [[DOTOMP_IV]], align 8
+// CHECK-NEXT:    br label %[[OMP_INNER_FOR_COND:.*]]
+// CHECK:       [[OMP_INNER_FOR_COND]]:
+// CHECK-NEXT:    [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// CHECK-NEXT:    [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
+// CHECK-NEXT:    [[CMP10:%.*]] = icmp sle i64 [[TMP16]], [[TMP17]]
+// CHECK-NEXT:    br i1 [[CMP10]], label %[[OMP_INNER_FOR_BODY:.*]], label 
%[[OMP_INNER_FOR_END:.*]]
+// CHECK:       [[OMP_INNER_FOR_BODY]]:
+// CHECK-NEXT:    [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// CHECK-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTLOWER]], align 4
+// CHECK-NEXT:    [[SUB11:%.*]] = sub i32 10, [[TMP19]]
+// CHECK-NEXT:    [[SUB12:%.*]] = sub i32 [[SUB11]], 1
+// CHECK-NEXT:    [[ADD13:%.*]] = add i32 [[SUB12]], 1
+// CHECK-NEXT:    [[DIV14:%.*]] = udiv i32 [[ADD13]], 1
+// CHECK-NEXT:    [[MUL15:%.*]] = mul i32 10, [[DIV14]]
+// CHECK-NEXT:    [[CONV16:%.*]] = zext i32 [[MUL15]] to i64
+// CHECK-NEXT:    [[DIV17:%.*]] = sdiv i64 [[TMP18]], [[CONV16]]
+// CHECK-NEXT:    [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1
+// CHECK-NEXT:    [[ADD19:%.*]] = add nsw i64 0, [[MUL18]]
+// CHECK-NEXT:    [[CONV20:%.*]] = trunc i64 [[ADD19]] to i32
+// CHECK-NEXT:    store i32 [[CONV20]], ptr [[I]], align 4
+// CHECK-NEXT:    [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// CHECK-NEXT:    [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// CHECK-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTLOWER]], align 4
+// CHECK-NEXT:    [[SUB21:%.*]] = sub i32 10, [[TMP22]]
+// CHECK-NEXT:    [[SUB22:%.*]] = sub i32 [[SUB21]], 1
+// CHECK-NEXT:    [[ADD23:%.*]] = add i32 [[SUB22]], 1
+// CHECK-NEXT:    [[DIV24:%.*]] = udiv i32 [[ADD23]], 1
+// CHECK-NEXT:    [[MUL25:%.*]] = mul i32 10, [[DIV24]]
+// CHECK-NEXT:    [[CONV26:%.*]] = zext i32 [[MUL25]] to i64
+// CHECK-NEXT:    [[DIV27:%.*]] = sdiv i64 [[TMP21]], [[CONV26]]
+// CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTLOWER]], align 4
+// CHECK-NEXT:    [[SUB28:%.*]] = sub i32 10, [[TMP23]]
+// CHECK-NEXT:    [[SUB29:%.*]] = sub i32 [[SUB28]], 1
+// CHECK-NEXT:    [[ADD30:%.*]] = add i32 [[SUB29]], 1
+// CHECK-NEXT:    [[DIV31:%.*]] = udiv i32 [[ADD30]], 1
+// CHECK-NEXT:    [[MUL32:%.*]] = mul i32 10, [[DIV31]]
+// CHECK-NEXT:    [[CONV33:%.*]] = zext i32 [[MUL32]] to i64
+// CHECK-NEXT:    [[MUL34:%.*]] = mul nsw i64 [[DIV27]], [[CONV33]]
+// CHECK-NEXT:    [[SUB35:%.*]] = sub nsw i64 [[TMP20]], [[MUL34]]
+// CHECK-NEXT:    [[TMP24:%.*]] = load i32, ptr [[DOTLOWER]], align 4
+// CHECK-NEXT:    [[SUB36:%.*]] = sub i32 10, [[TMP24]]
+// CHECK-NEXT:    [[SUB37:%.*]] = sub i32 [[SUB36]], 1
+// CHECK-NEXT:    [[ADD38:%.*]] = add i32 [[SUB37]], 1
+// CHECK-NEXT:    [[DIV39:%.*]] = udiv i32 [[ADD38]], 1
+// CHECK-NEXT:    [[MUL40:%.*]] = mul i32 1, [[DIV39]]
+// CHECK-NEXT:    [[CONV41:%.*]] = zext i32 [[MUL40]] to i64
+// CHECK-NEXT:    [[DIV42:%.*]] = sdiv i64 [[SUB35]], [[CONV41]]
+// CHECK-NEXT:    [[MUL43:%.*]] = mul nsw i64 [[DIV42]], 1
+// CHECK-NEXT:    [[ADD44:%.*]] = add nsw i64 0, [[MUL43]]
+// CHECK-NEXT:    [[CONV45:%.*]] = trunc i64 [[ADD44]] to i32
+// CHECK-NEXT:    store i32 [[CONV45]], ptr [[J]], align 4
+// CHECK-NEXT:    [[TMP25:%.*]] = load i32, ptr [[I]], align 4
+// CHECK-NEXT:    [[CONV46:%.*]] = zext i32 [[TMP25]] to i64
+// CHECK-NEXT:    [[TMP26:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// CHECK-NEXT:    [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// CHECK-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTLOWER]], align 4
+// CHECK-NEXT:    [[SUB47:%.*]] = sub i32 10, [[TMP28]]
+// CHECK-NEXT:    [[SUB48:%.*]] = sub i32 [[SUB47]], 1
+// CHECK-NEXT:    [[ADD49:%.*]] = add i32 [[SUB48]], 1
+// CHECK-NEXT:    [[DIV50:%.*]] = udiv i32 [[ADD49]], 1
+// CHECK-NEXT:    [[MUL51:%.*]] = mul i32 10, [[DIV50]]
+// CHECK-NEXT:    [[CONV52:%.*]] = zext i32 [[MUL51]] to i64
+// CHECK-NEXT:    [[DIV53:%.*]] = sdiv i64 [[TMP27]], [[CONV52]]
+// CHECK-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTLOWER]], align 4
+// CHECK-NEXT:    [[SUB54:%.*]] = sub i32 10, [[TMP29]]
+// CHECK-NEXT:    [[SUB55:%.*]] = sub i32 [[SUB54]], 1
+// CHECK-NEXT:    [[ADD56:%.*]] = add i32 [[SUB55]], 1
+// CHECK-NEXT:    [[DIV57:%.*]] = udiv i32 [[ADD56]], 1
+// CHECK-NEXT:    [[MUL58:%.*]] = mul i32 10, [[DIV57]]
+// CHECK-NEXT:    [[CONV59:%.*]] = zext i32 [[MUL58]] to i64
+// CHECK-NEXT:    [[MUL60:%.*]] = mul nsw i64 [[DIV53]], [[CONV59]]
+// CHECK-NEXT:    [[SUB61:%.*]] = sub nsw i64 [[TMP26]], [[MUL60]]
+// CHECK-NEXT:    [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// CHECK-NEXT:    [[TMP31:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// CHECK-NEXT:    [[TMP32:%.*]] = load i32, ptr [[DOTLOWER]], align 4
+// CHECK-NEXT:    [[SUB62:%.*]] = sub i32 10, [[TMP32]]
+// CHECK-NEXT:    [[SUB63:%.*]] = sub i32 [[SUB62]], 1
+// CHECK-NEXT:    [[ADD64:%.*]] = add i32 [[SUB63]], 1
+// CHECK-NEXT:    [[DIV65:%.*]] = udiv i32 [[ADD64]], 1
+// CHECK-NEXT:    [[MUL66:%.*]] = mul i32 10, [[DIV65]]
+// CHECK-NEXT:    [[CONV67:%.*]] = zext i32 [[MUL66]] to i64
+// CHECK-NEXT:    [[DIV68:%.*]] = sdiv i64 [[TMP31]], [[CONV67]]
+// CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTLOWER]], align 4
+// CHECK-NEXT:    [[SUB69:%.*]] = sub i32 10, [[TMP33]]
+// CHECK-NEXT:    [[SUB70:%.*]] = sub i32 [[SUB69]], 1
+// CHECK-NEXT:    [[ADD71:%.*]] = add i32 [[SUB70]], 1
+// CHECK-NEXT:    [[DIV72:%.*]] = udiv i32 [[ADD71]], 1
+// CHECK-NEXT:    [[MUL73:%.*]] = mul i32 10, [[DIV72]]
+// CHECK-NEXT:    [[CONV74:%.*]] = zext i32 [[MUL73]] to i64
+// CHECK-NEXT:    [[MUL75:%.*]] = mul nsw i64 [[DIV68]], [[CONV74]]
+// CHECK-NEXT:    [[SUB76:%.*]] = sub nsw i64 [[TMP30]], [[MUL75]]
+// CHECK-NEXT:    [[TMP34:%.*]] = load i32, ptr [[DOTLOWER]], align 4
+// CHECK-NEXT:    [[SUB77:%.*]] = sub i32 10, [[TMP34]]
+// CHECK-NEXT:    [[SUB78:%.*]] = sub i32 [[SUB77]], 1
+// CHECK-NEXT:    [[ADD79:%.*]] = add i32 [[SUB78]], 1
+// CHECK-NEXT:    [[DIV80:%.*]] = udiv i32 [[ADD79]], 1
+// CHECK-NEXT:    [[MUL81:%.*]] = mul i32 1, [[DIV80]]
+// CHECK-NEXT:    [[CONV82:%.*]] = zext i32 [[MUL81]] to i64
+// CHECK-NEXT:    [[DIV83:%.*]] = sdiv i64 [[SUB76]], [[CONV82]]
+// CHECK-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTLOWER]], align 4
+// CHECK-NEXT:    [[SUB84:%.*]] = sub i32 10, [[TMP35]]
+// CHECK-NEXT:    [[SUB85:%.*]] = sub i32 [[SUB84]], 1
+// CHECK-NEXT:    [[ADD86:%.*]] = add i32 [[SUB85]], 1
+// CHECK-NEXT:    [[DIV87:%.*]] = udiv i32 [[ADD86]], 1
+// CHECK-NEXT:    [[MUL88:%.*]] = mul i32 1, [[DIV87]]
+// CHECK-NEXT:    [[CONV89:%.*]] = zext i32 [[MUL88]] to i64
+// CHECK-NEXT:    [[MUL90:%.*]] = mul nsw i64 [[DIV83]], [[CONV89]]
+// CHECK-NEXT:    [[SUB91:%.*]] = sub nsw i64 [[SUB61]], [[MUL90]]
+// CHECK-NEXT:    [[MUL92:%.*]] = mul nsw i64 [[SUB91]], 1
+// CHECK-NEXT:    [[ADD93:%.*]] = add nsw i64 [[CONV46]], [[MUL92]]
+// CHECK-NEXT:    [[CONV94:%.*]] = trunc i64 [[ADD93]] to i32
+// CHECK-NEXT:    store i32 [[CONV94]], ptr [[K]], align 4
+// CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[K]], align 4
+// CHECK-NEXT:    [[CMP95:%.*]] = icmp ult i32 [[TMP36]], 10
+// CHECK-NEXT:    br i1 [[CMP95]], label %[[OMP_BODY_NEXT:.*]], label 
%[[OMP_BODY_CONTINUE:.*]]
+// CHECK:       [[OMP_BODY_NEXT]]:
+// CHECK-NEXT:    br label %[[OMP_BODY_CONTINUE]]
+// CHECK:       [[OMP_BODY_CONTINUE]]:
+// CHECK-NEXT:    br label %[[OMP_INNER_FOR_INC:.*]]
+// CHECK:       [[OMP_INNER_FOR_INC]]:
+// CHECK-NEXT:    [[TMP37:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
+// CHECK-NEXT:    [[ADD96:%.*]] = add nsw i64 [[TMP37]], 1
+// CHECK-NEXT:    store i64 [[ADD96]], ptr [[DOTOMP_IV]], align 8
+// CHECK-NEXT:    br label %[[OMP_INNER_FOR_COND]]
+// CHECK:       [[OMP_INNER_FOR_END]]:
+// CHECK-NEXT:    br label %[[OMP_LOOP_EXIT:.*]]
+// CHECK:       [[OMP_LOOP_EXIT]]:
+// CHECK-NEXT:    [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 
8
+// CHECK-NEXT:    [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4
+// CHECK-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 
[[TMP39]])
+// CHECK-NEXT:    ret void
+void triangular_loop_3() {
+#pragma omp target parallel loop collapse(3)
+  for (unsigned int i = 0; i < 10; i++)
+    for (int j = 0; j < 10; j++)
+      for (unsigned k = i; k < 10; k++)
+       ;
+}
+
+// CHECK-LABEL: define internal void @_Z10mixed_loopv.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef 
[[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[DOTOMP_IV:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTLB_MIN:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTLB_MAX:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTMIN_LESS_MAX:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    [[DOTLOWER:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[DOTOMP_LB:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[DOTOMP_UB:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[K:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], 
align 8
+// CHECK-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], 
align 8
+// CHECK-NEXT:    store i32 0, ptr [[TMP1]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TMP1]], align 4
 // CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
 // CHECK-NEXT:    store i32 [[ADD]], ptr [[DOTLB_MIN]], align 4
-// CHECK-NEXT:    store i32 9, ptr [[_TMP1]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[_TMP1]], align 4
-// CHECK-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP1]], 1
+// CHECK-NEXT:    store i32 9, ptr [[TMP1]], align 4
+// CHECK-NEXT:    [[TMP100:%.*]] = load i32, ptr [[TMP1]], align 4
+// CHECK-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP100]], 1
 // CHECK-NEXT:    store i32 [[ADD3]], ptr [[DOTLB_MAX]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTLB_MIN]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTLB_MAX]], align 4
@@ -608,7 +861,7 @@ void triangulat_loop_2() {
 // CHECK-NEXT:    br label %[[COND_END]]
 // CHECK:       [[COND_END]]:
 // CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP5]], %[[COND_TRUE]] ], [ 
[[TMP6]], %[[COND_FALSE]] ]
-// CHECK-NEXT:    store i32 [[COND]], ptr [[_TMP1]], align 4
+// CHECK-NEXT:    store i32 [[COND]], ptr [[TMP1]], align 4
 // CHECK-NEXT:    store i32 [[COND]], ptr [[DOTLOWER]], align 4
 // CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTLOWER]], align 4
 // CHECK-NEXT:    [[SUB:%.*]] = sub i32 10, [[TMP7]]
@@ -760,15 +1013,15 @@ void mixed_loop() {
        ;
 }
 
-// CHECK-LABEL: define internal void @_Z16rectangular_loopv.omp_outlined
+// CHECK-LABEL: define internal void @_Z16rectangular_loopv.omp_outlined(
 // CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef 
[[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
 // CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 // CHECK-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[TMP:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[_TMP2:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4

>From 2afc85080785353e78e45c46573512c125aa5ebc Mon Sep 17 00:00:00 2001
From: Ammarguellat <[email protected]>
Date: Thu, 2 Apr 2026 05:06:14 -0700
Subject: [PATCH 2/2] Fix format

---
 clang/lib/Sema/SemaOpenMP.cpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 44ce3adac05ae..0793eb24f8d03 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -8860,16 +8860,16 @@ calculateNumIters(Sema &SemaRef, Scope *S, 
SourceLocation DefaultLoc,
   // those correctly.
   if (TestIsStrictOp && InitDependOnLC.has_value() &&
       InitDependOnLC.value() >= 2 && !CondDependOnLC.has_value()) {
-      Diff = SemaRef.BuildBinOp(S, DefaultLoc, BO_Sub, Upper, Lower);
-      if (!Diff.isUsable())
-          return nullptr;
+    Diff = SemaRef.BuildBinOp(S, DefaultLoc, BO_Sub, Upper, Lower);
+    if (!Diff.isUsable())
+        return nullptr;
 
-      Diff = SemaRef.BuildBinOp(S, DefaultLoc, BO_Add, Diff.get(),
-          SemaRef.ActOnIntegerConstant(DefaultLoc, 1).get());
-      if (!Diff.isUsable())
-          return nullptr;
+    Diff = SemaRef.BuildBinOp(S, DefaultLoc, BO_Add, Diff.get(),
+        SemaRef.ActOnIntegerConstant(DefaultLoc, 1).get());
+    if (!Diff.isUsable())
+        return nullptr;
 
-      return Diff.get();
+    return Diff.get();
   }
 
   // If need to reorganize, then calculate the form as Upper - (Lower - Step [+

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to