llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Zahira Ammarguellat (zahiraam) <details> <summary>Changes</summary> This patch restricts the triangular loop iteration count optimization to only apply to nested triangular loops (`depth >= 2`), not first-level triangular loops. The optimization computes iterations as `(Upper - Lower + 1`) for triangular loops where the inner loop bound depends on an outer loop counter. However, this formula only works correctly for deeply nested triangular dependencies: `k` depends on `j`, and `j` itself depends on `i` For first-level triangular loops: `k` depends directly on `i`, the standard iteration count formula handles the calculation correctly. --- Patch is 91.12 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/190153.diff 4 Files Affected: - (modified) clang/lib/Sema/SemaOpenMP.cpp (+7-3) - (modified) clang/test/OpenMP/for_non_rectangular_codegen.c (+111-95) - (modified) clang/test/OpenMP/for_private_reduction_codegen.cpp (+17-17) - (modified) clang/test/OpenMP/loop_collapse_codegen.cpp (+525-272) ``````````diff diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 0d3c7fc4907a2..fada37ba45755 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -8854,18 +8854,22 @@ calculateNumIters(Sema &SemaRef, Scope *S, SourceLocation DefaultLoc, ExprResult Diff; - // For triangular loops, use already computed Upper and Lower bounds to - // calculate the number of iterations: Upper - Lower + 1. + // For nested triangular loops (depth >= 2), use already computed Upper and + // Lower bounds to calculate the number of iterations: Upper - Lower + 1. + // Don't apply to first-level triangular loops as the standard formula handles + // those correctly. if (TestIsStrictOp && InitDependOnLC.has_value() && - !CondDependOnLC.has_value()) { + InitDependOnLC.value() >= 2 && !CondDependOnLC.has_value()) { Diff = SemaRef.BuildBinOp(S, DefaultLoc, BO_Sub, Upper, Lower); if (!Diff.isUsable()) return nullptr; + Diff = SemaRef.BuildBinOp(S, DefaultLoc, BO_Add, Diff.get(), SemaRef.ActOnIntegerConstant(DefaultLoc, 1).get()); if (!Diff.isUsable()) return nullptr; + return Diff.get(); } diff --git a/clang/test/OpenMP/for_non_rectangular_codegen.c b/clang/test/OpenMP/for_non_rectangular_codegen.c index b35a8f6e27b50..f8c7c84093d89 100644 --- a/clang/test/OpenMP/for_non_rectangular_codegen.c +++ b/clang/test/OpenMP/for_non_rectangular_codegen.c @@ -40,14 +40,14 @@ void collapsed(int mp) { // CHECK-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[I0:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[_TMP13:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[_TMP15:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[J17:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[I18:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[I019:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[J19:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[I20:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[I021:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) // CHECK-NEXT: store i32 [[MP]], ptr [[MP_ADDR]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[MP_ADDR]], align 4 @@ -90,20 +90,22 @@ void collapsed(int mp) { // CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTUPPER]], align 4 // CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLOWER]], align 4 // CHECK-NEXT: [[SUB7:%.*]] = sub i32 [[TMP12]], [[TMP13]] -// CHECK-NEXT: [[ADD8:%.*]] = add i32 [[SUB7]], 1 -// CHECK-NEXT: [[CONV9:%.*]] = zext i32 [[ADD8]] to i64 -// CHECK-NEXT: [[MUL10:%.*]] = mul nsw i64 [[CONV6]], [[CONV9]] -// CHECK-NEXT: [[MUL11:%.*]] = mul nsw i64 [[MUL10]], 10 -// CHECK-NEXT: [[SUB12:%.*]] = sub nsw i64 [[MUL11]], 1 -// CHECK-NEXT: store i64 [[SUB12]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK-NEXT: [[SUB8:%.*]] = sub i32 [[SUB7]], 1 +// CHECK-NEXT: [[ADD9:%.*]] = add i32 [[SUB8]], 1 +// CHECK-NEXT: [[DIV10:%.*]] = udiv i32 [[ADD9]], 1 +// CHECK-NEXT: [[CONV11:%.*]] = zext i32 [[DIV10]] to i64 +// CHECK-NEXT: [[MUL12:%.*]] = mul nsw i64 [[CONV6]], [[CONV11]] +// CHECK-NEXT: [[MUL13:%.*]] = mul nsw i64 [[MUL12]], 10 +// CHECK-NEXT: [[SUB14:%.*]] = sub nsw i64 [[MUL13]], 1 +// CHECK-NEXT: store i64 [[SUB14]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK-NEXT: store i32 0, ptr [[J]], align 4 // CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 // CHECK-NEXT: store i32 [[TMP14]], ptr [[I]], align 4 // CHECK-NEXT: store i32 0, ptr [[I0]], align 4 -// CHECK-NEXT: store i32 0, ptr [[_TMP13]], align 4 +// CHECK-NEXT: store i32 0, ptr [[_TMP15]], align 4 // CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-NEXT: [[CMP14:%.*]] = icmp slt i32 0, [[TMP15]] -// CHECK-NEXT: br i1 [[CMP14]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK-NEXT: [[CMP16:%.*]] = icmp slt i32 0, [[TMP15]] +// CHECK-NEXT: br i1 [[CMP16]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK: omp.precond.then: // CHECK-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 @@ -113,122 +115,136 @@ void collapsed(int mp) { // CHECK-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) // CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK-NEXT: [[CMP20:%.*]] = icmp sgt i64 [[TMP17]], [[TMP18]] -// CHECK-NEXT: br i1 [[CMP20]], label [[COND_TRUE22:%.*]], label [[COND_FALSE23:%.*]] -// CHECK: cond.true22: +// CHECK-NEXT: [[CMP22:%.*]] = icmp sgt i64 [[TMP17]], [[TMP18]] +// CHECK-NEXT: br i1 [[CMP22]], label [[COND_TRUE24:%.*]], label [[COND_FALSE25:%.*]] +// CHECK: cond.true24: // CHECK-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK-NEXT: br label [[COND_END24:%.*]] -// CHECK: cond.false23: +// CHECK-NEXT: br label [[COND_END26:%.*]] +// CHECK: cond.false25: // CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK-NEXT: br label [[COND_END24]] -// CHECK: cond.end24: -// CHECK-NEXT: [[COND25:%.*]] = phi i64 [ [[TMP19]], [[COND_TRUE22]] ], [ [[TMP20]], [[COND_FALSE23]] ] -// CHECK-NEXT: store i64 [[COND25]], ptr [[DOTOMP_UB]], align 8 +// CHECK-NEXT: br label [[COND_END26]] +// CHECK: cond.end26: +// CHECK-NEXT: [[COND27:%.*]] = phi i64 [ [[TMP19]], [[COND_TRUE24]] ], [ [[TMP20]], [[COND_FALSE25]] ] +// CHECK-NEXT: store i64 [[COND27]], ptr [[DOTOMP_UB]], align 8 // CHECK-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 // CHECK-NEXT: store i64 [[TMP21]], ptr [[DOTOMP_IV]], align 8 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK: omp.inner.for.cond: // CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 // CHECK-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK-NEXT: [[CMP26:%.*]] = icmp sle i64 [[TMP22]], [[TMP23]] -// CHECK-NEXT: br i1 [[CMP26]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-NEXT: [[CMP28:%.*]] = icmp sle i64 [[TMP22]], [[TMP23]] +// CHECK-NEXT: br i1 [[CMP28]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK: omp.inner.for.body: // CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 // CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTUPPER]], align 4 // CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTLOWER]], align 4 -// CHECK-NEXT: [[SUB28:%.*]] = sub i32 [[TMP25]], [[TMP26]] -// CHECK-NEXT: [[ADD29:%.*]] = add i32 [[SUB28]], 1 -// CHECK-NEXT: [[MUL30:%.*]] = mul i32 1, [[ADD29]] -// CHECK-NEXT: [[MUL31:%.*]] = mul i32 [[MUL30]], 10 -// CHECK-NEXT: [[CONV32:%.*]] = zext i32 [[MUL31]] to i64 -// CHECK-NEXT: [[DIV33:%.*]] = sdiv i64 [[TMP24]], [[CONV32]] -// CHECK-NEXT: [[MUL34:%.*]] = mul nsw i64 [[DIV33]], 1 -// CHECK-NEXT: [[ADD35:%.*]] = add nsw i64 0, [[MUL34]] -// CHECK-NEXT: [[CONV36:%.*]] = trunc i64 [[ADD35]] to i32 -// CHECK-NEXT: store i32 [[CONV36]], ptr [[J17]], align 4 -// CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[J17]], align 4 -// CHECK-NEXT: [[CONV37:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK-NEXT: [[SUB30:%.*]] = sub i32 [[TMP25]], [[TMP26]] +// CHECK-NEXT: [[SUB31:%.*]] = sub i32 [[SUB30]], 1 +// CHECK-NEXT: [[ADD32:%.*]] = add i32 [[SUB31]], 1 +// CHECK-NEXT: [[DIV33:%.*]] = udiv i32 [[ADD32]], 1 +// CHECK-NEXT: [[MUL34:%.*]] = mul i32 1, [[DIV33]] +// CHECK-NEXT: [[MUL35:%.*]] = mul i32 [[MUL34]], 10 +// CHECK-NEXT: [[CONV36:%.*]] = zext i32 [[MUL35]] to i64 +// CHECK-NEXT: [[DIV37:%.*]] = sdiv i64 [[TMP24]], [[CONV36]] +// CHECK-NEXT: [[MUL38:%.*]] = mul nsw i64 [[DIV37]], 1 +// CHECK-NEXT: [[ADD39:%.*]] = add nsw i64 0, [[MUL38]] +// CHECK-NEXT: [[CONV40:%.*]] = trunc i64 [[ADD39]] to i32 +// CHECK-NEXT: store i32 [[CONV40]], ptr [[J19]], align 4 +// CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[J19]], align 4 +// CHECK-NEXT: [[CONV41:%.*]] = sext i32 [[TMP27]] to i64 // CHECK-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 // CHECK-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 // CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTUPPER]], align 4 // CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTLOWER]], align 4 -// CHECK-NEXT: [[SUB38:%.*]] = sub i32 [[TMP30]], [[TMP31]] -// CHECK-NEXT: [[ADD39:%.*]] = add i32 [[SUB38]], 1 -// CHECK-NEXT: [[MUL40:%.*]] = mul i32 1, [[ADD39]] -// CHECK-NEXT: [[MUL41:%.*]] = mul i32 [[MUL40]], 10 -// CHECK-NEXT: [[CONV42:%.*]] = zext i32 [[MUL41]] to i64 -// CHECK-NEXT: [[DIV43:%.*]] = sdiv i64 [[TMP29]], [[CONV42]] -// CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTUPPER]], align 4 -// CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTLOWER]], align 4 -// CHECK-NEXT: [[SUB44:%.*]] = sub i32 [[TMP32]], [[TMP33]] -// CHECK-NEXT: [[ADD45:%.*]] = add i32 [[SUB44]], 1 -// CHECK-NEXT: [[MUL46:%.*]] = mul i32 1, [[ADD45]] +// CHECK-NEXT: [[SUB42:%.*]] = sub i32 [[TMP30]], [[TMP31]] +// CHECK-NEXT: [[SUB43:%.*]] = sub i32 [[SUB42]], 1 +// CHECK-NEXT: [[ADD44:%.*]] = add i32 [[SUB43]], 1 +// CHECK-NEXT: [[DIV45:%.*]] = udiv i32 [[ADD44]], 1 +// CHECK-NEXT: [[MUL46:%.*]] = mul i32 1, [[DIV45]] // CHECK-NEXT: [[MUL47:%.*]] = mul i32 [[MUL46]], 10 // CHECK-NEXT: [[CONV48:%.*]] = zext i32 [[MUL47]] to i64 -// CHECK-NEXT: [[MUL49:%.*]] = mul nsw i64 [[DIV43]], [[CONV48]] -// CHECK-NEXT: [[SUB50:%.*]] = sub nsw i64 [[TMP28]], [[MUL49]] -// CHECK-NEXT: [[DIV51:%.*]] = sdiv i64 [[SUB50]], 10 -// CHECK-NEXT: [[MUL52:%.*]] = mul nsw i64 [[DIV51]], 1 -// CHECK-NEXT: [[ADD53:%.*]] = add nsw i64 [[CONV37]], [[MUL52]] -// CHECK-NEXT: [[CONV54:%.*]] = trunc i64 [[ADD53]] to i32 -// CHECK-NEXT: store i32 [[CONV54]], ptr [[I18]], align 4 +// CHECK-NEXT: [[DIV49:%.*]] = sdiv i64 [[TMP29]], [[CONV48]] +// CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTUPPER]], align 4 +// CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTLOWER]], align 4 +// CHECK-NEXT: [[SUB50:%.*]] = sub i32 [[TMP32]], [[TMP33]] +// CHECK-NEXT: [[SUB51:%.*]] = sub i32 [[SUB50]], 1 +// CHECK-NEXT: [[ADD52:%.*]] = add i32 [[SUB51]], 1 +// CHECK-NEXT: [[DIV53:%.*]] = udiv i32 [[ADD52]], 1 +// CHECK-NEXT: [[MUL54:%.*]] = mul i32 1, [[DIV53]] +// CHECK-NEXT: [[MUL55:%.*]] = mul i32 [[MUL54]], 10 +// CHECK-NEXT: [[CONV56:%.*]] = zext i32 [[MUL55]] to i64 +// CHECK-NEXT: [[MUL57:%.*]] = mul nsw i64 [[DIV49]], [[CONV56]] +// CHECK-NEXT: [[SUB58:%.*]] = sub nsw i64 [[TMP28]], [[MUL57]] +// CHECK-NEXT: [[DIV59:%.*]] = sdiv i64 [[SUB58]], 10 +// CHECK-NEXT: [[MUL60:%.*]] = mul nsw i64 [[DIV59]], 1 +// CHECK-NEXT: [[ADD61:%.*]] = add nsw i64 [[CONV41]], [[MUL60]] +// CHECK-NEXT: [[CONV62:%.*]] = trunc i64 [[ADD61]] to i32 +// CHECK-NEXT: store i32 [[CONV62]], ptr [[I20]], align 4 // CHECK-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 // CHECK-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 // CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTUPPER]], align 4 // CHECK-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTLOWER]], align 4 -// CHECK-NEXT: [[SUB55:%.*]] = sub i32 [[TMP36]], [[TMP37]] -// CHECK-NEXT: [[ADD56:%.*]] = add i32 [[SUB55]], 1 -// CHECK-NEXT: [[MUL57:%.*]] = mul i32 1, [[ADD56]] -// CHECK-NEXT: [[MUL58:%.*]] = mul i32 [[MUL57]], 10 -// CHECK-NEXT: [[CONV59:%.*]] = zext i32 [[MUL58]] to i64 -// CHECK-NEXT: [[DIV60:%.*]] = sdiv i64 [[TMP35]], [[CONV59]] +// CHECK-NEXT: [[SUB63:%.*]] = sub i32 [[TMP36]], [[TMP37]] +// CHECK-NEXT: [[SUB64:%.*]] = sub i32 [[SUB63]], 1 +// CHECK-NEXT: [[ADD65:%.*]] = add i32 [[SUB64]], 1 +// CHECK-NEXT: [[DIV66:%.*]] = udiv i32 [[ADD65]], 1 +// CHECK-NEXT: [[MUL67:%.*]] = mul i32 1, [[DIV66]] +// CHECK-NEXT: [[MUL68:%.*]] = mul i32 [[MUL67]], 10 +// CHECK-NEXT: [[CONV69:%.*]] = zext i32 [[MUL68]] to i64 +// CHECK-NEXT: [[DIV70:%.*]] = sdiv i64 [[TMP35]], [[CONV69]] // CHECK-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTUPPER]], align 4 // CHECK-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTLOWER]], align 4 -// CHECK-NEXT: [[SUB61:%.*]] = sub i32 [[TMP38]], [[TMP39]] -// CHECK-NEXT: [[ADD62:%.*]] = add i32 [[SUB61]], 1 -// CHECK-NEXT: [[MUL63:%.*]] = mul i32 1, [[ADD62]] -// CHECK-NEXT: [[MUL64:%.*]] = mul i32 [[MUL63]], 10 -// CHECK-NEXT: [[CONV65:%.*]] = zext i32 [[MUL64]] to i64 -// CHECK-NEXT: [[MUL66:%.*]] = mul nsw i64 [[DIV60]], [[CONV65]] -// CHECK-NEXT: [[SUB67:%.*]] = sub nsw i64 [[TMP34]], [[MUL66]] +// CHECK-NEXT: [[SUB71:%.*]] = sub i32 [[TMP38]], [[TMP39]] +// CHECK-NEXT: [[SUB72:%.*]] = sub i32 [[SUB71]], 1 +// CHECK-NEXT: [[ADD73:%.*]] = add i32 [[SUB72]], 1 +// CHECK-NEXT: [[DIV74:%.*]] = udiv i32 [[ADD73]], 1 +// CHECK-NEXT: [[MUL75:%.*]] = mul i32 1, [[DIV74]] +// CHECK-NEXT: [[MUL76:%.*]] = mul i32 [[MUL75]], 10 +// CHECK-NEXT: [[CONV77:%.*]] = zext i32 [[MUL76]] to i64 +// CHECK-NEXT: [[MUL78:%.*]] = mul nsw i64 [[DIV70]], [[CONV77]] +// CHECK-NEXT: [[SUB79:%.*]] = sub nsw i64 [[TMP34]], [[MUL78]] // CHECK-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 // CHECK-NEXT: [[TMP41:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 // CHECK-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTUPPER]], align 4 // CHECK-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTLOWER]], align 4 -// CHECK-NEXT: [[SUB68:%.*]] = sub i32 [[TMP42]], [[TMP43]] -// CHECK-NEXT: [[ADD69:%.*]] = add i32 [[SUB68]], 1 -// CHECK-NEXT: [[MUL70:%.*]] = mul i32 1, [[ADD69]] -// CHECK-NEXT: [[MUL71:%.*]] = mul i32 [[MUL70]], 10 -// CHECK-NEXT: [[CONV72:%.*]] = zext i32 [[MUL71]] to i64 -// CHECK-NEXT: [[DIV73:%.*]] = sdiv i64 [[TMP41]], [[CONV72]] +// CHECK-NEXT: [[SUB80:%.*]] = sub i32 [[TMP42]], [[TMP43]] +// CHECK-NEXT: [[SUB81:%.*]] = sub i32 [[SUB80]], 1 +// CHECK-NEXT: [[ADD82:%.*]] = add i32 [[SUB81]], 1 +// CHECK-NEXT: [[DIV83:%.*]] = udiv i32 [[ADD82]], 1 +// CHECK-NEXT: [[MUL84:%.*]] = mul i32 1, [[DIV83]] +// CHECK-NEXT: [[MUL85:%.*]] = mul i32 [[MUL84]], 10 +// CHECK-NEXT: [[CONV86:%.*]] = zext i32 [[MUL85]] to i64 +// CHECK-NEXT: [[DIV87:%.*]] = sdiv i64 [[TMP41]], [[CONV86]] // CHECK-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTUPPER]], align 4 // CHECK-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTLOWER]], align 4 -// CHECK-NEXT: [[SUB74:%.*]] = sub i32 [[TMP44]], [[TMP45]] -// CHECK-NEXT: [[ADD75:%.*]] = add i32 [[SUB74]], 1 -// CHECK-NEXT: [[MUL76:%.*]] = mul i32 1, [[ADD75]] -// CHECK-NEXT: [[MUL77:%.*]] = mul i32 [[MUL76]], 10 -// CHECK-NEXT: [[CONV78:%.*]] = zext i32 [[MUL77]] to i64 -// CHECK-NEXT: [[MUL79:%.*]] = mul nsw i64 [[DIV73]], [[CONV78]] -// CHECK-NEXT: [[SUB80:%.*]] = sub nsw i64 [[TMP40]], [[MUL79]] -// CHECK-NEXT: [[DIV81:%.*]] = sdiv i64 [[SUB80]], 10 -// CHECK-NEXT: [[MUL82:%.*]] = mul nsw i64 [[DIV81]], 10 -// CHECK-NEXT: [[SUB83:%.*]] = sub nsw i64 [[SUB67]], [[MUL82]] -// CHECK-NEXT: [[MUL84:%.*]] = mul nsw i64 [[SUB83]], 1 -// CHECK-NEXT: [[ADD85:%.*]] = add nsw i64 0, [[MUL84]] -// CHECK-NEXT: [[CONV86:%.*]] = trunc i64 [[ADD85]] to i32 -// CHECK-NEXT: store i32 [[CONV86]], ptr [[I019]], align 4 -// CHECK-NEXT: [[TMP46:%.*]] = load i32, ptr [[I18]], align 4 +// CHECK-NEXT: [[SUB88:%.*]] = sub i32 [[TMP44]], [[TMP45]] +// CHECK-NEXT: [[SUB89:%.*]] = sub i32 [[SUB88]], 1 +// CHECK-NEXT: [[ADD90:%.*]] = add i32 [[SUB89]], 1 +// CHECK-NEXT: [[DIV91:%.*]] = udiv i32 [[ADD90]], 1 +// CHECK-NEXT: [[MUL92:%.*]] = mul i32 1, [[DIV91]] +// CHECK-NEXT: [[MUL93:%.*]] = mul i32 [[MUL92]], 10 +// CHECK-NEXT: [[CONV94:%.*]] = zext i32 [[MUL93]] to i64 +// CHECK-NEXT: [[MUL95:%.*]] = mul nsw i64 [[DIV87]], [[CONV94]] +// CHECK-NEXT: [[SUB96:%.*]] = sub nsw i64 [[TMP40]], [[MUL95]] +// CHECK-NEXT: [[DIV97:%.*]] = sdiv i64 [[SUB96]], 10 +// CHECK-NEXT: [[MUL98:%.*]] = mul nsw i64 [[DIV97]], 10 +// CHECK-NEXT: [[SUB99:%.*]] = sub nsw i64 [[SUB79]], [[MUL98]] +// CHECK-NEXT: [[MUL100:%.*]] = mul nsw i64 [[SUB99]], 1 +// CHECK-NEXT: [[ADD101:%.*]] = add nsw i64 0, [[MUL100]] +// CHECK-NEXT: [[CONV102:%.*]] = trunc i64 [[ADD101]] to i32 +// CHECK-NEXT: store i32 [[CONV102]], ptr [[I021]], align 4 +// CHECK-NEXT: [[TMP46:%.*]] = load i32, ptr [[I20]], align 4 // CHECK-NEXT: [[TMP47:%.*]] = load i32, ptr [[MP_ADDR]], align 4 -// CHECK-NEXT: [[CMP87:%.*]] = icmp slt i32 [[TMP46]], [[TMP47]] -// CHECK-NEXT: br i1 [[CMP87]], label [[OMP_BODY_NEXT:%.*]], label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-NEXT: [[CMP103:%.*]] = icmp slt i32 [[TMP46]], [[TMP47]] +// CHECK-NEXT: br i1 [[CMP103]], label [[OMP_BODY_NEXT:%.*]], label [[OMP_BODY_CONTINUE:%.*]] // CHECK: omp.body.next: // CHECK-NEXT: br label [[OMP_BODY_CONTINUE]] // CHECK: omp.body.continue: // CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK: omp.inner.for.inc: // CHECK-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK-NEXT: [[ADD89:%.*]] = add nsw i64 [[TMP48]], 1 -// CHECK-NEXT: store i64 [[ADD89]], ptr [[DOTOMP_IV]], align 8 +// CHECK-NEXT: [[ADD105:%.*]] = add nsw i64 [[TMP48]], 1 +// CHECK-NEXT: store i64 [[ADD105]], ptr [[DOTOMP_IV]], align 8 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK: omp.inner.for.end: // CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -277,21 +293,21 @@ void collapsed(int mp) { // SIMD-ONLY0-NEXT: [[TMP6:%.*]] = load i32, ptr [[I0]], align 4 // SIMD-ONLY0-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 // SIMD-ONLY0-NEXT: store i32 [[INC]], ptr [[I0]], align 4 -// SIMD-ONLY0-NEXT: br label [[FOR_COND4]], !llvm.loop [[LOOP2:![0-9]+]] +// SIMD-ONLY0-NEXT: br label [[FOR_COND4]], !llvm.loop [[LOOP1:![0-9]+]] // SIMD-ONLY0: for.end: // SIMD-ONLY0-NEXT: br label [[FOR_INC7:%.*]] // SIMD-ONLY0: for.inc7: // SIMD-ONLY0-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 // SIMD-ONLY0-NEXT: [[INC8:%.*]] = add nsw i32 [[TMP7]], 1 // SIMD-ONLY0-NEXT: store i32 [[INC8]], ptr [[I]], align 4 -// SIMD-ONLY0-NEXT: br label [[FOR_COND1]], !llvm.loop [[LOOP4:![0-9]+]] +// SIMD-ONLY0-NEXT: br label [[FOR_COND1]], !llvm.loop [[LOOP3:![0-9]+]] // SIMD-ONLY0: for.end9: // SIMD-ONLY0-NEXT: br label [[FOR_INC10:%.*]] // SIMD-ONLY0: for.inc10: // SIMD-ONLY0-NEXT: [[TMP8:%.*]] = load i32, ptr [[J]], align 4 // SIMD-ONLY0-NEXT: [[INC11:%.*]] = add nsw i32 [[TMP8]], 1 // SIMD-ONLY0-NEXT: store i32 [[INC11]], ptr [[J]], align 4 -// SIMD-ONLY0-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// SIMD-ONLY0-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // SIMD-ONLY0: for.end12: // SIMD-ONLY0-NEXT: ret void // diff --git a/clang/test/OpenMP/for_private_reduction_codegen.cpp b/clang/test/OpenMP/for_private_reduction_codegen.cpp index f27b6b1d35225..fdaf6f3068c1b 100644 --- a/clang/test/OpenMP/for_private_reduction_codegen.cpp +++ b/clang/test/OpenMP/for_private_reduction_codegen.cpp @@ -112,7 +112,7 @@ int main(void) { // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4 // CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 // CHECK-NEXT: store i32 [[INC]], ptr [[I]], align 4 -// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]] // CHECK: for.end: // CHECK-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP0]], i32 4) // CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @_Z8func_redv.omp_outlined, ptr [[ARRAY]]) @@ -152,7 ... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/190153 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
