Author: Florian Hahn Date: 2025-09-09T10:36:54+02:00 New Revision: bb383adfafcae15105af39bb5bec07c9245e8784
URL: https://github.com/llvm/llvm-project/commit/bb383adfafcae15105af39bb5bec07c9245e8784 DIFF: https://github.com/llvm/llvm-project/commit/bb383adfafcae15105af39bb5bec07c9245e8784.diff LOG: [SCEVExp] Fix early exit in ComputeEndCheck. (#156910) ComputeEndCheck incorrectly returned false for unsigned predicates starting at zero and a positive step. The AddRec could still wrap if Step * trunc ExitCount wraps or trunc ExitCount strips leading 1s. Fixes https://github.com/llvm/llvm-project/issues/156849. PR: https://github.com/llvm/llvm-project/pull/156910 (cherry picked from commit f8972c8280d28660aaff888c093a9e01b9ee71e6) Added: Modified: llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll llvm/test/Transforms/LoopVectorize/X86/cost-model.ll llvm/test/Transforms/LoopVectorize/interleaved-accesses-requiring-scev-predicates.ll llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll Removed: ################################################################################ diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index 24fe08d6c3e4e..bf457194bfd8e 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -2133,8 +2133,15 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR, // negative. If Step is known to be positive or negative, only create // either 1. or 2. auto ComputeEndCheck = [&]() -> Value * { - // Checking <u 0 is always false. - if (!Signed && Start->isZero() && SE.isKnownPositive(Step)) + // Checking <u 0 is always false, if (Step * trunc ExitCount) does not wrap. + // TODO: Predicates that can be proven true/false should be discarded when + // the predicates are created, not late during expansion. + if (!Signed && Start->isZero() && SE.isKnownPositive(Step) && + DstBits < SrcBits && + ExitCount == SE.getZeroExtendExpr(SE.getTruncateExpr(ExitCount, ARTy), + ExitCount->getType()) && + SE.willNotOverflow(Instruction::Mul, Signed, Step, + SE.getTruncateExpr(ExitCount, ARTy))) return ConstantInt::getFalse(Loc->getContext()); // Get the backedge taken count and truncate or extended to the AR type. diff --git a/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll b/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll index 98b89abfeafda..057a60918ecf3 100644 --- a/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll +++ b/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll @@ -10,8 +10,13 @@ define void @f(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d, p ; CHECK-NEXT: br label [[FOR_BODY_LVER_CHECK:%.*]] ; CHECK: for.body.lver.check: ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP0]] to i32 +; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP2]]) +; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = or i1 [[MUL_OVERFLOW]], [[TMP1]] +; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]] ; CHECK: for.body.ph.lver.orig: ; CHECK-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] ; CHECK: for.body.lver.orig: @@ -75,7 +80,7 @@ define void @f(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d, p ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT2:%.*]], label [[FOR_BODY]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: for.end.loopexit1: +; CHECK: for.end.loopexit2: ; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -135,8 +140,13 @@ define void @f_with_offset(ptr noalias %b, ptr noalias %c, ptr noalias %d, ptr n ; CHECK-NEXT: br label [[FOR_BODY_LVER_CHECK:%.*]] ; CHECK: for.body.lver.check: ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP0]] to i32 +; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP2]]) +; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = or i1 [[MUL_OVERFLOW]], [[TMP1]] +; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]] ; CHECK: for.body.ph.lver.orig: ; CHECK-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] ; CHECK: for.body.lver.orig: @@ -200,7 +210,7 @@ define void @f_with_offset(ptr noalias %b, ptr noalias %c, ptr noalias %d, ptr n ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT2:%.*]], label [[FOR_BODY]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: for.end.loopexit1: +; CHECK: for.end.loopexit2: ; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll index 147e949808b54..c1566c4dd32bf 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll @@ -1045,8 +1045,8 @@ define i64 @live_in_known_1_via_scev() { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ <i64 3, i64 1, i64 1, i64 1>, [[VECTOR_PH]] ], [ [[VEC_PHI]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8 -; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8 +; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> [[VEC_PHI]]) ; CHECK-NEXT: br label [[EXIT:%.*]] @@ -1213,6 +1213,7 @@ define i32 @g(i64 %n) { ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; CHECK: vector.scevcheck: +; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[N]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[N]], 4294967295 ; CHECK-NEXT: br i1 [[TMP2]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-requiring-scev-predicates.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-requiring-scev-predicates.ll index f0b89d498e45c..38438941eb357 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-requiring-scev-predicates.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-requiring-scev-predicates.ll @@ -120,12 +120,16 @@ exit: ; For %gep, we have the following SCEV: ((4 * (zext i4 {0,+,5}<%loop> to i64))<nuw><nsw> + %x). ; Note the i4 bit wide AddRec {0,+,5}. It is known to wrap in the loop with trip count 16. -; FIXME: Currently we incorrectly assume the widened AddRec does not wrap. define void @wrap_predicate_for_interleave_group_wraps_for_known_trip_count(ptr noalias %x, ptr noalias %out) { ; CHECK-LABEL: define void @wrap_predicate_for_interleave_group_wraps_for_known_trip_count( ; CHECK-SAME: ptr noalias [[X:%.*]], ptr noalias [[OUT:%.*]]) { ; CHECK-NEXT: [[START:.*]]: -; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] +; CHECK: [[VECTOR_SCEVCHECK]]: +; CHECK-NEXT: [[MUL:%.*]] = call { i4, i1 } @llvm.umul.with.overflow.i4(i4 5, i4 -1) +; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i4, i1 } [[MUL]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i4, i1 } [[MUL]], 1 +; CHECK-NEXT: br i1 [[MUL_OVERFLOW]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: @@ -144,7 +148,7 @@ define void @wrap_predicate_for_interleave_group_wraps_for_known_trip_count(ptr ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 12, %[[MIDDLE_BLOCK]] ], [ 0, %[[START]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 12, %[[MIDDLE_BLOCK]] ], [ 0, %[[START]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -189,8 +193,13 @@ define void @wrap_predicate_for_interleave_group_unknown_trip_count(ptr noalias ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] ; CHECK: [[VECTOR_SCEVCHECK]]: ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 +; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP0]] to i4 +; CHECK-NEXT: [[MUL:%.*]] = call { i4, i1 } @llvm.umul.with.overflow.i4(i4 3, i4 [[TMP9]]) +; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i4, i1 } [[MUL]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i4, i1 } [[MUL]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], 15 -; CHECK-NEXT: br i1 [[TMP1]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[MUL_OVERFLOW]], [[TMP1]] +; CHECK-NEXT: br i1 [[TMP10]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll index a47037c46eedc..7a5cf4576ac9e 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll @@ -19,6 +19,7 @@ define void @load_clamped_index(ptr %A, ptr %B, i32 %N) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; CHECK: vector.scevcheck: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP0]] to i2 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP0]], 3 ; CHECK-NEXT: br i1 [[TMP1]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: @@ -91,6 +92,7 @@ define void @store_clamped_index(ptr %A, ptr %B, i32 %N) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; CHECK: vector.scevcheck: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP0]] to i2 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP0]], 3 ; CHECK-NEXT: br i1 [[TMP1]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: @@ -359,6 +361,7 @@ define void @clamped_index_equal_dependence(ptr %A, ptr %B, i32 %N) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; CHECK: vector.scevcheck: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP0]] to i2 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP0]], 3 ; CHECK-NEXT: br i1 [[TMP1]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: diff --git a/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll b/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll index 6e5eeaf9f1218..3e25695304854 100644 --- a/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll +++ b/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll @@ -29,7 +29,12 @@ define void @f1(ptr noalias %a, ; LV-LABEL: @f1( ; LV-NEXT: for.body.lver.check: ; LV-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1 +; LV-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP0]] to i32 +; LV-NEXT: [[MUL2:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP5]]) +; LV-NEXT: [[MUL_RESULT1:%.*]] = extractvalue { i32, i1 } [[MUL2]], 0 +; LV-NEXT: [[MUL_OVERFLOW1:%.*]] = extractvalue { i32, i1 } [[MUL2]], 1 ; LV-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 +; LV-NEXT: [[TMP8:%.*]] = or i1 [[MUL_OVERFLOW1]], [[TMP1]] ; LV-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]]) ; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0 ; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1 @@ -37,7 +42,7 @@ define void @f1(ptr noalias %a, ; LV-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[MUL_RESULT]] ; LV-NEXT: [[TMP4:%.*]] = icmp ult ptr [[TMP3]], [[A]] ; LV-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]] -; LV-NEXT: [[TMP7:%.*]] = or i1 [[TMP1]], [[TMP6]] +; LV-NEXT: [[TMP7:%.*]] = or i1 [[TMP8]], [[TMP6]] ; LV-NEXT: br i1 [[TMP7]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] ; LV: for.body.ph.lver.orig: ; LV-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] @@ -75,7 +80,7 @@ define void @f1(ptr noalias %a, ; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT2:%.*]], label [[FOR_BODY]] ; LV: for.end.loopexit: ; LV-NEXT: br label [[FOR_END:%.*]] -; LV: for.end.loopexit2: +; LV: for.end.loopexit5: ; LV-NEXT: br label [[FOR_END]] ; LV: for.end: ; LV-NEXT: ret void _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
