https://github.com/guy-david updated https://github.com/llvm/llvm-project/pull/181110
>From 9250d4e7e4f5274db96fab5ee8faa76a594178b9 Mon Sep 17 00:00:00 2001 From: Guy David <[email protected]> Date: Wed, 11 Feb 2026 16:20:41 +0200 Subject: [PATCH 1/2] [ValueTracking] Extend computeConstantRange for add/sub, sext/zext/trunc Recursively compute operand ranges for add/sub and propagate ranges through sext/zext/trunc. For add/sub, the computed range is intersected with any existing range from setLimitsForBinOp, and NSW/NUW flags are used via addWithNoWrap/ subWithNoWrap to tighten bounds. The motivation is to enable further folding of reduce.add expressions in comparisons, where the result range can be bounded by the input element ranges. Compile-time impact on llvm-test-suite is <0.1% mean. --- llvm/lib/Analysis/ValueTracking.cpp | 28 ++++++ llvm/test/Analysis/BasicAA/range.ll | 66 +++++++++++++ llvm/unittests/Analysis/ValueTrackingTest.cpp | 92 +++++++++++++++++++ 3 files changed, 186 insertions(+) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 8761b7bcb51a2..acbb50b8cae53 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -10242,6 +10242,34 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned, // TODO: Return ConstantRange. setLimitsForBinOp(*BO, Lower, Upper, IIQ, ForSigned); CR = ConstantRange::getNonEmpty(Lower, Upper); + if (BO->getOpcode() == Instruction::Add || + BO->getOpcode() == Instruction::Sub) { + ConstantRange LHS = computeConstantRange( + BO->getOperand(0), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth + 1); + ConstantRange RHS = computeConstantRange( + BO->getOperand(1), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth + 1); + unsigned NoWrapKind = 0; + if (IIQ.hasNoUnsignedWrap(BO)) + NoWrapKind |= OverflowingBinaryOperator::NoUnsignedWrap; + if (IIQ.hasNoSignedWrap(BO)) + NoWrapKind |= OverflowingBinaryOperator::NoSignedWrap; + ConstantRange OpCR = BO->getOpcode() == Instruction::Add + ? LHS.addWithNoWrap(RHS, NoWrapKind) + : LHS.subWithNoWrap(RHS, NoWrapKind); + CR = CR.intersectWith(OpCR); + } + } else if (auto *SExt = dyn_cast<SExtInst>(V)) { + CR = computeConstantRange(SExt->getOperand(0), ForSigned, UseInstrInfo, AC, + CtxI, DT, Depth + 1) + .signExtend(BitWidth); + } else if (auto *ZExt = dyn_cast<ZExtInst>(V)) { + CR = computeConstantRange(ZExt->getOperand(0), ForSigned, UseInstrInfo, AC, + CtxI, DT, Depth + 1) + .zeroExtend(BitWidth); + } else if (auto *Trunc = dyn_cast<TruncInst>(V)) { + CR = computeConstantRange(Trunc->getOperand(0), ForSigned, UseInstrInfo, AC, + CtxI, DT, Depth + 1) + .truncate(BitWidth); } else if (auto *II = dyn_cast<IntrinsicInst>(V)) CR = getRangeForIntrinsic(*II, UseInstrInfo); else if (auto *SI = dyn_cast<SelectInst>(V)) { diff --git a/llvm/test/Analysis/BasicAA/range.ll b/llvm/test/Analysis/BasicAA/range.ll index e5dfb60c8b878..a41fd63ee52f6 100644 --- a/llvm/test/Analysis/BasicAA/range.ll +++ b/llvm/test/Analysis/BasicAA/range.ll @@ -271,6 +271,72 @@ entry: ret i32 %load_ } +; CHECK-LABEL: Function: zext_propagate_range +; CHECK: NoAlias: i32* %gep, i32* %gep128 +define void @zext_propagate_range(ptr %p, i8 %idx) { + %narrow = and i8 %idx, 127 + %wide = zext i8 %narrow to i64 + %gep = getelementptr i32, ptr %p, i64 %wide + %gep128 = getelementptr i32, ptr %p, i64 128 + load i32, ptr %gep + load i32, ptr %gep128 + ret void +} + +; CHECK-LABEL: Function: sext_propagate_range +; CHECK: NoAlias: i32* %gep, i32* %gep128 +define void @sext_propagate_range(ptr %p, i8 %idx) { + %clamped = and i8 %idx, 100 + %wide = sext i8 %clamped to i64 + %gep = getelementptr i32, ptr %p, i64 %wide + %gep128 = getelementptr i32, ptr %p, i64 128 + load i32, ptr %gep + load i32, ptr %gep128 + ret void +} + +; CHECK-LABEL: Function: zext_add_range +; CHECK: NoAlias: i32* %gep, i32* %gep512 +define void @zext_add_range(ptr %p, i8 %x, i8 %y) { + %ext.x = zext i8 %x to i64 + %ext.y = zext i8 %y to i64 + %sum = add i64 %ext.x, %ext.y + %gep = getelementptr i32, ptr %p, i64 %sum + %gep512 = getelementptr i32, ptr %p, i64 512 + load i32, ptr %gep + load i32, ptr %gep512 + ret void +} + +; CHECK-LABEL: Function: zext_sub_range +; CHECK: NoAlias: i32* %gep, i32* %gep256 +; CHECK: NoAlias: i32* %gep, i32* %gepneg256 +define void @zext_sub_range(ptr %p, i8 %x, i8 %y) { + %ext.x = zext i8 %x to i64 + %ext.y = zext i8 %y to i64 + %diff = sub i64 %ext.x, %ext.y + %gep = getelementptr i32, ptr %p, i64 %diff + %gep256 = getelementptr i32, ptr %p, i64 256 + %gepneg256 = getelementptr i32, ptr %p, i64 -256 + load i32, ptr %gep + load i32, ptr %gep256 + load i32, ptr %gepneg256 + ret void +} + +; CHECK-LABEL: Function: trunc_propagate_range +; CHECK: NoAlias: i32* %gep, i32* %gep64 +define void @trunc_propagate_range(ptr %p, i64 %idx) { + %clamped = and i64 %idx, 63 + %narrow = trunc i64 %clamped to i8 + %wide = zext i8 %narrow to i64 + %gep = getelementptr i32, ptr %p, i64 %wide + %gep64 = getelementptr i32, ptr %p, i64 64 + load i32, ptr %gep + load i32, ptr %gep64 + ret void +} + declare void @llvm.assume(i1) !0 = !{ i32 0, i32 2 } diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp b/llvm/unittests/Analysis/ValueTrackingTest.cpp index 6229d408de2a8..2ee45dccc6595 100644 --- a/llvm/unittests/Analysis/ValueTrackingTest.cpp +++ b/llvm/unittests/Analysis/ValueTrackingTest.cpp @@ -3394,6 +3394,98 @@ TEST_F(ValueTrackingTest, ComputeConstantRange) { // If we don't know the value of x.2, we don't know the value of x.1. EXPECT_TRUE(CR1.isFullSet()); } + { + auto M = parseModule(R"( + define void @test(i8 %x) { + %sext = sext i8 %x to i32 + %zext = zext i8 %x to i32 + ret void + })"); + Function *F = M->getFunction("test"); + AssumptionCache AC(*F); + Instruction *SExt = &findInstructionByName(F, "sext"); + Instruction *ZExt = &findInstructionByName(F, "zext"); + ConstantRange SExtCR = computeConstantRange(SExt, true, true, &AC, SExt); + EXPECT_EQ(SExtCR.getSignedMin().getSExtValue(), -128); + EXPECT_EQ(SExtCR.getSignedMax().getSExtValue(), 127); + ConstantRange ZExtCR = computeConstantRange(ZExt, false, true, &AC, ZExt); + EXPECT_EQ(ZExtCR.getUnsignedMin().getZExtValue(), 0u); + EXPECT_EQ(ZExtCR.getUnsignedMax().getZExtValue(), 255u); + } + { + auto M = parseModule(R"( + define i32 @test(i8 %x) { + %ext = sext i8 %x to i32 + %add = add nsw i32 %ext, 10 + ret i32 %add + })"); + Function *F = M->getFunction("test"); + AssumptionCache AC(*F); + Instruction *Add = &findInstructionByName(F, "add"); + ConstantRange CR = computeConstantRange(Add, true, true, &AC, Add); + EXPECT_EQ(CR.getSignedMin().getSExtValue(), -118); + EXPECT_EQ(CR.getSignedMax().getSExtValue(), 137); + } + { + auto M = parseModule(R"( + define i32 @test(i8 %x, i8 %y) { + %ext.x = zext i8 %x to i32 + %ext.y = zext i8 %y to i32 + %sub = sub i32 %ext.x, %ext.y + ret i32 %sub + })"); + Function *F = M->getFunction("test"); + AssumptionCache AC(*F); + Instruction *Sub = &findInstructionByName(F, "sub"); + ConstantRange CR = computeConstantRange(Sub, true, true, &AC, Sub); + EXPECT_EQ(CR.getSignedMin().getSExtValue(), -255); + EXPECT_EQ(CR.getSignedMax().getSExtValue(), 255); + } + { + // trunc + auto M = parseModule(R"( + define void @test(i32 %x) { + %narrow = trunc i32 %x to i8 + ret void + })"); + Function *F = M->getFunction("test"); + AssumptionCache AC(*F); + Instruction *Trunc = &findInstructionByName(F, "narrow"); + ConstantRange CR = computeConstantRange(Trunc, false, true, &AC, Trunc); + EXPECT_TRUE(CR.isFullSet()); + EXPECT_EQ(CR.getBitWidth(), 8u); + } + { + // trunc with restricted input range + auto M = parseModule(R"( + define i8 @test(i32 %x) { + %clamped = and i32 %x, 127 + %narrow = trunc i32 %clamped to i8 + ret i8 %narrow + })"); + Function *F = M->getFunction("test"); + AssumptionCache AC(*F); + Instruction *Trunc = &findInstructionByName(F, "narrow"); + ConstantRange CR = computeConstantRange(Trunc, false, true, &AC, Trunc); + EXPECT_EQ(CR.getUnsignedMin().getZExtValue(), 0u); + EXPECT_EQ(CR.getUnsignedMax().getZExtValue(), 127u); + } + { + // Chained adds from i1 + auto M = parseModule(R"( + define i32 @test(i1 %x) { + %ext = sext i1 %x to i32 + %add1 = add nsw i32 %ext, %ext + %add2 = add nsw i32 %add1, %ext + ret i32 %add2 + })"); + Function *F = M->getFunction("test"); + AssumptionCache AC(*F); + Instruction *Add2 = &findInstructionByName(F, "add2"); + ConstantRange CR = computeConstantRange(Add2, true, true, &AC, Add2); + EXPECT_EQ(CR.getSignedMin().getSExtValue(), -3); + EXPECT_EQ(CR.getSignedMax().getSExtValue(), 0); + } } struct FindAllocaForValueTestParams { >From 34244cf1c482fd793ad762528dcb2c978b1150a2 Mon Sep 17 00:00:00 2001 From: Guy David <[email protected]> Date: Thu, 12 Feb 2026 13:28:29 +0200 Subject: [PATCH 2/2] Address comments --- clang/test/CodeGen/isfpclass.c | 4 +- clang/test/Headers/__clang_hip_math.hip | 48 +- clang/test/Headers/wasm.c | 32 +- llvm/lib/Analysis/ValueTracking.cpp | 96 ++-- llvm/test/CodeGen/AMDGPU/div_v2i128.ll | 434 ++++++++---------- llvm/test/CodeGen/AMDGPU/sdiv64.ll | 27 +- llvm/test/CodeGen/AMDGPU/srem64.ll | 95 ++-- llvm/test/CodeGen/AMDGPU/udiv64.ll | 34 +- llvm/test/CodeGen/AMDGPU/urem64.ll | 50 +- llvm/test/CodeGen/PowerPC/add_cmp.ll | 12 +- llvm/test/Transforms/Attributor/range.ll | 20 +- llvm/test/Transforms/InstCombine/add.ll | 4 +- llvm/test/Transforms/InstCombine/fls.ll | 2 +- llvm/test/Transforms/InstCombine/icmp-add.ll | 3 +- llvm/test/Transforms/InstCombine/pr80597.ll | 9 +- llvm/test/Transforms/InstCombine/sadd_sat.ll | 10 +- .../InstCombine/saturating-add-sub.ll | 5 +- llvm/unittests/Analysis/ValueTrackingTest.cpp | 15 + 18 files changed, 415 insertions(+), 485 deletions(-) diff --git a/clang/test/CodeGen/isfpclass.c b/clang/test/CodeGen/isfpclass.c index 4c6d556e008e5..1465b43149fcc 100644 --- a/clang/test/CodeGen/isfpclass.c +++ b/clang/test/CodeGen/isfpclass.c @@ -136,7 +136,7 @@ typedef double __attribute__((ext_vector_type(4))) double4; typedef int __attribute__((ext_vector_type(4))) int4; typedef long __attribute__((ext_vector_type(4))) long4; -// CHECK-LABEL: define dso_local range(i32 0, 2) <4 x i32> @check_isfpclass_nan_v4f32( +// CHECK-LABEL: define dso_local noundef range(i32 0, 2) <4 x i32> @check_isfpclass_nan_v4f32( // CHECK-SAME: <4 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = fcmp uno <4 x float> [[X]], zeroinitializer @@ -147,7 +147,7 @@ int4 check_isfpclass_nan_v4f32(float4 x) { return __builtin_isfpclass(x, 3 /*NaN*/); } -// CHECK-LABEL: define dso_local range(i32 0, 2) <4 x i32> @check_isfpclass_nan_strict_v4f32( +// CHECK-LABEL: define dso_local noundef range(i32 0, 2) <4 x i32> @check_isfpclass_nan_strict_v4f32( // CHECK-SAME: <4 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i1> @llvm.is.fpclass.v4f32(<4 x float> [[X]], i32 3) #[[ATTR5]] diff --git a/clang/test/Headers/__clang_hip_math.hip b/clang/test/Headers/__clang_hip_math.hip index 68a8666e41856..0a9c757aabf55 100644 --- a/clang/test/Headers/__clang_hip_math.hip +++ b/clang/test/Headers/__clang_hip_math.hip @@ -2653,7 +2653,7 @@ extern "C" __device__ int test_ilogb(double x) { return ilogb(x); } -// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___finitef( +// DEFAULT-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___finitef( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) @@ -2666,7 +2666,7 @@ extern "C" __device__ int test_ilogb(double x) { // FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: ret i32 1 // -// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___finitef( +// APPROX-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___finitef( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) @@ -2674,7 +2674,7 @@ extern "C" __device__ int test_ilogb(double x) { // APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // APPROX-NEXT: ret i32 [[CONV]] // -// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___finitef( +// NCRDIV-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___finitef( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) @@ -2682,7 +2682,7 @@ extern "C" __device__ int test_ilogb(double x) { // NCRDIV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // NCRDIV-NEXT: ret i32 [[CONV]] // -// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___finitef( +// AMDGCNSPIRV-LABEL: define spir_func noundef range(i32 0, 2) i32 @test___finitef( // AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { // AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) float @llvm.fabs.f32(float [[X]]) @@ -2694,7 +2694,7 @@ extern "C" __device__ BOOL_TYPE test___finitef(float x) { return __finitef(x); } -// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___finite( +// DEFAULT-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___finite( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]]) @@ -2707,7 +2707,7 @@ extern "C" __device__ BOOL_TYPE test___finitef(float x) { // FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: ret i32 1 // -// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___finite( +// APPROX-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___finite( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]]) @@ -2715,7 +2715,7 @@ extern "C" __device__ BOOL_TYPE test___finitef(float x) { // APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // APPROX-NEXT: ret i32 [[CONV]] // -// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___finite( +// NCRDIV-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___finite( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]]) @@ -2723,7 +2723,7 @@ extern "C" __device__ BOOL_TYPE test___finitef(float x) { // NCRDIV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // NCRDIV-NEXT: ret i32 [[CONV]] // -// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___finite( +// AMDGCNSPIRV-LABEL: define spir_func noundef range(i32 0, 2) i32 @test___finite( // AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { // AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) double @llvm.fabs.f64(double [[X]]) @@ -2735,7 +2735,7 @@ extern "C" __device__ BOOL_TYPE test___finite(double x) { return __finite(x); } -// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___isinff( +// DEFAULT-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isinff( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) @@ -2748,7 +2748,7 @@ extern "C" __device__ BOOL_TYPE test___finite(double x) { // FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: ret i32 0 // -// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___isinff( +// APPROX-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isinff( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) @@ -2756,7 +2756,7 @@ extern "C" __device__ BOOL_TYPE test___finite(double x) { // APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // APPROX-NEXT: ret i32 [[CONV]] // -// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___isinff( +// NCRDIV-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isinff( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) @@ -2764,7 +2764,7 @@ extern "C" __device__ BOOL_TYPE test___finite(double x) { // NCRDIV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // NCRDIV-NEXT: ret i32 [[CONV]] // -// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___isinff( +// AMDGCNSPIRV-LABEL: define spir_func noundef range(i32 0, 2) i32 @test___isinff( // AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { // AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) float @llvm.fabs.f32(float [[X]]) @@ -2776,7 +2776,7 @@ extern "C" __device__ BOOL_TYPE test___isinff(float x) { return __isinff(x); } -// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___isinf( +// DEFAULT-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isinf( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]]) @@ -2789,7 +2789,7 @@ extern "C" __device__ BOOL_TYPE test___isinff(float x) { // FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: ret i32 0 // -// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___isinf( +// APPROX-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isinf( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]]) @@ -2797,7 +2797,7 @@ extern "C" __device__ BOOL_TYPE test___isinff(float x) { // APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // APPROX-NEXT: ret i32 [[CONV]] // -// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___isinf( +// NCRDIV-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isinf( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[TMP0:%.*]] = tail call double @llvm.fabs.f64(double [[X]]) @@ -2805,7 +2805,7 @@ extern "C" __device__ BOOL_TYPE test___isinff(float x) { // NCRDIV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 // NCRDIV-NEXT: ret i32 [[CONV]] // -// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___isinf( +// AMDGCNSPIRV-LABEL: define spir_func noundef range(i32 0, 2) i32 @test___isinf( // AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { // AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call addrspace(4) double @llvm.fabs.f64(double [[X]]) @@ -2817,7 +2817,7 @@ extern "C" __device__ BOOL_TYPE test___isinf(double x) { return __isinf(x); } -// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___isnanf( +// DEFAULT-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isnanf( // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[TMP0:%.*]] = fcmp uno float [[X]], 0.000000e+00 @@ -2829,21 +2829,21 @@ extern "C" __device__ BOOL_TYPE test___isinf(double x) { // FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: ret i32 0 // -// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___isnanf( +// APPROX-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isnanf( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[TMP0:%.*]] = fcmp uno float [[X]], 0.000000e+00 // APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i32 // APPROX-NEXT: ret i32 [[CONV]] // -// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___isnanf( +// NCRDIV-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isnanf( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[TMP0:%.*]] = fcmp uno float [[X]], 0.000000e+00 // NCRDIV-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i32 // NCRDIV-NEXT: ret i32 [[CONV]] // -// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___isnanf( +// AMDGCNSPIRV-LABEL: define spir_func noundef range(i32 0, 2) i32 @test___isnanf( // AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { // AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = fcmp uno float [[X]], 0.000000e+00 @@ -2854,7 +2854,7 @@ extern "C" __device__ BOOL_TYPE test___isnanf(float x) { return __isnanf(x); } -// DEFAULT-LABEL: define dso_local range(i32 0, 2) i32 @test___isnan( +// DEFAULT-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isnan( // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[TMP0:%.*]] = fcmp uno double [[X]], 0.000000e+00 @@ -2866,21 +2866,21 @@ extern "C" __device__ BOOL_TYPE test___isnanf(float x) { // FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: ret i32 0 // -// APPROX-LABEL: define dso_local range(i32 0, 2) i32 @test___isnan( +// APPROX-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isnan( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[TMP0:%.*]] = fcmp uno double [[X]], 0.000000e+00 // APPROX-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i32 // APPROX-NEXT: ret i32 [[CONV]] // -// NCRDIV-LABEL: define dso_local range(i32 0, 2) i32 @test___isnan( +// NCRDIV-LABEL: define dso_local noundef range(i32 0, 2) i32 @test___isnan( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[TMP0:%.*]] = fcmp uno double [[X]], 0.000000e+00 // NCRDIV-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i32 // NCRDIV-NEXT: ret i32 [[CONV]] // -// AMDGCNSPIRV-LABEL: define spir_func range(i32 0, 2) i32 @test___isnan( +// AMDGCNSPIRV-LABEL: define spir_func noundef range(i32 0, 2) i32 @test___isnan( // AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { // AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = fcmp uno double [[X]], 0.000000e+00 diff --git a/clang/test/Headers/wasm.c b/clang/test/Headers/wasm.c index 2545a014e4340..fdce091fe640e 100644 --- a/clang/test/Headers/wasm.c +++ b/clang/test/Headers/wasm.c @@ -1234,7 +1234,7 @@ v128_t test_u16x8_ge(v128_t a, v128_t b) { return wasm_u16x8_ge(a, b); } -// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_eq( +// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_i32x4_eq( // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <4 x i32> [[A]], [[B]] @@ -1245,7 +1245,7 @@ v128_t test_i32x4_eq(v128_t a, v128_t b) { return wasm_i32x4_eq(a, b); } -// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_ne( +// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_i32x4_ne( // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ne <4 x i32> [[A]], [[B]] @@ -1256,7 +1256,7 @@ v128_t test_i32x4_ne(v128_t a, v128_t b) { return wasm_i32x4_ne(a, b); } -// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_lt( +// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_i32x4_lt( // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[CMP_I:%.*]] = icmp slt <4 x i32> [[A]], [[B]] @@ -1267,7 +1267,7 @@ v128_t test_i32x4_lt(v128_t a, v128_t b) { return wasm_i32x4_lt(a, b); } -// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_u32x4_lt( +// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_u32x4_lt( // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ult <4 x i32> [[A]], [[B]] @@ -1278,7 +1278,7 @@ v128_t test_u32x4_lt(v128_t a, v128_t b) { return wasm_u32x4_lt(a, b); } -// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_gt( +// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_i32x4_gt( // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <4 x i32> [[A]], [[B]] @@ -1289,7 +1289,7 @@ v128_t test_i32x4_gt(v128_t a, v128_t b) { return wasm_i32x4_gt(a, b); } -// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_u32x4_gt( +// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_u32x4_gt( // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt <4 x i32> [[A]], [[B]] @@ -1300,7 +1300,7 @@ v128_t test_u32x4_gt(v128_t a, v128_t b) { return wasm_u32x4_gt(a, b); } -// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_le( +// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_i32x4_le( // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[CMP_I:%.*]] = icmp sle <4 x i32> [[A]], [[B]] @@ -1311,7 +1311,7 @@ v128_t test_i32x4_le(v128_t a, v128_t b) { return wasm_i32x4_le(a, b); } -// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_u32x4_le( +// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_u32x4_le( // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[CMP_I:%.*]] = icmp ule <4 x i32> [[A]], [[B]] @@ -1322,7 +1322,7 @@ v128_t test_u32x4_le(v128_t a, v128_t b) { return wasm_u32x4_le(a, b); } -// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_i32x4_ge( +// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_i32x4_ge( // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[CMP_I:%.*]] = icmp sge <4 x i32> [[A]], [[B]] @@ -1333,7 +1333,7 @@ v128_t test_i32x4_ge(v128_t a, v128_t b) { return wasm_i32x4_ge(a, b); } -// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_u32x4_ge( +// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_u32x4_ge( // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[CMP_I:%.*]] = icmp uge <4 x i32> [[A]], [[B]] @@ -1428,7 +1428,7 @@ v128_t test_i64x2_ge(v128_t a, v128_t b) { return wasm_i64x2_ge(a, b); } -// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_eq( +// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_f32x4_eq( // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> @@ -1441,7 +1441,7 @@ v128_t test_f32x4_eq(v128_t a, v128_t b) { return wasm_f32x4_eq(a, b); } -// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_ne( +// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_f32x4_ne( // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> @@ -1454,7 +1454,7 @@ v128_t test_f32x4_ne(v128_t a, v128_t b) { return wasm_f32x4_ne(a, b); } -// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_lt( +// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_f32x4_lt( // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> @@ -1467,7 +1467,7 @@ v128_t test_f32x4_lt(v128_t a, v128_t b) { return wasm_f32x4_lt(a, b); } -// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_gt( +// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_f32x4_gt( // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> @@ -1480,7 +1480,7 @@ v128_t test_f32x4_gt(v128_t a, v128_t b) { return wasm_f32x4_gt(a, b); } -// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_le( +// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_f32x4_le( // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> @@ -1493,7 +1493,7 @@ v128_t test_f32x4_le(v128_t a, v128_t b) { return wasm_f32x4_le(a, b); } -// CHECK-LABEL: define hidden range(i32 -1, 1) <4 x i32> @test_f32x4_ge( +// CHECK-LABEL: define hidden noundef range(i32 -1, 1) <4 x i32> @test_f32x4_ge( // CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <4 x float> diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index acbb50b8cae53..abeb1c5d161c0 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -9818,10 +9818,14 @@ std::optional<bool> llvm::isImpliedByDomCondition(CmpPredicate Pred, return std::nullopt; } -static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower, - APInt &Upper, const InstrInfoQuery &IIQ, - bool PreferSignedRange) { - unsigned Width = Lower.getBitWidth(); +static ConstantRange getRangeForBinOp(const BinaryOperator &BO, bool ForSigned, + bool UseInstrInfo, AssumptionCache *AC, + const Instruction *CtxI, + const DominatorTree *DT, unsigned Depth) { + unsigned Width = BO.getType()->getScalarSizeInBits(); + InstrInfoQuery IIQ(UseInstrInfo); + APInt Lower = APInt(Width, 0); + APInt Upper = APInt(Width, 0); const APInt *C; switch (BO.getOpcode()) { case Instruction::Sub: @@ -9834,7 +9838,7 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower, // is never larger than the signed range. Example: // "sub nuw nsw i8 -2, x" is unsigned [0, 254] vs. signed [-128, 126]. // "sub nuw nsw i8 2, x" is unsigned [0, 2] vs. signed [-125, 127]. - if (PreferSignedRange && HasNSW && HasNUW) + if (ForSigned && HasNSW && HasNUW) HasNUW = false; if (HasNUW) { @@ -9863,7 +9867,7 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower, // range. Otherwise if both no-wraps are set, use the unsigned range // because it is never larger than the signed range. Example: "add nuw // nsw i8 X, -2" is unsigned [254,255] vs. signed [-128, 125]. - if (PreferSignedRange && HasNSW && HasNUW) + if (ForSigned && HasNSW && HasNUW) HasNUW = false; if (HasNUW) { @@ -10041,6 +10045,34 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower, default: break; } + + ConstantRange CR = ConstantRange::getNonEmpty(Lower, Upper); + bool IsDisjointOr = BO.getOpcode() == Instruction::Or && + cast<PossiblyDisjointInst>(&BO)->isDisjoint(); + if (BO.getOpcode() == Instruction::Add || + BO.getOpcode() == Instruction::Sub || IsDisjointOr) { + // Limit recursion depth more aggressively for binary operations. + unsigned NewDepth = std::max(Depth * 2, 1u); + ConstantRange LHS = computeConstantRange( + BO.getOperand(0), ForSigned, UseInstrInfo, AC, CtxI, DT, NewDepth); + ConstantRange RHS = computeConstantRange( + BO.getOperand(1), ForSigned, UseInstrInfo, AC, CtxI, DT, NewDepth); + unsigned NoWrapKind = 0; + // Only Add and Sub have no-wrap flags, not disjoint Or. + if (!IsDisjointOr) { + if (IIQ.hasNoUnsignedWrap(&BO)) + NoWrapKind |= OverflowingBinaryOperator::NoUnsignedWrap; + if (IIQ.hasNoSignedWrap(&BO)) + NoWrapKind |= OverflowingBinaryOperator::NoSignedWrap; + } + // Disjoint OR is semantically equivalent to Add. + ConstantRange OpCR = BO.getOpcode() == Instruction::Sub + ? LHS.subWithNoWrap(RHS, NoWrapKind) + : LHS.addWithNoWrap(RHS, NoWrapKind); + CR = CR.intersectWith(OpCR, ForSigned ? ConstantRange::Signed + : ConstantRange::Unsigned); + } + return CR; } static ConstantRange getRangeForIntrinsic(const IntrinsicInst &II, @@ -10237,39 +10269,25 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned, InstrInfoQuery IIQ(UseInstrInfo); ConstantRange CR = ConstantRange::getFull(BitWidth); if (auto *BO = dyn_cast<BinaryOperator>(V)) { - APInt Lower = APInt(BitWidth, 0); - APInt Upper = APInt(BitWidth, 0); - // TODO: Return ConstantRange. - setLimitsForBinOp(*BO, Lower, Upper, IIQ, ForSigned); - CR = ConstantRange::getNonEmpty(Lower, Upper); - if (BO->getOpcode() == Instruction::Add || - BO->getOpcode() == Instruction::Sub) { - ConstantRange LHS = computeConstantRange( - BO->getOperand(0), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth + 1); - ConstantRange RHS = computeConstantRange( - BO->getOperand(1), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth + 1); - unsigned NoWrapKind = 0; - if (IIQ.hasNoUnsignedWrap(BO)) - NoWrapKind |= OverflowingBinaryOperator::NoUnsignedWrap; - if (IIQ.hasNoSignedWrap(BO)) - NoWrapKind |= OverflowingBinaryOperator::NoSignedWrap; - ConstantRange OpCR = BO->getOpcode() == Instruction::Add - ? LHS.addWithNoWrap(RHS, NoWrapKind) - : LHS.subWithNoWrap(RHS, NoWrapKind); - CR = CR.intersectWith(OpCR); - } - } else if (auto *SExt = dyn_cast<SExtInst>(V)) { - CR = computeConstantRange(SExt->getOperand(0), ForSigned, UseInstrInfo, AC, - CtxI, DT, Depth + 1) - .signExtend(BitWidth); - } else if (auto *ZExt = dyn_cast<ZExtInst>(V)) { - CR = computeConstantRange(ZExt->getOperand(0), ForSigned, UseInstrInfo, AC, - CtxI, DT, Depth + 1) - .zeroExtend(BitWidth); - } else if (auto *Trunc = dyn_cast<TruncInst>(V)) { - CR = computeConstantRange(Trunc->getOperand(0), ForSigned, UseInstrInfo, AC, - CtxI, DT, Depth + 1) - .truncate(BitWidth); + CR = getRangeForBinOp(*BO, ForSigned, UseInstrInfo, AC, CtxI, DT, Depth); + } else if (isa<SExtInst>(V) || isa<ZExtInst>(V) || isa<TruncInst>(V)) { + auto *CastOp = cast<CastInst>(V); + ConstantRange OpCR = + computeConstantRange(CastOp->getOperand(0), ForSigned, UseInstrInfo, AC, + CtxI, DT, Depth + 1); + switch (CastOp->getOpcode()) { + case Instruction::SExt: + CR = OpCR.signExtend(BitWidth); + break; + case Instruction::ZExt: + CR = OpCR.zeroExtend(BitWidth); + break; + case Instruction::Trunc: + CR = OpCR.truncate(BitWidth); + break; + default: + llvm_unreachable("Unexpected cast opcode"); + } } else if (auto *II = dyn_cast<IntrinsicInst>(V)) CR = getRangeForIntrinsic(*II, UseInstrInfo); else if (auto *SI = dyn_cast<SelectInst>(V)) { diff --git a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll index 52410c6d3698e..97b460b32507b 100644 --- a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll +++ b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll @@ -857,28 +857,23 @@ define <2 x i128> @v_sdiv_v2i128_v_pow2k(<2 x i128> %lhs) { ; SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; SDAG-NEXT: v_sub_i32_e32 v2, vcc, 0x5e, v0 ; SDAG-NEXT: v_subb_u32_e32 v3, vcc, 0, v3, vcc -; SDAG-NEXT: v_xor_b32_e32 v0, 0x7f, v2 ; SDAG-NEXT: v_subb_u32_e32 v8, vcc, 0, v9, vcc ; SDAG-NEXT: v_cmp_lt_u64_e64 s[6:7], s[6:7], v[2:3] -; SDAG-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7] +; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[6:7] ; SDAG-NEXT: v_subb_u32_e32 v9, vcc, 0, v9, vcc -; SDAG-NEXT: v_or_b32_e32 v0, v0, v8 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] -; SDAG-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; SDAG-NEXT: v_or_b32_e32 v1, v3, v9 +; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc ; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9] -; SDAG-NEXT: v_cndmask_b32_e32 v14, v15, v14, vcc -; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] -; SDAG-NEXT: v_and_b32_e32 v0, 1, v14 -; SDAG-NEXT: v_cmp_eq_u32_e64 s[6:7], 1, v0 -; SDAG-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] +; SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; SDAG-NEXT: v_and_b32_e32 v0, 1, v0 +; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; SDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; SDAG-NEXT: v_cndmask_b32_e64 v1, v11, 0, s[4:5] -; SDAG-NEXT: s_xor_b64 s[6:7], s[4:5], -1 +; SDAG-NEXT: s_xor_b64 s[8:9], s[4:5], -1 ; SDAG-NEXT: v_cndmask_b32_e64 v0, v10, 0, s[4:5] ; SDAG-NEXT: v_cndmask_b32_e64 v16, v13, 0, s[4:5] ; SDAG-NEXT: v_cndmask_b32_e64 v17, v12, 0, s[4:5] -; SDAG-NEXT: s_and_b64 s[4:5], s[6:7], vcc -; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[8:9] ; SDAG-NEXT: s_cbranch_execz .LBB1_6 ; SDAG-NEXT: ; %bb.1: ; %udiv-bb15 ; SDAG-NEXT: v_add_i32_e32 v20, vcc, 1, v2 @@ -1015,28 +1010,23 @@ define <2 x i128> @v_sdiv_v2i128_v_pow2k(<2 x i128> %lhs) { ; SDAG-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc ; SDAG-NEXT: v_sub_i32_e32 v2, vcc, 0x5e, v2 ; SDAG-NEXT: v_subb_u32_e32 v3, vcc, 0, v4, vcc -; SDAG-NEXT: v_xor_b32_e32 v6, 0x7f, v2 ; SDAG-NEXT: v_subb_u32_e32 v4, vcc, 0, v12, vcc ; SDAG-NEXT: v_cmp_lt_u64_e64 s[6:7], s[6:7], v[2:3] -; SDAG-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[6:7] +; SDAG-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[6:7] ; SDAG-NEXT: v_subb_u32_e32 v5, vcc, 0, v12, vcc -; SDAG-NEXT: v_or_b32_e32 v6, v6, v4 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] -; SDAG-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; SDAG-NEXT: v_or_b32_e32 v7, v3, v5 +; SDAG-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc ; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5] -; SDAG-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc -; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] -; SDAG-NEXT: v_and_b32_e32 v6, 1, v12 -; SDAG-NEXT: v_cmp_eq_u32_e64 s[6:7], 1, v6 -; SDAG-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] +; SDAG-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc +; SDAG-NEXT: v_and_b32_e32 v6, 1, v6 +; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v6 +; SDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; SDAG-NEXT: v_cndmask_b32_e64 v13, v9, 0, s[4:5] -; SDAG-NEXT: s_xor_b64 s[6:7], s[4:5], -1 +; SDAG-NEXT: s_xor_b64 s[8:9], s[4:5], -1 ; SDAG-NEXT: v_cndmask_b32_e64 v12, v8, 0, s[4:5] ; SDAG-NEXT: v_cndmask_b32_e64 v7, v11, 0, s[4:5] ; SDAG-NEXT: v_cndmask_b32_e64 v6, v10, 0, s[4:5] -; SDAG-NEXT: s_and_b64 s[4:5], s[6:7], vcc -; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[8:9] ; SDAG-NEXT: s_cbranch_execz .LBB1_12 ; SDAG-NEXT: ; %bb.7: ; %udiv-bb1 ; SDAG-NEXT: v_add_i32_e32 v22, vcc, 1, v2 @@ -1165,7 +1155,7 @@ define <2 x i128> @v_sdiv_v2i128_v_pow2k(<2 x i128> %lhs) { ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: s_mov_b64 s[8:9], 0 ; GISEL-NEXT: v_ashrrev_i32_e32 v18, 31, v3 -; GISEL-NEXT: v_mov_b32_e32 v14, 0x5e +; GISEL-NEXT: v_mov_b32_e32 v12, 0x5e ; GISEL-NEXT: v_mov_b32_e32 v8, 0x7f ; GISEL-NEXT: v_mov_b32_e32 v9, 0 ; GISEL-NEXT: v_xor_b32_e32 v0, v18, v0 @@ -1174,49 +1164,41 @@ define <2 x i128> @v_sdiv_v2i128_v_pow2k(<2 x i128> %lhs) { ; GISEL-NEXT: v_xor_b32_e32 v3, v18, v3 ; GISEL-NEXT: v_sub_i32_e32 v10, vcc, v0, v18 ; GISEL-NEXT: v_subb_u32_e32 v11, vcc, v1, v18, vcc -; GISEL-NEXT: v_subb_u32_e32 v12, vcc, v2, v18, vcc -; GISEL-NEXT: v_subb_u32_e32 v13, vcc, v3, v18, vcc +; GISEL-NEXT: v_subb_u32_e32 v14, vcc, v2, v18, vcc +; GISEL-NEXT: v_subb_u32_e32 v15, vcc, v3, v18, vcc ; GISEL-NEXT: v_ffbh_u32_e32 v2, v11 ; GISEL-NEXT: v_ffbh_u32_e32 v3, v10 -; GISEL-NEXT: v_or_b32_e32 v0, v10, v12 -; GISEL-NEXT: v_or_b32_e32 v1, v11, v13 +; GISEL-NEXT: v_or_b32_e32 v0, v10, v14 +; GISEL-NEXT: v_or_b32_e32 v1, v11, v15 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, 32, v3 -; GISEL-NEXT: v_ffbh_u32_e32 v15, v13 -; GISEL-NEXT: v_ffbh_u32_e32 v16, v12 +; GISEL-NEXT: v_ffbh_u32_e32 v13, v15 +; GISEL-NEXT: v_ffbh_u32_e32 v16, v14 ; GISEL-NEXT: v_min_u32_e32 v2, v2, v3 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, 32, v16 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v0, vcc, 64, v2 -; GISEL-NEXT: v_min_u32_e32 v1, v15, v3 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[12:13] +; GISEL-NEXT: v_min_u32_e32 v1, v13, v3 +; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] ; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v14, v0 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v12, v0 ; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], 0, 0, vcc ; GISEL-NEXT: v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5] ; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5] ; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9] -; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_xor_b32_e32 v8, 0x7f, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc ; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[0:1] -; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc -; GISEL-NEXT: v_or_b32_e32 v8, v8, v0 -; GISEL-NEXT: v_or_b32_e32 v9, v3, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] -; GISEL-NEXT: v_cndmask_b32_e32 v14, v15, v14, vcc -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9] -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_or_b32_e32 v9, v16, v14 -; GISEL-NEXT: v_and_b32_e32 v14, 1, v9 -; GISEL-NEXT: v_or_b32_e32 v8, v9, v8 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 +; GISEL-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc +; GISEL-NEXT: v_or_b32_e32 v8, v16, v8 +; GISEL-NEXT: v_and_b32_e32 v8, 1, v8 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; GISEL-NEXT: v_cndmask_b32_e64 v16, v10, 0, vcc -; GISEL-NEXT: v_and_b32_e32 v14, 1, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v17, v11, 0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v8, v12, 0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v9, v13, 0, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 ; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1 +; GISEL-NEXT: v_cndmask_b32_e64 v17, v11, 0, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v8, v14, 0, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v9, v15, 0, vcc ; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GISEL-NEXT: s_cbranch_execz .LBB1_6 ; GISEL-NEXT: ; %bb.1: ; %udiv-bb15 @@ -1226,23 +1208,23 @@ define <2 x i128> @v_sdiv_v2i128_v_pow2k(<2 x i128> %lhs) { ; GISEL-NEXT: v_not_b32_e32 v2, 63 ; GISEL-NEXT: v_addc_u32_e64 v21, vcc, 0, v0, s[4:5] ; GISEL-NEXT: v_addc_u32_e32 v22, vcc, 0, v1, vcc -; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v23, v2 +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v23, v2 ; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], 64, v23 ; GISEL-NEXT: v_lshl_b64 v[0:1], v[10:11], v23 -; GISEL-NEXT: v_lshl_b64 v[2:3], v[12:13], v23 +; GISEL-NEXT: v_lshl_b64 v[2:3], v[14:15], v23 ; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1 ; GISEL-NEXT: v_lshr_b64 v[8:9], v[10:11], v8 -; GISEL-NEXT: v_lshl_b64 v[16:17], v[10:11], v14 +; GISEL-NEXT: v_lshl_b64 v[16:17], v[10:11], v12 ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v23 -; GISEL-NEXT: v_cndmask_b32_e32 v14, 0, v0, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v15, 0, v1, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v12, 0, v0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v13, 0, v1, vcc ; GISEL-NEXT: v_or_b32_e32 v0, v8, v2 ; GISEL-NEXT: v_or_b32_e32 v1, v9, v3 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v23 -; GISEL-NEXT: v_cndmask_b32_e32 v8, v0, v12, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v9, v1, v13, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v8, v0, v14, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v9, v1, v15, vcc ; GISEL-NEXT: s_mov_b64 s[10:11], s[8:9] ; GISEL-NEXT: v_mov_b32_e32 v0, s8 ; GISEL-NEXT: v_mov_b32_e32 v1, s9 @@ -1254,22 +1236,22 @@ define <2 x i128> @v_sdiv_v2i128_v_pow2k(<2 x i128> %lhs) { ; GISEL-NEXT: ; %bb.2: ; %udiv-preheader4 ; GISEL-NEXT: v_add_i32_e32 v16, vcc, 0xffffffc0, v19 ; GISEL-NEXT: v_sub_i32_e32 v17, vcc, 64, v19 -; GISEL-NEXT: v_lshr_b64 v[0:1], v[12:13], v19 +; GISEL-NEXT: v_lshr_b64 v[0:1], v[14:15], v19 ; GISEL-NEXT: v_lshr_b64 v[2:3], v[10:11], v19 ; GISEL-NEXT: s_mov_b64 s[8:9], 0 -; GISEL-NEXT: v_lshl_b64 v[23:24], v[12:13], v17 -; GISEL-NEXT: v_lshr_b64 v[12:13], v[12:13], v16 +; GISEL-NEXT: v_lshl_b64 v[23:24], v[14:15], v17 +; GISEL-NEXT: v_lshr_b64 v[14:15], v[14:15], v16 ; GISEL-NEXT: s_mov_b64 s[10:11], s[8:9] ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v19 ; GISEL-NEXT: v_cndmask_b32_e32 v16, 0, v0, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v17, 0, v1, vcc ; GISEL-NEXT: v_or_b32_e32 v0, v2, v23 ; GISEL-NEXT: v_or_b32_e32 v1, v3, v24 -; GISEL-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v0, v14, v0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v1, v15, v1, vcc ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v19 -; GISEL-NEXT: v_cndmask_b32_e32 v12, v0, v10, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v13, v1, v11, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v14, v0, v10, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v15, v1, v11, vcc ; GISEL-NEXT: v_mov_b32_e32 v11, 0 ; GISEL-NEXT: v_mov_b32_e32 v0, s8 ; GISEL-NEXT: v_mov_b32_e32 v1, s9 @@ -1277,25 +1259,25 @@ define <2 x i128> @v_sdiv_v2i128_v_pow2k(<2 x i128> %lhs) { ; GISEL-NEXT: v_mov_b32_e32 v3, s11 ; GISEL-NEXT: .LBB1_3: ; %udiv-do-while3 ; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_lshl_b64 v[2:3], v[14:15], 1 -; GISEL-NEXT: v_lshrrev_b32_e32 v10, 31, v15 -; GISEL-NEXT: v_lshl_b64 v[23:24], v[12:13], 1 +; GISEL-NEXT: v_lshl_b64 v[2:3], v[12:13], 1 +; GISEL-NEXT: v_lshrrev_b32_e32 v10, 31, v13 +; GISEL-NEXT: v_lshl_b64 v[23:24], v[14:15], 1 ; GISEL-NEXT: v_lshl_b64 v[16:17], v[16:17], 1 -; GISEL-NEXT: v_lshrrev_b32_e32 v12, 31, v13 -; GISEL-NEXT: v_lshrrev_b32_e32 v13, 31, v9 +; GISEL-NEXT: v_lshrrev_b32_e32 v14, 31, v15 +; GISEL-NEXT: v_lshrrev_b32_e32 v15, 31, v9 ; GISEL-NEXT: v_lshl_b64 v[8:9], v[8:9], 1 ; GISEL-NEXT: v_add_i32_e32 v19, vcc, -1, v19 ; GISEL-NEXT: v_addc_u32_e32 v20, vcc, -1, v20, vcc -; GISEL-NEXT: v_or_b32_e32 v14, v0, v2 -; GISEL-NEXT: v_or_b32_e32 v15, v1, v3 -; GISEL-NEXT: v_or_b32_e32 v2, v16, v12 -; GISEL-NEXT: v_or_b32_e32 v0, v23, v13 +; GISEL-NEXT: v_or_b32_e32 v12, v0, v2 +; GISEL-NEXT: v_or_b32_e32 v13, v1, v3 +; GISEL-NEXT: v_or_b32_e32 v2, v16, v14 +; GISEL-NEXT: v_or_b32_e32 v0, v23, v15 ; GISEL-NEXT: v_or_b32_e32 v8, v8, v10 ; GISEL-NEXT: v_addc_u32_e32 v21, vcc, -1, v21, vcc ; GISEL-NEXT: v_addc_u32_e32 v22, vcc, -1, v22, vcc ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, 1, v24 ; GISEL-NEXT: v_subb_u32_e32 v1, vcc, 0, v2, vcc -; GISEL-NEXT: v_subrev_i32_e64 v12, s[4:5], 0, v0 +; GISEL-NEXT: v_subrev_i32_e64 v14, s[4:5], 0, v0 ; GISEL-NEXT: v_or_b32_e32 v0, v19, v21 ; GISEL-NEXT: v_or_b32_e32 v1, v20, v22 ; GISEL-NEXT: v_subb_u32_e32 v3, vcc, 0, v17, vcc @@ -1305,7 +1287,7 @@ define <2 x i128> @v_sdiv_v2i128_v_pow2k(<2 x i128> %lhs) { ; GISEL-NEXT: v_and_b32_e32 v3, 2, v0 ; GISEL-NEXT: v_mov_b32_e32 v0, v10 ; GISEL-NEXT: v_mov_b32_e32 v1, v11 -; GISEL-NEXT: v_sub_i32_e64 v13, s[4:5], v24, v3 +; GISEL-NEXT: v_sub_i32_e64 v15, s[4:5], v24, v3 ; GISEL-NEXT: v_subbrev_u32_e64 v16, s[4:5], 0, v2, s[4:5] ; GISEL-NEXT: s_or_b64 s[8:9], vcc, s[8:9] ; GISEL-NEXT: v_subbrev_u32_e64 v17, vcc, 0, v17, s[4:5] @@ -1315,9 +1297,9 @@ define <2 x i128> @v_sdiv_v2i128_v_pow2k(<2 x i128> %lhs) { ; GISEL-NEXT: s_or_b64 exec, exec, s[8:9] ; GISEL-NEXT: .LBB1_5: ; %Flow14 ; GISEL-NEXT: s_or_b64 exec, exec, s[12:13] -; GISEL-NEXT: v_lshl_b64 v[2:3], v[14:15], 1 +; GISEL-NEXT: v_lshl_b64 v[2:3], v[12:13], 1 ; GISEL-NEXT: v_lshl_b64 v[8:9], v[8:9], 1 -; GISEL-NEXT: v_lshrrev_b32_e32 v10, 31, v15 +; GISEL-NEXT: v_lshrrev_b32_e32 v10, 31, v13 ; GISEL-NEXT: v_or_b32_e32 v8, v8, v10 ; GISEL-NEXT: v_or_b32_e32 v16, v0, v2 ; GISEL-NEXT: v_or_b32_e32 v17, v1, v3 @@ -1325,84 +1307,76 @@ define <2 x i128> @v_sdiv_v2i128_v_pow2k(<2 x i128> %lhs) { ; GISEL-NEXT: s_or_b64 exec, exec, s[6:7] ; GISEL-NEXT: s_mov_b64 s[8:9], 0 ; GISEL-NEXT: v_ashrrev_i32_e32 v19, 31, v7 -; GISEL-NEXT: v_mov_b32_e32 v2, 0x5e -; GISEL-NEXT: v_mov_b32_e32 v12, 0x7f -; GISEL-NEXT: v_mov_b32_e32 v13, 0 +; GISEL-NEXT: v_mov_b32_e32 v10, 0x5e +; GISEL-NEXT: v_mov_b32_e32 v2, 0x7f +; GISEL-NEXT: v_mov_b32_e32 v3, 0 ; GISEL-NEXT: v_xor_b32_e32 v0, v19, v4 ; GISEL-NEXT: v_xor_b32_e32 v1, v19, v5 -; GISEL-NEXT: v_xor_b32_e32 v3, v19, v6 -; GISEL-NEXT: v_xor_b32_e32 v6, v19, v7 -; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v19 -; GISEL-NEXT: v_subb_u32_e32 v5, vcc, v1, v19, vcc -; GISEL-NEXT: v_subb_u32_e32 v10, vcc, v3, v19, vcc -; GISEL-NEXT: v_subb_u32_e32 v11, vcc, v6, v19, vcc -; GISEL-NEXT: v_ffbh_u32_e32 v3, v5 -; GISEL-NEXT: v_ffbh_u32_e32 v6, v4 -; GISEL-NEXT: v_or_b32_e32 v0, v4, v10 -; GISEL-NEXT: v_or_b32_e32 v1, v5, v11 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, 32, v6 -; GISEL-NEXT: v_ffbh_u32_e32 v7, v11 -; GISEL-NEXT: v_ffbh_u32_e32 v14, v10 -; GISEL-NEXT: v_min_u32_e32 v3, v3, v6 -; GISEL-NEXT: v_add_i32_e32 v6, vcc, 32, v14 +; GISEL-NEXT: v_xor_b32_e32 v4, v19, v6 +; GISEL-NEXT: v_xor_b32_e32 v5, v19, v7 +; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v19 +; GISEL-NEXT: v_subb_u32_e32 v7, vcc, v1, v19, vcc +; GISEL-NEXT: v_subb_u32_e32 v12, vcc, v4, v19, vcc +; GISEL-NEXT: v_subb_u32_e32 v13, vcc, v5, v19, vcc +; GISEL-NEXT: v_ffbh_u32_e32 v4, v7 +; GISEL-NEXT: v_ffbh_u32_e32 v5, v6 +; GISEL-NEXT: v_or_b32_e32 v0, v6, v12 +; GISEL-NEXT: v_or_b32_e32 v1, v7, v13 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, 32, v5 +; GISEL-NEXT: v_ffbh_u32_e32 v11, v13 +; GISEL-NEXT: v_ffbh_u32_e32 v14, v12 +; GISEL-NEXT: v_min_u32_e32 v4, v4, v5 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, 32, v14 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; GISEL-NEXT: v_add_i32_e32 v0, vcc, 64, v3 -; GISEL-NEXT: v_min_u32_e32 v1, v7, v6 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11] +; GISEL-NEXT: v_add_i32_e32 v0, vcc, 64, v4 +; GISEL-NEXT: v_min_u32_e32 v1, v11, v5 +; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[12:13] ; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc -; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v0 -; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v10, v0 +; GISEL-NEXT: v_subb_u32_e64 v5, s[4:5], 0, 0, vcc ; GISEL-NEXT: v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5] ; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5] -; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[12:13] -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_xor_b32_e32 v6, 0x7f, v2 +; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[4:5], v[2:3] +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc ; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[0:1] -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_or_b32_e32 v6, v6, v0 -; GISEL-NEXT: v_or_b32_e32 v7, v3, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] -; GISEL-NEXT: v_cndmask_b32_e32 v12, v13, v12, vcc -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7] -; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; GISEL-NEXT: v_or_b32_e32 v7, v14, v12 -; GISEL-NEXT: v_and_b32_e32 v12, 1, v7 -; GISEL-NEXT: v_or_b32_e32 v6, v7, v6 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 -; GISEL-NEXT: v_cndmask_b32_e64 v12, v4, 0, vcc -; GISEL-NEXT: v_and_b32_e32 v14, 1, v6 -; GISEL-NEXT: v_cndmask_b32_e64 v13, v5, 0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v6, v10, 0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v7, v11, 0, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc +; GISEL-NEXT: v_or_b32_e32 v2, v14, v2 +; GISEL-NEXT: v_and_b32_e32 v2, 1, v2 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v10, v6, 0, vcc ; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1 +; GISEL-NEXT: v_cndmask_b32_e64 v11, v7, 0, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v2, v12, 0, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v3, v13, 0, vcc ; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GISEL-NEXT: s_cbranch_execz .LBB1_12 ; GISEL-NEXT: ; %bb.7: ; %udiv-bb1 -; GISEL-NEXT: v_add_i32_e32 v20, vcc, 1, v2 -; GISEL-NEXT: v_addc_u32_e64 v21, s[4:5], 0, v3, vcc -; GISEL-NEXT: v_sub_i32_e32 v24, vcc, 0x7f, v2 +; GISEL-NEXT: v_add_i32_e32 v20, vcc, 1, v4 +; GISEL-NEXT: v_addc_u32_e64 v21, s[4:5], 0, v5, vcc +; GISEL-NEXT: v_sub_i32_e32 v24, vcc, 0x7f, v4 ; GISEL-NEXT: v_not_b32_e32 v2, 63 ; GISEL-NEXT: v_addc_u32_e64 v22, vcc, 0, v0, s[4:5] ; GISEL-NEXT: v_addc_u32_e32 v23, vcc, 0, v1, vcc -; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v24, v2 -; GISEL-NEXT: v_sub_i32_e64 v6, s[4:5], 64, v24 -; GISEL-NEXT: v_lshl_b64 v[0:1], v[4:5], v24 -; GISEL-NEXT: v_lshl_b64 v[2:3], v[10:11], v24 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v24, v2 +; GISEL-NEXT: v_sub_i32_e64 v4, s[4:5], 64, v24 +; GISEL-NEXT: v_lshl_b64 v[0:1], v[6:7], v24 +; GISEL-NEXT: v_lshl_b64 v[2:3], v[12:13], v24 ; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1 -; GISEL-NEXT: v_lshr_b64 v[6:7], v[4:5], v6 -; GISEL-NEXT: v_lshl_b64 v[14:15], v[4:5], v12 +; GISEL-NEXT: v_lshr_b64 v[4:5], v[6:7], v4 +; GISEL-NEXT: v_lshl_b64 v[14:15], v[6:7], v10 ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v24 -; GISEL-NEXT: v_cndmask_b32_e32 v12, 0, v0, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v13, 0, v1, vcc -; GISEL-NEXT: v_or_b32_e32 v0, v6, v2 -; GISEL-NEXT: v_or_b32_e32 v1, v7, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v1, vcc +; GISEL-NEXT: v_or_b32_e32 v0, v4, v2 +; GISEL-NEXT: v_or_b32_e32 v1, v5, v3 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v14, v0, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v1, v15, v1, vcc ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v24 -; GISEL-NEXT: v_cndmask_b32_e32 v6, v0, v10, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v7, v1, v11, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v4, v0, v12, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v5, v1, v13, vcc ; GISEL-NEXT: s_mov_b64 s[10:11], s[8:9] ; GISEL-NEXT: v_mov_b32_e32 v0, s8 ; GISEL-NEXT: v_mov_b32_e32 v1, s9 @@ -1414,59 +1388,59 @@ define <2 x i128> @v_sdiv_v2i128_v_pow2k(<2 x i128> %lhs) { ; GISEL-NEXT: ; %bb.8: ; %udiv-preheader ; GISEL-NEXT: v_add_i32_e32 v24, vcc, 0xffffffc0, v20 ; GISEL-NEXT: v_sub_i32_e32 v14, vcc, 64, v20 -; GISEL-NEXT: v_lshr_b64 v[0:1], v[10:11], v20 -; GISEL-NEXT: v_lshr_b64 v[2:3], v[4:5], v20 +; GISEL-NEXT: v_lshr_b64 v[0:1], v[12:13], v20 +; GISEL-NEXT: v_lshr_b64 v[2:3], v[6:7], v20 ; GISEL-NEXT: s_mov_b64 s[8:9], 0 -; GISEL-NEXT: v_lshl_b64 v[14:15], v[10:11], v14 -; GISEL-NEXT: v_lshr_b64 v[10:11], v[10:11], v24 +; GISEL-NEXT: v_lshl_b64 v[14:15], v[12:13], v14 +; GISEL-NEXT: v_lshr_b64 v[12:13], v[12:13], v24 ; GISEL-NEXT: s_mov_b64 s[10:11], s[8:9] ; GISEL-NEXT: v_or_b32_e32 v2, v2, v14 ; GISEL-NEXT: v_or_b32_e32 v3, v3, v15 ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v20 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v2, v12, v2, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v13, v3, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v14, 0, v0, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v15, 0, v1, vcc ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v20 -; GISEL-NEXT: v_cndmask_b32_e32 v10, v2, v4, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v11, v3, v5, vcc -; GISEL-NEXT: v_mov_b32_e32 v4, 0 +; GISEL-NEXT: v_cndmask_b32_e32 v12, v2, v6, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v13, v3, v7, vcc +; GISEL-NEXT: v_mov_b32_e32 v7, 0 ; GISEL-NEXT: v_mov_b32_e32 v0, s8 ; GISEL-NEXT: v_mov_b32_e32 v1, s9 ; GISEL-NEXT: v_mov_b32_e32 v2, s10 ; GISEL-NEXT: v_mov_b32_e32 v3, s11 ; GISEL-NEXT: .LBB1_9: ; %udiv-do-while ; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_lshl_b64 v[24:25], v[10:11], 1 -; GISEL-NEXT: v_lshl_b64 v[14:15], v[14:15], 1 -; GISEL-NEXT: v_lshrrev_b32_e32 v5, 31, v11 -; GISEL-NEXT: v_lshrrev_b32_e32 v10, 31, v7 ; GISEL-NEXT: v_lshl_b64 v[2:3], v[12:13], 1 -; GISEL-NEXT: v_lshl_b64 v[6:7], v[6:7], 1 -; GISEL-NEXT: v_lshrrev_b32_e32 v11, 31, v13 +; GISEL-NEXT: v_lshl_b64 v[14:15], v[14:15], 1 +; GISEL-NEXT: v_lshrrev_b32_e32 v6, 31, v13 +; GISEL-NEXT: v_lshrrev_b32_e32 v24, 31, v5 +; GISEL-NEXT: v_lshl_b64 v[12:13], v[10:11], 1 +; GISEL-NEXT: v_lshl_b64 v[4:5], v[4:5], 1 +; GISEL-NEXT: v_lshrrev_b32_e32 v10, 31, v11 ; GISEL-NEXT: v_add_i32_e32 v20, vcc, -1, v20 ; GISEL-NEXT: v_addc_u32_e32 v21, vcc, -1, v21, vcc -; GISEL-NEXT: v_or_b32_e32 v5, v14, v5 -; GISEL-NEXT: v_or_b32_e32 v10, v24, v10 -; GISEL-NEXT: v_or_b32_e32 v6, v6, v11 -; GISEL-NEXT: v_or_b32_e32 v12, v0, v2 -; GISEL-NEXT: v_or_b32_e32 v13, v1, v3 +; GISEL-NEXT: v_or_b32_e32 v14, v14, v6 +; GISEL-NEXT: v_or_b32_e32 v2, v2, v24 +; GISEL-NEXT: v_or_b32_e32 v4, v4, v10 +; GISEL-NEXT: v_or_b32_e32 v10, v0, v12 +; GISEL-NEXT: v_or_b32_e32 v11, v1, v13 ; GISEL-NEXT: v_addc_u32_e32 v22, vcc, -1, v22, vcc ; GISEL-NEXT: v_addc_u32_e32 v23, vcc, -1, v23, vcc -; GISEL-NEXT: v_sub_i32_e32 v0, vcc, 1, v25 -; GISEL-NEXT: v_subb_u32_e32 v0, vcc, 0, v5, vcc -; GISEL-NEXT: v_subrev_i32_e64 v10, s[4:5], 0, v10 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, 1, v3 +; GISEL-NEXT: v_subb_u32_e32 v0, vcc, 0, v14, vcc +; GISEL-NEXT: v_subrev_i32_e64 v12, s[4:5], 0, v2 ; GISEL-NEXT: v_or_b32_e32 v0, v20, v22 ; GISEL-NEXT: v_or_b32_e32 v1, v21, v23 ; GISEL-NEXT: v_subb_u32_e32 v2, vcc, 0, v15, vcc ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] ; GISEL-NEXT: v_ashrrev_i32_e32 v0, 31, v2 -; GISEL-NEXT: v_and_b32_e32 v3, 1, v0 +; GISEL-NEXT: v_and_b32_e32 v6, 1, v0 ; GISEL-NEXT: v_and_b32_e32 v2, 2, v0 -; GISEL-NEXT: v_mov_b32_e32 v0, v3 -; GISEL-NEXT: v_mov_b32_e32 v1, v4 -; GISEL-NEXT: v_sub_i32_e64 v11, s[4:5], v25, v2 -; GISEL-NEXT: v_subbrev_u32_e64 v14, s[4:5], 0, v5, s[4:5] +; GISEL-NEXT: v_mov_b32_e32 v0, v6 +; GISEL-NEXT: v_mov_b32_e32 v1, v7 +; GISEL-NEXT: v_sub_i32_e64 v13, s[4:5], v3, v2 +; GISEL-NEXT: v_subbrev_u32_e64 v14, s[4:5], 0, v14, s[4:5] ; GISEL-NEXT: s_or_b64 s[8:9], vcc, s[8:9] ; GISEL-NEXT: v_subbrev_u32_e64 v15, vcc, 0, v15, s[4:5] ; GISEL-NEXT: s_andn2_b64 exec, exec, s[8:9] @@ -1475,30 +1449,30 @@ define <2 x i128> @v_sdiv_v2i128_v_pow2k(<2 x i128> %lhs) { ; GISEL-NEXT: s_or_b64 exec, exec, s[8:9] ; GISEL-NEXT: .LBB1_11: ; %Flow11 ; GISEL-NEXT: s_or_b64 exec, exec, s[12:13] -; GISEL-NEXT: v_lshl_b64 v[2:3], v[12:13], 1 -; GISEL-NEXT: v_lshl_b64 v[6:7], v[6:7], 1 -; GISEL-NEXT: v_lshrrev_b32_e32 v4, 31, v13 -; GISEL-NEXT: v_or_b32_e32 v6, v6, v4 -; GISEL-NEXT: v_or_b32_e32 v12, v0, v2 -; GISEL-NEXT: v_or_b32_e32 v13, v1, v3 +; GISEL-NEXT: v_lshl_b64 v[6:7], v[10:11], 1 +; GISEL-NEXT: v_lshl_b64 v[2:3], v[4:5], 1 +; GISEL-NEXT: v_lshrrev_b32_e32 v4, 31, v11 +; GISEL-NEXT: v_or_b32_e32 v2, v2, v4 +; GISEL-NEXT: v_or_b32_e32 v10, v0, v6 +; GISEL-NEXT: v_or_b32_e32 v11, v1, v7 ; GISEL-NEXT: .LBB1_12: ; %Flow12 ; GISEL-NEXT: s_or_b64 exec, exec, s[6:7] ; GISEL-NEXT: v_xor_b32_e32 v0, v16, v18 ; GISEL-NEXT: v_xor_b32_e32 v1, v17, v18 -; GISEL-NEXT: v_xor_b32_e32 v2, v8, v18 -; GISEL-NEXT: v_xor_b32_e32 v3, v9, v18 -; GISEL-NEXT: v_xor_b32_e32 v4, v12, v19 -; GISEL-NEXT: v_xor_b32_e32 v5, v13, v19 -; GISEL-NEXT: v_xor_b32_e32 v6, v6, v19 -; GISEL-NEXT: v_xor_b32_e32 v7, v7, v19 +; GISEL-NEXT: v_xor_b32_e32 v6, v8, v18 +; GISEL-NEXT: v_xor_b32_e32 v7, v9, v18 +; GISEL-NEXT: v_xor_b32_e32 v4, v10, v19 +; GISEL-NEXT: v_xor_b32_e32 v5, v11, v19 +; GISEL-NEXT: v_xor_b32_e32 v8, v2, v19 +; GISEL-NEXT: v_xor_b32_e32 v9, v3, v19 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v18 ; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v18, vcc ; GISEL-NEXT: v_sub_i32_e64 v4, s[4:5], v4, v19 ; GISEL-NEXT: v_subb_u32_e64 v5, s[4:5], v5, v19, s[4:5] -; GISEL-NEXT: v_subb_u32_e32 v2, vcc, v2, v18, vcc -; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v18, vcc -; GISEL-NEXT: v_subb_u32_e64 v6, vcc, v6, v19, s[4:5] -; GISEL-NEXT: v_subb_u32_e32 v7, vcc, v7, v19, vcc +; GISEL-NEXT: v_subb_u32_e32 v2, vcc, v6, v18, vcc +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v7, v18, vcc +; GISEL-NEXT: v_subb_u32_e64 v6, vcc, v8, v19, s[4:5] +; GISEL-NEXT: v_subb_u32_e32 v7, vcc, v9, v19, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] %shl = sdiv <2 x i128> %lhs, <i128 8589934592, i128 8589934592> ret <2 x i128> %shl @@ -2248,28 +2222,23 @@ define <2 x i128> @v_udiv_v2i128_v_pow2k(<2 x i128> %lhs) { ; SDAG-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc ; SDAG-NEXT: v_sub_i32_e32 v12, vcc, 0x5e, v2 ; SDAG-NEXT: v_subb_u32_e32 v13, vcc, 0, v10, vcc -; SDAG-NEXT: v_xor_b32_e32 v2, 0x7f, v12 ; SDAG-NEXT: v_subb_u32_e32 v14, vcc, 0, v15, vcc ; SDAG-NEXT: v_cmp_lt_u64_e64 s[6:7], s[6:7], v[12:13] -; SDAG-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[6:7] +; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[6:7] ; SDAG-NEXT: v_subb_u32_e32 v15, vcc, 0, v15, vcc -; SDAG-NEXT: v_or_b32_e32 v2, v2, v14 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[14:15] -; SDAG-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; SDAG-NEXT: v_or_b32_e32 v3, v13, v15 +; SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] -; SDAG-NEXT: v_cndmask_b32_e32 v10, v11, v10, vcc -; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] -; SDAG-NEXT: v_and_b32_e32 v2, 1, v10 -; SDAG-NEXT: v_cmp_eq_u32_e64 s[6:7], 1, v2 -; SDAG-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] +; SDAG-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc +; SDAG-NEXT: v_and_b32_e32 v2, 1, v2 +; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 +; SDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; SDAG-NEXT: v_cndmask_b32_e64 v3, v9, 0, s[4:5] -; SDAG-NEXT: s_xor_b64 s[6:7], s[4:5], -1 +; SDAG-NEXT: s_xor_b64 s[8:9], s[4:5], -1 ; SDAG-NEXT: v_cndmask_b32_e64 v2, v8, 0, s[4:5] ; SDAG-NEXT: v_cndmask_b32_e64 v10, v1, 0, s[4:5] ; SDAG-NEXT: v_cndmask_b32_e64 v11, v0, 0, s[4:5] -; SDAG-NEXT: s_and_b64 s[4:5], s[6:7], vcc -; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[8:9] ; SDAG-NEXT: s_cbranch_execz .LBB3_6 ; SDAG-NEXT: ; %bb.1: ; %udiv-bb15 ; SDAG-NEXT: v_add_i32_e32 v18, vcc, 1, v12 @@ -2395,28 +2364,23 @@ define <2 x i128> @v_udiv_v2i128_v_pow2k(<2 x i128> %lhs) { ; SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; SDAG-NEXT: v_sub_i32_e32 v0, vcc, 0x5e, v0 ; SDAG-NEXT: v_subb_u32_e32 v1, vcc, 0, v8, vcc -; SDAG-NEXT: v_xor_b32_e32 v8, 0x7f, v0 ; SDAG-NEXT: v_subb_u32_e32 v14, vcc, 0, v15, vcc ; SDAG-NEXT: v_cmp_lt_u64_e64 s[6:7], s[6:7], v[0:1] -; SDAG-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7] +; SDAG-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[6:7] ; SDAG-NEXT: v_subb_u32_e32 v15, vcc, 0, v15, vcc -; SDAG-NEXT: v_or_b32_e32 v8, v8, v14 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[14:15] -; SDAG-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; SDAG-NEXT: v_or_b32_e32 v9, v1, v15 +; SDAG-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc ; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] -; SDAG-NEXT: v_cndmask_b32_e32 v12, v13, v12, vcc -; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] -; SDAG-NEXT: v_and_b32_e32 v8, 1, v12 -; SDAG-NEXT: v_cmp_eq_u32_e64 s[6:7], 1, v8 -; SDAG-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] +; SDAG-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc +; SDAG-NEXT: v_and_b32_e32 v8, 1, v8 +; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 +; SDAG-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; SDAG-NEXT: v_cndmask_b32_e64 v9, v7, 0, s[4:5] -; SDAG-NEXT: s_xor_b64 s[6:7], s[4:5], -1 +; SDAG-NEXT: s_xor_b64 s[8:9], s[4:5], -1 ; SDAG-NEXT: v_cndmask_b32_e64 v8, v6, 0, s[4:5] ; SDAG-NEXT: v_cndmask_b32_e64 v12, v5, 0, s[4:5] ; SDAG-NEXT: v_cndmask_b32_e64 v13, v4, 0, s[4:5] -; SDAG-NEXT: s_and_b64 s[4:5], s[6:7], vcc -; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[8:9] ; SDAG-NEXT: s_cbranch_execz .LBB3_12 ; SDAG-NEXT: ; %bb.7: ; %udiv-bb1 ; SDAG-NEXT: v_add_i32_e32 v18, vcc, 1, v0 @@ -2548,38 +2512,30 @@ define <2 x i128> @v_udiv_v2i128_v_pow2k(<2 x i128> %lhs) { ; GISEL-NEXT: v_add_i32_e32 v13, vcc, 32, v13 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, 32, v15 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3] -; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; GISEL-NEXT: v_min_u32_e32 v2, v12, v13 -; GISEL-NEXT: v_min_u32_e32 v3, v14, v15 -; GISEL-NEXT: v_add_i32_e32 v2, vcc, 64, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-NEXT: v_min_u32_e32 v3, v12, v13 +; GISEL-NEXT: v_min_u32_e32 v12, v14, v15 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, 64, v3 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9] -; GISEL-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc -; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v16, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc +; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v16, v3 ; GISEL-NEXT: v_subb_u32_e64 v15, s[4:5], 0, 0, vcc ; GISEL-NEXT: v_subb_u32_e64 v12, s[4:5], 0, 0, s[4:5] ; GISEL-NEXT: v_subb_u32_e64 v13, s[4:5], 0, 0, s[4:5] ; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[14:15], v[10:11] -; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; GISEL-NEXT: v_xor_b32_e32 v2, 0x7f, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[12:13] -; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; GISEL-NEXT: v_or_b32_e32 v2, v2, v12 -; GISEL-NEXT: v_or_b32_e32 v3, v15, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[12:13] -; GISEL-NEXT: v_cndmask_b32_e32 v10, v11, v10, vcc -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3] -; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GISEL-NEXT: v_or_b32_e32 v3, v17, v10 -; GISEL-NEXT: v_and_b32_e32 v10, 1, v3 -; GISEL-NEXT: v_or_b32_e32 v2, v3, v2 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; GISEL-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc +; GISEL-NEXT: v_or_b32_e32 v2, v2, v3 +; GISEL-NEXT: v_and_b32_e32 v2, 1, v2 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; GISEL-NEXT: v_cndmask_b32_e64 v10, v0, 0, vcc -; GISEL-NEXT: v_and_b32_e32 v16, 1, v2 +; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1 ; GISEL-NEXT: v_cndmask_b32_e64 v11, v1, 0, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v3, v9, 0, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 -; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1 ; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GISEL-NEXT: s_cbranch_execz .LBB3_6 ; GISEL-NEXT: ; %bb.1: ; %udiv-bb15 @@ -2710,27 +2666,19 @@ define <2 x i128> @v_udiv_v2i128_v_pow2k(<2 x i128> %lhs) { ; GISEL-NEXT: v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5] ; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5] ; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[14:15], v[8:9] -; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; GISEL-NEXT: v_xor_b32_e32 v8, 0x7f, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc ; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[0:1] -; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc -; GISEL-NEXT: v_or_b32_e32 v8, v8, v0 -; GISEL-NEXT: v_or_b32_e32 v9, v15, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] -; GISEL-NEXT: v_cndmask_b32_e32 v12, v13, v12, vcc -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9] -; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; GISEL-NEXT: v_or_b32_e32 v9, v17, v12 -; GISEL-NEXT: v_and_b32_e32 v12, 1, v9 -; GISEL-NEXT: v_or_b32_e32 v8, v9, v8 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 +; GISEL-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc +; GISEL-NEXT: v_or_b32_e32 v8, v17, v8 +; GISEL-NEXT: v_and_b32_e32 v8, 1, v8 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; GISEL-NEXT: v_cndmask_b32_e64 v12, v4, 0, vcc -; GISEL-NEXT: v_and_b32_e32 v16, 1, v8 +; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1 ; GISEL-NEXT: v_cndmask_b32_e64 v13, v5, 0, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v8, v6, 0, vcc ; GISEL-NEXT: v_cndmask_b32_e64 v9, v7, 0, vcc -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 -; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1 ; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GISEL-NEXT: s_cbranch_execz .LBB3_12 ; GISEL-NEXT: ; %bb.7: ; %udiv-bb1 diff --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll b/llvm/test/CodeGen/AMDGPU/sdiv64.ll index fdb20f372ab8d..d3a027f99947b 100644 --- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll @@ -1275,12 +1275,11 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-IR-NEXT: s_addc_u32 s11, 0, -1 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[8:9], s[2:3], 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[12:13], s[10:11], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[14:15], s[10:11], 63 -; GCN-IR-NEXT: s_or_b64 s[12:13], s[8:9], s[12:13] -; GCN-IR-NEXT: s_and_b64 s[8:9], s[12:13], exec +; GCN-IR-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13] +; GCN-IR-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[8:9] +; GCN-IR-NEXT: s_and_b64 s[8:9], s[8:9], exec +; GCN-IR-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0 ; GCN-IR-NEXT: s_cselect_b32 s8, 0, 24 -; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[14:15] -; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[12:13] ; GCN-IR-NEXT: s_mov_b32 s9, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB10_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 @@ -1462,13 +1461,11 @@ define i64 @v_test_sdiv_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_addc_u32_e64 v3, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[2:3] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[6:7], 63, v[2:3] +; GCN-IR-NEXT: v_mov_b32_e32 v11, v10 ; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], vcc +; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 ; GCN-IR-NEXT: v_cndmask_b32_e64 v4, 24, 0, s[4:5] ; GCN-IR-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GCN-IR-NEXT: v_mov_b32_e32 v11, v10 -; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 -; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GCN-IR-NEXT: s_cbranch_execz .LBB11_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 @@ -1653,14 +1650,12 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_addc_u32_e64 v3, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[2:3] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[6:7], 63, v[2:3] ; GCN-IR-NEXT: v_mov_b32_e32 v4, 0x8000 ; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], vcc -; GCN-IR-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[4:5] -; GCN-IR-NEXT: s_xor_b64 s[4:5], s[4:5], -1 ; GCN-IR-NEXT: v_mov_b32_e32 v11, v10 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 -; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] +; GCN-IR-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[4:5], s[4:5], -1 ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GCN-IR-NEXT: s_cbranch_execz .LBB12_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 @@ -1755,12 +1750,10 @@ define i64 @v_test_sdiv_pow2_k_den_i64(i64 %x) { ; GCN-IR-NEXT: v_cmp_lt_u64_e64 s[4:5], 63, v[0:1] ; GCN-IR-NEXT: v_mov_b32_e32 v9, v8 ; GCN-IR-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[0:1] -; GCN-IR-NEXT: s_xor_b64 s[6:7], s[4:5], -1 ; GCN-IR-NEXT: v_cndmask_b32_e64 v3, v5, 0, s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[8:9], s[4:5], -1 ; GCN-IR-NEXT: v_cndmask_b32_e64 v2, v4, 0, s[4:5] -; GCN-IR-NEXT: s_and_b64 s[4:5], s[6:7], vcc -; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[8:9] ; GCN-IR-NEXT: s_cbranch_execz .LBB13_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: v_add_i32_e32 v7, vcc, 1, v0 diff --git a/llvm/test/CodeGen/AMDGPU/srem64.ll b/llvm/test/CodeGen/AMDGPU/srem64.ll index 02d2e6c1473ab..3bee2fa7da49a 100644 --- a/llvm/test/CodeGen/AMDGPU/srem64.ll +++ b/llvm/test/CodeGen/AMDGPU/srem64.ll @@ -1414,73 +1414,72 @@ define amdgpu_kernel void @s_test_srem_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-IR-LABEL: s_test_srem_k_num_i64: ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 -; GCN-IR-NEXT: s_mov_b64 s[6:7], 0 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: s_ashr_i32 s8, s3, 31 -; GCN-IR-NEXT: s_mov_b32 s9, s8 -; GCN-IR-NEXT: s_xor_b64 s[2:3], s[2:3], s[8:9] -; GCN-IR-NEXT: s_sub_u32 s4, s2, s8 -; GCN-IR-NEXT: s_subb_u32 s5, s3, s8 +; GCN-IR-NEXT: s_ashr_i32 s6, s3, 31 +; GCN-IR-NEXT: s_mov_b32 s7, s6 +; GCN-IR-NEXT: s_xor_b64 s[2:3], s[2:3], s[6:7] +; GCN-IR-NEXT: s_sub_u32 s4, s2, s6 +; GCN-IR-NEXT: s_subb_u32 s5, s3, s6 ; GCN-IR-NEXT: s_flbit_i32_b64 s14, s[4:5] -; GCN-IR-NEXT: s_add_u32 s2, s14, 0xffffffc5 -; GCN-IR-NEXT: s_addc_u32 s3, 0, -1 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[8:9], s[4:5], 0 -; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[2:3], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[2:3], 63 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[8:9], s[10:11] -; GCN-IR-NEXT: s_and_b64 s[8:9], s[10:11], exec -; GCN-IR-NEXT: s_cselect_b32 s8, 0, 24 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] -; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[10:11] -; GCN-IR-NEXT: s_mov_b32 s9, 0 +; GCN-IR-NEXT: s_add_u32 s8, s14, 0xffffffc5 +; GCN-IR-NEXT: s_addc_u32 s9, 0, -1 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[4:5], 0 +; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[8:9], 63 +; GCN-IR-NEXT: s_mov_b64 s[2:3], 0 +; GCN-IR-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] +; GCN-IR-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[6:7] +; GCN-IR-NEXT: s_and_b64 s[6:7], s[6:7], exec +; GCN-IR-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0 +; GCN-IR-NEXT: s_cselect_b32 s6, 0, 24 +; GCN-IR-NEXT: s_mov_b32 s7, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB10_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: s_add_u32 s8, s2, 1 -; GCN-IR-NEXT: s_addc_u32 s3, s3, 0 -; GCN-IR-NEXT: s_cselect_b64 s[10:11], -1, 0 -; GCN-IR-NEXT: s_sub_i32 s2, 63, s2 -; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[10:11] -; GCN-IR-NEXT: s_lshl_b64 s[2:3], 24, s2 +; GCN-IR-NEXT: s_add_u32 s10, s8, 1 +; GCN-IR-NEXT: s_addc_u32 s6, s9, 0 +; GCN-IR-NEXT: s_cselect_b64 s[6:7], -1, 0 +; GCN-IR-NEXT: s_sub_i32 s8, 63, s8 +; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[6:7] +; GCN-IR-NEXT: s_lshl_b64 s[6:7], 24, s8 ; GCN-IR-NEXT: s_cbranch_vccz .LBB10_4 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: s_lshr_b64 s[10:11], 24, s8 +; GCN-IR-NEXT: s_lshr_b64 s[10:11], 24, s10 ; GCN-IR-NEXT: s_add_u32 s12, s4, -1 ; GCN-IR-NEXT: s_addc_u32 s13, s5, -1 ; GCN-IR-NEXT: s_sub_u32 s14, 58, s14 ; GCN-IR-NEXT: s_subb_u32 s15, 0, 0 ; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 -; GCN-IR-NEXT: s_mov_b32 s7, 0 +; GCN-IR-NEXT: s_mov_b32 s3, 0 ; GCN-IR-NEXT: .LBB10_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN-IR-NEXT: s_lshl_b64 s[10:11], s[10:11], 1 -; GCN-IR-NEXT: s_lshr_b32 s6, s3, 31 -; GCN-IR-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[6:7] -; GCN-IR-NEXT: s_or_b64 s[2:3], s[8:9], s[2:3] -; GCN-IR-NEXT: s_sub_u32 s6, s12, s10 -; GCN-IR-NEXT: s_subb_u32 s6, s13, s11 -; GCN-IR-NEXT: s_ashr_i32 s8, s6, 31 +; GCN-IR-NEXT: s_lshr_b32 s2, s7, 31 +; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 +; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[2:3] +; GCN-IR-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7] +; GCN-IR-NEXT: s_sub_u32 s2, s12, s10 +; GCN-IR-NEXT: s_subb_u32 s2, s13, s11 +; GCN-IR-NEXT: s_ashr_i32 s8, s2, 31 ; GCN-IR-NEXT: s_mov_b32 s9, s8 -; GCN-IR-NEXT: s_and_b32 s6, s8, 1 +; GCN-IR-NEXT: s_and_b32 s2, s8, 1 ; GCN-IR-NEXT: s_and_b64 s[16:17], s[8:9], s[4:5] ; GCN-IR-NEXT: s_sub_u32 s10, s10, s16 ; GCN-IR-NEXT: s_subb_u32 s11, s11, s17 ; GCN-IR-NEXT: s_add_u32 s14, s14, 1 ; GCN-IR-NEXT: s_addc_u32 s15, s15, 0 ; GCN-IR-NEXT: s_cselect_b64 s[16:17], -1, 0 -; GCN-IR-NEXT: s_mov_b64 s[8:9], s[6:7] +; GCN-IR-NEXT: s_mov_b64 s[8:9], s[2:3] ; GCN-IR-NEXT: s_and_b64 vcc, exec, s[16:17] ; GCN-IR-NEXT: s_cbranch_vccz .LBB10_3 ; GCN-IR-NEXT: .LBB10_4: ; %Flow6 -; GCN-IR-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 -; GCN-IR-NEXT: s_or_b64 s[8:9], s[6:7], s[2:3] +; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 +; GCN-IR-NEXT: s_or_b64 s[6:7], s[2:3], s[6:7] ; GCN-IR-NEXT: .LBB10_5: ; %udiv-end -; GCN-IR-NEXT: v_mov_b32_e32 v0, s8 +; GCN-IR-NEXT: v_mov_b32_e32 v0, s6 ; GCN-IR-NEXT: v_mul_hi_u32 v0, s4, v0 -; GCN-IR-NEXT: s_mul_i32 s6, s4, s9 -; GCN-IR-NEXT: s_mul_i32 s5, s5, s8 -; GCN-IR-NEXT: s_mul_i32 s4, s4, s8 -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, s6, v0 +; GCN-IR-NEXT: s_mul_i32 s7, s4, s7 +; GCN-IR-NEXT: s_mul_i32 s5, s5, s6 +; GCN-IR-NEXT: s_mul_i32 s4, s4, s6 +; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, s7, v0 ; GCN-IR-NEXT: v_add_i32_e32 v1, vcc, s5, v0 ; GCN-IR-NEXT: v_sub_i32_e64 v0, vcc, 24, s4 ; GCN-IR-NEXT: s_mov_b32 s3, 0xf000 @@ -1612,12 +1611,10 @@ define i64 @v_test_srem_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_addc_u32_e64 v3, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[2:3] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[6:7], 63, v[2:3] +; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 ; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; GCN-IR-NEXT: v_cndmask_b32_e64 v4, 24, 0, s[4:5] ; GCN-IR-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 -; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GCN-IR-NEXT: s_cbranch_execz .LBB11_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 @@ -1801,13 +1798,11 @@ define i64 @v_test_srem_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_addc_u32_e64 v3, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[2:3] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[6:7], 63, v[2:3] ; GCN-IR-NEXT: v_mov_b32_e32 v4, 0x8000 ; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], vcc +; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 ; GCN-IR-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[4:5] ; GCN-IR-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 -; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GCN-IR-NEXT: s_cbranch_execz .LBB12_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 @@ -1908,12 +1903,10 @@ define i64 @v_test_srem_pow2_k_den_i64(i64 %x) { ; GCN-IR-NEXT: v_cmp_lt_u64_e64 s[4:5], 63, v[2:3] ; GCN-IR-NEXT: v_mov_b32_e32 v11, v10 ; GCN-IR-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[2:3] -; GCN-IR-NEXT: s_xor_b64 s[6:7], s[4:5], -1 ; GCN-IR-NEXT: v_cndmask_b32_e64 v5, v1, 0, s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[8:9], s[4:5], -1 ; GCN-IR-NEXT: v_cndmask_b32_e64 v4, v0, 0, s[4:5] -; GCN-IR-NEXT: s_and_b64 s[4:5], s[6:7], vcc -; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[8:9] ; GCN-IR-NEXT: s_cbranch_execz .LBB13_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v2 diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll index 1c50f930facba..3f5be80b1efbd 100644 --- a/llvm/test/CodeGen/AMDGPU/udiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll @@ -912,12 +912,11 @@ define amdgpu_kernel void @s_test_udiv_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-IR-NEXT: s_addc_u32 s9, 0, -1 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[2:3], 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[8:9], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[8:9], 63 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[6:7], s[10:11] -; GCN-IR-NEXT: s_and_b64 s[6:7], s[10:11], exec +; GCN-IR-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] +; GCN-IR-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[6:7] +; GCN-IR-NEXT: s_and_b64 s[6:7], s[6:7], exec +; GCN-IR-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0 ; GCN-IR-NEXT: s_cselect_b32 s6, 0, 24 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] -; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[10:11] ; GCN-IR-NEXT: s_mov_b32 s7, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB8_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 @@ -1083,13 +1082,11 @@ define i64 @v_test_udiv_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_addc_u32_e64 v5, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[4:5] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[6:7], 63, v[4:5] ; GCN-IR-NEXT: v_mov_b32_e32 v3, 0x8000 ; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], vcc +; GCN-IR-NEXT: v_mov_b32_e32 v2, 0 ; GCN-IR-NEXT: v_cndmask_b32_e64 v3, v3, 0, s[4:5] ; GCN-IR-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GCN-IR-NEXT: v_mov_b32_e32 v2, 0 -; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GCN-IR-NEXT: s_cbranch_execz .LBB9_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 @@ -1173,12 +1170,10 @@ define i64 @v_test_udiv_pow2_k_den_i64(i64 %x) { ; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e64 s[4:5], 63, v[4:5] ; GCN-IR-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[4:5] -; GCN-IR-NEXT: s_xor_b64 s[6:7], s[4:5], -1 ; GCN-IR-NEXT: v_cndmask_b32_e64 v2, v1, 0, s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[8:9], s[4:5], -1 ; GCN-IR-NEXT: v_cndmask_b32_e64 v3, v0, 0, s[4:5] -; GCN-IR-NEXT: s_and_b64 s[4:5], s[6:7], vcc -; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[8:9] ; GCN-IR-NEXT: s_cbranch_execz .LBB10_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: v_add_i32_e32 v7, vcc, 1, v4 @@ -1277,13 +1272,12 @@ define amdgpu_kernel void @s_test_udiv_k_den_i64(ptr addrspace(1) %out, i64 %x) ; GCN-IR-NEXT: s_subb_u32 s9, 0, 0 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], s[2:3], 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[6:7], s[8:9], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[8:9], 63 ; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] -; GCN-IR-NEXT: s_and_b64 s[6:7], s[4:5], exec -; GCN-IR-NEXT: s_cselect_b32 s7, 0, s3 +; GCN-IR-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], exec +; GCN-IR-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0 ; GCN-IR-NEXT: s_cselect_b32 s6, 0, s2 -; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], s[12:13] -; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[4:5] +; GCN-IR-NEXT: s_cselect_b32 s7, 0, s3 ; GCN-IR-NEXT: s_mov_b64 s[4:5], 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB11_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 @@ -1372,12 +1366,10 @@ define i64 @v_test_udiv_k_den_i64(i64 %x) { ; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e64 s[4:5], 63, v[4:5] ; GCN-IR-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[4:5] -; GCN-IR-NEXT: s_xor_b64 s[6:7], s[4:5], -1 ; GCN-IR-NEXT: v_cndmask_b32_e64 v2, v1, 0, s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[8:9], s[4:5], -1 ; GCN-IR-NEXT: v_cndmask_b32_e64 v3, v0, 0, s[4:5] -; GCN-IR-NEXT: s_and_b64 s[4:5], s[6:7], vcc -; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[8:9] ; GCN-IR-NEXT: s_cbranch_execz .LBB12_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: v_add_i32_e32 v7, vcc, 1, v4 diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll index 28e6627b87413..b6608b9f48a7a 100644 --- a/llvm/test/CodeGen/AMDGPU/urem64.ll +++ b/llvm/test/CodeGen/AMDGPU/urem64.ll @@ -926,12 +926,11 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-IR-NEXT: s_addc_u32 s9, 0, -1 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[2:3], 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[8:9], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[8:9], 63 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[6:7], s[10:11] -; GCN-IR-NEXT: s_and_b64 s[6:7], s[10:11], exec +; GCN-IR-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] +; GCN-IR-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[6:7] +; GCN-IR-NEXT: s_and_b64 s[6:7], s[6:7], exec +; GCN-IR-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0 ; GCN-IR-NEXT: s_cselect_b32 s6, 0, 24 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] -; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[10:11] ; GCN-IR-NEXT: s_mov_b32 s7, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB6_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 @@ -1042,13 +1041,12 @@ define amdgpu_kernel void @s_test_urem_k_den_i64(ptr addrspace(1) %out, i64 %x) ; GCN-IR-NEXT: s_subb_u32 s9, 0, 0 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], s[2:3], 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[6:7], s[8:9], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[8:9], 63 ; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] -; GCN-IR-NEXT: s_and_b64 s[6:7], s[4:5], exec -; GCN-IR-NEXT: s_cselect_b32 s7, 0, s3 +; GCN-IR-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], exec +; GCN-IR-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0 ; GCN-IR-NEXT: s_cselect_b32 s6, 0, s2 -; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], s[12:13] -; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[4:5] +; GCN-IR-NEXT: s_cselect_b32 s7, 0, s3 ; GCN-IR-NEXT: s_mov_b64 s[4:5], 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB7_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 @@ -1219,13 +1217,11 @@ define i64 @v_test_urem_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_addc_u32_e64 v3, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[2:3] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[6:7], 63, v[2:3] ; GCN-IR-NEXT: v_mov_b32_e32 v4, 0x8000 ; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], vcc +; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 ; GCN-IR-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[4:5] ; GCN-IR-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 -; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GCN-IR-NEXT: s_cbranch_execz .LBB8_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 @@ -1310,22 +1306,20 @@ define i64 @v_test_urem_pow2_k_den_i64(i64 %x) { ; GCN-IR-NEXT: v_add_i32_e64 v2, s[4:5], 32, v2 ; GCN-IR-NEXT: v_ffbh_u32_e32 v3, v1 ; GCN-IR-NEXT: v_min_u32_e32 v8, v2, v3 -; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 48, v8 -; GCN-IR-NEXT: v_subb_u32_e64 v3, s[4:5], 0, 0, s[4:5] +; GCN-IR-NEXT: v_sub_i32_e64 v4, s[4:5], 48, v8 +; GCN-IR-NEXT: v_subb_u32_e64 v5, s[4:5], 0, 0, s[4:5] ; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] -; GCN-IR-NEXT: v_cmp_lt_u64_e64 s[4:5], 63, v[2:3] +; GCN-IR-NEXT: v_cmp_lt_u64_e64 s[4:5], 63, v[4:5] ; GCN-IR-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[2:3] -; GCN-IR-NEXT: s_xor_b64 s[6:7], s[4:5], -1 -; GCN-IR-NEXT: v_cndmask_b32_e64 v5, v1, 0, s[4:5] -; GCN-IR-NEXT: v_cndmask_b32_e64 v4, v0, 0, s[4:5] -; GCN-IR-NEXT: s_and_b64 s[4:5], s[6:7], vcc -; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; GCN-IR-NEXT: v_cndmask_b32_e64 v3, v1, 0, s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[8:9], s[4:5], -1 +; GCN-IR-NEXT: v_cndmask_b32_e64 v2, v0, 0, s[4:5] +; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[8:9] ; GCN-IR-NEXT: s_cbranch_execz .LBB9_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v2 -; GCN-IR-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 63, v2 +; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v4 +; GCN-IR-NEXT: v_addc_u32_e32 v2, vcc, 0, v5, vcc +; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 63, v4 ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[0:1], v2 ; GCN-IR-NEXT: v_mov_b32_e32 v4, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 @@ -1369,11 +1363,11 @@ define i64 @v_test_urem_pow2_k_den_i64(i64 %x) { ; GCN-IR-NEXT: .LBB9_5: ; %Flow4 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 -; GCN-IR-NEXT: v_or_b32_e32 v5, v5, v3 -; GCN-IR-NEXT: v_or_b32_e32 v4, v4, v2 +; GCN-IR-NEXT: v_or_b32_e32 v3, v5, v3 +; GCN-IR-NEXT: v_or_b32_e32 v2, v4, v2 ; GCN-IR-NEXT: .LBB9_6: ; %Flow5 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[6:7] -; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[4:5], 15 +; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 15 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 ; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc ; GCN-IR-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/PowerPC/add_cmp.ll b/llvm/test/CodeGen/PowerPC/add_cmp.ll index cbe16a498a538..c5cc071e0183d 100644 --- a/llvm/test/CodeGen/PowerPC/add_cmp.ll +++ b/llvm/test/CodeGen/PowerPC/add_cmp.ll @@ -30,27 +30,27 @@ entry: define zeroext i1 @addiCmpiUnsignedOverflow(i32 zeroext %x) { entry: - %add = add nuw i32 110, %x - %cmp = icmp ugt i32 %add, 100 + %add = add nuw i32 110, %x + %cmp = icmp ugt i32 %add, 200 ret i1 %cmp ; CHECK: === addiCmpiUnsignedOverflow ; CHECK: Optimized lowered selection DAG: %bb.0 'addiCmpiUnsignedOverflow:entry' ; CHECK: [[REG1:t[0-9]+]]: i32 = truncate {{t[0-9]+}} ; CHECK: [[REG2:t[0-9]+]]: i32 = add nuw [[REG1]], Constant:i32<110> -; CHECK: {{t[0-9]+}}: i1 = setcc [[REG2]], Constant:i32<100>, setugt:ch +; CHECK: {{t[0-9]+}}: i1 = setcc [[REG2]], Constant:i32<200>, setugt:ch } define zeroext i1 @addiCmpiSignedOverflow(i16 signext %x) { entry: - %add = add nsw i16 16, %x - %cmp = icmp sgt i16 %add, -32767 + %add = add nsw i16 16, %x + %cmp = icmp sgt i16 %add, 30 ret i1 %cmp ; CHECK: === addiCmpiSignedOverflow ; CHECK: Optimized lowered selection DAG: %bb.0 'addiCmpiSignedOverflow:entry' ; CHECK: [[REG1:t[0-9]+]]: i16 = truncate {{t[0-9]+}} ; CHECK: [[REG2:t[0-9]+]]: i16 = add nsw [[REG1]], Constant:i16<16> -; CHECK: {{t[0-9]+}}: i1 = setcc [[REG2]], Constant:i16<-32767>, setgt:ch +; CHECK: {{t[0-9]+}}: i1 = setcc [[REG2]], Constant:i16<30>, setgt:ch } diff --git a/llvm/test/Transforms/Attributor/range.ll b/llvm/test/Transforms/Attributor/range.ll index 38f8a829cf419..2e6fa20b86954 100644 --- a/llvm/test/Transforms/Attributor/range.ll +++ b/llvm/test/Transforms/Attributor/range.ll @@ -888,29 +888,13 @@ define dso_local i64 @select_int2ptr_bitcast_ptr2int(i32 %a) local_unnamed_addr ; TUNIT-LABEL: define {{[^@]+}}@select_int2ptr_bitcast_ptr2int ; TUNIT-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CMP:%.*]] = icmp sgt i32 [[A]], 5 -; TUNIT-NEXT: [[DOT:%.*]] = select i1 [[CMP]], i32 1, i32 2 -; TUNIT-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[A]], 10 -; TUNIT-NEXT: [[Y_0_V:%.*]] = select i1 [[CMP1]], i32 1, i32 2 -; TUNIT-NEXT: [[Y_0:%.*]] = add nuw nsw i32 [[DOT]], [[Y_0_V]] -; TUNIT-NEXT: [[CMP6:%.*]] = icmp eq i32 [[Y_0]], 5 -; TUNIT-NEXT: [[I2P:%.*]] = inttoptr i1 [[CMP6]] to ptr -; TUNIT-NEXT: [[P2I:%.*]] = ptrtoint ptr [[I2P]] to i64 -; TUNIT-NEXT: ret i64 [[P2I]] +; TUNIT-NEXT: ret i64 0 ; ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@select_int2ptr_bitcast_ptr2int ; CGSCC-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CMP:%.*]] = icmp sgt i32 [[A]], 5 -; CGSCC-NEXT: [[DOT:%.*]] = select i1 [[CMP]], i32 1, i32 2 -; CGSCC-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[A]], 10 -; CGSCC-NEXT: [[Y_0_V:%.*]] = select i1 [[CMP1]], i32 1, i32 2 -; CGSCC-NEXT: [[Y_0:%.*]] = add nuw nsw i32 [[DOT]], [[Y_0_V]] -; CGSCC-NEXT: [[CMP6:%.*]] = icmp eq i32 [[Y_0]], 5 -; CGSCC-NEXT: [[I2P:%.*]] = inttoptr i1 [[CMP6]] to ptr -; CGSCC-NEXT: [[P2I:%.*]] = ptrtoint ptr [[I2P]] to i64 -; CGSCC-NEXT: ret i64 [[P2I]] +; CGSCC-NEXT: ret i64 0 ; entry: %cmp = icmp sgt i32 %a, 5 diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll index aa68dfb540064..9d19ff1d37c26 100644 --- a/llvm/test/Transforms/InstCombine/add.ll +++ b/llvm/test/Transforms/InstCombine/add.ll @@ -3274,9 +3274,7 @@ define <2 x i32> @dec_zext_add_nonzero_vec_poison1(<2 x i8> %x) { define <2 x i32> @dec_zext_add_nonzero_vec_poison2(<2 x i8> %x) { ; CHECK-LABEL: @dec_zext_add_nonzero_vec_poison2( ; CHECK-NEXT: [[O:%.*]] = or <2 x i8> [[X:%.*]], splat (i8 8) -; CHECK-NEXT: [[A:%.*]] = add nsw <2 x i8> [[O]], splat (i8 -1) -; CHECK-NEXT: [[B:%.*]] = zext <2 x i8> [[A]] to <2 x i32> -; CHECK-NEXT: [[C:%.*]] = add nuw nsw <2 x i32> [[B]], <i32 1, i32 poison> +; CHECK-NEXT: [[C:%.*]] = zext <2 x i8> [[O]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[C]] ; %o = or <2 x i8> %x, <i8 8, i8 8> diff --git a/llvm/test/Transforms/InstCombine/fls.ll b/llvm/test/Transforms/InstCombine/fls.ll index 68bc0a2fc8a1d..ea757268259f5 100644 --- a/llvm/test/Transforms/InstCombine/fls.ll +++ b/llvm/test/Transforms/InstCombine/fls.ll @@ -33,7 +33,7 @@ define i32 @flsnotconst(i64 %z) { ; CHECK-LABEL: @flsnotconst( ; CHECK-NEXT: [[CTLZ:%.*]] = call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[Z:%.*]], i1 false) ; CHECK-NEXT: [[TMP1:%.*]] = trunc nuw nsw i64 [[CTLZ]] to i32 -; CHECK-NEXT: [[GOO:%.*]] = sub nsw i32 64, [[TMP1]] +; CHECK-NEXT: [[GOO:%.*]] = sub nuw nsw i32 64, [[TMP1]] ; CHECK-NEXT: ret i32 [[GOO]] ; %goo = call i32 @flsl(i64 %z) diff --git a/llvm/test/Transforms/InstCombine/icmp-add.ll b/llvm/test/Transforms/InstCombine/icmp-add.ll index 85d01b1786cc9..486da0bd2b4d5 100644 --- a/llvm/test/Transforms/InstCombine/icmp-add.ll +++ b/llvm/test/Transforms/InstCombine/icmp-add.ll @@ -3160,7 +3160,8 @@ define i1 @icmp_add_constant_with_constant_ult_to_slt_neg2(i8 range(i8 -4, 120) } ; Negative test: C2 is negative -define i1 @icmp_add_constant_with_constant_ult_to_slt_neg3(i32 range(i32 -4, 10) %x) { +; Prevent constant fold by using the range [-10, 10). +define i1 @icmp_add_constant_with_constant_ult_to_slt_neg3(i32 range(i32 -10, 10) %x) { ; CHECK-LABEL: @icmp_add_constant_with_constant_ult_to_slt_neg3( ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[X:%.*]], 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD]], -6 diff --git a/llvm/test/Transforms/InstCombine/pr80597.ll b/llvm/test/Transforms/InstCombine/pr80597.ll index 148da056486f9..bf536b9ecd133 100644 --- a/llvm/test/Transforms/InstCombine/pr80597.ll +++ b/llvm/test/Transforms/InstCombine/pr80597.ll @@ -5,14 +5,9 @@ define i64 @pr80597(i1 %cond) { ; CHECK-LABEL: define i64 @pr80597( ; CHECK-SAME: i1 [[COND:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADD:%.*]] = select i1 [[COND]], i64 0, i64 -12884901888 -; CHECK-NEXT: [[SEXT1:%.*]] = add nsw i64 [[ADD]], 8836839514384105472 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[SEXT1]], -34359738368 -; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK-NEXT: br i1 true, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.else: -; CHECK-NEXT: [[SEXT2:%.*]] = ashr exact i64 [[ADD]], 1 -; CHECK-NEXT: [[ASHR:%.*]] = or disjoint i64 [[SEXT2]], 4418419761487020032 -; CHECK-NEXT: ret i64 [[ASHR]] +; CHECK-NEXT: ret i64 poison ; CHECK: if.then: ; CHECK-NEXT: ret i64 0 ; diff --git a/llvm/test/Transforms/InstCombine/sadd_sat.ll b/llvm/test/Transforms/InstCombine/sadd_sat.ll index 6afb77d975b8c..3143d4addecc1 100644 --- a/llvm/test/Transforms/InstCombine/sadd_sat.ll +++ b/llvm/test/Transforms/InstCombine/sadd_sat.ll @@ -824,11 +824,11 @@ entry: define i16 @or(i8 %X, i16 %Y) { ; CHECK-LABEL: @or( -; CHECK-NEXT: [[TMP1:%.*]] = trunc i16 [[Y:%.*]] to i8 -; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[TMP1]], -16 -; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X:%.*]], i8 [[TMP2]]) -; CHECK-NEXT: [[L12:%.*]] = sext i8 [[TMP3]] to i16 -; CHECK-NEXT: ret i16 [[L12]] +; CHECK-NEXT: [[L12:%.*]] = sext i8 [[TMP3:%.*]] to i16 +; CHECK-NEXT: [[CONV14:%.*]] = or i16 [[Y:%.*]], -16 +; CHECK-NEXT: [[SUB:%.*]] = sub nsw i16 [[L12]], [[CONV14]] +; CHECK-NEXT: [[L13:%.*]] = call i16 @llvm.smin.i16(i16 [[SUB]], i16 127) +; CHECK-NEXT: ret i16 [[L13]] ; %conv10 = sext i8 %X to i16 %conv14 = or i16 %Y, 65520 diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll index efa89db4af61a..dff1f09213864 100644 --- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll +++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll @@ -1111,8 +1111,7 @@ define <3 x i8> @test_vector_usub_add_nuw_no_ov_nonsplat1_poison(<3 x i8> %a) { ; Can be optimized if the add nuw RHS constant range handles non-splat vectors. define <2 x i8> @test_vector_usub_add_nuw_no_ov_nonsplat2(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_usub_add_nuw_no_ov_nonsplat2( -; CHECK-NEXT: [[B:%.*]] = add nuw <2 x i8> [[A:%.*]], <i8 10, i8 9> -; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[B]], <2 x i8> splat (i8 9)) +; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[A:%.*]], <i8 1, i8 0> ; CHECK-NEXT: ret <2 x i8> [[R]] ; %b = add nuw <2 x i8> %a, <i8 10, i8 9> @@ -1188,7 +1187,7 @@ define <2 x i8> @test_vector_ssub_add_nsw_no_ov_nonsplat2(<2 x i8> %a, <2 x i8> ; CHECK-LABEL: @test_vector_ssub_add_nsw_no_ov_nonsplat2( ; CHECK-NEXT: [[AA:%.*]] = add nsw <2 x i8> [[A:%.*]], <i8 7, i8 8> ; CHECK-NEXT: [[BB:%.*]] = and <2 x i8> [[B:%.*]], splat (i8 7) -; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> [[AA]], <2 x i8> [[BB]]) +; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i8> [[AA]], [[BB]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; %aa = add nsw <2 x i8> %a, <i8 7, i8 8> diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp b/llvm/unittests/Analysis/ValueTrackingTest.cpp index 2ee45dccc6595..b872bbb28bf7e 100644 --- a/llvm/unittests/Analysis/ValueTrackingTest.cpp +++ b/llvm/unittests/Analysis/ValueTrackingTest.cpp @@ -3486,6 +3486,21 @@ TEST_F(ValueTrackingTest, ComputeConstantRange) { EXPECT_EQ(CR.getSignedMin().getSExtValue(), -3); EXPECT_EQ(CR.getSignedMax().getSExtValue(), 0); } + { + auto M = parseModule(R"( + define i32 @test(i8 %x, i8 %y) { + %ext.x = zext i8 %x to i32 + %ext.y = zext i8 %y to i32 + %or = or disjoint i32 %ext.x, %ext.y + ret i32 %or + })"); + Function *F = M->getFunction("test"); + AssumptionCache AC(*F); + Instruction *Or = &findInstructionByName(F, "or"); + ConstantRange CR = computeConstantRange(Or, false, true, &AC, Or); + EXPECT_EQ(CR.getUnsignedMin().getZExtValue(), 0u); + EXPECT_EQ(CR.getUnsignedMax().getZExtValue(), 510u); + } } struct FindAllocaForValueTestParams { _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
