https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/183373
>From 618e4f59e6ccdc663680253a9962f58abfb275e3 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <[email protected]> Date: Wed, 25 Feb 2026 15:45:08 +0100 Subject: [PATCH 1/4] ValueTracking: Special case fmul by llvm.amdgcn.trig.preop This is another instance of the logic from #183159. If we know one source is not-infinity, and the other source is less than or equal to 1, this cannot overflow. Special case llvm.amdgcn.trig.preop, as a substitute for proper range tracking. This almost enables pruning edge case handling in trig function implementations, if not for the recursion depth limit (but that's a problem for another day). --- llvm/lib/Analysis/ValueTracking.cpp | 37 ++++-- .../AMDGPU/nofpclass-amdgcn-trig-preop.ll | 113 ++++++++++++++++++ 2 files changed, 138 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 2684b41cd1e5d..5eb048d1fd8ff 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -4965,6 +4965,12 @@ static constexpr KnownFPClass::MinMaxKind getMinMaxKind(Intrinsic::ID IID) { } } +/// \return true if this is a floating point value that is known to have a +/// magintude smaller than 1. i.e., fabs(X) <=1.0 +static bool isAbsoluteValueLessEqualOne(const Value *V) { + return match(V, m_Intrinsic<Intrinsic::amdgcn_trig_preop>(m_Value())); +} + void computeKnownFPClass(const Value *V, const APInt &DemandedElts, FPClassTest InterestedClasses, KnownFPClass &Known, const SimplifyQuery &Q, unsigned Depth) { @@ -5574,37 +5580,44 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts, Op->getType()->getScalarType()->getFltSemantics()) : DenormalMode::getDynamic(); + Value *LHS = Op->getOperand(0); + Value *RHS = Op->getOperand(1); // X * X is always non-negative or a NaN. // FIXME: Should check isGuaranteedNotToBeUndef - if (Op->getOperand(0) == Op->getOperand(1)) { + if (LHS == RHS) { KnownFPClass KnownSrc; - computeKnownFPClass(Op->getOperand(0), DemandedElts, fcAllFlags, KnownSrc, - Q, Depth + 1); + computeKnownFPClass(LHS, DemandedElts, fcAllFlags, KnownSrc, Q, + Depth + 1); Known = KnownFPClass::square(KnownSrc, Mode); break; } + KnownFPClass KnownLHS, KnownRHS; + const APFloat *CRHS; - if (match(Op->getOperand(1), m_APFloat(CRHS))) { - KnownFPClass KnownLHS; + if (match(RHS, m_APFloat(CRHS))) { computeKnownFPClass(Op->getOperand(0), DemandedElts, fcAllFlags, KnownLHS, Q, Depth + 1); - Known = KnownFPClass::fmul(KnownLHS, *CRHS, Mode); } else { - KnownFPClass KnownLHS, KnownRHS; - - computeKnownFPClass(Op->getOperand(1), DemandedElts, fcAllFlags, KnownRHS, - Q, Depth + 1); + computeKnownFPClass(RHS, DemandedElts, fcAllFlags, KnownRHS, Q, + Depth + 1); // TODO: Improve accuracy in unfused FMA pattern. We can prove an // additional not-nan if the addend is known-not negative infinity if the // multiply is known-not infinity. - computeKnownFPClass(Op->getOperand(0), DemandedElts, fcAllFlags, KnownLHS, - Q, Depth + 1); + computeKnownFPClass(LHS, DemandedElts, fcAllFlags, KnownLHS, Q, + Depth + 1); Known = KnownFPClass::fmul(KnownLHS, KnownRHS, Mode); } + /// Propgate no-infs if the other source is known smaller than one, such + /// that this cannot introduce overflow. + if (KnownLHS.isKnownNever(fcInf) && isAbsoluteValueLessEqualOne(RHS)) + Known.knownNot(fcInf); + else if (KnownRHS.isKnownNever(fcInf) && isAbsoluteValueLessEqualOne(LHS)) + Known.knownNot(fcInf); + break; } case Instruction::FDiv: diff --git a/llvm/test/Transforms/Attributor/AMDGPU/nofpclass-amdgcn-trig-preop.ll b/llvm/test/Transforms/Attributor/AMDGPU/nofpclass-amdgcn-trig-preop.ll index fe6e939664161..4cb217de2b1eb 100644 --- a/llvm/test/Transforms/Attributor/AMDGPU/nofpclass-amdgcn-trig-preop.ll +++ b/llvm/test/Transforms/Attributor/AMDGPU/nofpclass-amdgcn-trig-preop.ll @@ -10,3 +10,116 @@ define double @ret_trig_preop_f64(double %x, i32 %n) { %ret = call double @llvm.amdgcn.trig.preop.f64(double %x, i32 %n) ret double %ret } + +define double @ret_not_inf__fmul__trig_preop(double nofpclass(inf) %not.inf, double %x, i32 %n) { +; CHECK-LABEL: define nofpclass(inf) double @ret_not_inf__fmul__trig_preop( +; CHECK-SAME: double nofpclass(inf) [[NOT_INF:%.*]], double [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TRIG_PREOP:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[X]], i32 [[N]]) #[[ATTR2]] +; CHECK-NEXT: [[MUL:%.*]] = fmul double [[NOT_INF]], [[TRIG_PREOP]] +; CHECK-NEXT: ret double [[MUL]] +; + %trig.preop = call double @llvm.amdgcn.trig.preop.f64(double %x, i32 %n) + %mul = fmul double %not.inf, %trig.preop + ret double %mul +} + +define double @ret_trig_preop__fmul__not_inf(double nofpclass(inf) %not.inf, double %x, i32 %n) { +; CHECK-LABEL: define nofpclass(inf) double @ret_trig_preop__fmul__not_inf( +; CHECK-SAME: double nofpclass(inf) [[NOT_INF:%.*]], double [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TRIG_PREOP:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[X]], i32 [[N]]) #[[ATTR2]] +; CHECK-NEXT: [[MUL:%.*]] = fmul double [[TRIG_PREOP]], [[NOT_INF]] +; CHECK-NEXT: ret double [[MUL]] +; + %trig.preop = call double @llvm.amdgcn.trig.preop.f64(double %x, i32 %n) + %mul = fmul double %trig.preop, %not.inf + ret double %mul +} + +define double @ret_not_nan__fmul__trig_preop(double nofpclass(nan) %not.nan, double %x, i32 %n) { +; CHECK-LABEL: define double @ret_not_nan__fmul__trig_preop( +; CHECK-SAME: double nofpclass(nan) [[NOT_NAN:%.*]], double [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TRIG_PREOP:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[X]], i32 [[N]]) #[[ATTR2]] +; CHECK-NEXT: [[MUL:%.*]] = fmul double [[NOT_NAN]], [[TRIG_PREOP]] +; CHECK-NEXT: ret double [[MUL]] +; + %trig.preop = call double @llvm.amdgcn.trig.preop.f64(double %x, i32 %n) + %mul = fmul double %not.nan, %trig.preop + ret double %mul +} + +define double @ret_trig_preop__fmul__not_nan(double nofpclass(nan) %not.nan, double %x, i32 %n) { +; CHECK-LABEL: define double @ret_trig_preop__fmul__not_nan( +; CHECK-SAME: double nofpclass(nan) [[NOT_NAN:%.*]], double [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TRIG_PREOP:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[X]], i32 [[N]]) #[[ATTR2]] +; CHECK-NEXT: [[MUL:%.*]] = fmul double [[TRIG_PREOP]], [[NOT_NAN]] +; CHECK-NEXT: ret double [[MUL]] +; + %trig.preop = call double @llvm.amdgcn.trig.preop.f64(double %x, i32 %n) + %mul = fmul double %trig.preop, %not.nan + ret double %mul +} + +; Extraction from __ocmlpriv_trigredlarge_f64. This should be able to +; propagate no-nans to the return. +define double @trig_preop_propagate_nonan(double noundef nofpclass(inf nan) %x){ +; CHECK-LABEL: define noundef nofpclass(nan) double @trig_preop_propagate_nonan( +; CHECK-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[I2:%.*]] = tail call double @llvm.amdgcn.trig.preop.f64(double noundef nofpclass(nan inf) [[X]], i32 noundef 0) #[[ATTR2]] +; CHECK-NEXT: [[CMP:%.*]] = fcmp oge double [[X]], 0x7B00000000000000 +; CHECK-NEXT: [[I9:%.*]] = fmul double [[X]], 0x37F0000000000000 +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], double [[I9]], double [[X]] +; CHECK-NEXT: [[MUL11:%.*]] = fmul double [[I2]], [[COND]] +; CHECK-NEXT: [[FNEG13:%.*]] = fneg double [[MUL11]] +; CHECK-NEXT: [[I32:%.*]] = tail call noundef nofpclass(nan) double @llvm.fma.f64(double noundef [[I2]], double noundef [[COND]], double noundef [[FNEG13]]) #[[ATTR2]] +; CHECK-NEXT: ret double [[I32]] +; +entry: + %i2 = tail call double @llvm.amdgcn.trig.preop.f64(double %x, i32 0) + %i4 = tail call double @llvm.amdgcn.trig.preop.f64(double %x, i32 1) + %cmp = fcmp oge double %x, 0x7B00000000000000 + %i9 = fmul double %x, 0x37F0000000000000 + %cond = select i1 %cmp, double %i9, double %x + %mul4 = fmul double %i4, %cond + %mul11 = fmul double %i2, %cond + %fneg13 = fneg double %mul11 + %i32 = tail call double @llvm.fma.f64(double %i2, double %cond, double %fneg13) + ret double %i32 +} + +; Superset of trig_preop_propagate_nonan. We would like to able to +; propagate nonans to the return, but this hits the recursion depth +; limit. +define double @trig_preop_propagate_nonan_full(double noundef nofpclass(inf nan) %x) { +; CHECK-LABEL: define double @trig_preop_propagate_nonan_full( +; CHECK-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[I2:%.*]] = tail call double @llvm.amdgcn.trig.preop.f64(double noundef nofpclass(nan inf) [[X]], i32 noundef 0) #[[ATTR2]] +; CHECK-NEXT: [[I4:%.*]] = tail call double @llvm.amdgcn.trig.preop.f64(double noundef nofpclass(nan inf) [[X]], i32 noundef 1) #[[ATTR2]] +; CHECK-NEXT: [[CMP:%.*]] = fcmp oge double [[X]], 0x7B00000000000000 +; CHECK-NEXT: [[I9:%.*]] = fmul double [[X]], 0x37F0000000000000 +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], double [[I9]], double [[X]] +; CHECK-NEXT: [[MUL4:%.*]] = fmul double [[I4]], [[COND]] +; CHECK-NEXT: [[MUL11:%.*]] = fmul double [[I2]], [[COND]] +; CHECK-NEXT: [[FNEG13:%.*]] = fneg double [[MUL11]] +; CHECK-NEXT: [[I32:%.*]] = tail call double @llvm.fma.f64(double noundef [[I2]], double noundef [[COND]], double noundef [[FNEG13]]) #[[ATTR2]] +; CHECK-NEXT: [[ADD:%.*]] = fadd double [[MUL4]], [[I32]] +; CHECK-NEXT: [[ADD57:%.*]] = fadd double [[MUL11]], [[ADD]] +; CHECK-NEXT: [[I108:%.*]] = fmul double [[ADD57]], 2.500000e-01 +; CHECK-NEXT: ret double [[I108]] +; +entry: + %i2 = tail call double @llvm.amdgcn.trig.preop.f64(double %x, i32 0) + %i4 = tail call double @llvm.amdgcn.trig.preop.f64(double %x, i32 1) + %cmp = fcmp oge double %x, 0x7B00000000000000 + %i9 = fmul double %x, 0x37F0000000000000 + %cond = select i1 %cmp, double %i9, double %x + %mul4 = fmul double %i4, %cond + %mul11 = fmul double %i2, %cond + %fneg13 = fneg double %mul11 + %i32 = tail call double @llvm.fma.f64(double %i2, double %cond, double %fneg13) + %add = fadd double %mul4, %i32 + %add57 = fadd double %mul11, %add + %i108 = fmul double %add57, 2.500000e-01 + ret double %i108 +} >From 7c86ea1c29e6322cdfd1e3ff8ff56d801df01f82 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <[email protected]> Date: Thu, 26 Feb 2026 16:34:12 +0100 Subject: [PATCH 2/4] cleanup --- llvm/lib/Analysis/ValueTracking.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 5eb048d1fd8ff..dba5ea13fa26d 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -4968,6 +4968,7 @@ static constexpr KnownFPClass::MinMaxKind getMinMaxKind(Intrinsic::ID IID) { /// \return true if this is a floating point value that is known to have a /// magintude smaller than 1. i.e., fabs(X) <=1.0 static bool isAbsoluteValueLessEqualOne(const Value *V) { + // TODO: Handle frexp and x - floor(x)? return match(V, m_Intrinsic<Intrinsic::amdgcn_trig_preop>(m_Value())); } @@ -5596,8 +5597,8 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts, const APFloat *CRHS; if (match(RHS, m_APFloat(CRHS))) { - computeKnownFPClass(Op->getOperand(0), DemandedElts, fcAllFlags, KnownLHS, - Q, Depth + 1); + computeKnownFPClass(LHS, DemandedElts, fcAllFlags, KnownLHS, Q, + Depth + 1); Known = KnownFPClass::fmul(KnownLHS, *CRHS, Mode); } else { computeKnownFPClass(RHS, DemandedElts, fcAllFlags, KnownRHS, Q, >From a90c0d24966bcd30fcf4203848640d2e137a1646 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <[email protected]> Date: Thu, 26 Feb 2026 16:48:52 +0100 Subject: [PATCH 3/4] Typo fix --- llvm/lib/Analysis/ValueTracking.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index dba5ea13fa26d..c743f421026e2 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -4966,7 +4966,7 @@ static constexpr KnownFPClass::MinMaxKind getMinMaxKind(Intrinsic::ID IID) { } /// \return true if this is a floating point value that is known to have a -/// magintude smaller than 1. i.e., fabs(X) <=1.0 +/// magnitude smaller than 1. i.e., fabs(X) <=1.0 static bool isAbsoluteValueLessEqualOne(const Value *V) { // TODO: Handle frexp and x - floor(x)? return match(V, m_Intrinsic<Intrinsic::amdgcn_trig_preop>(m_Value())); >From fd63deffdeee8c3e281a9561d5df1801697b4c25 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <[email protected]> Date: Thu, 26 Feb 2026 16:49:10 +0100 Subject: [PATCH 4/4] Typo fix --- llvm/lib/Analysis/ValueTracking.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index c743f421026e2..ce8e27fef5e8a 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -4966,7 +4966,7 @@ static constexpr KnownFPClass::MinMaxKind getMinMaxKind(Intrinsic::ID IID) { } /// \return true if this is a floating point value that is known to have a -/// magnitude smaller than 1. i.e., fabs(X) <=1.0 +/// magnitude smaller than 1. i.e., fabs(X) <= 1.0 static bool isAbsoluteValueLessEqualOne(const Value *V) { // TODO: Handle frexp and x - floor(x)? return match(V, m_Intrinsic<Intrinsic::amdgcn_trig_preop>(m_Value())); _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
