https://github.com/guy-david updated https://github.com/llvm/llvm-project/pull/164503
>From 5f6506ab04cb11f547d20e7d013f9e63e15b5c3b Mon Sep 17 00:00:00 2001 From: Guy David <[email protected]> Date: Wed, 22 Oct 2025 00:07:57 +0300 Subject: [PATCH 1/2] [DAGCombiner] Relax nsz constraint with fp->int->fp optimizations --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 51 ++++++++++++++-- llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll | 59 +++++++++++++++++++ 2 files changed, 104 insertions(+), 6 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 1ad35b6c04d4d..121a740973d4f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -18871,6 +18871,37 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) { return SDValue(); } +/// Check if a use of a floating-point operation doesn't care about the sign of +/// zero. This allows us to optimize (sitofp (fptosi x)) -> ftrunc(x) even +/// without NoSignedZerosFPMath, as long as all uses are sign-insensitive. +static bool isSignInsensitiveUse(SDNode *Use, unsigned OperandNo, + SelectionDAG &DAG) { + switch (Use->getOpcode()) { + case ISD::SETCC: + // Comparisons: IEEE 754 specifies +0.0 == -0.0. + case ISD::FABS: + // fabs always produces +0.0. + return true; + case ISD::FADD: + case ISD::FSUB: { + // Arithmetic with non-zero constants fixes the uncertainty around the sign + // bit. + SDValue Other = Use->getOperand(1 - OperandNo); + return DAG.isKnownNeverZeroFloat(Other); + } + default: + return false; + } +} + +/// Check if all uses of a value are insensitive to the sign of zero. +static bool allUsesSignInsensitive(SDValue V, SelectionDAG &DAG) { + return all_of(V->uses(), [&](SDUse &Use) { + SDNode *User = Use.getUser(); + unsigned OperandNo = Use.getOperandNo(); + return isSignInsensitiveUse(User, OperandNo, DAG); + }); +} static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI) { @@ -18892,12 +18923,13 @@ static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP; assert(IsSigned || IsUnsigned); - bool IsSignedZeroSafe = DAG.getTarget().Options.NoSignedZerosFPMath; + bool IsSignedZeroSafe = DAG.getTarget().Options.NoSignedZerosFPMath || + allUsesSignInsensitive(SDValue(N, 0), DAG); // For signed conversions: The optimization changes signed zero behavior. if (IsSigned && !IsSignedZeroSafe) return SDValue(); // For unsigned conversions, we need FABS to canonicalize -0.0 to +0.0 - // (unless NoSignedZerosFPMath is set). + // (unless outputting a signed zero is OK). if (IsUnsigned && !IsSignedZeroSafe && !TLI.isFAbsFree(VT)) return SDValue(); @@ -19376,10 +19408,17 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't // know it was called from a context with a nsz flag if the input fsub does // not. - if (N0.getOpcode() == ISD::FSUB && N->getFlags().hasNoSignedZeros() && - N0.hasOneUse()) { - return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1), - N0.getOperand(0)); + if (N0.getOpcode() == ISD::FSUB && N0.hasOneUse()) { + SDValue X = N0.getOperand(0); + SDValue Y = N0.getOperand(1); + + // Safe if NoSignedZeros, or if we can prove X != Y (avoiding the -0.0 vs + // +0.0 issue) For now, we use a conservative check: if either operand is + // known never zero, then X - Y can't produce a signed zero from X == Y. + if (N->getFlags().hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(X) || + DAG.isKnownNeverZeroFloat(Y)) { + return DAG.getNode(ISD::FSUB, SDLoc(N), VT, Y, X); + } } if (SimplifyDemandedBits(SDValue(N, 0))) diff --git a/llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll b/llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll index 9a8c555953611..6f61e22203620 100644 --- a/llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll +++ b/llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll @@ -134,7 +134,66 @@ entry: ret float %f } +define i1 @test_fcmp(float %x) { +; CHECK-LABEL: test_fcmp: +; CHECK: // %bb.0: +; CHECK-NEXT: frintz s0, s0 +; CHECK-NEXT: fcmp s0, #0.0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret +; +; NO-SIGNED-ZEROS-LABEL: test_fcmp: +; NO-SIGNED-ZEROS: // %bb.0: +; NO-SIGNED-ZEROS-NEXT: frintz s0, s0 +; NO-SIGNED-ZEROS-NEXT: fcmp s0, #0.0 +; NO-SIGNED-ZEROS-NEXT: cset w0, eq +; NO-SIGNED-ZEROS-NEXT: ret + %conv1 = fptosi float %x to i32 + %conv2 = sitofp i32 %conv1 to float + %cmp = fcmp oeq float %conv2, 0.0 + ret i1 %cmp +} + +define float @test_fadd(float %x) { +; CHECK-LABEL: test_fadd: +; CHECK: // %bb.0: +; CHECK-NEXT: frintz s0, s0 +; CHECK-NEXT: fmov s1, #1.00000000 +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: ret +; +; NO-SIGNED-ZEROS-LABEL: test_fadd: +; NO-SIGNED-ZEROS: // %bb.0: +; NO-SIGNED-ZEROS-NEXT: frintz s0, s0 +; NO-SIGNED-ZEROS-NEXT: fmov s1, #1.00000000 +; NO-SIGNED-ZEROS-NEXT: fadd s0, s0, s1 +; NO-SIGNED-ZEROS-NEXT: ret + %conv1 = fptosi float %x to i32 + %conv2 = sitofp i32 %conv1 to float + %add = fadd float %conv2, 1.0 + ret float %add +} + +define float @test_fabs(float %x) { +; CHECK-LABEL: test_fabs: +; CHECK: // %bb.0: +; CHECK-NEXT: frintz s0, s0 +; CHECK-NEXT: fabs s0, s0 +; CHECK-NEXT: ret +; +; NO-SIGNED-ZEROS-LABEL: test_fabs: +; NO-SIGNED-ZEROS: // %bb.0: +; NO-SIGNED-ZEROS-NEXT: frintz s0, s0 +; NO-SIGNED-ZEROS-NEXT: fabs s0, s0 +; NO-SIGNED-ZEROS-NEXT: ret + %conv1 = fptosi float %x to i32 + %conv2 = sitofp i32 %conv1 to float + %abs = call float @llvm.fabs.f32(float %conv2) + ret float %abs +} + declare i32 @llvm.smin.i32(i32, i32) declare i32 @llvm.smax.i32(i32, i32) declare i32 @llvm.umin.i32(i32, i32) declare i32 @llvm.umax.i32(i32, i32) +declare float @llvm.fabs.f32(float) >From 0df2dcc4766a3589996081a96699b53c3bb9cdbc Mon Sep 17 00:00:00 2001 From: Guy David <[email protected]> Date: Wed, 22 Oct 2025 12:16:34 +0300 Subject: [PATCH 2/2] Addressed comments --- llvm/include/llvm/CodeGen/SelectionDAG.h | 4 ++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 33 +---------- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 29 +++++++++ llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll | 59 ++++++++++++++++--- 4 files changed, 86 insertions(+), 39 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index df6ce0fe1b037..a4ab3ef1de30c 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -2322,6 +2322,10 @@ class SelectionDAG { /// +nan are considered positive, -0.0, -inf and -nan are not. LLVM_ABI bool cannotBeOrderedNegativeFP(SDValue Op) const; + /// Check if all uses of a floating-point value are insensitive to signed + /// zeros. + LLVM_ABI bool allUsesSignedZeroInsensitive(SDValue Op) const; + /// Test whether two SDValues are known to compare equal. This /// is true if they are the same value, or if one is negative zero and the /// other positive zero. diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 121a740973d4f..2781491dd0668 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -18871,37 +18871,6 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) { return SDValue(); } -/// Check if a use of a floating-point operation doesn't care about the sign of -/// zero. This allows us to optimize (sitofp (fptosi x)) -> ftrunc(x) even -/// without NoSignedZerosFPMath, as long as all uses are sign-insensitive. -static bool isSignInsensitiveUse(SDNode *Use, unsigned OperandNo, - SelectionDAG &DAG) { - switch (Use->getOpcode()) { - case ISD::SETCC: - // Comparisons: IEEE 754 specifies +0.0 == -0.0. - case ISD::FABS: - // fabs always produces +0.0. - return true; - case ISD::FADD: - case ISD::FSUB: { - // Arithmetic with non-zero constants fixes the uncertainty around the sign - // bit. - SDValue Other = Use->getOperand(1 - OperandNo); - return DAG.isKnownNeverZeroFloat(Other); - } - default: - return false; - } -} - -/// Check if all uses of a value are insensitive to the sign of zero. -static bool allUsesSignInsensitive(SDValue V, SelectionDAG &DAG) { - return all_of(V->uses(), [&](SDUse &Use) { - SDNode *User = Use.getUser(); - unsigned OperandNo = Use.getOperandNo(); - return isSignInsensitiveUse(User, OperandNo, DAG); - }); -} static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI) { @@ -18924,7 +18893,7 @@ static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, assert(IsSigned || IsUnsigned); bool IsSignedZeroSafe = DAG.getTarget().Options.NoSignedZerosFPMath || - allUsesSignInsensitive(SDValue(N, 0), DAG); + DAG.allUsesSignedZeroInsensitive(SDValue(N, 0)); // For signed conversions: The optimization changes signed zero behavior. if (IsSigned && !IsSignedZeroSafe) return SDValue(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 90edaf3ef5471..5af4b8b29cc81 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6075,6 +6075,35 @@ bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const { Op, [](ConstantFPSDNode *C) { return !C->isZero(); }); } +bool SelectionDAG::allUsesSignedZeroInsensitive(SDValue Op) const { + assert(Op.getValueType().isFloatingPoint()); + return all_of(Op->uses(), [&](SDUse &Use) { + SDNode *User = Use.getUser(); + unsigned OperandNo = Use.getOperandNo(); + + // Check if this use is insensitive to the sign of zero + switch (User->getOpcode()) { + case ISD::SETCC: + // Comparisons: IEEE-754 specifies +0.0 == -0.0. + case ISD::FABS: + // fabs always produces +0.0. + return true; + case ISD::FCOPYSIGN: + // copysign overwrites the sign bit of the first operand. + return OperandNo == 0; + case ISD::FADD: + case ISD::FSUB: { + // Arithmetic with non-zero constants fixes the uncertainty around the + // sign bit. + SDValue Other = User->getOperand(1 - OperandNo); + return isKnownNeverZeroFloat(Other); + } + default: + return false; + } + }); +} + bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const { if (Depth >= MaxRecursionDepth) return false; // Limit search depth. diff --git a/llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll b/llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll index 6f61e22203620..cac155e256572 100644 --- a/llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll +++ b/llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll @@ -154,6 +154,48 @@ define i1 @test_fcmp(float %x) { ret i1 %cmp } +define float @test_fabs(float %x) { +; CHECK-LABEL: test_fabs: +; CHECK: // %bb.0: +; CHECK-NEXT: frintz s0, s0 +; CHECK-NEXT: fabs s0, s0 +; CHECK-NEXT: ret +; +; NO-SIGNED-ZEROS-LABEL: test_fabs: +; NO-SIGNED-ZEROS: // %bb.0: +; NO-SIGNED-ZEROS-NEXT: frintz s0, s0 +; NO-SIGNED-ZEROS-NEXT: fabs s0, s0 +; NO-SIGNED-ZEROS-NEXT: ret + %conv1 = fptosi float %x to i32 + %conv2 = sitofp i32 %conv1 to float + %abs = call float @llvm.fabs.f32(float %conv2) + ret float %abs +} + +define float @test_copysign(float %x, float %y) { +; CHECK-LABEL: test_copysign: +; CHECK: // %bb.0: +; CHECK-NEXT: frintz s0, s0 +; CHECK-NEXT: mvni v2.4s, #128, lsl #24 +; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1 +; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: ret +; +; NO-SIGNED-ZEROS-LABEL: test_copysign: +; NO-SIGNED-ZEROS: // %bb.0: +; NO-SIGNED-ZEROS-NEXT: frintz s0, s0 +; NO-SIGNED-ZEROS-NEXT: mvni v2.4s, #128, lsl #24 +; NO-SIGNED-ZEROS-NEXT: // kill: def $s1 killed $s1 def $q1 +; NO-SIGNED-ZEROS-NEXT: bif v0.16b, v1.16b, v2.16b +; NO-SIGNED-ZEROS-NEXT: // kill: def $s0 killed $s0 killed $q0 +; NO-SIGNED-ZEROS-NEXT: ret + %conv1 = fptosi float %x to i32 + %conv2 = sitofp i32 %conv1 to float + %combine = call float @llvm.copysign.f32(float %conv2, float %y) + ret float %combine +} + define float @test_fadd(float %x) { ; CHECK-LABEL: test_fadd: ; CHECK: // %bb.0: @@ -174,22 +216,24 @@ define float @test_fadd(float %x) { ret float %add } -define float @test_fabs(float %x) { -; CHECK-LABEL: test_fabs: +define float @test_fsub(float %x) { +; CHECK-LABEL: test_fsub: ; CHECK: // %bb.0: ; CHECK-NEXT: frintz s0, s0 -; CHECK-NEXT: fabs s0, s0 +; CHECK-NEXT: fmov s1, #-1.00000000 +; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: ret ; -; NO-SIGNED-ZEROS-LABEL: test_fabs: +; NO-SIGNED-ZEROS-LABEL: test_fsub: ; NO-SIGNED-ZEROS: // %bb.0: ; NO-SIGNED-ZEROS-NEXT: frintz s0, s0 -; NO-SIGNED-ZEROS-NEXT: fabs s0, s0 +; NO-SIGNED-ZEROS-NEXT: fmov s1, #-1.00000000 +; NO-SIGNED-ZEROS-NEXT: fadd s0, s0, s1 ; NO-SIGNED-ZEROS-NEXT: ret %conv1 = fptosi float %x to i32 %conv2 = sitofp i32 %conv1 to float - %abs = call float @llvm.fabs.f32(float %conv2) - ret float %abs + %sub = fsub float %conv2, 1.0 + ret float %sub } declare i32 @llvm.smin.i32(i32, i32) @@ -197,3 +241,4 @@ declare i32 @llvm.smax.i32(i32, i32) declare i32 @llvm.umin.i32(i32, i32) declare i32 @llvm.umax.i32(i32, i32) declare float @llvm.fabs.f32(float) +declare float @llvm.copysign.f32(float, float) _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
