fp optimizations (PR #164503)

Guy David via llvm-branch-commits Wed, 22 Oct 2025 02:17:02 -0700

https://github.com/guy-david updated 
https://github.com/llvm/llvm-project/pull/164503


>From 5f6506ab04cb11f547d20e7d013f9e63e15b5c3b Mon Sep 17 00:00:00 2001
From: Guy David <[email protected]>
Date: Wed, 22 Oct 2025 00:07:57 +0300
Subject: [PATCH 1/2] [DAGCombiner] Relax nsz constraint with fp->int->fp
 optimizations

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 51 ++++++++++++++--
 llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll  | 59 +++++++++++++++++++
 2 files changed, 104 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 1ad35b6c04d4d..121a740973d4f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18871,6 +18871,37 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
 
   return SDValue();
 }
+/// Check if a use of a floating-point operation doesn't care about the sign of
+/// zero. This allows us to optimize (sitofp (fptosi x)) -> ftrunc(x) even
+/// without NoSignedZerosFPMath, as long as all uses are sign-insensitive.
+static bool isSignInsensitiveUse(SDNode *Use, unsigned OperandNo,
+                                 SelectionDAG &DAG) {
+  switch (Use->getOpcode()) {
+  case ISD::SETCC:
+    // Comparisons: IEEE 754 specifies +0.0 == -0.0.
+  case ISD::FABS:
+    // fabs always produces +0.0.
+    return true;
+  case ISD::FADD:
+  case ISD::FSUB: {
+    // Arithmetic with non-zero constants fixes the uncertainty around the sign
+    // bit.
+    SDValue Other = Use->getOperand(1 - OperandNo);
+    return DAG.isKnownNeverZeroFloat(Other);
+  }
+  default:
+    return false;
+  }
+}
+
+/// Check if all uses of a value are insensitive to the sign of zero.
+static bool allUsesSignInsensitive(SDValue V, SelectionDAG &DAG) {
+  return all_of(V->uses(), [&](SDUse &Use) {
+    SDNode *User = Use.getUser();
+    unsigned OperandNo = Use.getOperandNo();
+    return isSignInsensitiveUse(User, OperandNo, DAG);
+  });
+}
 
 static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG,
                                const TargetLowering &TLI) {
@@ -18892,12 +18923,13 @@ static SDValue foldFPToIntToFP(SDNode *N, const SDLoc 
&DL, SelectionDAG &DAG,
   bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP;
   assert(IsSigned || IsUnsigned);
 
-  bool IsSignedZeroSafe = DAG.getTarget().Options.NoSignedZerosFPMath;
+  bool IsSignedZeroSafe = DAG.getTarget().Options.NoSignedZerosFPMath ||
+                          allUsesSignInsensitive(SDValue(N, 0), DAG);
   // For signed conversions: The optimization changes signed zero behavior.
   if (IsSigned && !IsSignedZeroSafe)
     return SDValue();
   // For unsigned conversions, we need FABS to canonicalize -0.0 to +0.0
-  // (unless NoSignedZerosFPMath is set).
+  // (unless outputting a signed zero is OK).
   if (IsUnsigned && !IsSignedZeroSafe && !TLI.isFAbsFree(VT))
     return SDValue();
 
@@ -19376,10 +19408,17 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
   // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost 
doesn't
   // know it was called from a context with a nsz flag if the input fsub does
   // not.
-  if (N0.getOpcode() == ISD::FSUB && N->getFlags().hasNoSignedZeros() &&
-      N0.hasOneUse()) {
-    return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
-                       N0.getOperand(0));
+  if (N0.getOpcode() == ISD::FSUB && N0.hasOneUse()) {
+    SDValue X = N0.getOperand(0);
+    SDValue Y = N0.getOperand(1);
+
+    // Safe if NoSignedZeros, or if we can prove X != Y (avoiding the -0.0 vs
+    // +0.0 issue) For now, we use a conservative check: if either operand is
+    // known never zero, then X - Y can't produce a signed zero from X == Y.
+    if (N->getFlags().hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(X) ||
+        DAG.isKnownNeverZeroFloat(Y)) {
+      return DAG.getNode(ISD::FSUB, SDLoc(N), VT, Y, X);
+    }
   }
 
   if (SimplifyDemandedBits(SDValue(N, 0)))
diff --git a/llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll 
b/llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll
index 9a8c555953611..6f61e22203620 100644
--- a/llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll
+++ b/llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll
@@ -134,7 +134,66 @@ entry:
   ret float %f
 }
 
+define i1 @test_fcmp(float %x) {
+; CHECK-LABEL: test_fcmp:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintz s0, s0
+; CHECK-NEXT:    fcmp s0, #0.0
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+;
+; NO-SIGNED-ZEROS-LABEL: test_fcmp:
+; NO-SIGNED-ZEROS:       // %bb.0:
+; NO-SIGNED-ZEROS-NEXT:    frintz s0, s0
+; NO-SIGNED-ZEROS-NEXT:    fcmp s0, #0.0
+; NO-SIGNED-ZEROS-NEXT:    cset w0, eq
+; NO-SIGNED-ZEROS-NEXT:    ret
+  %conv1 = fptosi float %x to i32
+  %conv2 = sitofp i32 %conv1 to float
+  %cmp = fcmp oeq float %conv2, 0.0
+  ret i1 %cmp
+}
+
+define float @test_fadd(float %x) {
+; CHECK-LABEL: test_fadd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintz s0, s0
+; CHECK-NEXT:    fmov s1, #1.00000000
+; CHECK-NEXT:    fadd s0, s0, s1
+; CHECK-NEXT:    ret
+;
+; NO-SIGNED-ZEROS-LABEL: test_fadd:
+; NO-SIGNED-ZEROS:       // %bb.0:
+; NO-SIGNED-ZEROS-NEXT:    frintz s0, s0
+; NO-SIGNED-ZEROS-NEXT:    fmov s1, #1.00000000
+; NO-SIGNED-ZEROS-NEXT:    fadd s0, s0, s1
+; NO-SIGNED-ZEROS-NEXT:    ret
+  %conv1 = fptosi float %x to i32
+  %conv2 = sitofp i32 %conv1 to float
+  %add = fadd float %conv2, 1.0
+  ret float %add
+}
+
+define float @test_fabs(float %x) {
+; CHECK-LABEL: test_fabs:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintz s0, s0
+; CHECK-NEXT:    fabs s0, s0
+; CHECK-NEXT:    ret
+;
+; NO-SIGNED-ZEROS-LABEL: test_fabs:
+; NO-SIGNED-ZEROS:       // %bb.0:
+; NO-SIGNED-ZEROS-NEXT:    frintz s0, s0
+; NO-SIGNED-ZEROS-NEXT:    fabs s0, s0
+; NO-SIGNED-ZEROS-NEXT:    ret
+  %conv1 = fptosi float %x to i32
+  %conv2 = sitofp i32 %conv1 to float
+  %abs = call float @llvm.fabs.f32(float %conv2)
+  ret float %abs
+}
+
 declare i32 @llvm.smin.i32(i32, i32)
 declare i32 @llvm.smax.i32(i32, i32)
 declare i32 @llvm.umin.i32(i32, i32)
 declare i32 @llvm.umax.i32(i32, i32)
+declare float @llvm.fabs.f32(float)

>From 0df2dcc4766a3589996081a96699b53c3bb9cdbc Mon Sep 17 00:00:00 2001
From: Guy David <[email protected]>
Date: Wed, 22 Oct 2025 12:16:34 +0300
Subject: [PATCH 2/2] Addressed comments

---
 llvm/include/llvm/CodeGen/SelectionDAG.h      |  4 ++
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 33 +----------
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 29 +++++++++
 llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll  | 59 ++++++++++++++++---
 4 files changed, 86 insertions(+), 39 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h 
b/llvm/include/llvm/CodeGen/SelectionDAG.h
index df6ce0fe1b037..a4ab3ef1de30c 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -2322,6 +2322,10 @@ class SelectionDAG {
   /// +nan are considered positive, -0.0, -inf and -nan are not.
   LLVM_ABI bool cannotBeOrderedNegativeFP(SDValue Op) const;
 
+  /// Check if all uses of a floating-point value are insensitive to signed
+  /// zeros.
+  LLVM_ABI bool allUsesSignedZeroInsensitive(SDValue Op) const;
+
   /// Test whether two SDValues are known to compare equal. This
   /// is true if they are the same value, or if one is negative zero and the
   /// other positive zero.
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 121a740973d4f..2781491dd0668 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18871,37 +18871,6 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
 
   return SDValue();
 }
-/// Check if a use of a floating-point operation doesn't care about the sign of
-/// zero. This allows us to optimize (sitofp (fptosi x)) -> ftrunc(x) even
-/// without NoSignedZerosFPMath, as long as all uses are sign-insensitive.
-static bool isSignInsensitiveUse(SDNode *Use, unsigned OperandNo,
-                                 SelectionDAG &DAG) {
-  switch (Use->getOpcode()) {
-  case ISD::SETCC:
-    // Comparisons: IEEE 754 specifies +0.0 == -0.0.
-  case ISD::FABS:
-    // fabs always produces +0.0.
-    return true;
-  case ISD::FADD:
-  case ISD::FSUB: {
-    // Arithmetic with non-zero constants fixes the uncertainty around the sign
-    // bit.
-    SDValue Other = Use->getOperand(1 - OperandNo);
-    return DAG.isKnownNeverZeroFloat(Other);
-  }
-  default:
-    return false;
-  }
-}
-
-/// Check if all uses of a value are insensitive to the sign of zero.
-static bool allUsesSignInsensitive(SDValue V, SelectionDAG &DAG) {
-  return all_of(V->uses(), [&](SDUse &Use) {
-    SDNode *User = Use.getUser();
-    unsigned OperandNo = Use.getOperandNo();
-    return isSignInsensitiveUse(User, OperandNo, DAG);
-  });
-}
 
 static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG,
                                const TargetLowering &TLI) {
@@ -18924,7 +18893,7 @@ static SDValue foldFPToIntToFP(SDNode *N, const SDLoc 
&DL, SelectionDAG &DAG,
   assert(IsSigned || IsUnsigned);
 
   bool IsSignedZeroSafe = DAG.getTarget().Options.NoSignedZerosFPMath ||
-                          allUsesSignInsensitive(SDValue(N, 0), DAG);
+                          DAG.allUsesSignedZeroInsensitive(SDValue(N, 0));
   // For signed conversions: The optimization changes signed zero behavior.
   if (IsSigned && !IsSignedZeroSafe)
     return SDValue();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 90edaf3ef5471..5af4b8b29cc81 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6075,6 +6075,35 @@ bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) 
const {
       Op, [](ConstantFPSDNode *C) { return !C->isZero(); });
 }
 
+bool SelectionDAG::allUsesSignedZeroInsensitive(SDValue Op) const {
+  assert(Op.getValueType().isFloatingPoint());
+  return all_of(Op->uses(), [&](SDUse &Use) {
+    SDNode *User = Use.getUser();
+    unsigned OperandNo = Use.getOperandNo();
+
+    // Check if this use is insensitive to the sign of zero
+    switch (User->getOpcode()) {
+    case ISD::SETCC:
+      // Comparisons: IEEE-754 specifies +0.0 == -0.0.
+    case ISD::FABS:
+      // fabs always produces +0.0.
+      return true;
+    case ISD::FCOPYSIGN:
+      // copysign overwrites the sign bit of the first operand.
+      return OperandNo == 0;
+    case ISD::FADD:
+    case ISD::FSUB: {
+      // Arithmetic with non-zero constants fixes the uncertainty around the
+      // sign bit.
+      SDValue Other = User->getOperand(1 - OperandNo);
+      return isKnownNeverZeroFloat(Other);
+    }
+    default:
+      return false;
+    }
+  });
+}
+
 bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
   if (Depth >= MaxRecursionDepth)
     return false; // Limit search depth.
diff --git a/llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll 
b/llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll
index 6f61e22203620..cac155e256572 100644
--- a/llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll
+++ b/llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll
@@ -154,6 +154,48 @@ define i1 @test_fcmp(float %x) {
   ret i1 %cmp
 }
 
+define float @test_fabs(float %x) {
+; CHECK-LABEL: test_fabs:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintz s0, s0
+; CHECK-NEXT:    fabs s0, s0
+; CHECK-NEXT:    ret
+;
+; NO-SIGNED-ZEROS-LABEL: test_fabs:
+; NO-SIGNED-ZEROS:       // %bb.0:
+; NO-SIGNED-ZEROS-NEXT:    frintz s0, s0
+; NO-SIGNED-ZEROS-NEXT:    fabs s0, s0
+; NO-SIGNED-ZEROS-NEXT:    ret
+  %conv1 = fptosi float %x to i32
+  %conv2 = sitofp i32 %conv1 to float
+  %abs = call float @llvm.fabs.f32(float %conv2)
+  ret float %abs
+}
+
+define float @test_copysign(float %x, float %y) {
+; CHECK-LABEL: test_copysign:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintz s0, s0
+; CHECK-NEXT:    mvni v2.4s, #128, lsl #24
+; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT:    ret
+;
+; NO-SIGNED-ZEROS-LABEL: test_copysign:
+; NO-SIGNED-ZEROS:       // %bb.0:
+; NO-SIGNED-ZEROS-NEXT:    frintz s0, s0
+; NO-SIGNED-ZEROS-NEXT:    mvni v2.4s, #128, lsl #24
+; NO-SIGNED-ZEROS-NEXT:    // kill: def $s1 killed $s1 def $q1
+; NO-SIGNED-ZEROS-NEXT:    bif v0.16b, v1.16b, v2.16b
+; NO-SIGNED-ZEROS-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; NO-SIGNED-ZEROS-NEXT:    ret
+  %conv1 = fptosi float %x to i32
+  %conv2 = sitofp i32 %conv1 to float
+  %combine = call float @llvm.copysign.f32(float %conv2, float %y)
+  ret float %combine
+}
+
 define float @test_fadd(float %x) {
 ; CHECK-LABEL: test_fadd:
 ; CHECK:       // %bb.0:
@@ -174,22 +216,24 @@ define float @test_fadd(float %x) {
   ret float %add
 }
 
-define float @test_fabs(float %x) {
-; CHECK-LABEL: test_fabs:
+define float @test_fsub(float %x) {
+; CHECK-LABEL: test_fsub:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    frintz s0, s0
-; CHECK-NEXT:    fabs s0, s0
+; CHECK-NEXT:    fmov s1, #-1.00000000
+; CHECK-NEXT:    fadd s0, s0, s1
 ; CHECK-NEXT:    ret
 ;
-; NO-SIGNED-ZEROS-LABEL: test_fabs:
+; NO-SIGNED-ZEROS-LABEL: test_fsub:
 ; NO-SIGNED-ZEROS:       // %bb.0:
 ; NO-SIGNED-ZEROS-NEXT:    frintz s0, s0
-; NO-SIGNED-ZEROS-NEXT:    fabs s0, s0
+; NO-SIGNED-ZEROS-NEXT:    fmov s1, #-1.00000000
+; NO-SIGNED-ZEROS-NEXT:    fadd s0, s0, s1
 ; NO-SIGNED-ZEROS-NEXT:    ret
   %conv1 = fptosi float %x to i32
   %conv2 = sitofp i32 %conv1 to float
-  %abs = call float @llvm.fabs.f32(float %conv2)
-  ret float %abs
+  %sub = fsub float %conv2, 1.0
+  ret float %sub
 }
 
 declare i32 @llvm.smin.i32(i32, i32)
@@ -197,3 +241,4 @@ declare i32 @llvm.smax.i32(i32, i32)
 declare i32 @llvm.umin.i32(i32, i32)
 declare i32 @llvm.umax.i32(i32, i32)
 declare float @llvm.fabs.f32(float)
+declare float @llvm.copysign.f32(float, float)

_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [DAGCombiner] Relax nsz constraint with fp->int->fp optimizations (PR #164503)

Reply via email to