work206-submit)] Fix PR 118541, do not generate unordered fp cmoves for IEEE compares.

Michael Meissner via Gcc-cvs Wed, 28 May 2025 23:41:33 -0700

https://gcc.gnu.org/g:2ecab381b9eb2bc22073f96d5fc97e620646fae1


commit 2ecab381b9eb2bc22073f96d5fc97e620646fae1
Author: Michael Meissner <meiss...@linux.ibm.com>
Date:   Thu May 29 02:40:48 2025 -0400

    Fix PR 118541, do not generate unordered fp cmoves for IEEE compares.
    
    In bug PR target/118541 on power9, power10, and power11 systems, for the
    function:
    
            extern double __ieee754_acos (double);
    
            double
            __acospi (double x)
            {
              double ret = __ieee754_acos (x) / 3.14;
              return __builtin_isgreater (ret, 1.0) ? 1.0 : ret;
            }
    
    GCC currently generates the following code:
    
            Power9                          Power10 and Power11
            ======                          ===================
            bl __ieee754_acos               bl __ieee754_acos@notoc
            nop                             plfd 0,.LC0@pcrel
            addis 9,2,.LC2@toc@ha           xxspltidp 12,1065353216
            addi 1,1,32                     addi 1,1,32
            lfd 0,.LC2@toc@l(9)             ld 0,16(1)
            addis 9,2,.LC0@toc@ha           fdiv 0,1,0
            ld 0,16(1)                      mtlr 0
            lfd 12,.LC0@toc@l(9)            xscmpgtdp 1,0,12
            fdiv 0,1,0                      xxsel 1,0,12,1
            mtlr 0                          blr
            xscmpgtdp 1,0,12
            xxsel 1,0,12,1
            blr
    
    This is because ifcvt.c optimizes the conditional floating point move to 
use the
    XSCMPGTDP instruction.
    
    However, the XSCMPGTDP instruction traps if one of the arguments is a 
signaling
    NaN.  This patch disables generating XSCMP{EQ,GT,GE}{DP,QP} instructions 
unless
    -ffinite-math-only is in effect so that we do not get a trap.
    
    2025-05-28  Michael Meissner  <meiss...@linux.ibm.com>
    
    gcc/
    
            PR target/118541
            * config/rs6000/rs6000.cc (have_compare_and_set_mask): Don't do 
compare
            and set mask operations unless -ffinite-math-only.
            * config/rs6000/rs6000.md (mov<SFDF:mode><SFDF2:mode>cc_p9): Disable
            generating XSCMP{EQ,GT,GE}{DP,QP} unless -ffinite-math-only is in
            effect.
            (mov<SFDF:mode><SFDF2:mode>cc_invert_p9): Likewise.
            (fpmask<mode>, SFDF iterator): Likewise.
            (xxsel<mode>, SFDF iterator): Likewise.
            (mov<mode>cc, IEEE128 iterator): Likewise.
            (mov<mode>cc_p10): Likewise.
            (mov<mode>cc_invert_p10): Likewise.
            (fpmask<mode>, IEEE128 iterator): Likewise.
            (xxsel<mode>, IEEE128 iterator): Likewise.
    
    gcc/testsuite/
    
            PR target/118541
            * gcc.target/powerpc/float128-cmove.c: Change optimization flag to
            -Ofast instead of -O2.
            * gcc.target/powerpc/float128-minmax-3.: Likewise.
            * gcc.target/powerpc/p9-minmax-2.c: Delete test, the code is no 
longer
            valid unless NaNs are not handled.
            * gcc.target/powerpc/pr118541-1.c: New test.
            * gcc.target/powerpc/pr118541-2.c: Likewise.

Diff:
---
 gcc/config/rs6000/rs6000.cc                        |   8 +-
 gcc/config/rs6000/rs6000.md                        |  27 ++-
 gcc/testsuite/gcc.target/powerpc/float128-cmove.c  |   6 +-
 .../gcc.target/powerpc/float128-minmax-3.c         |   6 +-
 gcc/testsuite/gcc.target/powerpc/p9-minmax-2.c     | 190 ---------------------
 gcc/testsuite/gcc.target/powerpc/pr118541-1.c      |  28 +++
 gcc/testsuite/gcc.target/powerpc/pr118541-2.c      |  26 +++
 7 files changed, 89 insertions(+), 202 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 7ea377781034..0ef509f06230 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -16409,11 +16409,17 @@ rs6000_maybe_emit_fp_cmove (rtx dest, rtx op, rtx 
true_cond, rtx false_cond)
 /* Helper function to return true if the target has instructions to do a
    compare and set mask instruction that can be used with XXSEL to implement a
    conditional move.  It is also assumed that such a target also supports the
-   "C" minimum and maximum instructions. */
+   "C" minimum and maximum instructions.
+
+   However, these instructions will trap if given a signaling NaN, so we can
+   only use them if NaNs are not expected.  */
 
 static bool
 have_compare_and_set_mask (machine_mode mode)
 {
+  if (!flag_finite_math_only)
+    return false;
+
   switch (mode)
     {
     case E_SFmode:
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 9c718ca2a226..c13101eb4318 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -5653,6 +5653,10 @@
   "fsel %0,%1,%2,%3"
   [(set_attr "type" "fp")])
 
+;; On power9, we can generate XSCMP{EQ,GT,GE}DP and XXSEL to do a floating
+;; point conditional move.  However, these instructions trap if one of the
+;; arguments is a signalling NaN.  Therefore we can only do this optimize if
+;; NaNs are not expected in the code.
 (define_insn_and_split "*mov<SFDF:mode><SFDF2:mode>cc_p9"
   [(set (match_operand:SFDF 0 "vsx_register_operand" "=&wa,wa")
        (if_then_else:SFDF
@@ -5662,7 +5666,7 @@
         (match_operand:SFDF 4 "vsx_register_operand" "wa,wa")
         (match_operand:SFDF 5 "vsx_register_operand" "wa,wa")))
    (clobber (match_scratch:V2DI 6 "=0,&wa"))]
-  "TARGET_P9_MINMAX"
+  "TARGET_P9_MINMAX && flag_finite_math_only"
   "#"
   "&& 1"
   [(set (match_dup 6)
@@ -5694,7 +5698,7 @@
         (match_operand:SFDF 4 "vsx_register_operand" "wa,wa")
         (match_operand:SFDF 5 "vsx_register_operand" "wa,wa")))
    (clobber (match_scratch:V2DI 6 "=0,&wa"))]
-  "TARGET_P9_MINMAX"
+  "TARGET_P9_MINMAX && flag_finite_math_only"
   "#"
   "&& 1"
   [(set (match_dup 6)
@@ -5729,7 +5733,7 @@
                 (match_operand:SFDF 3 "vsx_register_operand" "wa")])
         (match_operand:V2DI 4 "all_ones_constant" "")
         (match_operand:V2DI 5 "zero_constant" "")))]
-  "TARGET_P9_MINMAX"
+  "TARGET_P9_MINMAX && flag_finite_math_only"
   "xscmp%V1dp %x0,%x2,%x3"
   [(set_attr "type" "fpcompare")])
 
@@ -5739,18 +5743,23 @@
                               (match_operand:V2DI 2 "zero_constant" ""))
                           (match_operand:SFDF 3 "vsx_register_operand" "wa")
                           (match_operand:SFDF 4 "vsx_register_operand" "wa")))]
-  "TARGET_P9_MINMAX"
+  "TARGET_P9_MINMAX && flag_finite_math_only"
   "xxsel %x0,%x4,%x3,%x1"
   [(set_attr "type" "vecmove")])
 
 ;; Support for ISA 3.1 IEEE 128-bit conditional move.  The mode used in the
 ;; comparison must be the same as used in the move.
+;;
+;; On power10, we can generate XSCMP{EQ,GT,GE}QP and XXSEL to do a floating
+;; point conditional move for IEEE 128-bit values.  However, these instructions
+;; trap if one of the arguments is a signalling NaN.  Therefore we can only do
+;; this optimize if NaNs are not expected in the code.
 (define_expand "mov<mode>cc"
    [(set (match_operand:IEEE128 0 "gpc_reg_operand")
         (if_then_else:IEEE128 (match_operand 1 "comparison_operator")
                               (match_operand:IEEE128 2 "gpc_reg_operand")
                               (match_operand:IEEE128 3 "gpc_reg_operand")))]
-  "TARGET_POWER10 && TARGET_FLOAT128_HW"
+  "TARGET_POWER10 && TARGET_FLOAT128_HW && flag_finite_math_only"
 {
   if (rs6000_emit_cmove (operands[0], operands[1], operands[2], operands[3]))
     DONE;
@@ -5767,7 +5776,7 @@
         (match_operand:IEEE128 4 "altivec_register_operand" "v,v")
         (match_operand:IEEE128 5 "altivec_register_operand" "v,v")))
    (clobber (match_scratch:V2DI 6 "=0,&v"))]
-  "TARGET_POWER10 && TARGET_FLOAT128_HW"
+  "TARGET_POWER10 && TARGET_FLOAT128_HW && flag_finite_math_only"
   "#"
   "&& 1"
   [(set (match_dup 6)
@@ -5799,7 +5808,7 @@
         (match_operand:IEEE128 4 "altivec_register_operand" "v,v")
         (match_operand:IEEE128 5 "altivec_register_operand" "v,v")))
    (clobber (match_scratch:V2DI 6 "=0,&v"))]
-  "TARGET_POWER10 && TARGET_FLOAT128_HW"
+  "TARGET_POWER10 && TARGET_FLOAT128_HW && flag_finite_math_only"
   "#"
   "&& 1"
   [(set (match_dup 6)
@@ -5834,7 +5843,7 @@
                 (match_operand:IEEE128 3 "altivec_register_operand" "v")])
         (match_operand:V2DI 4 "all_ones_constant" "")
         (match_operand:V2DI 5 "zero_constant" "")))]
-  "TARGET_POWER10 && TARGET_FLOAT128_HW"
+  "TARGET_POWER10 && TARGET_FLOAT128_HW && flag_finite_math_only"
   "xscmp%V1qp %0,%2,%3"
   [(set_attr "type" "fpcompare")])
 
@@ -5845,7 +5854,7 @@
             (match_operand:V2DI 2 "zero_constant" ""))
         (match_operand:IEEE128 3 "altivec_register_operand" "v")
         (match_operand:IEEE128 4 "altivec_register_operand" "v")))]
-  "TARGET_POWER10 && TARGET_FLOAT128_HW"
+  "TARGET_POWER10 && TARGET_FLOAT128_HW && flag_finite_math_only"
   "xxsel %x0,%x4,%x3,%x1"
   [(set_attr "type" "vecmove")])
 
diff --git a/gcc/testsuite/gcc.target/powerpc/float128-cmove.c 
b/gcc/testsuite/gcc.target/powerpc/float128-cmove.c
index 2fae8dc23bcf..496fe29740c8 100644
--- a/gcc/testsuite/gcc.target/powerpc/float128-cmove.c
+++ b/gcc/testsuite/gcc.target/powerpc/float128-cmove.c
@@ -1,7 +1,11 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target ppc_float128_hw } */
 /* { dg-require-effective-target power10_ok } */
-/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+/* { dg-options "-mdejagnu-cpu=power10 -Ofast" } */
+
+/* The XSCMP{EQ,GT,GE}QP instructions will trap if a signaling NaN is one of
+   the arguments, so this code is now only generated if -Ofast or
+   -ffinite-math-only is used.  */
 
 #ifndef TYPE
 #ifdef __LONG_DOUBLE_IEEE128__
diff --git a/gcc/testsuite/gcc.target/powerpc/float128-minmax-3.c 
b/gcc/testsuite/gcc.target/powerpc/float128-minmax-3.c
index 6f7627c0f2a1..9c7474911981 100644
--- a/gcc/testsuite/gcc.target/powerpc/float128-minmax-3.c
+++ b/gcc/testsuite/gcc.target/powerpc/float128-minmax-3.c
@@ -1,6 +1,10 @@
 /* { dg-require-effective-target ppc_float128_hw } */
 /* { dg-require-effective-target power10_ok } */
-/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+/* { dg-options "-mdejagnu-cpu=power10 -Ofast" } */
+
+/* The XS{MAX,MIN}}CQP instructions will trap if a signaling NaN is one of the
+   arguments, so this code is now only generated if -Ofast or
+   -ffinite-math-only is used.  */
 
 #ifndef TYPE
 #define TYPE _Float128
diff --git a/gcc/testsuite/gcc.target/powerpc/p9-minmax-2.c 
b/gcc/testsuite/gcc.target/powerpc/p9-minmax-2.c
deleted file mode 100644
index 0684eb501c56..000000000000
--- a/gcc/testsuite/gcc.target/powerpc/p9-minmax-2.c
+++ /dev/null
@@ -1,190 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-mdejagnu-cpu=power9 -mvsx -O2 -mpower9-minmax" } */
-/* { dg-require-effective-target powerpc_vsx } */
-/* { dg-final { scan-assembler-not "fsel"      } } */
-/* { dg-final { scan-assembler     "xscmpeqdp" } } */
-/* { dg-final { scan-assembler     "xscmpgtdp" } } */
-/* { dg-final { scan-assembler-not "xscmpodp"  } } */
-/* { dg-final { scan-assembler-not "xscmpudp"  } } */
-/* { dg-final { scan-assembler     "xsmaxcdp"  } } */
-/* { dg-final { scan-assembler-not "xsmaxdp"   } } */
-/* { dg-final { scan-assembler     "xsmincdp"  } } */
-/* { dg-final { scan-assembler-not "xsmindp"   } } */
-/* { dg-final { scan-assembler     "xxsel"     } } */
-
-/* Due to NaN support, <= and >= are not handled presently unless -ffast-math
-   is used.  At some point this will be fixed and the xscmpgedp instruction can
-   be generated normally. The <= and >= tests are bracketed with
-   #ifdef DO_GE_LE.  */
-
-#ifdef DO_GE_LE
-double
-dbl_max1 (double a, double b)
-{
-  return (a >= b) ? a : b;
-}
-#endif
-
-double
-dbl_max2 (double a, double b)
-{
-  return (a > b) ? a : b;
-}
-
-double
-dbl_min1 (double a, double b)
-{
-  return (a < b) ? a : b;
-}
-
-#ifdef DO_GE_LE
-double
-dbl_min2 (double a, double b)
-{
-  return (a <= b) ? a : b;
-}
-#endif
-
-double
-dbl_cmp_eq (double a, double b, double c, double d)
-{
-  return (a == b) ? c : d;
-}
-
-double
-dbl_cmp_ne (double a, double b, double c, double d)
-{
-  return (a != b) ? c : d;
-}
-
-double
-dbl_cmp_gt (double a, double b, double c, double d)
-{
-  return (a > b) ? c : d;
-}
-
-#ifdef DO_GE_LE
-double
-dbl_cmp_ge (double a, double b, double c, double d)
-{
-  return (a >= b) ? c : d;
-}
-#endif
-
-double
-dbl_cmp_lt (double a, double b, double c, double d)
-{
-  return (a < b) ? c : d;
-}
-
-#ifdef DO_GE_LE
-double
-dbl_cmp_le (double a, double b, double c, double d)
-{
-  return (a <= b) ? c : d;
-}
-#endif
-
-#ifdef DO_GE_LE
-float
-flt_max1 (float a, float b)
-{
-  return (a >= b) ? a : b;
-}
-#endif
-
-float
-flt_max2 (float a, float b)
-{
-  return (a > b) ? a : b;
-}
-
-float
-flt_min1 (float a, float b)
-{
-  return (a < b) ? a : b;
-}
-
-#ifdef DO_GE_LE
-float
-flt_min2 (float a, float b)
-{
-  return (a <= b) ? a : b;
-}
-#endif
-
-float
-flt_cmp_eq (float a, float b, float c, float d)
-{
-  return (a == b) ? c : d;
-}
-
-float
-flt_cmp_ne (float a, float b, float c, float d)
-{
-  return (a != b) ? c : d;
-}
-
-float
-flt_cmp_gt (float a, float b, float c, float d)
-{
-  return (a > b) ? c : d;
-}
-
-#ifdef DO_GE_LE
-float
-flt_cmp_ge (float a, float b, float c, float d)
-{
-  return (a >= b) ? c : d;
-}
-#endif
-
-float
-flt_cmp_lt (float a, float b, float c, float d)
-{
-  return (a < b) ? c : d;
-}
-
-#ifdef DO_GE_LE
-float
-flt_cmp_le (float a, float b, float c, float d)
-{
-  return (a <= b) ? c : d;
-}
-#endif
-
-double
-dbl_flt_max1 (float a, float b)
-{
-  return (a > b) ? a : b;
-}
-
-double
-dbl_flt_max2 (double a, float b)
-{
-  return (a > b) ? a : b;
-}
-
-double
-dbl_flt_max3 (float a, double b)
-{
-  return (a > b) ? a : b;
-}
-
-double
-dbl_flt_min1 (float a, float b)
-{
-  return (a < b) ? a : b;
-}
-
-double
-dbl_flt_min2 (double a, float b)
-{
-  return (a < b) ? a : b;
-}
-
-double
-dbl_flt_min3 (float a, double b)
-{
-  return (a < b) ? a : b;
-}
diff --git a/gcc/testsuite/gcc.target/powerpc/pr118541-1.c 
b/gcc/testsuite/gcc.target/powerpc/pr118541-1.c
new file mode 100644
index 000000000000..d5690dd7e38a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr118541-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+/* { dg-require-effective-target powerpc_vsx } */
+
+/* PR target/118541 says that the ordered comparison functions like isgreater
+   should not optimize floating point conditional moves to use
+   x{s,v}cmp{eq,gt,ge}{dp,qp} and xxsel since that instruction can cause traps
+   if one of the arguments is a signaling NaN.  */
+
+/* Verify isgreater does not generate xscmpgtdp when NaNs are allowed.  */
+
+double
+ordered_compare (double a, double b, double c, double d)
+{
+  /*
+   * fcmpu 0,1,2
+   * fmr   1,4
+   * bnglr 0
+   * fmr   1,3
+   * blr
+   */
+
+  return __builtin_isgreater (a, b) ? c : d;
+}
+
+/* { dg-final { scan-assembler-not {\mxscmpg[te]dp\M}       } } */
+/* { dg-final { scan-assembler-not {\mxxsel\M}              } } */
+/* { dg-final { scan-assembler     {\mxscmpudp\M|\mfcmpu\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr118541-2.c 
b/gcc/testsuite/gcc.target/powerpc/pr118541-2.c
new file mode 100644
index 000000000000..5e1d83daedad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr118541-2.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-mdejagnu-cpu=power9 -Ofast" } */
+/* { dg-require-effective-target powerpc_vsx } */
+
+/* PR target/118541 says that the ordered comparison functions like isgreater
+   should not optimize floating point conditional moves to use
+   x{s,v}cmp{eq,gt,ge}{dp,qp} and xxsel since that instruction can cause traps
+   if one of the arguments is a signaling NaN.  */
+
+/* Verify isgreater does generate xscmpgtdp when NaNs are not allowed.  */
+
+double
+ordered_compare (double a, double b, double c, double d)
+{
+  /*
+   * xscmpgtdp 1,1,2
+   * xxsel     1,4,3,1
+   * blr
+   */
+
+  return __builtin_isgreater (a, b) ? c : d;
+}
+
+/* { dg-final { scan-assembler     {\mxscmpg[te]dp\M}       } } */
+/* { dg-final { scan-assembler     {\mxxsel\M}              } } */
+/* { dg-final { scan-assembler-not {\mxscmpudp\M|\mfcmpu\M} } } */

[gcc(refs/users/meissner/heads/work206-submit)] Fix PR 118541, do not generate unordered fp cmoves for IEEE compares.

Reply via email to