https://gcc.gnu.org/g:416996ae6b1e1cff86c64593a86b35d0ab5e2e44
commit 416996ae6b1e1cff86c64593a86b35d0ab5e2e44 Author: Michael Meissner <meiss...@linux.ibm.com> Date: Tue Jul 29 23:42:22 2025 -0400 Fix PR 118541, do not generate floating point cmoves for IEEE compares. In bug PR target/118541 on power9, power10, and power11 systems, for the function: extern double __ieee754_acos (double); double __acospi (double x) { double ret = __ieee754_acos (x) / 3.14; return __builtin_isgreater (ret, 1.0) ? 1.0 : ret; } GCC currently generates the following code: Power9 Power10 and Power11 ====== =================== bl __ieee754_acos bl __ieee754_acos@notoc nop plfd 0,.LC0@pcrel addis 9,2,.LC2@toc@ha xxspltidp 12,1065353216 addi 1,1,32 addi 1,1,32 lfd 0,.LC2@toc@l(9) ld 0,16(1) addis 9,2,.LC0@toc@ha fdiv 0,1,0 ld 0,16(1) mtlr 0 lfd 12,.LC0@toc@l(9) xscmpgtdp 1,0,12 fdiv 0,1,0 xxsel 1,0,12,1 mtlr 0 blr xscmpgtdp 1,0,12 xxsel 1,0,12,1 blr This is because ifcvt.cc optimizes the conditional floating point move to use the XSCMPGTDP instruction. However, the XSCMPGTDP instruction will generate an interrupt if one of the arguments is a signalling NaN and signalling NaNs can generate an interrupt. The IEEE comparison functions (isgreater, etc.) require that the comparison not raise an interrupt. The following patch changes the PowerPC back end so that ifcvt.cc will not change the if/then test and move into a conditional move if the comparison is one of the comparisons that do not raise an error with signalling NaNs and -Ofast is not used. If a normal comparison is used or -Ofast is used, GCC will continue to generate XSCMPGTDP and XXSEL. For the following code: #include <math.h> double ieee_compare (double a, double b, double c, double d) { return isgreater (a, b) ? c : d; } /* Verify normal > does generate xscmpgtdp. */ double normal_compare (double a, double b, double c, double d) { return a > b ? c : d; } with the following patch, GCC generates the following for power9, power10, and power11: ieee_compare: fcmpu 0,1,2 fmr 1,4 bnglr 0 fmr 1,3 blr normal_compare: xscmpgtdp 1,1,2 xxsel 1,4,3,1 blr 2025-07-29 Michael Meissner <meiss...@linux.ibm.com> gcc/ PR target/118541 * config/rs6000/predicates.md (fpmask_comparison_operator): Add NE, LT, LE. (invert_fpmask_comparison_operator): Delete. * config/rs6000/rs6000-protos.h (enum cond_reverse_fp): New enumeration. (rs6000_reverse_condition): Add argument. * config/rs6000/rs6000.cc (rs6000_reverse_condition): Add argument which controls whether floating point comparisons can be reversed for conditional moves that involve IEEE comparison functions. The IEEE comparison functions are required to not trap on signaling NaNs. If this is a jump, we allow the comparison to be reversed. (rs6000_emit_sCOND): Adjust rs6000_reverse_condition call to not allow reversing floating point comparisons that involve IEEE comparison functions. * config/rs6000/rs6000.h (REVERSE_CONDITION): Likewise. * config/rs6000/rs6000.md (mov<SFDF:mode><SFDF2:mode>cc_p9): Add support to convert NE into EQ by swapping the true/false values during the split. (mov<SFDF:mode><SFDF2:mode>cc_invert_p9): Delete insn. (fpmask<mode>, SFDF iterator): Add support for LT and LE. (mov<mode>cc_p10): Add support to convert NE into EQ by swapping the true/false values during the split. (mov<mode>cc_invert_p10): Delete insn. (fpmask<mode>, IEEE128 iterator): Add support for LT and LE. (reverse_branch_comparison): Name the insn. Adjust the rs6000_reverse_condition calls. gcc/testsuite/ PR target/118541 * gcc.target/powerpc/pr118541-1.c: New test. * gcc.target/powerpc/pr118541-2.c: Likewise. Diff: --- gcc/config/rs6000/predicates.md | 11 +- gcc/config/rs6000/rs6000-protos.h | 24 +++- gcc/config/rs6000/rs6000.cc | 37 ++++-- gcc/config/rs6000/rs6000.h | 10 +- gcc/config/rs6000/rs6000.md | 158 ++++++++++++-------------- gcc/testsuite/gcc.target/powerpc/pr118541-1.c | 28 +++++ gcc/testsuite/gcc.target/powerpc/pr118541-2.c | 26 +++++ 7 files changed, 187 insertions(+), 107 deletions(-) diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 647e89afb6a7..7087b69080d8 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -1460,14 +1460,11 @@ ;; Return 1 if OP is a comparison operator suitable for floating point ;; vector/scalar comparisons that generate a -1/0 mask. +;; +;; Do not match UNEQ, UNLT, UNLE, UNGT, or UNGE since the fpmask +;; instructions can trap on signaling NaNs. (define_predicate "fpmask_comparison_operator" - (match_code "eq,gt,ge")) - -;; Return 1 if OP is a comparison operator suitable for vector/scalar -;; comparisons that generate a 0/-1 mask (i.e. the inverse of -;; fpmask_comparison_operator). -(define_predicate "invert_fpmask_comparison_operator" - (match_code "ne,unlt,unle")) + (match_code "eq,ne,gt,ge,lt,le")) ;; Return 1 if OP is a comparison operation suitable for integer vector/scalar ;; comparisons that generate a -1/0 mask. diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 4619142d197b..2eea755285fe 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -114,8 +114,30 @@ extern const char *rs6000_sibcall_template (rtx *, unsigned int); extern const char *rs6000_indirect_call_template (rtx *, unsigned int); extern const char *rs6000_indirect_sibcall_template (rtx *, unsigned int); extern const char *rs6000_pltseq_template (rtx *, int); + +/* Whether we can reverse the sense of a floating point comparison. + + If the comparison involves UNEQ, LTGT, UNGT, UNGE, UNLT, and UNLE + comparisons we cannot generate instructions that could generate a trap with + a signaling NaN. These comprisons are created using the floating point + relational tests (like isgreater, isless, etc.), and these comparisons must + not trap on any value like a signaling NaN. The fpmask instructions + (XSCMPEQDP, XSCMPGTQP, etc.) that do the comparison and produce 1's or 0's + to allow the XXSEL instruction to do a conditional move. + + However, if we are not generating fpmask instructions, we have to allow + doing the reversals. We need this reversal to handle conditional jumps that + are too far away to jump to directly and we have to reverse the jump, so we + can generate a jump around an unconditional jump. */ + +enum class cond_reverse_fp { + reverse_ok, + no_reverse +}; + extern enum rtx_code rs6000_reverse_condition (machine_mode, - enum rtx_code); + enum rtx_code, + enum cond_reverse_fp); extern rtx rs6000_emit_eqne (machine_mode, rtx, rtx, rtx); extern rtx rs6000_emit_fp_cror (rtx_code, machine_mode, rtx); extern void rs6000_emit_sCOND (machine_mode, rtx[]); diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index f4f0ad80e35e..652b76af8e67 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -15282,17 +15282,27 @@ rs6000_print_patchable_function_entry (FILE *file, } enum rtx_code -rs6000_reverse_condition (machine_mode mode, enum rtx_code code) +rs6000_reverse_condition (machine_mode mode, + enum rtx_code code, + enum cond_reverse_fp do_fp_reverse) { - /* Reversal of FP compares takes care -- an ordered compare - becomes an unordered compare and vice versa. */ + /* Do not allow reversing comparisons if this is an IEEE comparison + (i.e. isgreater, etc.) that must not trap. + + We do allow the comparsion to be reversed for explicit jumps when + cond_reverse_fp::reverse_ok is passed. */ if (mode == CCFPmode - && (!flag_finite_math_only - || code == UNLT || code == UNLE || code == UNGT || code == UNGE + && (code == UNLT || code == UNLE || code == UNGT || code == UNGE || code == UNEQ || code == LTGT)) - return reverse_condition_maybe_unordered (code); - else - return reverse_condition (code); + { + if (!flag_finite_math_only + && do_fp_reverse == cond_reverse_fp::no_reverse) + return UNKNOWN; + + return reverse_condition_maybe_unordered (code); + } + + return reverse_condition (code); } /* Check if C (as 64bit integer) can be rotated to a constant which constains @@ -15900,14 +15910,17 @@ rs6000_emit_sCOND (machine_mode mode, rtx operands[]) || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE) { rtx not_result = gen_reg_rtx (CCEQmode); - rtx not_op, rev_cond_rtx; + rtx not_op, rev_cmp_rtx; machine_mode cc_mode; + rtx_code rev; cc_mode = GET_MODE (XEXP (condition_rtx, 0)); - rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code), - SImode, XEXP (condition_rtx, 0), const0_rtx); - not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx); + rev = rs6000_reverse_condition (cc_mode, cond_code, + cond_reverse_fp::no_reverse); + rev_cmp_rtx = gen_rtx_fmt_ee (rev, SImode, XEXP (condition_rtx, 0), + const0_rtx); + not_op = gen_rtx_COMPARE (CCEQmode, rev_cmp_rtx, const0_rtx); emit_insn (gen_rtx_SET (not_result, not_op)); condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx); } diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index cffe2750ba9a..52b2f7bcb95f 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -1804,11 +1804,17 @@ extern scalar_int_mode rs6000_pmode; /* Can the condition code MODE be safely reversed? This is safe in all cases on this port, because at present it doesn't use the - trapping FP comparisons (fcmpo). */ + trapping FP comparisons (fcmpo). + + However, this is not safe for IEEE comparisons (i.e. isgreater, etc.) + starting with the power9 because ifcvt.cc will want to create a fp cmove, + and the x{s,v}cmp{eq,gt,ge}{dp,qp} instructions will trap if one of the + arguments is a signalling NaN. */ #define REVERSIBLE_CC_MODE(MODE) 1 /* Given a condition code and a mode, return the inverse condition. */ -#define REVERSE_CONDITION(CODE, MODE) rs6000_reverse_condition (MODE, CODE) +#define REVERSE_CONDITION(CODE, MODE) \ + rs6000_reverse_condition (MODE, CODE, cond_reverse_fp::no_reverse) /* Target cpu costs. */ diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 5a56ad79a9ee..eeb377d426ae 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -5761,7 +5761,7 @@ "#" "&& 1" [(set (match_dup 6) - (if_then_else:V2DI (match_dup 1) + (if_then_else:V2DI (match_dup 9) (match_dup 7) (match_dup 8))) (set (match_dup 0) @@ -5770,48 +5770,20 @@ (match_dup 4) (match_dup 5)))] { - if (GET_CODE (operands[6]) == SCRATCH) - operands[6] = gen_reg_rtx (V2DImode); - - operands[7] = CONSTM1_RTX (V2DImode); - operands[8] = CONST0_RTX (V2DImode); -} - [(set_attr "length" "8") - (set_attr "type" "vecperm")]) - -;; Handle inverting the fpmask comparisons. -(define_insn_and_split "*mov<SFDF:mode><SFDF2:mode>cc_invert_p9" - [(set (match_operand:SFDF 0 "vsx_register_operand" "=&wa,wa") - (if_then_else:SFDF - (match_operator:CCFP 1 "invert_fpmask_comparison_operator" - [(match_operand:SFDF2 2 "vsx_register_operand" "wa,wa") - (match_operand:SFDF2 3 "vsx_register_operand" "wa,wa")]) - (match_operand:SFDF 4 "vsx_register_operand" "wa,wa") - (match_operand:SFDF 5 "vsx_register_operand" "wa,wa"))) - (clobber (match_scratch:V2DI 6 "=0,&wa"))] - "TARGET_P9_MINMAX" - "#" - "&& 1" - [(set (match_dup 6) - (if_then_else:V2DI (match_dup 9) - (match_dup 7) - (match_dup 8))) - (set (match_dup 0) - (if_then_else:SFDF (ne (match_dup 6) - (match_dup 8)) - (match_dup 5) - (match_dup 4)))] -{ - rtx op1 = operands[1]; - enum rtx_code cond = reverse_condition_maybe_unordered (GET_CODE (op1)); + /* Fix NE to swap true/false values. */ + if (GET_CODE (operands[1]) != NE) + operands[9] = operands[1]; + else + { + operands[9] = gen_rtx_fmt_ee (EQ, CCFPmode, operands[2], operands[3]); + std::swap (operands[4], operands[5]); + } if (GET_CODE (operands[6]) == SCRATCH) operands[6] = gen_reg_rtx (V2DImode); operands[7] = CONSTM1_RTX (V2DImode); operands[8] = CONST0_RTX (V2DImode); - - operands[9] = gen_rtx_fmt_ee (cond, CCFPmode, operands[2], operands[3]); } [(set_attr "length" "8") (set_attr "type" "vecperm")]) @@ -5825,7 +5797,26 @@ (match_operand:V2DI 4 "all_ones_constant" "") (match_operand:V2DI 5 "zero_constant" "")))] "TARGET_P9_MINMAX" - "xscmp%V1dp %x0,%x2,%x3" +{ + /* Note, NE should not show up due to the previous define_insn_and_split + converting it into EQ.. */ + switch (GET_CODE (operands[1])) + { + case EQ: + case GT: + case GE: + return "xscmp%V1dp %x0,%x2,%x3"; + + case LT: + return "xscmpgedp %x0,%x3,%x2"; + + case LE: + return "xscmpgtdp %x0,%x3,%x2"; + + default: + gcc_unreachable (); + } +} [(set_attr "type" "fpcompare")]) (define_insn "*xxsel<mode>" @@ -5866,7 +5857,7 @@ "#" "&& 1" [(set (match_dup 6) - (if_then_else:V2DI (match_dup 1) + (if_then_else:V2DI (match_dup 9) (match_dup 7) (match_dup 8))) (set (match_dup 0) @@ -5875,48 +5866,20 @@ (match_dup 4) (match_dup 5)))] { - if (GET_CODE (operands[6]) == SCRATCH) - operands[6] = gen_reg_rtx (V2DImode); - - operands[7] = CONSTM1_RTX (V2DImode); - operands[8] = CONST0_RTX (V2DImode); -} - [(set_attr "length" "8") - (set_attr "type" "vecperm")]) - -;; Handle inverting the fpmask comparisons. -(define_insn_and_split "*mov<mode>cc_invert_p10" - [(set (match_operand:IEEE128 0 "altivec_register_operand" "=&v,v") - (if_then_else:IEEE128 - (match_operator:CCFP 1 "invert_fpmask_comparison_operator" - [(match_operand:IEEE128 2 "altivec_register_operand" "v,v") - (match_operand:IEEE128 3 "altivec_register_operand" "v,v")]) - (match_operand:IEEE128 4 "altivec_register_operand" "v,v") - (match_operand:IEEE128 5 "altivec_register_operand" "v,v"))) - (clobber (match_scratch:V2DI 6 "=0,&v"))] - "TARGET_POWER10 && TARGET_FLOAT128_HW" - "#" - "&& 1" - [(set (match_dup 6) - (if_then_else:V2DI (match_dup 9) - (match_dup 7) - (match_dup 8))) - (set (match_dup 0) - (if_then_else:IEEE128 (ne (match_dup 6) - (match_dup 8)) - (match_dup 5) - (match_dup 4)))] -{ - rtx op1 = operands[1]; - enum rtx_code cond = reverse_condition_maybe_unordered (GET_CODE (op1)); + /* Fix NE to swap true/false values. */ + if (GET_CODE (operands[1]) != NE) + operands[9] = operands[1]; + else + { + operands[9] = gen_rtx_fmt_ee (EQ, CCFPmode, operands[2], operands[3]); + std::swap (operands[4], operands[5]); + } if (GET_CODE (operands[6]) == SCRATCH) operands[6] = gen_reg_rtx (V2DImode); operands[7] = CONSTM1_RTX (V2DImode); operands[8] = CONST0_RTX (V2DImode); - - operands[9] = gen_rtx_fmt_ee (cond, CCFPmode, operands[2], operands[3]); } [(set_attr "length" "8") (set_attr "type" "vecperm")]) @@ -5930,7 +5893,26 @@ (match_operand:V2DI 4 "all_ones_constant" "") (match_operand:V2DI 5 "zero_constant" "")))] "TARGET_POWER10 && TARGET_FLOAT128_HW" - "xscmp%V1qp %0,%2,%3" +{ + /* Note, NE should not show up due to the previous define_insn_and_split + converting it into EQ.. */ + switch (GET_CODE (operands[1])) + { + case EQ: + case GT: + case GE: + return "xscmp%V1qp %0,%2,%3"; + + case LT: + return "xscmpgeqp %0,%3,%2"; + + case LE: + return "xscmpgtqp %0,%3,%2"; + + default: + gcc_unreachable (); + } +} [(set_attr "type" "fpcompare")]) (define_insn "*xxsel<mode>" @@ -13546,7 +13528,7 @@ ;; If we are comparing the result of two comparisons, this can be done ;; using creqv or crxor. -(define_insn_and_split "" +(define_insn_and_split "*reverse_branch_comparison" [(set (match_operand:CCEQ 0 "cc_reg_operand" "=y") (compare:CCEQ (match_operator 1 "branch_comparison_operator" [(match_operand 2 "cc_reg_operand" "y") @@ -13568,19 +13550,25 @@ GET_MODE (operands[3])); if (! positive_1) - operands[1] = gen_rtx_fmt_ee (rs6000_reverse_condition (GET_MODE (operands[2]), - GET_CODE (operands[1])), - SImode, - operands[2], const0_rtx); + { + rtx_code rev = rs6000_reverse_condition (GET_MODE (operands[2]), + GET_CODE (operands[1]), + cond_reverse_fp::reverse_ok); + gcc_assert (rev != UNKNOWN); + operands[1] = gen_rtx_fmt_ee (rev, SImode, operands[2], const0_rtx); + } else if (GET_MODE (operands[1]) != SImode) operands[1] = gen_rtx_fmt_ee (GET_CODE (operands[1]), SImode, operands[2], const0_rtx); if (! positive_2) - operands[3] = gen_rtx_fmt_ee (rs6000_reverse_condition (GET_MODE (operands[4]), - GET_CODE (operands[3])), - SImode, - operands[4], const0_rtx); + { + rtx_code rev = rs6000_reverse_condition (GET_MODE (operands[4]), + GET_CODE (operands[3]), + cond_reverse_fp::reverse_ok); + gcc_assert (rev != UNKNOWN); + operands[3] = gen_rtx_fmt_ee (rev, SImode, operands[4], const0_rtx); + } else if (GET_MODE (operands[3]) != SImode) operands[3] = gen_rtx_fmt_ee (GET_CODE (operands[3]), SImode, operands[4], const0_rtx); diff --git a/gcc/testsuite/gcc.target/powerpc/pr118541-1.c b/gcc/testsuite/gcc.target/powerpc/pr118541-1.c new file mode 100644 index 000000000000..30901cd1e41d --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr118541-1.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */ +/* { dg-require-effective-target powerpc_vsx } */ + +/* PR target/118541 says that the IEEE comparison functions like isgreater + should not optimize floating point conditional moves to use + x{s,v}cmp{eq,gt,ge}{dp,qp} and xxsel since that instruction can cause traps + if one of the arguments is a signaling NaN. */ + +/* Verify isgreater does not generate xscmpgtdp when NaNs are allowed. */ + +double +ieee_compare (double a, double b, double c, double d) +{ + /* + * fcmpu 0,1,2 + * fmr 1,4 + * bnglr 0 + * fmr 1,3 + * blr + */ + + return __builtin_isgreater (a, b) ? c : d; +} + +/* { dg-final { scan-assembler-not {\mxscmpg[te]dp\M} } } */ +/* { dg-final { scan-assembler-not {\mxxsel\M} } } */ +/* { dg-final { scan-assembler {\mxscmpudp\M|\mfcmpu\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr118541-2.c b/gcc/testsuite/gcc.target/powerpc/pr118541-2.c new file mode 100644 index 000000000000..1f36890b775f --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr118541-2.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */ +/* { dg-require-effective-target powerpc_vsx } */ + +/* PR target/118541 says that the IEEE comparison functions like isgreater + should not optimize floating point conditional moves to use + x{s,v}cmp{eq,gt,ge}{dp,qp} and xxsel since that instruction can cause traps + if one of the arguments is a signaling NaN. */ + +/* Verify normal > does generate xscmpgtdp when NaNs are allowed. */ + +double +normal_compare (double a, double b, double c, double d) +{ + /* + * xscmpgtdp 1,1,2 + * xxsel 1,4,3,1 + * blr + */ + + return a > b ? c : d; +} + +/* { dg-final { scan-assembler {\mxscmpg[te]dp\M} } } */ +/* { dg-final { scan-assembler {\mxxsel\M} } } */ +/* { dg-final { scan-assembler-not {\mxscmpudp\M|\mfcmpu\M} } } */