On Fri, Jan 31, 2025 at 3:55 AM Michael Meissner <meiss...@linux.ibm.com> wrote: > > Fix PR 118541, do not generate unordered fp cmoves for IEEE compares. > > In bug PR target/118541 on power9, power10, and power11 systems, for the > function: > > extern double __ieee754_acos (double); > > double > __acospi (double x) > { > double ret = __ieee754_acos (x) / 3.14; > return __builtin_isgreater (ret, 1.0) ? 1.0 : ret; > } > > GCC currently generates the following code: > > Power9 Power10 and Power11 > ====== =================== > bl __ieee754_acos bl __ieee754_acos@notoc > nop plfd 0,.LC0@pcrel > addis 9,2,.LC2@toc@ha xxspltidp 12,1065353216 > addi 1,1,32 addi 1,1,32 > lfd 0,.LC2@toc@l(9) ld 0,16(1) > addis 9,2,.LC0@toc@ha fdiv 0,1,0 > ld 0,16(1) mtlr 0 > lfd 12,.LC0@toc@l(9) xscmpgtdp 1,0,12 > fdiv 0,1,0 xxsel 1,0,12,1 > mtlr 0 blr > xscmpgtdp 1,0,12 > xxsel 1,0,12,1 > blr > > This is because ifcvt.c optimizes the conditional floating point move to use > the > XSCMPGTDP instruction. > > However, the XSCMPGTDP instruction will generate an interrupt if one of the > arguments is a signalling NaN and signalling NaNs can generate an interrupt. > The IEEE comparison functions (isgreater, etc.) require that the comparison > not > raise an interrupt. > > The following patch changes the PowerPC back end so that ifcvt.c will not > change > the if/then test and move into a conditional move if the comparison is one of > the comparisons that do not raise an error with signalling NaNs and -Ofast is > not used. If a normal comparison is used or -Ofast is used, GCC will continue > to generate XSCMPGTDP and XXSEL. > > For the following code: > > double > ordered_compare (double a, double b, double c, double d) > { > return __builtin_isgreater (a, b) ? c : d; > } > > /* Verify normal > does generate xscmpgtdp. */ > > double > normal_compare (double a, double b, double c, double d) > { > return a > b ? c : d; > } > > with the following patch, GCC generates the following for power9, power10, and > power11: > > ordered_compare: > fcmpu 0,1,2 > fmr 1,4 > bnglr 0 > fmr 1,3 > blr > > normal_compare: > xscmpgtdp 1,1,2 > xxsel 1,4,3,1 > blr > > I have built bootstrap compilers on big endian power9 systems and little > endian > power9/power10 systems and there were no regressions. Can I check this patch > into the GCC trunk, and after a waiting period, can I check this into the > active > older branches? > > 2025-01-30 Michael Meissner <meiss...@linux.ibm.com> > > gcc/ > > PR target/118541 > * config/rs6000/rs6000-protos.h (REVERSE_COND_ORDERED_OK): New macro. > (REVERSE_COND_NO_ORDERED): Likewise. > (rs6000_reverse_condition): Add argument. > * config/rs6000/rs6000.cc (rs6000_reverse_condition): Do not allow > ordered comparisons to be reversed for floating point cmoves. > (rs6000_emit_sCOND): Adjust rs6000_reverse_condition call. > * config/rs6000/rs6000.h (REVERSE_CONDITION): Likewise. > * config/rs6000/rs6000.md (reverse_branch_comparison): Name insn. > Adjust rs6000_reverse_condition call. > > gcc/testsuite/ > > PR target/118541 > * gcc.target/powerpc/pr118541.c: New test. > --- > gcc/config/rs6000/rs6000-protos.h | 6 ++- > gcc/config/rs6000/rs6000.cc | 23 ++++++++--- > gcc/config/rs6000/rs6000.h | 10 ++++- > gcc/config/rs6000/rs6000.md | 24 +++++++----- > gcc/testsuite/gcc.target/powerpc/pr118541.c | 43 +++++++++++++++++++++ > 5 files changed, 89 insertions(+), 17 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/powerpc/pr118541.c > > diff --git a/gcc/config/rs6000/rs6000-protos.h > b/gcc/config/rs6000/rs6000-protos.h > index 4619142d197..112332660d3 100644 > --- a/gcc/config/rs6000/rs6000-protos.h > +++ b/gcc/config/rs6000/rs6000-protos.h > @@ -114,8 +114,12 @@ extern const char *rs6000_sibcall_template (rtx *, > unsigned int); > extern const char *rs6000_indirect_call_template (rtx *, unsigned int); > extern const char *rs6000_indirect_sibcall_template (rtx *, unsigned int); > extern const char *rs6000_pltseq_template (rtx *, int); > + > +#define REVERSE_COND_ORDERED_OK false > +#define REVERSE_COND_NO_ORDERED true > + > extern enum rtx_code rs6000_reverse_condition (machine_mode, > - enum rtx_code); > + enum rtx_code, bool); > extern rtx rs6000_emit_eqne (machine_mode, rtx, rtx, rtx); > extern rtx rs6000_emit_fp_cror (rtx_code, machine_mode, rtx); > extern void rs6000_emit_sCOND (machine_mode, rtx[]); > diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc > index f9f9a0b931d..eaf79435ec3 100644 > --- a/gcc/config/rs6000/rs6000.cc > +++ b/gcc/config/rs6000/rs6000.cc > @@ -15360,15 +15360,25 @@ rs6000_print_patchable_function_entry (FILE *file, > } > > enum rtx_code > -rs6000_reverse_condition (machine_mode mode, enum rtx_code code) > +rs6000_reverse_condition (machine_mode mode, > + enum rtx_code code, > + bool no_ordered) > { > /* Reversal of FP compares takes care -- an ordered compare > - becomes an unordered compare and vice versa. */ > + becomes an unordered compare and vice versa. > + > + However, this is not safe for ordered comparisons (i.e. for isgreater, > + etc.) starting with the power9 because ifcvt.cc will want to create a > fp > + cmove, and the x{s,v}cmp{eq,gt,ge}{dp,qp} instructions will trap if one > of > + the arguments is a signalling NaN. */ > + > if (mode == CCFPmode > && (!flag_finite_math_only
I fail to see where -Ofast is allowed, but only see the pre-existing flag check above which checks for no NaN/Inf rather than sNaN - the latter would be checked with HONOR_SNANS (mode). I guess you really want > || code == UNLT || code == UNLE || code == UNGT || code == UNGE > || code == UNEQ || code == LTGT)) > - return reverse_condition_maybe_unordered (code); > + return (no_ordered return (no_ordered && HONOR_SNANS (mode) no? > + ? UNKNOWN > + : reverse_condition_maybe_unordered (code)); > else > return reverse_condition (code); > } > @@ -15980,11 +15990,14 @@ rs6000_emit_sCOND (machine_mode mode, rtx > operands[]) > rtx not_result = gen_reg_rtx (CCEQmode); > rtx not_op, rev_cond_rtx; > machine_mode cc_mode; > + enum rtx_code rev; > > cc_mode = GET_MODE (XEXP (condition_rtx, 0)); > > - rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, > cond_code), > - SImode, XEXP (condition_rtx, 0), > const0_rtx); > + rev = rs6000_reverse_condition (cc_mode, cond_code, > + REVERSE_COND_ORDERED_OK); > + rev_cond_rtx = gen_rtx_fmt_ee (rev, SImode, XEXP (condition_rtx, 0), > + const0_rtx); > not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx); > emit_insn (gen_rtx_SET (not_result, not_op)); > condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx); > diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h > index ec08c96d0f6..c595d7138bc 100644 > --- a/gcc/config/rs6000/rs6000.h > +++ b/gcc/config/rs6000/rs6000.h > @@ -1812,11 +1812,17 @@ extern scalar_int_mode rs6000_pmode; > > /* Can the condition code MODE be safely reversed? This is safe in > all cases on this port, because at present it doesn't use the > - trapping FP comparisons (fcmpo). */ > + trapping FP comparisons (fcmpo). > + > + However, this is not safe for ordered comparisons (i.e. for isgreater, > etc.) > + starting with the power9 because ifcvt.cc will want to create a fp cmove, > + and the x{s,v}cmp{eq,gt,ge}{dp,qp} instructions will trap if one of the > + arguments is a signalling NaN. */ > #define REVERSIBLE_CC_MODE(MODE) 1 > > /* Given a condition code and a mode, return the inverse condition. */ > -#define REVERSE_CONDITION(CODE, MODE) rs6000_reverse_condition (MODE, CODE) > +#define REVERSE_CONDITION(CODE, MODE) \ > + rs6000_reverse_condition (MODE, CODE, REVERSE_COND_NO_ORDERED) > > > /* Target cpu costs. */ > diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md > index 65da0c65330..5f7ff36617d 100644 > --- a/gcc/config/rs6000/rs6000.md > +++ b/gcc/config/rs6000/rs6000.md > @@ -13497,7 +13497,7 @@ (define_insn "@cceq_rev_compare_<mode>" > ;; If we are comparing the result of two comparisons, this can be done > ;; using creqv or crxor. > > -(define_insn_and_split "" > +(define_insn_and_split "*reverse_branch_comparison" > [(set (match_operand:CCEQ 0 "cc_reg_operand" "=y") > (compare:CCEQ (match_operator 1 "branch_comparison_operator" > [(match_operand 2 "cc_reg_operand" "y") > @@ -13519,19 +13519,25 @@ (define_insn_and_split "" > GET_MODE (operands[3])); > > if (! positive_1) > - operands[1] = gen_rtx_fmt_ee (rs6000_reverse_condition (GET_MODE > (operands[2]), > - GET_CODE > (operands[1])), > - SImode, > - operands[2], const0_rtx); > + { > + enum rtx_code rev = rs6000_reverse_condition (GET_MODE (operands[2]), > + GET_CODE (operands[1]), > + REVERSE_COND_ORDERED_OK); > + gcc_assert (rev != UNKNOWN); > + operands[1] = gen_rtx_fmt_ee (rev, SImode, operands[2], const0_rtx); > + } > else if (GET_MODE (operands[1]) != SImode) > operands[1] = gen_rtx_fmt_ee (GET_CODE (operands[1]), SImode, > operands[2], const0_rtx); > > if (! positive_2) > - operands[3] = gen_rtx_fmt_ee (rs6000_reverse_condition (GET_MODE > (operands[4]), > - GET_CODE > (operands[3])), > - SImode, > - operands[4], const0_rtx); > + { > + enum rtx_code rev = rs6000_reverse_condition (GET_MODE (operands[4]), > + GET_CODE (operands[3]), > + REVERSE_COND_ORDERED_OK); > + gcc_assert (rev != UNKNOWN); > + operands[3] = gen_rtx_fmt_ee (rev, SImode, operands[4], const0_rtx); > + } > else if (GET_MODE (operands[3]) != SImode) > operands[3] = gen_rtx_fmt_ee (GET_CODE (operands[3]), SImode, > operands[4], const0_rtx); > diff --git a/gcc/testsuite/gcc.target/powerpc/pr118541.c > b/gcc/testsuite/gcc.target/powerpc/pr118541.c > new file mode 100644 > index 00000000000..6ef67a2cf76 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr118541.c > @@ -0,0 +1,43 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */ > +/* { dg-require-effective-target powerpc_vsx } */ > + > +/* PR target/118541 says that the ordered comparison functions like isgreater > + should not optimize floating point conditional moves to use > + x{s,v}cmp{eq,gt,ge}{dp,qp} and xxsel since that instruction can cause > traps > + if one of the arguments is a signaling NaN. */ > + > +/* Verify isgreater does not generate xscmpgtdp. */ > + > +double > +ordered_compare (double a, double b, double c, double d) > +{ > + /* > + * fcmpu 0,1,2 > + * fmr 1,4 > + * bnglr 0 > + * fmr 1,3 > + * blr > + */ > + > + return __builtin_isgreater (a, b) ? c : d; > +} > + > +/* Verify normal > does generate xscmpgtdp. */ > + > +double > +normal_compare (double a, double b, double c, double d) > +{ > + /* > + * xscmpgtdp 1,1,2 > + * xxsel 1,4,3,1 > + * blr > + */ > + > + return a > b ? c : d; > +} > + > +/* { dg-final { scan-assembler-times {\mxscmpg[te]dp\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mxxsel\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mxscmpudp\M|\mfcmpu\M} 1 } } */ > + > -- > 2.48.1 > > > -- > Michael Meissner, IBM > PO Box 98, Ayer, Massachusetts, USA, 01432 > email: meiss...@linux.ibm.com