https://gcc.gnu.org/g:c8946223276b6ef3ab1b0514bffdbb0d5bb2e275
commit c8946223276b6ef3ab1b0514bffdbb0d5bb2e275 Author: Michael Meissner <[email protected]> Date: Tue Jan 6 13:09:36 2026 -0500 Revert changes Diff: --- gcc/ChangeLog.bugs | 76 +-------- gcc/config/rs6000/predicates.md | 11 ++ gcc/config/rs6000/rs6000.cc | 108 ++++++++++++- gcc/config/rs6000/rs6000.md | 210 +++++++++++++++++++++++-- gcc/testsuite/gcc.target/powerpc/p9-minmax-2.c | 175 ++++++++++++++++++++- 5 files changed, 478 insertions(+), 102 deletions(-) diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs index 746ebc4fafde..fe831e13f7dc 100644 --- a/gcc/ChangeLog.bugs +++ b/gcc/ChangeLog.bugs @@ -1,78 +1,4 @@ -==================== Branch work233-bugs, patch #305 ==================== - -Fix PR 118541, do not generate floating point cmoves for IEEE compares. - -In bug PR target/118541 on power9, power10, and power11 systems, for the -function: - - extern double __ieee754_acos (double); - - double - __acospi (double x) - { - double ret = __ieee754_acos (x) / 3.14; - return __builtin_isgreater (ret, 1.0) ? 1.0 : ret; - } - -GCC currently generates the following code: - - Power9 Power10 and Power11 - ====== =================== - bl __ieee754_acos bl __ieee754_acos@notoc - nop plfd 0,.LC0@pcrel - addis 9,2,.LC2@toc@ha xxspltidp 12,1065353216 - addi 1,1,32 addi 1,1,32 - lfd 0,.LC2@toc@l(9) ld 0,16(1) - addis 9,2,.LC0@toc@ha fdiv 0,1,0 - ld 0,16(1) mtlr 0 - lfd 12,.LC0@toc@l(9) xscmpgtdp 1,0,12 - fdiv 0,1,0 xxsel 1,0,12,1 - mtlr 0 blr - xscmpgtdp 1,0,12 - xxsel 1,0,12,1 - blr - -This is because ifcvt.cc optimizes the conditional floating point move to use the -XSCMPGTDP instruction. - -However, the XSCMPGTDP instruction will generate an interrupt if one of the -arguments is a signalling NaN and signalling NaNs can generate an interrupt. -The IEEE comparison functions (isgreater, etc.) require that the comparison not -raise an interrupt. - -This patch just eliminates the generation of XSCMPEQDP, XSCMPEQQP, XSCMPGTDP, -XSCMPGTQP, XSCMPGEDP, and XSCMPGEQP instructions. - -2025-12-08 Michael Meissner <[email protected]> - -gcc/ - - PR target/118541 - * config/rs6000/predicates.md (fpmask_comparison_operator): Delete - predicate. - (invert_fpmask_comparison_operator): Likewise. - * config/rs6000/rs6000.cc (rs6000_maybe_emit_fp_cmove): Delete function. - (have_fp_minc_maxc): Rename from have_compare_and_set_mask since we no - longer generate the floating point compare and set mask instructions, - but we also need the test for floating point min/max. - (rs6000_emit_cmove): Delete support for generating floating point - compare and set mask instructions since those instructions can raise - NaN exceptions. - * config/rs6000/rs6000.md (mov<SFDF:mode><SFDF2:mode>cc_p9): Delete - insn. - (fpmask<mode>, SFDF iterator): Likewise. - (xxsel<mode>): Likewise. - (mov<mode>cc_p10): Likewise. - (mov<mode>cc_invert_p10): Likewise. - (fpmask<mode>, IEEE128 iterator): Likewise. - -gcc/testsuite/ - - PR target/118541 - * gcc.target/powerpc/p9-minmax-2.c: Remove tests for floating point - compare and mask instructions. - * gcc.target/powerpc/float128-cmove.c: Delete test. - * gcc.target/powerpc/p9-minmax-1.c: Likewise. +==================== Branch work233-bugs, patch #305 was reverted ==================== ==================== Branch work233-bugs, patch #304 ==================== diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 56e65162b158..54dbc8bcc952 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -1458,6 +1458,17 @@ (and (match_operand 0 "branch_comparison_operator") (match_code "ne,le,ge,leu,geu,ordered"))) +;; Return 1 if OP is a comparison operator suitable for floating point +;; vector/scalar comparisons that generate a -1/0 mask. +(define_predicate "fpmask_comparison_operator" + (match_code "eq,gt,ge")) + +;; Return 1 if OP is a comparison operator suitable for vector/scalar +;; comparisons that generate a 0/-1 mask (i.e. the inverse of +;; fpmask_comparison_operator). +(define_predicate "invert_fpmask_comparison_operator" + (match_code "ne,unlt,unle")) + ;; Return 1 if OP is a comparison operation suitable for integer vector/scalar ;; comparisons that generate a -1/0 mask. (define_predicate "vecint_comparison_operator" diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index cdba8dd4d57e..cc7bd5ee7bf9 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -16338,11 +16338,98 @@ rs6000_maybe_emit_maxc_minc (rtx dest, rtx op, rtx true_cond, rtx false_cond) return true; } -/* Helper function to return true if the target supports the "C" minimum and - maximum instructions. */ +/* Possibly emit a floating point conditional move by generating a compare that + sets a mask instruction and a XXSEL select instruction. + + Move TRUE_COND to DEST if OP of the operands of the last comparison is + nonzero/true, FALSE_COND if it is zero/false. + + Return false if the operation cannot be generated, and true if we could + generate the instruction. */ + +static bool +rs6000_maybe_emit_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond) +{ + enum rtx_code code = GET_CODE (op); + rtx op0 = XEXP (op, 0); + rtx op1 = XEXP (op, 1); + machine_mode compare_mode = GET_MODE (op0); + machine_mode result_mode = GET_MODE (dest); + rtx compare_rtx; + rtx cmove_rtx; + rtx clobber_rtx; + + if (!can_create_pseudo_p ()) + return 0; + + /* We allow the comparison to be either SFmode/DFmode and the true/false + condition to be either SFmode/DFmode. I.e. we allow: + + float a, b; + double c, d, r; + + r = (a == b) ? c : d; + + and: + + double a, b; + float c, d, r; + + r = (a == b) ? c : d; + + but we don't allow intermixing the IEEE 128-bit floating point types with + the 32/64-bit scalar types. */ + + if (!(compare_mode == result_mode + || (compare_mode == SFmode && result_mode == DFmode) + || (compare_mode == DFmode && result_mode == SFmode))) + return false; + + switch (code) + { + case EQ: + case GE: + case GT: + break; + + case NE: + case LT: + case LE: + code = swap_condition (code); + std::swap (op0, op1); + break; + + default: + return false; + } + + /* Generate: [(parallel [(set (dest) + (if_then_else (op (cmp1) (cmp2)) + (true) + (false))) + (clobber (scratch))])]. */ + + compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1); + cmove_rtx = gen_rtx_SET (dest, + gen_rtx_IF_THEN_ELSE (result_mode, + compare_rtx, + true_cond, + false_cond)); + + clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode)); + emit_insn (gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (2, cmove_rtx, clobber_rtx))); + + return true; +} + +/* Helper function to return true if the target has instructions to do a + compare and set mask instruction that can be used with XXSEL to implement a + conditional move. It is also assumed that such a target also supports the + "C" minimum and maximum instructions. */ static bool -have_fp_minc_maxc (machine_mode mode) +have_compare_and_set_mask (machine_mode mode) { switch (mode) { @@ -16387,10 +16474,17 @@ rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond) if (GET_MODE (false_cond) != result_mode) return false; - /* See if we can use the "C" minimum, "C" maximum instructions. */ - if (have_fp_minc_maxc (compare_mode) && compare_mode == result_mode - && rs6000_maybe_emit_maxc_minc (dest, op, true_cond, false_cond)) - return true; + /* See if we can use the "C" minimum, "C" maximum, and compare and set mask + instructions. */ + if (have_compare_and_set_mask (compare_mode) + && have_compare_and_set_mask (result_mode)) + { + if (rs6000_maybe_emit_maxc_minc (dest, op, true_cond, false_cond)) + return true; + + if (rs6000_maybe_emit_fp_cmove (dest, op, true_cond, false_cond)) + return true; + } /* Don't allow using floating point comparisons for integer results for now. */ diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 7555065ec555..dbdc2eef2df2 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -5645,19 +5645,6 @@ FAIL; }) -(define_expand "mov<mode>cc" - [(set (match_operand:IEEE128 0 "gpc_reg_operand") - (if_then_else:IEEE128 (match_operand 1 "comparison_operator") - (match_operand:IEEE128 2 "gpc_reg_operand") - (match_operand:IEEE128 3 "gpc_reg_operand")))] - "TARGET_POWER10 && TARGET_FLOAT128_HW" -{ - if (rs6000_emit_cmove (operands[0], operands[1], operands[2], operands[3])) - DONE; - else - FAIL; -}) - ;; We use the BASE_REGS for the isel input operands because, if rA is ;; 0, the value of 0 is placed in rD upon truth. Similarly for rB ;; because we may switch the operands and rB may end up being rA. @@ -5760,6 +5747,203 @@ "TARGET_HARD_FLOAT && TARGET_PPC_GFXOPT" "fsel %0,%1,%2,%3" [(set_attr "type" "fp")]) + +(define_insn_and_split "*mov<SFDF:mode><SFDF2:mode>cc_p9" + [(set (match_operand:SFDF 0 "vsx_register_operand" "=&wa,wa") + (if_then_else:SFDF + (match_operator:CCFP 1 "fpmask_comparison_operator" + [(match_operand:SFDF2 2 "vsx_register_operand" "wa,wa") + (match_operand:SFDF2 3 "vsx_register_operand" "wa,wa")]) + (match_operand:SFDF 4 "vsx_register_operand" "wa,wa") + (match_operand:SFDF 5 "vsx_register_operand" "wa,wa"))) + (clobber (match_scratch:V2DI 6 "=0,&wa"))] + "TARGET_P9_MINMAX" + "#" + "&& 1" + [(set (match_dup 6) + (if_then_else:V2DI (match_dup 1) + (match_dup 7) + (match_dup 8))) + (set (match_dup 0) + (if_then_else:SFDF (ne (match_dup 6) + (match_dup 8)) + (match_dup 4) + (match_dup 5)))] +{ + if (GET_CODE (operands[6]) == SCRATCH) + operands[6] = gen_reg_rtx (V2DImode); + + operands[7] = CONSTM1_RTX (V2DImode); + operands[8] = CONST0_RTX (V2DImode); +} + [(set_attr "length" "8") + (set_attr "type" "vecperm")]) + +;; Handle inverting the fpmask comparisons. +(define_insn_and_split "*mov<SFDF:mode><SFDF2:mode>cc_invert_p9" + [(set (match_operand:SFDF 0 "vsx_register_operand" "=&wa,wa") + (if_then_else:SFDF + (match_operator:CCFP 1 "invert_fpmask_comparison_operator" + [(match_operand:SFDF2 2 "vsx_register_operand" "wa,wa") + (match_operand:SFDF2 3 "vsx_register_operand" "wa,wa")]) + (match_operand:SFDF 4 "vsx_register_operand" "wa,wa") + (match_operand:SFDF 5 "vsx_register_operand" "wa,wa"))) + (clobber (match_scratch:V2DI 6 "=0,&wa"))] + "TARGET_P9_MINMAX" + "#" + "&& 1" + [(set (match_dup 6) + (if_then_else:V2DI (match_dup 9) + (match_dup 7) + (match_dup 8))) + (set (match_dup 0) + (if_then_else:SFDF (ne (match_dup 6) + (match_dup 8)) + (match_dup 5) + (match_dup 4)))] +{ + rtx op1 = operands[1]; + enum rtx_code cond = reverse_condition_maybe_unordered (GET_CODE (op1)); + + if (GET_CODE (operands[6]) == SCRATCH) + operands[6] = gen_reg_rtx (V2DImode); + + operands[7] = CONSTM1_RTX (V2DImode); + operands[8] = CONST0_RTX (V2DImode); + + operands[9] = gen_rtx_fmt_ee (cond, CCFPmode, operands[2], operands[3]); +} + [(set_attr "length" "8") + (set_attr "type" "vecperm")]) + +(define_insn "*fpmask<mode>" + [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") + (if_then_else:V2DI + (match_operator:CCFP 1 "fpmask_comparison_operator" + [(match_operand:SFDF 2 "vsx_register_operand" "wa") + (match_operand:SFDF 3 "vsx_register_operand" "wa")]) + (match_operand:V2DI 4 "all_ones_constant" "") + (match_operand:V2DI 5 "zero_constant" "")))] + "TARGET_P9_MINMAX" + "xscmp%V1dp %x0,%x2,%x3" + [(set_attr "type" "fpcompare")]) + +(define_insn "*xxsel<mode>" + [(set (match_operand:SFDF 0 "vsx_register_operand" "=wa") + (if_then_else:SFDF (ne (match_operand:V2DI 1 "vsx_register_operand" "wa") + (match_operand:V2DI 2 "zero_constant" "")) + (match_operand:SFDF 3 "vsx_register_operand" "wa") + (match_operand:SFDF 4 "vsx_register_operand" "wa")))] + "TARGET_P9_MINMAX" + "xxsel %x0,%x4,%x3,%x1" + [(set_attr "type" "vecmove")]) + +;; Support for ISA 3.1 IEEE 128-bit conditional move. The mode used in the +;; comparison must be the same as used in the move. +(define_expand "mov<mode>cc" + [(set (match_operand:IEEE128 0 "gpc_reg_operand") + (if_then_else:IEEE128 (match_operand 1 "comparison_operator") + (match_operand:IEEE128 2 "gpc_reg_operand") + (match_operand:IEEE128 3 "gpc_reg_operand")))] + "TARGET_POWER10 && TARGET_FLOAT128_HW" +{ + if (rs6000_emit_cmove (operands[0], operands[1], operands[2], operands[3])) + DONE; + else + FAIL; +}) + +(define_insn_and_split "*mov<mode>cc_p10" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=&v,v") + (if_then_else:IEEE128 + (match_operator:CCFP 1 "fpmask_comparison_operator" + [(match_operand:IEEE128 2 "altivec_register_operand" "v,v") + (match_operand:IEEE128 3 "altivec_register_operand" "v,v")]) + (match_operand:IEEE128 4 "altivec_register_operand" "v,v") + (match_operand:IEEE128 5 "altivec_register_operand" "v,v"))) + (clobber (match_scratch:V2DI 6 "=0,&v"))] + "TARGET_POWER10 && TARGET_FLOAT128_HW" + "#" + "&& 1" + [(set (match_dup 6) + (if_then_else:V2DI (match_dup 1) + (match_dup 7) + (match_dup 8))) + (set (match_dup 0) + (if_then_else:IEEE128 (ne (match_dup 6) + (match_dup 8)) + (match_dup 4) + (match_dup 5)))] +{ + if (GET_CODE (operands[6]) == SCRATCH) + operands[6] = gen_reg_rtx (V2DImode); + + operands[7] = CONSTM1_RTX (V2DImode); + operands[8] = CONST0_RTX (V2DImode); +} + [(set_attr "length" "8") + (set_attr "type" "vecperm")]) + +;; Handle inverting the fpmask comparisons. +(define_insn_and_split "*mov<mode>cc_invert_p10" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=&v,v") + (if_then_else:IEEE128 + (match_operator:CCFP 1 "invert_fpmask_comparison_operator" + [(match_operand:IEEE128 2 "altivec_register_operand" "v,v") + (match_operand:IEEE128 3 "altivec_register_operand" "v,v")]) + (match_operand:IEEE128 4 "altivec_register_operand" "v,v") + (match_operand:IEEE128 5 "altivec_register_operand" "v,v"))) + (clobber (match_scratch:V2DI 6 "=0,&v"))] + "TARGET_POWER10 && TARGET_FLOAT128_HW" + "#" + "&& 1" + [(set (match_dup 6) + (if_then_else:V2DI (match_dup 9) + (match_dup 7) + (match_dup 8))) + (set (match_dup 0) + (if_then_else:IEEE128 (ne (match_dup 6) + (match_dup 8)) + (match_dup 5) + (match_dup 4)))] +{ + rtx op1 = operands[1]; + enum rtx_code cond = reverse_condition_maybe_unordered (GET_CODE (op1)); + + if (GET_CODE (operands[6]) == SCRATCH) + operands[6] = gen_reg_rtx (V2DImode); + + operands[7] = CONSTM1_RTX (V2DImode); + operands[8] = CONST0_RTX (V2DImode); + + operands[9] = gen_rtx_fmt_ee (cond, CCFPmode, operands[2], operands[3]); +} + [(set_attr "length" "8") + (set_attr "type" "vecperm")]) + +(define_insn "*fpmask<mode>" + [(set (match_operand:V2DI 0 "altivec_register_operand" "=v") + (if_then_else:V2DI + (match_operator:CCFP 1 "fpmask_comparison_operator" + [(match_operand:IEEE128 2 "altivec_register_operand" "v") + (match_operand:IEEE128 3 "altivec_register_operand" "v")]) + (match_operand:V2DI 4 "all_ones_constant" "") + (match_operand:V2DI 5 "zero_constant" "")))] + "TARGET_POWER10 && TARGET_FLOAT128_HW" + "xscmp%V1qp %0,%2,%3" + [(set_attr "type" "fpcompare")]) + +(define_insn "*xxsel<mode>" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (if_then_else:IEEE128 + (ne (match_operand:V2DI 1 "altivec_register_operand" "v") + (match_operand:V2DI 2 "zero_constant" "")) + (match_operand:IEEE128 3 "altivec_register_operand" "v") + (match_operand:IEEE128 4 "altivec_register_operand" "v")))] + "TARGET_POWER10 && TARGET_FLOAT128_HW" + "xxsel %x0,%x4,%x3,%x1" + [(set_attr "type" "vecmove")]) + ;; Conversions to and from floating-point. diff --git a/gcc/testsuite/gcc.target/powerpc/p9-minmax-2.c b/gcc/testsuite/gcc.target/powerpc/p9-minmax-2.c index b93056f78aea..0684eb501c56 100644 --- a/gcc/testsuite/gcc.target/powerpc/p9-minmax-2.c +++ b/gcc/testsuite/gcc.target/powerpc/p9-minmax-2.c @@ -1,29 +1,190 @@ /* { dg-do compile } */ -/* { dg-options "-mdejagnu-cpu=power9 -mvsx -O2" } */ +/* { dg-options "-mdejagnu-cpu=power9 -mvsx -O2 -mpower9-minmax" } */ /* { dg-require-effective-target powerpc_vsx } */ -/* { dg-final { scan-assembler "xsmaxcdp" } } */ -/* { dg-final { scan-assembler "xsmincdp" } } */ +/* { dg-final { scan-assembler-not "fsel" } } */ +/* { dg-final { scan-assembler "xscmpeqdp" } } */ +/* { dg-final { scan-assembler "xscmpgtdp" } } */ +/* { dg-final { scan-assembler-not "xscmpodp" } } */ +/* { dg-final { scan-assembler-not "xscmpudp" } } */ +/* { dg-final { scan-assembler "xsmaxcdp" } } */ +/* { dg-final { scan-assembler-not "xsmaxdp" } } */ +/* { dg-final { scan-assembler "xsmincdp" } } */ +/* { dg-final { scan-assembler-not "xsmindp" } } */ +/* { dg-final { scan-assembler "xxsel" } } */ + +/* Due to NaN support, <= and >= are not handled presently unless -ffast-math + is used. At some point this will be fixed and the xscmpgedp instruction can + be generated normally. The <= and >= tests are bracketed with + #ifdef DO_GE_LE. */ + +#ifdef DO_GE_LE +double +dbl_max1 (double a, double b) +{ + return (a >= b) ? a : b; +} +#endif double -dbl_max (double a, double b) +dbl_max2 (double a, double b) { return (a > b) ? a : b; } double -dbl_min (double a, double b) +dbl_min1 (double a, double b) { return (a < b) ? a : b; } +#ifdef DO_GE_LE +double +dbl_min2 (double a, double b) +{ + return (a <= b) ? a : b; +} +#endif + +double +dbl_cmp_eq (double a, double b, double c, double d) +{ + return (a == b) ? c : d; +} + +double +dbl_cmp_ne (double a, double b, double c, double d) +{ + return (a != b) ? c : d; +} + +double +dbl_cmp_gt (double a, double b, double c, double d) +{ + return (a > b) ? c : d; +} + +#ifdef DO_GE_LE +double +dbl_cmp_ge (double a, double b, double c, double d) +{ + return (a >= b) ? c : d; +} +#endif + +double +dbl_cmp_lt (double a, double b, double c, double d) +{ + return (a < b) ? c : d; +} + +#ifdef DO_GE_LE +double +dbl_cmp_le (double a, double b, double c, double d) +{ + return (a <= b) ? c : d; +} +#endif + +#ifdef DO_GE_LE +float +flt_max1 (float a, float b) +{ + return (a >= b) ? a : b; +} +#endif + float -flt_max (float a, float b) +flt_max2 (float a, float b) { return (a > b) ? a : b; } float -flt_min (float a, float b) +flt_min1 (float a, float b) +{ + return (a < b) ? a : b; +} + +#ifdef DO_GE_LE +float +flt_min2 (float a, float b) +{ + return (a <= b) ? a : b; +} +#endif + +float +flt_cmp_eq (float a, float b, float c, float d) +{ + return (a == b) ? c : d; +} + +float +flt_cmp_ne (float a, float b, float c, float d) +{ + return (a != b) ? c : d; +} + +float +flt_cmp_gt (float a, float b, float c, float d) +{ + return (a > b) ? c : d; +} + +#ifdef DO_GE_LE +float +flt_cmp_ge (float a, float b, float c, float d) +{ + return (a >= b) ? c : d; +} +#endif + +float +flt_cmp_lt (float a, float b, float c, float d) +{ + return (a < b) ? c : d; +} + +#ifdef DO_GE_LE +float +flt_cmp_le (float a, float b, float c, float d) +{ + return (a <= b) ? c : d; +} +#endif + +double +dbl_flt_max1 (float a, float b) +{ + return (a > b) ? a : b; +} + +double +dbl_flt_max2 (double a, float b) +{ + return (a > b) ? a : b; +} + +double +dbl_flt_max3 (float a, double b) +{ + return (a > b) ? a : b; +} + +double +dbl_flt_min1 (float a, float b) +{ + return (a < b) ? a : b; +} + +double +dbl_flt_min2 (double a, float b) +{ + return (a < b) ? a : b; +} + +double +dbl_flt_min3 (float a, double b) { return (a < b) ? a : b; }
