This patch rewrites the SVE FP comparisons so that they always use unspecs and so that they have an additional operand to indicate whether the predicate is known to be a PTRUE. It's part of a series that rewrites the SVE FP patterns so that they can cope with non-PTRUE predicates.
Tested on aarch64-linux-gnu (with and without SVE) and aarch64_be-elf. Applied as r274421. Richard 2019-08-14 Richard Sandiford <richard.sandif...@arm.com> gcc/ * config/aarch64/iterators.md (UNSPEC_COND_FCMUO): New unspec. (cmp_op): Handle it. (SVE_COND_FP_CMP): Rename to... (SVE_COND_FP_CMP_I0): ...this. (SVE_FP_CMP): Remove. * config/aarch64/aarch64-sve.md (*fcm<SVE_FP_CMP:cmp_op><SVE_F:mode>): Replace with... (*fcm<SVE_COND_FP_CMP_I0:cmp_op><SVE_F:mode>): ...this new pattern, using unspecs to represent the comparison. (*fcmuo<SVE_F:mode>): Use UNSPEC_COND_FCMUO. (*fcm<cmp_op><mode>_and_combine, *fcmuo<mode>_and_combine): Update accordingly. * config/aarch64/aarch64.c (aarch64_emit_sve_ptrue_op): Delete. (aarch64_unspec_cond_code): Move after integer code. Handle UNORDERED. (aarch64_emit_sve_predicated_cond): Replace with... (aarch64_emit_sve_fp_cond): ...this new function. (aarch64_emit_sve_or_conds): Replace with... (aarch64_emit_sve_or_fp_conds): ...this new function. (aarch64_emit_sve_inverted_cond): Replace with... (aarch64_emit_sve_invert_fp_cond): ...this new function. (aarch64_expand_sve_vec_cmp_float): Update accordingly. Index: gcc/config/aarch64/iterators.md =================================================================== --- gcc/config/aarch64/iterators.md 2019-08-14 09:25:49.689451157 +0100 +++ gcc/config/aarch64/iterators.md 2019-08-14 09:29:14.195939545 +0100 @@ -479,6 +479,7 @@ (define_c_enum "unspec" UNSPEC_COND_FCMLE ; Used in aarch64-sve.md. UNSPEC_COND_FCMLT ; Used in aarch64-sve.md. UNSPEC_COND_FCMNE ; Used in aarch64-sve.md. + UNSPEC_COND_FCMUO ; Used in aarch64-sve.md. UNSPEC_COND_FDIV ; Used in aarch64-sve.md. UNSPEC_COND_FMAXNM ; Used in aarch64-sve.md. UNSPEC_COND_FMINNM ; Used in aarch64-sve.md. @@ -1273,9 +1274,6 @@ (define_code_iterator SVE_UNPRED_FP_BINA ;; SVE integer comparisons. (define_code_iterator SVE_INT_CMP [lt le eq ne ge gt ltu leu geu gtu]) -;; SVE floating-point comparisons. -(define_code_iterator SVE_FP_CMP [lt le eq ne ge gt]) - ;; ------------------------------------------------------------------- ;; Code Attributes ;; ------------------------------------------------------------------- @@ -1663,12 +1661,13 @@ (define_int_iterator SVE_COND_FP_TERNARY UNSPEC_COND_FNMLA UNSPEC_COND_FNMLS]) -(define_int_iterator SVE_COND_FP_CMP [UNSPEC_COND_FCMEQ - UNSPEC_COND_FCMGE - UNSPEC_COND_FCMGT - UNSPEC_COND_FCMLE - UNSPEC_COND_FCMLT - UNSPEC_COND_FCMNE]) +;; SVE FP comparisons that accept #0.0. +(define_int_iterator SVE_COND_FP_CMP_I0 [UNSPEC_COND_FCMEQ + UNSPEC_COND_FCMGE + UNSPEC_COND_FCMGT + UNSPEC_COND_FCMLE + UNSPEC_COND_FCMLT + UNSPEC_COND_FCMNE]) (define_int_iterator FCADD [UNSPEC_FCADD90 UNSPEC_FCADD270]) @@ -1955,7 +1954,8 @@ (define_int_attr cmp_op [(UNSPEC_COND_FC (UNSPEC_COND_FCMGT "gt") (UNSPEC_COND_FCMLE "le") (UNSPEC_COND_FCMLT "lt") - (UNSPEC_COND_FCMNE "ne")]) + (UNSPEC_COND_FCMNE "ne") + (UNSPEC_COND_FCMUO "uo")]) (define_int_attr sve_int_op [(UNSPEC_ANDV "andv") (UNSPEC_IORV "orv") Index: gcc/config/aarch64/aarch64-sve.md =================================================================== --- gcc/config/aarch64/aarch64-sve.md 2019-08-14 09:25:49.685451187 +0100 +++ gcc/config/aarch64/aarch64-sve.md 2019-08-14 09:29:14.191939575 +0100 @@ -3136,15 +3136,15 @@ (define_expand "vec_cmp<mode><vpred>" } ) -;; Floating-point comparisons predicated with a PTRUE. +;; Predicated floating-point comparisons. (define_insn "*fcm<cmp_op><mode>" [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") (unspec:<VPRED> [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") - (SVE_FP_CMP:<VPRED> - (match_operand:SVE_F 2 "register_operand" "w, w") - (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))] - UNSPEC_MERGE_PTRUE))] + (match_operand:SI 4 "aarch64_sve_ptrue_flag") + (match_operand:SVE_F 2 "register_operand" "w, w") + (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")] + SVE_COND_FP_CMP_I0))] "TARGET_SVE" "@ fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0 @@ -3156,10 +3156,10 @@ (define_insn "*fcmuo<mode>" [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") (unspec:<VPRED> [(match_operand:<VPRED> 1 "register_operand" "Upl") - (unordered:<VPRED> - (match_operand:SVE_F 2 "register_operand" "w") - (match_operand:SVE_F 3 "register_operand" "w"))] - UNSPEC_MERGE_PTRUE))] + (match_operand:SI 4 "aarch64_sve_ptrue_flag") + (match_operand:SVE_F 2 "register_operand" "w") + (match_operand:SVE_F 3 "register_operand" "w")] + UNSPEC_COND_FCMUO))] "TARGET_SVE" "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" ) @@ -3177,20 +3177,21 @@ (define_insn_and_split "*fcm<cmp_op><mod (and:<VPRED> (unspec:<VPRED> [(match_operand:<VPRED> 1) - (SVE_FP_CMP - (match_operand:SVE_F 2 "register_operand" "w, w") - (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))] - UNSPEC_MERGE_PTRUE) + (const_int SVE_KNOWN_PTRUE) + (match_operand:SVE_F 2 "register_operand" "w, w") + (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")] + SVE_COND_FP_CMP_I0) (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))] "TARGET_SVE" "#" "&& 1" [(set (match_dup 0) - (and:<VPRED> - (SVE_FP_CMP:<VPRED> - (match_dup 2) - (match_dup 3)) - (match_dup 4)))] + (unspec:<VPRED> + [(match_dup 4) + (const_int SVE_MAYBE_NOT_PTRUE) + (match_dup 2) + (match_dup 3)] + SVE_COND_FP_CMP_I0))] ) ;; Same for unordered comparisons. @@ -3199,62 +3200,21 @@ (define_insn_and_split "*fcmuo<mode>_and (and:<VPRED> (unspec:<VPRED> [(match_operand:<VPRED> 1) - (unordered - (match_operand:SVE_F 2 "register_operand" "w") - (match_operand:SVE_F 3 "register_operand" "w"))] - UNSPEC_MERGE_PTRUE) + (const_int SVE_KNOWN_PTRUE) + (match_operand:SVE_F 2 "register_operand" "w") + (match_operand:SVE_F 3 "register_operand" "w")] + UNSPEC_COND_FCMUO) (match_operand:<VPRED> 4 "register_operand" "Upl")))] "TARGET_SVE" "#" "&& 1" [(set (match_dup 0) - (and:<VPRED> - (unordered:<VPRED> - (match_dup 2) - (match_dup 3)) - (match_dup 4)))] -) - -;; Unpredicated floating-point comparisons, with the results ANDed with -;; another predicate. This is a valid fold for the same reasons as above. -(define_insn "*fcm<cmp_op><mode>_and" - [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") - (and:<VPRED> - (SVE_FP_CMP:<VPRED> - (match_operand:SVE_F 2 "register_operand" "w, w") - (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")) - (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))] - "TARGET_SVE" - "@ - fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0 - fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" -) - -;; Same for unordered comparisons. -(define_insn "*fcmuo<mode>_and" - [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") - (and:<VPRED> - (unordered:<VPRED> - (match_operand:SVE_F 2 "register_operand" "w") - (match_operand:SVE_F 3 "register_operand" "w")) - (match_operand:<VPRED> 1 "register_operand" "Upl")))] - "TARGET_SVE" - "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" -) - -;; Predicated floating-point comparisons. We don't need a version -;; of this for unordered comparisons. -(define_insn "*pred_fcm<cmp_op><mode>" - [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") (unspec:<VPRED> - [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") - (match_operand:SVE_F 2 "register_operand" "w, w") - (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")] - SVE_COND_FP_CMP))] - "TARGET_SVE" - "@ - fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0 - fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" + [(match_dup 4) + (const_int SVE_MAYBE_NOT_PTRUE) + (match_dup 2) + (match_dup 3)] + UNSPEC_COND_FCMUO))] ) ;; ------------------------------------------------------------------------- Index: gcc/config/aarch64/aarch64.c =================================================================== --- gcc/config/aarch64/aarch64.c 2019-08-14 09:15:57.617827961 +0100 +++ gcc/config/aarch64/aarch64.c 2019-08-14 09:29:14.195939545 +0100 @@ -17700,28 +17700,35 @@ aarch64_sve_cmp_operand_p (rtx_code op_c (set TARGET OP) - given that PTRUE is an all-true predicate of the appropriate mode. */ + given that PTRUE is an all-true predicate of the appropriate mode + and that the instruction clobbers the condition codes. */ static void -aarch64_emit_sve_ptrue_op (rtx target, rtx ptrue, rtx op) +aarch64_emit_sve_ptrue_op_cc (rtx target, rtx ptrue, rtx op) { rtx unspec = gen_rtx_UNSPEC (GET_MODE (target), gen_rtvec (2, ptrue, op), UNSPEC_MERGE_PTRUE); - rtx_insn *insn = emit_set_insn (target, unspec); + rtx_insn *insn = emit_insn (gen_set_clobber_cc_nzc (target, unspec)); set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op)); } -/* Likewise, but also clobber the condition codes. */ +/* Expand an SVE integer comparison using the SVE equivalent of: -static void -aarch64_emit_sve_ptrue_op_cc (rtx target, rtx ptrue, rtx op) + (set TARGET (CODE OP0 OP1)). */ + +void +aarch64_expand_sve_vec_cmp_int (rtx target, rtx_code code, rtx op0, rtx op1) { - rtx unspec = gen_rtx_UNSPEC (GET_MODE (target), - gen_rtvec (2, ptrue, op), - UNSPEC_MERGE_PTRUE); - rtx_insn *insn = emit_insn (gen_set_clobber_cc_nzc (target, unspec)); - set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op)); + machine_mode pred_mode = GET_MODE (target); + machine_mode data_mode = GET_MODE (op0); + + if (!aarch64_sve_cmp_operand_p (code, op1)) + op1 = force_reg (data_mode, op1); + + rtx ptrue = aarch64_ptrue_reg (pred_mode); + rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1); + aarch64_emit_sve_ptrue_op_cc (target, ptrue, cond); } /* Return the UNSPEC_COND_* code for comparison CODE. */ @@ -17743,6 +17750,8 @@ aarch64_unspec_cond_code (rtx_code code) return UNSPEC_COND_FCMLE; case GE: return UNSPEC_COND_FCMGE; + case UNORDERED: + return UNSPEC_COND_FCMUO; default: gcc_unreachable (); } @@ -17750,78 +17759,58 @@ aarch64_unspec_cond_code (rtx_code code) /* Emit: - (set TARGET (unspec [PRED OP0 OP1] UNSPEC_COND_<X>)) + (set TARGET (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X>)) - where <X> is the operation associated with comparison CODE. This form - of instruction is used when (and (CODE OP0 OP1) PRED) would have different - semantics, such as when PRED might not be all-true and when comparing - inactive lanes could have side effects. */ + where <X> is the operation associated with comparison CODE. + KNOWN_PTRUE_P is true if PRED is known to be a PTRUE. */ static void -aarch64_emit_sve_predicated_cond (rtx target, rtx_code code, - rtx pred, rtx op0, rtx op1) +aarch64_emit_sve_fp_cond (rtx target, rtx_code code, rtx pred, + bool known_ptrue_p, rtx op0, rtx op1) { + rtx flag = gen_int_mode (known_ptrue_p, SImode); rtx unspec = gen_rtx_UNSPEC (GET_MODE (pred), - gen_rtvec (3, pred, op0, op1), + gen_rtvec (4, pred, flag, op0, op1), aarch64_unspec_cond_code (code)); emit_set_insn (target, unspec); } -/* Expand an SVE integer comparison using the SVE equivalent of: - - (set TARGET (CODE OP0 OP1)). */ - -void -aarch64_expand_sve_vec_cmp_int (rtx target, rtx_code code, rtx op0, rtx op1) -{ - machine_mode pred_mode = GET_MODE (target); - machine_mode data_mode = GET_MODE (op0); - - if (!aarch64_sve_cmp_operand_p (code, op1)) - op1 = force_reg (data_mode, op1); - - rtx ptrue = aarch64_ptrue_reg (pred_mode); - rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1); - aarch64_emit_sve_ptrue_op_cc (target, ptrue, cond); -} - /* Emit the SVE equivalent of: - (set TMP1 (CODE1 OP0 OP1)) - (set TMP2 (CODE2 OP0 OP1)) + (set TMP1 (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X1>)) + (set TMP2 (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X2>)) (set TARGET (ior:PRED_MODE TMP1 TMP2)) - PTRUE is an all-true predicate with the same mode as TARGET. */ + where <Xi> is the operation associated with comparison CODEi. + KNOWN_PTRUE_P is true if PRED is known to be a PTRUE. */ static void -aarch64_emit_sve_or_conds (rtx target, rtx_code code1, rtx_code code2, - rtx ptrue, rtx op0, rtx op1) +aarch64_emit_sve_or_fp_conds (rtx target, rtx_code code1, rtx_code code2, + rtx pred, bool known_ptrue_p, rtx op0, rtx op1) { - machine_mode pred_mode = GET_MODE (ptrue); + machine_mode pred_mode = GET_MODE (pred); rtx tmp1 = gen_reg_rtx (pred_mode); - aarch64_emit_sve_ptrue_op (tmp1, ptrue, - gen_rtx_fmt_ee (code1, pred_mode, op0, op1)); + aarch64_emit_sve_fp_cond (tmp1, code1, pred, known_ptrue_p, op0, op1); rtx tmp2 = gen_reg_rtx (pred_mode); - aarch64_emit_sve_ptrue_op (tmp2, ptrue, - gen_rtx_fmt_ee (code2, pred_mode, op0, op1)); + aarch64_emit_sve_fp_cond (tmp2, code2, pred, known_ptrue_p, op0, op1); aarch64_emit_binop (target, ior_optab, tmp1, tmp2); } /* Emit the SVE equivalent of: - (set TMP (CODE OP0 OP1)) + (set TMP (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X>)) (set TARGET (not TMP)) - PTRUE is an all-true predicate with the same mode as TARGET. */ + where <X> is the operation associated with comparison CODE. + KNOWN_PTRUE_P is true if PRED is known to be a PTRUE. */ static void -aarch64_emit_sve_inverted_cond (rtx target, rtx ptrue, rtx_code code, - rtx op0, rtx op1) +aarch64_emit_sve_invert_fp_cond (rtx target, rtx_code code, rtx pred, + bool known_ptrue_p, rtx op0, rtx op1) { - machine_mode pred_mode = GET_MODE (ptrue); + machine_mode pred_mode = GET_MODE (pred); rtx tmp = gen_reg_rtx (pred_mode); - aarch64_emit_sve_ptrue_op (tmp, ptrue, - gen_rtx_fmt_ee (code, pred_mode, op0, op1)); + aarch64_emit_sve_fp_cond (tmp, code, pred, known_ptrue_p, op0, op1); aarch64_emit_unop (target, one_cmpl_optab, tmp); } @@ -17854,14 +17843,13 @@ aarch64_expand_sve_vec_cmp_float (rtx ta case NE: { /* There is native support for the comparison. */ - rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1); - aarch64_emit_sve_ptrue_op (target, ptrue, cond); + aarch64_emit_sve_fp_cond (target, code, ptrue, true, op0, op1); return false; } case LTGT: /* This is a trapping operation (LT or GT). */ - aarch64_emit_sve_or_conds (target, LT, GT, ptrue, op0, op1); + aarch64_emit_sve_or_fp_conds (target, LT, GT, ptrue, true, op0, op1); return false; case UNEQ: @@ -17869,7 +17857,8 @@ aarch64_expand_sve_vec_cmp_float (rtx ta { /* This would trap for signaling NaNs. */ op1 = force_reg (data_mode, op1); - aarch64_emit_sve_or_conds (target, UNORDERED, EQ, ptrue, op0, op1); + aarch64_emit_sve_or_fp_conds (target, UNORDERED, EQ, + ptrue, true, op0, op1); return false; } /* fall through */ @@ -17882,7 +17871,8 @@ aarch64_expand_sve_vec_cmp_float (rtx ta /* Work out which elements are ordered. */ rtx ordered = gen_reg_rtx (pred_mode); op1 = force_reg (data_mode, op1); - aarch64_emit_sve_inverted_cond (ordered, ptrue, UNORDERED, op0, op1); + aarch64_emit_sve_invert_fp_cond (ordered, UNORDERED, + ptrue, true, op0, op1); /* Test the opposite condition for the ordered elements, then invert the result. */ @@ -17892,13 +17882,12 @@ aarch64_expand_sve_vec_cmp_float (rtx ta code = reverse_condition_maybe_unordered (code); if (can_invert_p) { - aarch64_emit_sve_predicated_cond (target, code, - ordered, op0, op1); + aarch64_emit_sve_fp_cond (target, code, + ordered, false, op0, op1); return true; } - rtx tmp = gen_reg_rtx (pred_mode); - aarch64_emit_sve_predicated_cond (tmp, code, ordered, op0, op1); - aarch64_emit_unop (target, one_cmpl_optab, tmp); + aarch64_emit_sve_invert_fp_cond (target, code, + ordered, false, op0, op1); return false; } break; @@ -17916,11 +17905,10 @@ aarch64_expand_sve_vec_cmp_float (rtx ta code = reverse_condition_maybe_unordered (code); if (can_invert_p) { - rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1); - aarch64_emit_sve_ptrue_op (target, ptrue, cond); + aarch64_emit_sve_fp_cond (target, code, ptrue, true, op0, op1); return true; } - aarch64_emit_sve_inverted_cond (target, ptrue, code, op0, op1); + aarch64_emit_sve_invert_fp_cond (target, code, ptrue, true, op0, op1); return false; }