This patch rewrites the SVE FP comparisons so that they always use
unspecs and so that they have an additional operand to indicate
whether the predicate is known to be a PTRUE.  It's part of a series
that rewrites the SVE FP patterns so that they can cope with non-PTRUE
predicates.

Tested on aarch64-linux-gnu (with and without SVE) and aarch64_be-elf.
Applied as r274421.

Richard


2019-08-14  Richard Sandiford  <richard.sandif...@arm.com>

gcc/
        * config/aarch64/iterators.md (UNSPEC_COND_FCMUO): New unspec.
        (cmp_op): Handle it.
        (SVE_COND_FP_CMP): Rename to...
        (SVE_COND_FP_CMP_I0): ...this.
        (SVE_FP_CMP): Remove.
        * config/aarch64/aarch64-sve.md
        (*fcm<SVE_FP_CMP:cmp_op><SVE_F:mode>): Replace with...
        (*fcm<SVE_COND_FP_CMP_I0:cmp_op><SVE_F:mode>): ...this new pattern,
        using unspecs to represent the comparison.
        (*fcmuo<SVE_F:mode>): Use UNSPEC_COND_FCMUO.
        (*fcm<cmp_op><mode>_and_combine, *fcmuo<mode>_and_combine): Update
        accordingly.
        * config/aarch64/aarch64.c (aarch64_emit_sve_ptrue_op): Delete.
        (aarch64_unspec_cond_code): Move after integer code.  Handle
        UNORDERED.
        (aarch64_emit_sve_predicated_cond): Replace with...
        (aarch64_emit_sve_fp_cond): ...this new function.
        (aarch64_emit_sve_or_conds): Replace with...
        (aarch64_emit_sve_or_fp_conds): ...this new function.
        (aarch64_emit_sve_inverted_cond): Replace with...
        (aarch64_emit_sve_invert_fp_cond): ...this new function.
        (aarch64_expand_sve_vec_cmp_float): Update accordingly.

Index: gcc/config/aarch64/iterators.md
===================================================================
--- gcc/config/aarch64/iterators.md     2019-08-14 09:25:49.689451157 +0100
+++ gcc/config/aarch64/iterators.md     2019-08-14 09:29:14.195939545 +0100
@@ -479,6 +479,7 @@ (define_c_enum "unspec"
     UNSPEC_COND_FCMLE  ; Used in aarch64-sve.md.
     UNSPEC_COND_FCMLT  ; Used in aarch64-sve.md.
     UNSPEC_COND_FCMNE  ; Used in aarch64-sve.md.
+    UNSPEC_COND_FCMUO  ; Used in aarch64-sve.md.
     UNSPEC_COND_FDIV   ; Used in aarch64-sve.md.
     UNSPEC_COND_FMAXNM ; Used in aarch64-sve.md.
     UNSPEC_COND_FMINNM ; Used in aarch64-sve.md.
@@ -1273,9 +1274,6 @@ (define_code_iterator SVE_UNPRED_FP_BINA
 ;; SVE integer comparisons.
 (define_code_iterator SVE_INT_CMP [lt le eq ne ge gt ltu leu geu gtu])
 
-;; SVE floating-point comparisons.
-(define_code_iterator SVE_FP_CMP [lt le eq ne ge gt])
-
 ;; -------------------------------------------------------------------
 ;; Code Attributes
 ;; -------------------------------------------------------------------
@@ -1663,12 +1661,13 @@ (define_int_iterator SVE_COND_FP_TERNARY
                                          UNSPEC_COND_FNMLA
                                          UNSPEC_COND_FNMLS])
 
-(define_int_iterator SVE_COND_FP_CMP [UNSPEC_COND_FCMEQ
-                                     UNSPEC_COND_FCMGE
-                                     UNSPEC_COND_FCMGT
-                                     UNSPEC_COND_FCMLE
-                                     UNSPEC_COND_FCMLT
-                                     UNSPEC_COND_FCMNE])
+;; SVE FP comparisons that accept #0.0.
+(define_int_iterator SVE_COND_FP_CMP_I0 [UNSPEC_COND_FCMEQ
+                                        UNSPEC_COND_FCMGE
+                                        UNSPEC_COND_FCMGT
+                                        UNSPEC_COND_FCMLE
+                                        UNSPEC_COND_FCMLT
+                                        UNSPEC_COND_FCMNE])
 
 (define_int_iterator FCADD [UNSPEC_FCADD90
                            UNSPEC_FCADD270])
@@ -1955,7 +1954,8 @@ (define_int_attr cmp_op [(UNSPEC_COND_FC
                         (UNSPEC_COND_FCMGT "gt")
                         (UNSPEC_COND_FCMLE "le")
                         (UNSPEC_COND_FCMLT "lt")
-                        (UNSPEC_COND_FCMNE "ne")])
+                        (UNSPEC_COND_FCMNE "ne")
+                        (UNSPEC_COND_FCMUO "uo")])
 
 (define_int_attr sve_int_op [(UNSPEC_ANDV "andv")
                             (UNSPEC_IORV "orv")
Index: gcc/config/aarch64/aarch64-sve.md
===================================================================
--- gcc/config/aarch64/aarch64-sve.md   2019-08-14 09:25:49.685451187 +0100
+++ gcc/config/aarch64/aarch64-sve.md   2019-08-14 09:29:14.191939575 +0100
@@ -3136,15 +3136,15 @@ (define_expand "vec_cmp<mode><vpred>"
   }
 )
 
-;; Floating-point comparisons predicated with a PTRUE.
+;; Predicated floating-point comparisons.
 (define_insn "*fcm<cmp_op><mode>"
   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
        (unspec:<VPRED>
          [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-          (SVE_FP_CMP:<VPRED>
-            (match_operand:SVE_F 2 "register_operand" "w, w")
-            (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))]
-         UNSPEC_MERGE_PTRUE))]
+          (match_operand:SI 4 "aarch64_sve_ptrue_flag")
+          (match_operand:SVE_F 2 "register_operand" "w, w")
+          (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
+         SVE_COND_FP_CMP_I0))]
   "TARGET_SVE"
   "@
    fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
@@ -3156,10 +3156,10 @@ (define_insn "*fcmuo<mode>"
   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
        (unspec:<VPRED>
          [(match_operand:<VPRED> 1 "register_operand" "Upl")
-          (unordered:<VPRED>
-            (match_operand:SVE_F 2 "register_operand" "w")
-            (match_operand:SVE_F 3 "register_operand" "w"))]
-         UNSPEC_MERGE_PTRUE))]
+          (match_operand:SI 4 "aarch64_sve_ptrue_flag")
+          (match_operand:SVE_F 2 "register_operand" "w")
+          (match_operand:SVE_F 3 "register_operand" "w")]
+         UNSPEC_COND_FCMUO))]
   "TARGET_SVE"
   "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
 )
@@ -3177,20 +3177,21 @@ (define_insn_and_split "*fcm<cmp_op><mod
        (and:<VPRED>
          (unspec:<VPRED>
            [(match_operand:<VPRED> 1)
-            (SVE_FP_CMP
-              (match_operand:SVE_F 2 "register_operand" "w, w")
-              (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))]
-           UNSPEC_MERGE_PTRUE)
+            (const_int SVE_KNOWN_PTRUE)
+            (match_operand:SVE_F 2 "register_operand" "w, w")
+            (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
+           SVE_COND_FP_CMP_I0)
          (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
   "TARGET_SVE"
   "#"
   "&& 1"
   [(set (match_dup 0)
-       (and:<VPRED>
-         (SVE_FP_CMP:<VPRED>
-           (match_dup 2)
-           (match_dup 3))
-         (match_dup 4)))]
+       (unspec:<VPRED>
+         [(match_dup 4)
+          (const_int SVE_MAYBE_NOT_PTRUE)
+          (match_dup 2)
+          (match_dup 3)]
+         SVE_COND_FP_CMP_I0))]
 )
 
 ;; Same for unordered comparisons.
@@ -3199,62 +3200,21 @@ (define_insn_and_split "*fcmuo<mode>_and
        (and:<VPRED>
          (unspec:<VPRED>
            [(match_operand:<VPRED> 1)
-            (unordered
-              (match_operand:SVE_F 2 "register_operand" "w")
-              (match_operand:SVE_F 3 "register_operand" "w"))]
-           UNSPEC_MERGE_PTRUE)
+            (const_int SVE_KNOWN_PTRUE)
+            (match_operand:SVE_F 2 "register_operand" "w")
+            (match_operand:SVE_F 3 "register_operand" "w")]
+           UNSPEC_COND_FCMUO)
          (match_operand:<VPRED> 4 "register_operand" "Upl")))]
   "TARGET_SVE"
   "#"
   "&& 1"
   [(set (match_dup 0)
-       (and:<VPRED>
-         (unordered:<VPRED>
-           (match_dup 2)
-           (match_dup 3))
-         (match_dup 4)))]
-)
-
-;; Unpredicated floating-point comparisons, with the results ANDed with
-;; another predicate.  This is a valid fold for the same reasons as above.
-(define_insn "*fcm<cmp_op><mode>_and"
-  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
-       (and:<VPRED>
-         (SVE_FP_CMP:<VPRED>
-           (match_operand:SVE_F 2 "register_operand" "w, w")
-           (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))
-         (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))]
-  "TARGET_SVE"
-  "@
-   fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
-   fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
-)
-
-;; Same for unordered comparisons.
-(define_insn "*fcmuo<mode>_and"
-  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
-       (and:<VPRED>
-         (unordered:<VPRED>
-           (match_operand:SVE_F 2 "register_operand" "w")
-           (match_operand:SVE_F 3 "register_operand" "w"))
-         (match_operand:<VPRED> 1 "register_operand" "Upl")))]
-  "TARGET_SVE"
-  "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
-)
-
-;; Predicated floating-point comparisons.  We don't need a version
-;; of this for unordered comparisons.
-(define_insn "*pred_fcm<cmp_op><mode>"
-  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
        (unspec:<VPRED>
-         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-          (match_operand:SVE_F 2 "register_operand" "w, w")
-          (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
-         SVE_COND_FP_CMP))]
-  "TARGET_SVE"
-  "@
-   fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
-   fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+         [(match_dup 4)
+          (const_int SVE_MAYBE_NOT_PTRUE)
+          (match_dup 2)
+          (match_dup 3)]
+         UNSPEC_COND_FCMUO))]
 )
 
 ;; -------------------------------------------------------------------------
Index: gcc/config/aarch64/aarch64.c
===================================================================
--- gcc/config/aarch64/aarch64.c        2019-08-14 09:15:57.617827961 +0100
+++ gcc/config/aarch64/aarch64.c        2019-08-14 09:29:14.195939545 +0100
@@ -17700,28 +17700,35 @@ aarch64_sve_cmp_operand_p (rtx_code op_c
 
      (set TARGET OP)
 
-   given that PTRUE is an all-true predicate of the appropriate mode.  */
+   given that PTRUE is an all-true predicate of the appropriate mode
+   and that the instruction clobbers the condition codes.  */
 
 static void
-aarch64_emit_sve_ptrue_op (rtx target, rtx ptrue, rtx op)
+aarch64_emit_sve_ptrue_op_cc (rtx target, rtx ptrue, rtx op)
 {
   rtx unspec = gen_rtx_UNSPEC (GET_MODE (target),
                               gen_rtvec (2, ptrue, op),
                               UNSPEC_MERGE_PTRUE);
-  rtx_insn *insn = emit_set_insn (target, unspec);
+  rtx_insn *insn = emit_insn (gen_set_clobber_cc_nzc (target, unspec));
   set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op));
 }
 
-/* Likewise, but also clobber the condition codes.  */
+/* Expand an SVE integer comparison using the SVE equivalent of:
 
-static void
-aarch64_emit_sve_ptrue_op_cc (rtx target, rtx ptrue, rtx op)
+     (set TARGET (CODE OP0 OP1)).  */
+
+void
+aarch64_expand_sve_vec_cmp_int (rtx target, rtx_code code, rtx op0, rtx op1)
 {
-  rtx unspec = gen_rtx_UNSPEC (GET_MODE (target),
-                              gen_rtvec (2, ptrue, op),
-                              UNSPEC_MERGE_PTRUE);
-  rtx_insn *insn = emit_insn (gen_set_clobber_cc_nzc (target, unspec));
-  set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op));
+  machine_mode pred_mode = GET_MODE (target);
+  machine_mode data_mode = GET_MODE (op0);
+
+  if (!aarch64_sve_cmp_operand_p (code, op1))
+    op1 = force_reg (data_mode, op1);
+
+  rtx ptrue = aarch64_ptrue_reg (pred_mode);
+  rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
+  aarch64_emit_sve_ptrue_op_cc (target, ptrue, cond);
 }
 
 /* Return the UNSPEC_COND_* code for comparison CODE.  */
@@ -17743,6 +17750,8 @@ aarch64_unspec_cond_code (rtx_code code)
       return UNSPEC_COND_FCMLE;
     case GE:
       return UNSPEC_COND_FCMGE;
+    case UNORDERED:
+      return UNSPEC_COND_FCMUO;
     default:
       gcc_unreachable ();
     }
@@ -17750,78 +17759,58 @@ aarch64_unspec_cond_code (rtx_code code)
 
 /* Emit:
 
-      (set TARGET (unspec [PRED OP0 OP1] UNSPEC_COND_<X>))
+      (set TARGET (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X>))
 
-   where <X> is the operation associated with comparison CODE.  This form
-   of instruction is used when (and (CODE OP0 OP1) PRED) would have different
-   semantics, such as when PRED might not be all-true and when comparing
-   inactive lanes could have side effects.  */
+   where <X> is the operation associated with comparison CODE.
+   KNOWN_PTRUE_P is true if PRED is known to be a PTRUE.  */
 
 static void
-aarch64_emit_sve_predicated_cond (rtx target, rtx_code code,
-                                 rtx pred, rtx op0, rtx op1)
+aarch64_emit_sve_fp_cond (rtx target, rtx_code code, rtx pred,
+                         bool known_ptrue_p, rtx op0, rtx op1)
 {
+  rtx flag = gen_int_mode (known_ptrue_p, SImode);
   rtx unspec = gen_rtx_UNSPEC (GET_MODE (pred),
-                              gen_rtvec (3, pred, op0, op1),
+                              gen_rtvec (4, pred, flag, op0, op1),
                               aarch64_unspec_cond_code (code));
   emit_set_insn (target, unspec);
 }
 
-/* Expand an SVE integer comparison using the SVE equivalent of:
-
-     (set TARGET (CODE OP0 OP1)).  */
-
-void
-aarch64_expand_sve_vec_cmp_int (rtx target, rtx_code code, rtx op0, rtx op1)
-{
-  machine_mode pred_mode = GET_MODE (target);
-  machine_mode data_mode = GET_MODE (op0);
-
-  if (!aarch64_sve_cmp_operand_p (code, op1))
-    op1 = force_reg (data_mode, op1);
-
-  rtx ptrue = aarch64_ptrue_reg (pred_mode);
-  rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
-  aarch64_emit_sve_ptrue_op_cc (target, ptrue, cond);
-}
-
 /* Emit the SVE equivalent of:
 
-      (set TMP1 (CODE1 OP0 OP1))
-      (set TMP2 (CODE2 OP0 OP1))
+      (set TMP1 (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X1>))
+      (set TMP2 (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X2>))
       (set TARGET (ior:PRED_MODE TMP1 TMP2))
 
-   PTRUE is an all-true predicate with the same mode as TARGET.  */
+   where <Xi> is the operation associated with comparison CODEi.
+   KNOWN_PTRUE_P is true if PRED is known to be a PTRUE.  */
 
 static void
-aarch64_emit_sve_or_conds (rtx target, rtx_code code1, rtx_code code2,
-                          rtx ptrue, rtx op0, rtx op1)
+aarch64_emit_sve_or_fp_conds (rtx target, rtx_code code1, rtx_code code2,
+                             rtx pred, bool known_ptrue_p, rtx op0, rtx op1)
 {
-  machine_mode pred_mode = GET_MODE (ptrue);
+  machine_mode pred_mode = GET_MODE (pred);
   rtx tmp1 = gen_reg_rtx (pred_mode);
-  aarch64_emit_sve_ptrue_op (tmp1, ptrue,
-                            gen_rtx_fmt_ee (code1, pred_mode, op0, op1));
+  aarch64_emit_sve_fp_cond (tmp1, code1, pred, known_ptrue_p, op0, op1);
   rtx tmp2 = gen_reg_rtx (pred_mode);
-  aarch64_emit_sve_ptrue_op (tmp2, ptrue,
-                            gen_rtx_fmt_ee (code2, pred_mode, op0, op1));
+  aarch64_emit_sve_fp_cond (tmp2, code2, pred, known_ptrue_p, op0, op1);
   aarch64_emit_binop (target, ior_optab, tmp1, tmp2);
 }
 
 /* Emit the SVE equivalent of:
 
-      (set TMP (CODE OP0 OP1))
+      (set TMP (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X>))
       (set TARGET (not TMP))
 
-   PTRUE is an all-true predicate with the same mode as TARGET.  */
+   where <X> is the operation associated with comparison CODE.
+   KNOWN_PTRUE_P is true if PRED is known to be a PTRUE.  */
 
 static void
-aarch64_emit_sve_inverted_cond (rtx target, rtx ptrue, rtx_code code,
-                               rtx op0, rtx op1)
+aarch64_emit_sve_invert_fp_cond (rtx target, rtx_code code, rtx pred,
+                                bool known_ptrue_p, rtx op0, rtx op1)
 {
-  machine_mode pred_mode = GET_MODE (ptrue);
+  machine_mode pred_mode = GET_MODE (pred);
   rtx tmp = gen_reg_rtx (pred_mode);
-  aarch64_emit_sve_ptrue_op (tmp, ptrue,
-                            gen_rtx_fmt_ee (code, pred_mode, op0, op1));
+  aarch64_emit_sve_fp_cond (tmp, code, pred, known_ptrue_p, op0, op1);
   aarch64_emit_unop (target, one_cmpl_optab, tmp);
 }
 
@@ -17854,14 +17843,13 @@ aarch64_expand_sve_vec_cmp_float (rtx ta
     case NE:
       {
        /* There is native support for the comparison.  */
-       rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
-       aarch64_emit_sve_ptrue_op (target, ptrue, cond);
+       aarch64_emit_sve_fp_cond (target, code, ptrue, true, op0, op1);
        return false;
       }
 
     case LTGT:
       /* This is a trapping operation (LT or GT).  */
-      aarch64_emit_sve_or_conds (target, LT, GT, ptrue, op0, op1);
+      aarch64_emit_sve_or_fp_conds (target, LT, GT, ptrue, true, op0, op1);
       return false;
 
     case UNEQ:
@@ -17869,7 +17857,8 @@ aarch64_expand_sve_vec_cmp_float (rtx ta
        {
          /* This would trap for signaling NaNs.  */
          op1 = force_reg (data_mode, op1);
-         aarch64_emit_sve_or_conds (target, UNORDERED, EQ, ptrue, op0, op1);
+         aarch64_emit_sve_or_fp_conds (target, UNORDERED, EQ,
+                                       ptrue, true, op0, op1);
          return false;
        }
       /* fall through */
@@ -17882,7 +17871,8 @@ aarch64_expand_sve_vec_cmp_float (rtx ta
          /* Work out which elements are ordered.  */
          rtx ordered = gen_reg_rtx (pred_mode);
          op1 = force_reg (data_mode, op1);
-         aarch64_emit_sve_inverted_cond (ordered, ptrue, UNORDERED, op0, op1);
+         aarch64_emit_sve_invert_fp_cond (ordered, UNORDERED,
+                                          ptrue, true, op0, op1);
 
          /* Test the opposite condition for the ordered elements,
             then invert the result.  */
@@ -17892,13 +17882,12 @@ aarch64_expand_sve_vec_cmp_float (rtx ta
            code = reverse_condition_maybe_unordered (code);
          if (can_invert_p)
            {
-             aarch64_emit_sve_predicated_cond (target, code,
-                                               ordered, op0, op1);
+             aarch64_emit_sve_fp_cond (target, code,
+                                       ordered, false, op0, op1);
              return true;
            }
-         rtx tmp = gen_reg_rtx (pred_mode);
-         aarch64_emit_sve_predicated_cond (tmp, code, ordered, op0, op1);
-         aarch64_emit_unop (target, one_cmpl_optab, tmp);
+         aarch64_emit_sve_invert_fp_cond (target, code,
+                                          ordered, false, op0, op1);
          return false;
        }
       break;
@@ -17916,11 +17905,10 @@ aarch64_expand_sve_vec_cmp_float (rtx ta
   code = reverse_condition_maybe_unordered (code);
   if (can_invert_p)
     {
-      rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
-      aarch64_emit_sve_ptrue_op (target, ptrue, cond);
+      aarch64_emit_sve_fp_cond (target, code, ptrue, true, op0, op1);
       return true;
     }
-  aarch64_emit_sve_inverted_cond (target, ptrue, code, op0, op1);
+  aarch64_emit_sve_invert_fp_cond (target, code, ptrue, true, op0, op1);
   return false;
 }
 

Reply via email to