https://gcc.gnu.org/g:45fd943eabfe8e71aeecf001e9200f4d52748610

commit r16-2629-g45fd943eabfe8e71aeecf001e9200f4d52748610
Author: Spencer Abson <spencer.ab...@arm.com>
Date:   Wed Jul 30 08:58:50 2025 +0000

    aarch64: Add support for unpacked SVE FP conditional ternary arithmetic
    
    This patch extends the expander for fma, fnma, fms, and fnms to support
    partial SVE FP modes.
    
    We add the missing BF16 tests, which we can now trigger for having
    implemented the conditional expander.
    
    We also add tests for the 'merging with multiplicand' case, which this
    expander canonicalizes (albeit under SVE_STRICT_GP).
    
    gcc/ChangeLog:
    
            * config/aarch64/aarch64-sve.md (@cond_<optab><mode>): Extend
            to support partial FP modes.
            (*cond_<optab><mode>_2_strict): Extend from SVE_FULL_F to SVE_F,
            use aarch64_predicate_operand.
            (*cond_<optab><mode>_4_strict): Extend from SVE_FULL_F_B16B16 to
            SVE_F_B16B16, use aarch64_predicate_operand.
            (*cond_<optab><mode>_any_strict):  Likewise.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/aarch64/sve/unpacked_cond_fmla_1.c: Add test cases
            for merging with multiplcand.
            * gcc.target/aarch64/sve/unpacked_cond_fmls_1.c: Likewise.
            * gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c: Likewise.
            * gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c: Likewise.
            * gcc.target/aarch64/sve/unpacked_cond_fmla_2.c: New test.
            * gcc.target/aarch64/sve/unpacked_cond_fmls_2.c: Likewise.
            * gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c: Likewise..
            * gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c: Likewise.
            * g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C: Likewise.
            * g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C: Likewise.

Diff:
---
 gcc/config/aarch64/aarch64-sve.md                  | 60 +++++++++++-----------
 .../aarch64/sve/unpacked_cond_ternary_bf16_1.C     | 35 +++++++++++++
 .../aarch64/sve/unpacked_cond_ternary_bf16_2.C     | 14 +++++
 .../gcc.target/aarch64/sve/unpacked_cond_fmla_1.c  | 10 ++--
 .../gcc.target/aarch64/sve/unpacked_cond_fmla_2.c  | 22 ++++++++
 .../gcc.target/aarch64/sve/unpacked_cond_fmls_1.c  | 10 ++--
 .../gcc.target/aarch64/sve/unpacked_cond_fmls_2.c  | 22 ++++++++
 .../gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c | 10 ++--
 .../gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c | 22 ++++++++
 .../gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c | 10 ++--
 .../gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c | 22 ++++++++
 11 files changed, 196 insertions(+), 41 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index 815dec97d872..80a32889f8cf 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -7634,17 +7634,17 @@
 
 ;; Predicated floating-point ternary operations with merging.
 (define_expand "@cond_<optab><mode>"
-  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
-       (unspec:SVE_FULL_F_B16B16
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+       (unspec:SVE_F_B16B16
          [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F_B16B16
+          (unspec:SVE_F_B16B16
             [(match_dup 1)
              (const_int SVE_STRICT_GP)
-             (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
-             (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
-             (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
+             (match_operand:SVE_F_B16B16 2 "register_operand")
+             (match_operand:SVE_F_B16B16 3 "register_operand")
+             (match_operand:SVE_F_B16B16 4 "register_operand")]
             SVE_COND_FP_TERNARY)
-          (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")]
+          (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")]
          UNSPEC_SEL))]
   "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
 {
@@ -7652,6 +7652,8 @@
      second of the two.  */
   if (rtx_equal_p (operands[3], operands[5]))
     std::swap (operands[2], operands[3]);
+
+  operands[1] = aarch64_sve_emit_masked_fp_pred (<MODE>mode, operands[1]);
 })
 
 ;; Predicated floating-point ternary operations, merging with the
@@ -7681,15 +7683,15 @@
 )
 
 (define_insn "*cond_<optab><mode>_2_strict"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
-         [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+          (unspec:SVE_F
             [(match_dup 1)
              (const_int SVE_STRICT_GP)
-             (match_operand:SVE_FULL_F 2 "register_operand")
-             (match_operand:SVE_FULL_F 3 "register_operand")
-             (match_operand:SVE_FULL_F 4 "register_operand")]
+             (match_operand:SVE_F 2 "register_operand")
+             (match_operand:SVE_F 3 "register_operand")
+             (match_operand:SVE_F 4 "register_operand")]
             SVE_COND_FP_TERNARY)
           (match_dup 2)]
          UNSPEC_SEL))]
@@ -7727,15 +7729,15 @@
 )
 
 (define_insn "*cond_<optab><mode>_4_strict"
-  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
-       (unspec:SVE_FULL_F_B16B16
-         [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F_B16B16
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+       (unspec:SVE_F_B16B16
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+          (unspec:SVE_F_B16B16
             [(match_dup 1)
              (const_int SVE_STRICT_GP)
-             (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
-             (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
-             (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
+             (match_operand:SVE_F_B16B16 2 "register_operand")
+             (match_operand:SVE_F_B16B16 3 "register_operand")
+             (match_operand:SVE_F_B16B16 4 "register_operand")]
             SVE_COND_FP_TERNARY)
           (match_dup 4)]
          UNSPEC_SEL))]
@@ -7795,17 +7797,17 @@
 )
 
 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
-  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
-       (unspec:SVE_FULL_F_B16B16
-         [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F_B16B16
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+       (unspec:SVE_F_B16B16
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+          (unspec:SVE_F_B16B16
             [(match_dup 1)
              (const_int SVE_STRICT_GP)
-             (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
-             (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
-             (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
+             (match_operand:SVE_F_B16B16 2 "register_operand")
+             (match_operand:SVE_F_B16B16 3 "register_operand")
+             (match_operand:SVE_F_B16B16 4 "register_operand")]
             SVE_COND_FP_TERNARY)
-          (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")]
+          (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")]
          UNSPEC_SEL))]
   "TARGET_SVE
    && (<supports_bf16> || !<is_bf16>)
diff --git 
a/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C 
b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C
new file mode 100644
index 000000000000..95cd698f1a61
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C
@@ -0,0 +1,35 @@
+/* { dg-do compile }*/
+/* { dg-options "-O2  -fno-trapping-math -msve-vector-bits=2048 " } */
+
+#include <stdint.h>
+#pragma GCC target "arch=armv9-a+sve-b16b16"
+
+#define COND_BFMLA(TYPE, PRED_TYPE, MERGE)                               \
+  TYPE test_bfmla_##TYPE##_##MERGE (TYPE a, TYPE b, TYPE c, PRED_TYPE p) \
+  {return p ? a * b + c : MERGE; }
+
+#define COND_BFMLS(TYPE, PRED_TYPE, MERGE)                               \
+  TYPE test_bfmls_##TYPE##_##MERGE (TYPE a, TYPE b, TYPE c, PRED_TYPE p) \
+  {return p ? a * -b + c : MERGE; }
+
+#define TEST_OP(TYPE, PRED_TYPE, T) \
+  T (TYPE, PRED_TYPE, c)            \
+  T (TYPE, PRED_TYPE, 0)
+
+#define TEST(TYPE, PTYPE, SIZE)                                   \
+  typedef TYPE TYPE##SIZE __attribute__ ((vector_size (SIZE)));   \
+  typedef PTYPE PTYPE##SIZE __attribute__ ((vector_size (SIZE))); \
+  TEST_OP (TYPE##SIZE, PTYPE##SIZE, COND_BFMLA)                   \
+  TEST_OP (TYPE##SIZE, PTYPE##SIZE, COND_BFMLS)
+
+TEST (__bf16, uint16_t, 128)
+
+TEST (__bf16, uint16_t, 64)
+
+/* { dg-final { scan-assembler-times {\tptrue} 8 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, 
z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tbfmla\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tbfmls\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git 
a/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C 
b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C
new file mode 100644
index 000000000000..c0d7c50756eb
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msve-vector-bits=2048" } */
+
+#include "unpacked_cond_ternary_bf16_1.C"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 4 } } */
+/* { dg-final { scan-assembler-times {\tand} 8 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, 
z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tbfmla\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tbfmls\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_1.c
index 8181c421fbc6..cae924225978 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_1.c
@@ -25,6 +25,8 @@
   }
 
 #define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i)  \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)  \
   TEST_FN (FN, TYPE0, TYPE1, COUNT, c_i)  \
   TEST_FN (FN, TYPE0, TYPE1, COUNT, 0)
 
@@ -34,14 +36,16 @@ TEST_ALL (FMLA (f16), _Float16, uint32_t, 64)
 
 TEST_ALL (FMLA (f32), float, uint64_t, 32)
 
-/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 6 } } */
-/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 6 } } */
-/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 6 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */
 
 /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, 
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
 /* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
 
 /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, 
z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
 /* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
 
 /* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_2.c
new file mode 100644
index 000000000000..72e04a4958e9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_2.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */
+
+#include "unpacked_cond_fmla_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 8 } } */
+/* { dg-final { scan-assembler-times {\tand} 12 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, 
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, 
z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_1.c
index 3755fdf3c433..db0f81804b97 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_1.c
@@ -25,6 +25,8 @@
   }
 
 #define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i)  \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)  \
   TEST_FN (FN, TYPE0, TYPE1, COUNT, c_i)  \
   TEST_FN (FN, TYPE0, TYPE1, COUNT, 0)
 
@@ -34,14 +36,16 @@ TEST_ALL (FMLS (f16), _Float16, uint32_t, 64)
 
 TEST_ALL (FMLS (f32), float, uint64_t, 32)
 
-/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 6 } } */
-/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 6 } } */
-/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 6 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */
 
 /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, 
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
 /* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
 
 /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, 
z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
 /* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
 
 /* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_2.c
new file mode 100644
index 000000000000..30120527debd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_2.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */
+
+#include "unpacked_cond_fmls_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 8 } } */
+/* { dg-final { scan-assembler-times {\tand} 12 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, 
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, 
z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c
index b83265304d01..07bab63d1503 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c
@@ -25,6 +25,8 @@
   }
 
 #define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i)  \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)  \
   TEST_FN (FN, TYPE0, TYPE1, COUNT, c_i)  \
   TEST_FN (FN, TYPE0, TYPE1, COUNT, 0)
 
@@ -34,14 +36,16 @@ TEST_ALL (FNMLA (f16), _Float16, uint32_t, 64)
 
 TEST_ALL (FNMLA (f32), float, uint64_t, 32)
 
-/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 6 } } */
-/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 6 } } */
-/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 6 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */
 
 /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, 
z[0-9]+\.s\n} 1 } } */
 /* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
 
 /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, 
z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
 /* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
 
 /* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c
new file mode 100644
index 000000000000..daef4e49fa90
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */
+
+#include "unpacked_cond_fnmla_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 8 } } */
+/* { dg-final { scan-assembler-times {\tand} 12 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, 
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, 
z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c
index a66af9e13342..5526378c5216 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c
@@ -25,6 +25,8 @@
   }
 
 #define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i)  \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)  \
   TEST_FN (FN, TYPE0, TYPE1, COUNT, c_i)  \
   TEST_FN (FN, TYPE0, TYPE1, COUNT, 0)
 
@@ -34,14 +36,16 @@ TEST_ALL (FNMLS (f16), _Float16, uint32_t, 64)
 
 TEST_ALL (FNMLS (f32), float, uint64_t, 32)
 
-/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 6 } } */
-/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 6 } } */
-/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 6 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */
 
 /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, 
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
 /* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
 
 /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, 
z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
 /* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
 
 /* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c
new file mode 100644
index 000000000000..8a8f34828fb7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */
+
+#include "unpacked_cond_fnmls_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 8 } } */
+/* { dg-final { scan-assembler-times {\tand} 12 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, 
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, 
z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */

Reply via email to