On 7/24/25 08:19, Spencer Abson wrote: > External email: Use caution opening links or attachments > > > This patch extends the expander for conditional smax, smin, add, sub, mul, > min, max, and div to support partial SVE FP modes. > > If exceptions from undefined vector elements must be suppressed, this > expansion converts the container-level predicate to an element-level one, and > ensures that these elements are inactive for the operation. In practice, this > is a predicate AND with the existing mask and a container-size PTRUE.
Hi, This patch looks good to me (but I cannot approve as I am not a reviewer or maintainer). Remi > > gcc/ChangeLog: > > * config/aarch64/aarch64-protos.h (aarch64_sve_emit_masked_fp_pred): > Declare. > * config/aarch64/aarch64-sve.md (and<mode>3): Change this to... > (@and<mode>3): ...this, so that we can use gen_and3. > (@cond_<optab><mode>): Extend from SVE_FULL_F_B16B16 to SVE_F_B16B16, > use aarch64_predicate_operand. > (*cond_<optab><mode>_2_strict): Likewise. > (*cond_<optab><mode>_3_strict): Likewise. > (*cond_<optab><mode>_any_strict): Likwise. > (*cond_<optab><mode>_2_const_strict): Extend from SVE_FULL_F to > SVE_F, > use aarch64_predicate_operand. > (*cond_<optab><mode>_any_const_strict): Likewise. > (*cond_sub<mode>_3_const_strict): Likwise. > (*cond_sub<mode>_const_strict): Likewise. > (*vcond_mask_<mode><vpred>): Use aarch64_predicate_operand, and > update > the comment here. > * config/aarch64/aarch64.cc (aarch64_sve_emit_masked_fp_pred): New > function. Helper to mask the predicate in conditional expanders. > > gcc/testsuite/ChangeLog: > > * g++.target/aarch64/sve/unpacked_cond_binary_bf16_2.C: New test. > * gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_2.c: Likewise. > * gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_2.c: Likewise. > * gcc.target/aarch64/sve/unpacked_cond_fadd_2.c: Likewise. > * gcc.target/aarch64/sve/unpacked_cond_fdiv_2.c: Likewise. > * gcc.target/aarch64/sve/unpacked_cond_fmaxnm_2.c: Likewise. > * gcc.target/aarch64/sve/unpacked_cond_fminnm_2.c: Likewise. > * gcc.target/aarch64/sve/unpacked_cond_fmul_2.c: Likewise. > * gcc.target/aarch64/sve/unpacked_cond_fsubr_2.c: Likewise. > --- > gcc/config/aarch64/aarch64-protos.h | 1 + > gcc/config/aarch64/aarch64-sve.md | 152 +++++++++--------- > gcc/config/aarch64/aarch64.cc | 27 ++++ > .../aarch64/sve/unpacked_cond_binary_bf16_2.C | 18 +++ > .../sve/unpacked_cond_builtin_fmax_2.c | 24 +++ > .../sve/unpacked_cond_builtin_fmin_2.c | 24 +++ > .../aarch64/sve/unpacked_cond_fadd_2.c | 28 ++++ > .../aarch64/sve/unpacked_cond_fdiv_2.c | 22 +++ > .../aarch64/sve/unpacked_cond_fmaxnm_2.c | 24 +++ > .../aarch64/sve/unpacked_cond_fminnm_2.c | 24 +++ > .../aarch64/sve/unpacked_cond_fmul_2.c | 22 +++ > .../aarch64/sve/unpacked_cond_fsubr_2.c | 26 +++ > 12 files changed, 319 insertions(+), 73 deletions(-) > create mode 100644 > gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_2.C > create mode 100644 > gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_2.c > create mode 100644 > gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_2.c > create mode 100644 > gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_2.c > create mode 100644 > gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_2.c > create mode 100644 > gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_2.c > create mode 100644 > gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_2.c > create mode 100644 > gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_2.c > create mode 100644 > gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_2.c > > diff --git a/gcc/config/aarch64/aarch64-protos.h > b/gcc/config/aarch64/aarch64-protos.h > index e946e8da11d..38c307cdc3a 100644 > --- a/gcc/config/aarch64/aarch64-protos.h > +++ b/gcc/config/aarch64/aarch64-protos.h > @@ -1031,6 +1031,7 @@ rtx aarch64_pfalse_reg (machine_mode); > bool aarch64_sve_same_pred_for_ptest_p (rtx *, rtx *); > rtx aarch64_sve_packed_pred (machine_mode); > rtx aarch64_sve_fp_pred (machine_mode, rtx *); > +rtx aarch64_sve_emit_masked_fp_pred (machine_mode, rtx); > void aarch64_emit_load_store_through_mode (rtx, rtx, machine_mode); > bool aarch64_expand_maskloadstore (rtx *, machine_mode); > void aarch64_emit_sve_pred_move (rtx, rtx, rtx); > diff --git a/gcc/config/aarch64/aarch64-sve.md > b/gcc/config/aarch64/aarch64-sve.md > index b252eef411c..fe407f7e77f 100644 > --- a/gcc/config/aarch64/aarch64-sve.md > +++ b/gcc/config/aarch64/aarch64-sve.md > @@ -5605,18 +5605,21 @@ > > ;; Predicated floating-point operations with merging. > (define_expand "@cond_<optab><mode>" > - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") > - (unspec:SVE_FULL_F_B16B16 > + [(set (match_operand:SVE_F_B16B16 0 "register_operand") > + (unspec:SVE_F_B16B16 > [(match_operand:<VPRED> 1 "register_operand") > - (unspec:SVE_FULL_F_B16B16 > + (unspec:SVE_F_B16B16 > [(match_dup 1) > (const_int SVE_STRICT_GP) > - (match_operand:SVE_FULL_F_B16B16 2 "<sve_pred_fp_rhs1_operand>") > - (match_operand:SVE_FULL_F_B16B16 3 > "<sve_pred_fp_rhs2_operand>")] > + (match_operand:SVE_F_B16B16 2 "<sve_pred_fp_rhs1_operand>") > + (match_operand:SVE_F_B16B16 3 "<sve_pred_fp_rhs2_operand>")] > SVE_COND_FP_BINARY) > - (match_operand:SVE_FULL_F_B16B16 4 "aarch64_simd_reg_or_zero")] > + (match_operand:SVE_F_B16B16 4 "aarch64_simd_reg_or_zero")] > UNSPEC_SEL))] > "TARGET_SVE && (<supports_bf16> || !<is_bf16>)" > + { > + operands[1] = aarch64_sve_emit_masked_fp_pred (<MODE>mode, operands[1]); > + } > ) > > ;; Predicated floating-point operations, merging with the first input. > @@ -5644,14 +5647,14 @@ > ) > > (define_insn "*cond_<optab><mode>_2_strict" > - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") > - (unspec:SVE_FULL_F_B16B16 > - [(match_operand:<VPRED> 1 "register_operand") > - (unspec:SVE_FULL_F_B16B16 > + [(set (match_operand:SVE_F_B16B16 0 "register_operand") > + (unspec:SVE_F_B16B16 > + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") > + (unspec:SVE_F_B16B16 > [(match_dup 1) > (const_int SVE_STRICT_GP) > - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") > - (match_operand:SVE_FULL_F_B16B16 3 "register_operand")] > + (match_operand:SVE_F_B16B16 2 "register_operand") > + (match_operand:SVE_F_B16B16 3 "register_operand")] > SVE_COND_FP_BINARY) > (match_dup 2)] > UNSPEC_SEL))] > @@ -5687,14 +5690,14 @@ > ) > > (define_insn "*cond_<optab><mode>_2_const_strict" > - [(set (match_operand:SVE_FULL_F 0 "register_operand") > - (unspec:SVE_FULL_F > - [(match_operand:<VPRED> 1 "register_operand") > - (unspec:SVE_FULL_F > + [(set (match_operand:SVE_F 0 "register_operand") > + (unspec:SVE_F > + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") > + (unspec:SVE_F > [(match_dup 1) > (const_int SVE_STRICT_GP) > - (match_operand:SVE_FULL_F 2 "register_operand") > - (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")] > + (match_operand:SVE_F 2 "register_operand") > + (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")] > SVE_COND_FP_BINARY_I1) > (match_dup 2)] > UNSPEC_SEL))] > @@ -5730,14 +5733,14 @@ > ) > > (define_insn "*cond_<optab><mode>_3_strict" > - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") > - (unspec:SVE_FULL_F_B16B16 > - [(match_operand:<VPRED> 1 "register_operand") > - (unspec:SVE_FULL_F_B16B16 > + [(set (match_operand:SVE_F_B16B16 0 "register_operand") > + (unspec:SVE_F_B16B16 > + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") > + (unspec:SVE_F_B16B16 > [(match_dup 1) > (const_int SVE_STRICT_GP) > - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") > - (match_operand:SVE_FULL_F_B16B16 3 "register_operand")] > + (match_operand:SVE_F_B16B16 2 "register_operand") > + (match_operand:SVE_F_B16B16 3 "register_operand")] > SVE_COND_FP_BINARY) > (match_dup 3)] > UNSPEC_SEL))] > @@ -5794,16 +5797,16 @@ > ) > > (define_insn_and_rewrite "*cond_<optab><mode>_any_strict" > - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") > - (unspec:SVE_FULL_F_B16B16 > - [(match_operand:<VPRED> 1 "register_operand") > - (unspec:SVE_FULL_F_B16B16 > + [(set (match_operand:SVE_F_B16B16 0 "register_operand") > + (unspec:SVE_F_B16B16 > + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") > + (unspec:SVE_F_B16B16 > [(match_dup 1) > (const_int SVE_STRICT_GP) > - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") > - (match_operand:SVE_FULL_F_B16B16 3 "register_operand")] > + (match_operand:SVE_F_B16B16 2 "register_operand") > + (match_operand:SVE_F_B16B16 3 "register_operand")] > SVE_COND_FP_BINARY) > - (match_operand:SVE_FULL_F_B16B16 4 "aarch64_simd_reg_or_zero")] > + (match_operand:SVE_F_B16B16 4 "aarch64_simd_reg_or_zero")] > UNSPEC_SEL))] > "TARGET_SVE > && (<supports_bf16> || !<is_bf16>) > @@ -5868,16 +5871,16 @@ > ) > > (define_insn_and_rewrite "*cond_<optab><mode>_any_const_strict" > - [(set (match_operand:SVE_FULL_F 0 "register_operand") > - (unspec:SVE_FULL_F > - [(match_operand:<VPRED> 1 "register_operand") > - (unspec:SVE_FULL_F > + [(set (match_operand:SVE_F 0 "register_operand") > + (unspec:SVE_F > + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") > + (unspec:SVE_F > [(match_dup 1) > (const_int SVE_STRICT_GP) > - (match_operand:SVE_FULL_F 2 "register_operand") > - (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")] > + (match_operand:SVE_F 2 "register_operand") > + (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")] > SVE_COND_FP_BINARY_I1) > - (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] > + (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] > UNSPEC_SEL))] > "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" > {@ [ cons: =0 , 1 , 2 , 4 ] > @@ -5953,14 +5956,14 @@ > ) > > (define_insn "*cond_add<mode>_2_const_strict" > - [(set (match_operand:SVE_FULL_F 0 "register_operand") > - (unspec:SVE_FULL_F > - [(match_operand:<VPRED> 1 "register_operand") > - (unspec:SVE_FULL_F > + [(set (match_operand:SVE_F 0 "register_operand") > + (unspec:SVE_F > + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") > + (unspec:SVE_F > [(match_dup 1) > (const_int SVE_STRICT_GP) > - (match_operand:SVE_FULL_F 2 "register_operand") > - (match_operand:SVE_FULL_F 3 > "aarch64_sve_float_arith_with_sub_immediate")] > + (match_operand:SVE_F 2 "register_operand") > + (match_operand:SVE_F 3 > "aarch64_sve_float_arith_with_sub_immediate")] > UNSPEC_COND_FADD) > (match_dup 2)] > UNSPEC_SEL))] > @@ -6015,16 +6018,16 @@ > ) > > (define_insn_and_rewrite "*cond_add<mode>_any_const_strict" > - [(set (match_operand:SVE_FULL_F 0 "register_operand") > - (unspec:SVE_FULL_F > - [(match_operand:<VPRED> 1 "register_operand") > - (unspec:SVE_FULL_F > + [(set (match_operand:SVE_F 0 "register_operand") > + (unspec:SVE_F > + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") > + (unspec:SVE_F > [(match_dup 1) > (const_int SVE_STRICT_GP) > - (match_operand:SVE_FULL_F 2 "register_operand") > - (match_operand:SVE_FULL_F 3 > "aarch64_sve_float_arith_with_sub_immediate")] > + (match_operand:SVE_F 2 "register_operand") > + (match_operand:SVE_F 3 > "aarch64_sve_float_arith_with_sub_immediate")] > UNSPEC_COND_FADD) > - (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] > + (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] > UNSPEC_SEL))] > "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" > {@ [ cons: =0 , 1 , 2 , 3 , 4 ] > @@ -6266,14 +6269,14 @@ > ) > > (define_insn "*cond_sub<mode>_3_const_strict" > - [(set (match_operand:SVE_FULL_F 0 "register_operand") > - (unspec:SVE_FULL_F > - [(match_operand:<VPRED> 1 "register_operand") > - (unspec:SVE_FULL_F > + [(set (match_operand:SVE_F 0 "register_operand") > + (unspec:SVE_F > + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") > + (unspec:SVE_F > [(match_dup 1) > (const_int SVE_STRICT_GP) > - (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") > - (match_operand:SVE_FULL_F 3 "register_operand")] > + (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate") > + (match_operand:SVE_F 3 "register_operand")] > UNSPEC_COND_FSUB) > (match_dup 3)] > UNSPEC_SEL))] > @@ -6323,16 +6326,16 @@ > ) > > (define_insn_and_rewrite "*cond_sub<mode>_const_strict" > - [(set (match_operand:SVE_FULL_F 0 "register_operand") > - (unspec:SVE_FULL_F > - [(match_operand:<VPRED> 1 "register_operand") > - (unspec:SVE_FULL_F > + [(set (match_operand:SVE_F 0 "register_operand") > + (unspec:SVE_F > + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") > + (unspec:SVE_F > [(match_dup 1) > (const_int SVE_STRICT_GP) > - (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") > - (match_operand:SVE_FULL_F 3 "register_operand")] > + (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate") > + (match_operand:SVE_F 3 "register_operand")] > UNSPEC_COND_FSUB) > - (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] > + (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] > UNSPEC_SEL))] > "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])" > {@ [ cons: =0 , 1 , 3 , 4 ] > @@ -6913,7 +6916,7 @@ > ;; Predicate AND. We can reuse one of the inputs as the GP. > ;; Doubling the second operand is the preferred implementation > ;; of the MOV alias, so we use that instead of %1/z, %1, %2. > -(define_insn "and<mode>3" > +(define_insn "@and<mode>3" > [(set (match_operand:PRED_ALL 0 "register_operand") > (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand") > (match_operand:PRED_ALL 2 "register_operand")))] > @@ -8201,20 +8204,23 @@ > ;; > ;; For unpacked vectors, it doesn't really matter whether SEL uses the > ;; the container size or the element size. If SEL used the container size, > -;; it would ignore undefined bits of the predicate but would copy the > -;; upper (undefined) bits of each container along with the defined bits. > -;; If SEL used the element size, it would use undefined bits of the predicate > -;; to select between undefined elements in each input vector. Thus the only > -;; difference is whether the undefined bits in a container always come from > -;; the same input as the defined bits, or whether the choice can vary > -;; independently of the defined bits. > +;; it would would copy the upper (undefined) bits of each container along > +;; with the corresponding defined bits. If SEL used the element size, > +;; it would use separate predicate bits to select between the undefined > +;; elements in each input vector; these seperate predicate bits might > +;; themselves be undefined, depending on the mode of the predicate. > +;; > +;; Thus the only difference is whether the undefined bits in a container > +;; always come from the same input as the defined bits, or whether the > +;; choice can vary independently of the defined bits. > ;; > ;; For the other instructions, using the element size is more natural, > ;; so we do that for SEL as well. > +;; > (define_insn "*vcond_mask_<mode><vpred>" > [(set (match_operand:SVE_ALL 0 "register_operand") > (unspec:SVE_ALL > - [(match_operand:<VPRED> 3 "register_operand") > + [(match_operand:<VPRED> 3 "aarch64_predicate_operand") > (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm") > (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")] > UNSPEC_SEL))] > diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc > index 9e4a37bcaff..a06d34bf4ed 100644 > --- a/gcc/config/aarch64/aarch64.cc > +++ b/gcc/config/aarch64/aarch64.cc > @@ -3931,6 +3931,33 @@ aarch64_sve_fp_pred (machine_mode data_mode, rtx > *strictness) > return aarch64_ptrue_reg (aarch64_sve_pred_mode (data_mode)); > } > > +/* PRED is a predicate that governs an operation on DATA_MODE. If DATA_MODE > + is a partial vector mode, and if exceptions must be suppressed for its > + undefined elements, convert PRED from a container-level predicate to > + an element-level predicate and ensure that the undefined elements > + are inactive. Make no changes otherwise. > + > + Return the resultant predicate. */ > +rtx > +aarch64_sve_emit_masked_fp_pred (machine_mode data_mode, rtx pred) > +{ > + unsigned int vec_flags = aarch64_classify_vector_mode (data_mode); > + if (flag_trapping_math && (vec_flags & VEC_PARTIAL)) > + { > + /* Generate an element-level mask. */ > + rtx mask = aarch64_sve_packed_pred (data_mode); > + machine_mode pmode = GET_MODE (mask); > + > + /* Apply the existing predicate. */ > + rtx dst = gen_reg_rtx (pmode); > + emit_insn (gen_and3 (pmode, dst, mask, > + gen_lowpart (pmode, pred))); > + return dst; > + } > + > + return pred; > +} > + > /* Emit a comparison CMP between OP0 and OP1, both of which have mode > DATA_MODE, and return the result in a predicate of mode PRED_MODE. > Use TARGET as the target register if nonnull and convenient. */ > diff --git > a/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_2.C > b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_2.C > new file mode 100644 > index 00000000000..02880efa333 > --- /dev/null > +++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_2.C > @@ -0,0 +1,18 @@ > +/* { dg-do compile }*/ > +/* { dg-options "-O -ffinite-math-only -fno-signed-zeros > -msve-vector-bits=2048 " } */ > + > +#include "unpacked_cond_binary_bf16_1.C" > + > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 15 } } */ > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 15 } } */ > +/* { dg-final { scan-assembler-times {\tand} 30 } } */ > + > +/* { dg-final { scan-assembler-times {\tbfadd\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ > +/* { dg-final { scan-assembler-times {\tbfsub\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ > +/* { dg-final { scan-assembler-times {\tbfmul\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ > + > +/* { dg-final { scan-assembler-times {\tbfminnm\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ > +/* { dg-final { scan-assembler-times {\tbfmaxnm\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ > + > +// There's no BFSUBR. > +/* { dg-final { scan-assembler-times {\tsel\t} 2 } } */ > diff --git > a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_2.c > b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_2.c > new file mode 100644 > index 00000000000..f84ded5ea3c > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_2.c > @@ -0,0 +1,24 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ > + > +#include "unpacked_cond_builtin_fmax_1.c" > + > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */ > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */ > +/* { dg-final { scan-assembler-times {\tand} 21 } } */ > + > +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */ > +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */ > +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s\n} 3 } } */ > +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ > +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, #0.0\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, #1.0\n} 2 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h\n} 6 } } */ > +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ > +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, #0.0\n} 4 } } */ > +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, #1.0\n} 4 } } */ > + > +/* { dg-final { scan-assembler-not {\tsel\t} } } */ > diff --git > a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_2.c > b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_2.c > new file mode 100644 > index 00000000000..bceddf9ef74 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_2.c > @@ -0,0 +1,24 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ > + > +#include "unpacked_cond_builtin_fmin_1.c" > + > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */ > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */ > +/* { dg-final { scan-assembler-times {\tand} 21 } } */ > + > +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */ > +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */ > +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s\n} 3 } } */ > +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ > +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, #0.0\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, #1.0\n} 2 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h\n} 6 } } */ > +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ > +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, #0.0\n} 4 } } */ > +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, #1.0\n} 4 } } */ > + > +/* { dg-final { scan-assembler-not {\tsel\t} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_2.c > b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_2.c > new file mode 100644 > index 00000000000..e59864b5e8f > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_2.c > @@ -0,0 +1,28 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ > + > +#include "unpacked_cond_fadd_1.c" > + > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 11 } } */ > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 22 } } */ > +/* { dg-final { scan-assembler-times {\tand} 33 } } */ > + > +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 19 } } */ > +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 19 } } */ > +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 19 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s\n} 5 } } */ > +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ > +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, #0.5\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, #1.0\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, #0.5\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, #1.0\n} 2 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h\n} 10 } } */ > +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ > +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, #0.5\n} 4 } } */ > +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, #1.0\n} 4 } } */ > +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, #0.5\n} 4 } } */ > +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, #1.0\n} 4 } } */ > + > +/* { dg-final { scan-assembler-not {\tsel\t} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_2.c > b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_2.c > new file mode 100644 > index 00000000000..1ca3dbf2242 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_2.c > @@ -0,0 +1,22 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ > + > +#include "unpacked_cond_fdiv_1.c" > + > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 3 } } */ > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 6 } } */ > +/* { dg-final { scan-assembler-times {\tand} 9 } } */ > + > +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 7 } } */ > +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 7 } } */ > +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 7 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ > + > +/* { dg-final { scan-assembler-not {\tsel\t} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_2.c > b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_2.c > new file mode 100644 > index 00000000000..282f3ed0830 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_2.c > @@ -0,0 +1,24 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 > -fno-signed-zeros -ffinite-math-only" } */ > + > +#include "unpacked_cond_fmaxnm_1.c" > + > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */ > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */ > +/* { dg-final { scan-assembler-times {\tand} 21 } } */ > + > +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */ > +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */ > +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s\n} 3 } } */ > +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ > +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, #0.0\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, #1.0\n} 2 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h\n} 6 } } */ > +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ > +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, #0.0\n} 4 } } */ > +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, #1.0\n} 4 } } */ > + > +/* { dg-final { scan-assembler-not {\tsel\t} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_2.c > b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_2.c > new file mode 100644 > index 00000000000..8226a6fadc4 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_2.c > @@ -0,0 +1,24 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 > -fno-signed-zeros -ffinite-math-only" } */ > + > +#include "unpacked_cond_fminnm_1.c" > + > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */ > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */ > +/* { dg-final { scan-assembler-times {\tand} 21 } } */ > + > +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */ > +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */ > +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s\n} 3 } } */ > +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ > +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, #0.0\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, #1.0\n} 2 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h\n} 6 } } */ > +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ > +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, #0.0\n} 4 } } */ > +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, #1.0\n} 4 } } */ > + > +/* { dg-final { scan-assembler-not {\tsel\t} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_2.c > b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_2.c > new file mode 100644 > index 00000000000..21713f58379 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_2.c > @@ -0,0 +1,22 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ > + > +#include "unpacked_cond_fmul_1.c" > + > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 5 } } */ > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 10 } } */ > +/* { dg-final { scan-assembler-times {\tand} 15 } } */ > + > +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 10 } } */ > +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 10 } } */ > +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 10 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ > +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, #0.5\n} 2 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h\n} 4 } } */ > +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ > +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, #0.5\n} 4 } } */ > + > +/* { dg-final { scan-assembler-not {\tsel\t} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_2.c > b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_2.c > new file mode 100644 > index 00000000000..cd7a0e16047 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_2.c > @@ -0,0 +1,26 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ > + > +#include "unpacked_cond_fsubr_1.c" > + > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */ > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */ > +/* { dg-final { scan-assembler-times {\tand} 21 } } */ > + > +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */ > +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */ > +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s\n} 3 } } */ > +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, #0.5\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, #1.0\n} 2 } } */ > + > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h\n} 6 } } */ > +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ > +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, #0.5\n} 4 } } */ > +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, > z[0-9]+\.h, #1.0\n} 4 } } */ > + > +/* { dg-final { scan-assembler-not {\tsel\t} } } */ > -- > 2.34.1 >