https://gcc.gnu.org/g:241380c6d632eb6d6595c0976663a29a9be47507
commit r16-2618-g241380c6d632eb6d6595c0976663a29a9be47507 Author: Spencer Abson <spencer.ab...@arm.com> Date: Tue Jul 29 16:37:26 2025 +0000 aarch64: Add support for unpacked SVE FP conditional binary arithmetic This patch extends the expander for conditional smax, smin, add, sub, mul, min, max, and div to support partial SVE FP modes. If exceptions from undefined vector elements must be suppressed, this expansion converts the container-level predicate to an element-level one, and ensures that these elements are inactive for the operation. In practice, this is a predicate AND with the existing mask and a container-size PTRUE. gcc/ChangeLog: * config/aarch64/aarch64-protos.h (aarch64_sve_emit_masked_fp_pred): Declare. * config/aarch64/aarch64-sve.md (and<mode>3): Change this to... (@and<mode>3): ...this, so that we can use gen_and3. (@cond_<optab><mode>): Extend from SVE_FULL_F_B16B16 to SVE_F_B16B16, use aarch64_predicate_operand. (*cond_<optab><mode>_2_strict): Likewise. (*cond_<optab><mode>_3_strict): Likewise. (*cond_<optab><mode>_any_strict): Likwise. (*cond_<optab><mode>_2_const_strict): Extend from SVE_FULL_F to SVE_F, use aarch64_predicate_operand. (*cond_<optab><mode>_any_const_strict): Likewise. (*cond_sub<mode>_3_const_strict): Likwise. (*cond_sub<mode>_const_strict): Likewise. (*vcond_mask_<mode><vpred>): Use aarch64_predicate_operand, and update the comment here. * config/aarch64/aarch64.cc (aarch64_sve_emit_masked_fp_pred): New function. Helper to mask the predicate in conditional expanders. gcc/testsuite/ChangeLog: * g++.target/aarch64/sve/unpacked_cond_binary_bf16_2.C: New test. * gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_cond_fadd_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_cond_fdiv_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_cond_fmaxnm_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_cond_fminnm_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_cond_fmul_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_cond_fsubr_2.c: Likewise. Diff: --- gcc/config/aarch64/aarch64-protos.h | 1 + gcc/config/aarch64/aarch64-sve.md | 152 +++++++++++---------- gcc/config/aarch64/aarch64.cc | 27 ++++ .../aarch64/sve/unpacked_cond_binary_bf16_2.C | 18 +++ .../aarch64/sve/unpacked_cond_builtin_fmax_2.c | 24 ++++ .../aarch64/sve/unpacked_cond_builtin_fmin_2.c | 24 ++++ .../gcc.target/aarch64/sve/unpacked_cond_fadd_2.c | 28 ++++ .../gcc.target/aarch64/sve/unpacked_cond_fdiv_2.c | 22 +++ .../aarch64/sve/unpacked_cond_fmaxnm_2.c | 24 ++++ .../aarch64/sve/unpacked_cond_fminnm_2.c | 24 ++++ .../gcc.target/aarch64/sve/unpacked_cond_fmul_2.c | 22 +++ .../gcc.target/aarch64/sve/unpacked_cond_fsubr_2.c | 26 ++++ 12 files changed, 319 insertions(+), 73 deletions(-) diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index e946e8da11da..38c307cdc3a6 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -1031,6 +1031,7 @@ rtx aarch64_pfalse_reg (machine_mode); bool aarch64_sve_same_pred_for_ptest_p (rtx *, rtx *); rtx aarch64_sve_packed_pred (machine_mode); rtx aarch64_sve_fp_pred (machine_mode, rtx *); +rtx aarch64_sve_emit_masked_fp_pred (machine_mode, rtx); void aarch64_emit_load_store_through_mode (rtx, rtx, machine_mode); bool aarch64_expand_maskloadstore (rtx *, machine_mode); void aarch64_emit_sve_pred_move (rtx, rtx, rtx); diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index b252eef411c2..fe407f7e77ff 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -5605,18 +5605,21 @@ ;; Predicated floating-point operations with merging. (define_expand "@cond_<optab><mode>" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_B16B16 2 "<sve_pred_fp_rhs1_operand>") - (match_operand:SVE_FULL_F_B16B16 3 "<sve_pred_fp_rhs2_operand>")] + (match_operand:SVE_F_B16B16 2 "<sve_pred_fp_rhs1_operand>") + (match_operand:SVE_F_B16B16 3 "<sve_pred_fp_rhs2_operand>")] SVE_COND_FP_BINARY) - (match_operand:SVE_FULL_F_B16B16 4 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F_B16B16 4 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>)" + { + operands[1] = aarch64_sve_emit_masked_fp_pred (<MODE>mode, operands[1]); + } ) ;; Predicated floating-point operations, merging with the first input. @@ -5644,14 +5647,14 @@ ) (define_insn "*cond_<optab><mode>_2_strict" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand")] SVE_COND_FP_BINARY) (match_dup 2)] UNSPEC_SEL))] @@ -5687,14 +5690,14 @@ ) (define_insn "*cond_<optab><mode>_2_const_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")] SVE_COND_FP_BINARY_I1) (match_dup 2)] UNSPEC_SEL))] @@ -5730,14 +5733,14 @@ ) (define_insn "*cond_<optab><mode>_3_strict" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand")] SVE_COND_FP_BINARY) (match_dup 3)] UNSPEC_SEL))] @@ -5794,16 +5797,16 @@ ) (define_insn_and_rewrite "*cond_<optab><mode>_any_strict" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand")] SVE_COND_FP_BINARY) - (match_operand:SVE_FULL_F_B16B16 4 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F_B16B16 4 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>) @@ -5868,16 +5871,16 @@ ) (define_insn_and_rewrite "*cond_<optab><mode>_any_const_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")] SVE_COND_FP_BINARY_I1) - (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" {@ [ cons: =0 , 1 , 2 , 4 ] @@ -5953,14 +5956,14 @@ ) (define_insn "*cond_add<mode>_2_const_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_immediate")] UNSPEC_COND_FADD) (match_dup 2)] UNSPEC_SEL))] @@ -6015,16 +6018,16 @@ ) (define_insn_and_rewrite "*cond_add<mode>_any_const_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_immediate")] UNSPEC_COND_FADD) - (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" {@ [ cons: =0 , 1 , 2 , 3 , 4 ] @@ -6266,14 +6269,14 @@ ) (define_insn "*cond_sub<mode>_3_const_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") - (match_operand:SVE_FULL_F 3 "register_operand")] + (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate") + (match_operand:SVE_F 3 "register_operand")] UNSPEC_COND_FSUB) (match_dup 3)] UNSPEC_SEL))] @@ -6323,16 +6326,16 @@ ) (define_insn_and_rewrite "*cond_sub<mode>_const_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") - (match_operand:SVE_FULL_F 3 "register_operand")] + (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate") + (match_operand:SVE_F 3 "register_operand")] UNSPEC_COND_FSUB) - (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])" {@ [ cons: =0 , 1 , 3 , 4 ] @@ -6913,7 +6916,7 @@ ;; Predicate AND. We can reuse one of the inputs as the GP. ;; Doubling the second operand is the preferred implementation ;; of the MOV alias, so we use that instead of %1/z, %1, %2. -(define_insn "and<mode>3" +(define_insn "@and<mode>3" [(set (match_operand:PRED_ALL 0 "register_operand") (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand") (match_operand:PRED_ALL 2 "register_operand")))] @@ -8201,20 +8204,23 @@ ;; ;; For unpacked vectors, it doesn't really matter whether SEL uses the ;; the container size or the element size. If SEL used the container size, -;; it would ignore undefined bits of the predicate but would copy the -;; upper (undefined) bits of each container along with the defined bits. -;; If SEL used the element size, it would use undefined bits of the predicate -;; to select between undefined elements in each input vector. Thus the only -;; difference is whether the undefined bits in a container always come from -;; the same input as the defined bits, or whether the choice can vary -;; independently of the defined bits. +;; it would would copy the upper (undefined) bits of each container along +;; with the corresponding defined bits. If SEL used the element size, +;; it would use separate predicate bits to select between the undefined +;; elements in each input vector; these seperate predicate bits might +;; themselves be undefined, depending on the mode of the predicate. +;; +;; Thus the only difference is whether the undefined bits in a container +;; always come from the same input as the defined bits, or whether the +;; choice can vary independently of the defined bits. ;; ;; For the other instructions, using the element size is more natural, ;; so we do that for SEL as well. +;; (define_insn "*vcond_mask_<mode><vpred>" [(set (match_operand:SVE_ALL 0 "register_operand") (unspec:SVE_ALL - [(match_operand:<VPRED> 3 "register_operand") + [(match_operand:<VPRED> 3 "aarch64_predicate_operand") (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm") (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index a6d6bed97e8a..5502d0b48072 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -3933,6 +3933,33 @@ aarch64_sve_fp_pred (machine_mode data_mode, rtx *strictness) return aarch64_ptrue_reg (aarch64_sve_pred_mode (data_mode)); } +/* PRED is a predicate that governs an operation on DATA_MODE. If DATA_MODE + is a partial vector mode, and if exceptions must be suppressed for its + undefined elements, convert PRED from a container-level predicate to + an element-level predicate and ensure that the undefined elements + are inactive. Make no changes otherwise. + + Return the resultant predicate. */ +rtx +aarch64_sve_emit_masked_fp_pred (machine_mode data_mode, rtx pred) +{ + unsigned int vec_flags = aarch64_classify_vector_mode (data_mode); + if (flag_trapping_math && (vec_flags & VEC_PARTIAL)) + { + /* Generate an element-level mask. */ + rtx mask = aarch64_sve_packed_pred (data_mode); + machine_mode pmode = GET_MODE (mask); + + /* Apply the existing predicate. */ + rtx dst = gen_reg_rtx (pmode); + emit_insn (gen_and3 (pmode, dst, mask, + gen_lowpart (pmode, pred))); + return dst; + } + + return pred; +} + /* Emit a comparison CMP between OP0 and OP1, both of which have mode DATA_MODE, and return the result in a predicate of mode PRED_MODE. Use TARGET as the target register if nonnull and convenient. */ diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_2.C b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_2.C new file mode 100644 index 000000000000..02880efa333b --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_2.C @@ -0,0 +1,18 @@ +/* { dg-do compile }*/ +/* { dg-options "-O -ffinite-math-only -fno-signed-zeros -msve-vector-bits=2048 " } */ + +#include "unpacked_cond_binary_bf16_1.C" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 15 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 15 } } */ +/* { dg-final { scan-assembler-times {\tand} 30 } } */ + +/* { dg-final { scan-assembler-times {\tbfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tbfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tbfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ + +/* { dg-final { scan-assembler-times {\tbfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tbfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ + +// There's no BFSUBR. +/* { dg-final { scan-assembler-times {\tsel\t} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_2.c new file mode 100644 index 000000000000..f84ded5ea3c5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_2.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include "unpacked_cond_builtin_fmax_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */ +/* { dg-final { scan-assembler-times {\tand} 21 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_2.c new file mode 100644 index 000000000000..bceddf9ef742 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_2.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include "unpacked_cond_builtin_fmin_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */ +/* { dg-final { scan-assembler-times {\tand} 21 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_2.c new file mode 100644 index 000000000000..e59864b5e8f5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_2.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include "unpacked_cond_fadd_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 11 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 22 } } */ +/* { dg-final { scan-assembler-times {\tand} 33 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 19 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 19 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 19 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 5 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 10 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_2.c new file mode 100644 index 000000000000..1ca3dbf22426 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_2.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include "unpacked_cond_fdiv_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 3 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 6 } } */ +/* { dg-final { scan-assembler-times {\tand} 9 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 7 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 7 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 7 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_2.c new file mode 100644 index 000000000000..282f3ed0830e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_2.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-signed-zeros -ffinite-math-only" } */ + +#include "unpacked_cond_fmaxnm_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */ +/* { dg-final { scan-assembler-times {\tand} 21 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_2.c new file mode 100644 index 000000000000..8226a6fadc4f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_2.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-signed-zeros -ffinite-math-only" } */ + +#include "unpacked_cond_fminnm_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */ +/* { dg-final { scan-assembler-times {\tand} 21 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_2.c new file mode 100644 index 000000000000..21713f583793 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_2.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include "unpacked_cond_fmul_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 5 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 10 } } */ +/* { dg-final { scan-assembler-times {\tand} 15 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 10 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 10 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 10 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_2.c new file mode 100644 index 000000000000..cd7a0e160473 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_2.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include "unpacked_cond_fsubr_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */ +/* { dg-final { scan-assembler-times {\tand} 21 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */