https://gcc.gnu.org/g:b768e2786f8c85097442bd52010fee1b7ed12ed2
commit r16-2743-gb768e2786f8c85097442bd52010fee1b7ed12ed2 Author: Richard Sandiford <richard.sandif...@arm.com> Date: Mon Aug 4 11:45:35 2025 +0100 aarch64: Use VNx16BI for svdup_b* This patch continues the work of making ACLE intrinsics use VNx16BI for svbool_t results. It deals with the predicate forms of svdup. gcc/ * config/aarch64/aarch64-protos.h (aarch64_emit_sve_pred_vec_duplicate): Declare. * config/aarch64/aarch64.cc (aarch64_emit_sve_pred_vec_duplicate): New function. * config/aarch64/aarch64-sve.md (vec_duplicate<PRED_ALL:mode>): Use it. * config/aarch64/aarch64-sve-builtins-base.cc (svdup_impl::expand): Handle boolean values specially. Check for constants and fall back on aarch64_emit_sve_pred_vec_duplicate for the variable case, ensuring that the result has mode VNx16BI. gcc/testsuite/ * gcc.target/aarch64/sve/acle/general/dup_1.c: New test. Diff: --- gcc/config/aarch64/aarch64-protos.h | 1 + gcc/config/aarch64/aarch64-sve-builtins-base.cc | 18 ++++++++- gcc/config/aarch64/aarch64-sve.md | 5 +-- gcc/config/aarch64/aarch64.cc | 21 ++++++++++ .../gcc.target/aarch64/sve/acle/general/dup_1.c | 47 ++++++++++++++++++++++ 5 files changed, 87 insertions(+), 5 deletions(-) diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 38c307cdc3a6..87b3f4551b4f 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -1039,6 +1039,7 @@ void aarch64_expand_sve_mem_move (rtx, rtx, machine_mode); bool aarch64_maybe_expand_sve_subreg_move (rtx, rtx); rtx aarch64_replace_reg_mode (rtx, machine_mode); void aarch64_split_sve_subreg_move (rtx, rtx, rtx); +void aarch64_emit_sve_pred_vec_duplicate (machine_mode, rtx, rtx); void aarch64_expand_prologue (void); void aarch64_decompose_vec_struct_index (machine_mode, rtx *, rtx *, bool); void aarch64_expand_vector_init (rtx, rtx); diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index d58d5972baf1..314d53ec9adc 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -1050,6 +1050,23 @@ public: rtx expand (function_expander &e) const override { + machine_mode mode = e.vector_mode (0); + if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) + { + gcc_assert (e.pred == PRED_none); + + rtx src = e.args[0]; + if (GET_CODE (src) == CONST_INT) + return (src == const0_rtx + ? CONST0_RTX (VNx16BImode) + : aarch64_ptrue_all (e.type_suffix (0).element_bytes)); + + rtx dest = e.get_reg_target (); + src = force_reg (GET_MODE (src), src); + aarch64_emit_sve_pred_vec_duplicate (mode, dest, src); + return dest; + } + if (e.pred == PRED_none || e.pred == PRED_x) /* There's no benefit to using predicated instructions for _x here. */ return e.use_unpred_insn (e.direct_optab_handler (vec_duplicate_optab)); @@ -1058,7 +1075,6 @@ public: the duplicate of the function argument and the "false" value is the value of inactive lanes. */ insn_code icode; - machine_mode mode = e.vector_mode (0); if (valid_for_const_vector_p (GET_MODE_INNER (mode), e.args.last ())) /* Duplicate the constant to fill a vector. The pattern optimizes various cases involving constant operands, falling back to SEL diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index f01e05e4142d..88d323af32dc 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -2990,10 +2990,7 @@ (vec_duplicate:PRED_ALL (match_operand:QI 1 "register_operand")))] "TARGET_SVE" { - rtx tmp = gen_reg_rtx (DImode); - rtx op1 = gen_lowpart (DImode, operands[1]); - emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode))); - emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp)); + aarch64_emit_sve_pred_vec_duplicate (<MODE>mode, operands[0], operands[1]); DONE; } ) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 565c532c7b41..f72db5f10a87 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -6752,6 +6752,27 @@ aarch64_split_sve_subreg_move (rtx dest, rtx ptrue, rtx src) dest, ptrue, src)); } +/* Set predicate register DEST such that every element has the scalar + boolean value in SRC, with any nonzero source counting as "true". + MODE is a MODE_VECTOR_BOOL that determines the element size; + DEST can have this mode or VNx16BImode. In the latter case, + the upper bits of each element are defined to be zero, as for + the .H, .S, and .D forms of PTRUE. */ + +void +aarch64_emit_sve_pred_vec_duplicate (machine_mode mode, rtx dest, rtx src) +{ + rtx tmp = gen_reg_rtx (DImode); + emit_insn (gen_ashldi3 (tmp, gen_lowpart (DImode, src), + gen_int_mode (63, DImode))); + if (GET_MODE (dest) == VNx16BImode) + emit_insn (gen_aarch64_sve_while_acle (UNSPEC_WHILELO, DImode, mode, + dest, const0_rtx, tmp)); + else + emit_insn (gen_while (UNSPEC_WHILELO, DImode, mode, + dest, const0_rtx, tmp)); +} + static bool aarch64_function_ok_for_sibcall (tree, tree exp) { diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dup_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dup_1.c new file mode 100644 index 000000000000..c3c4e2d086e0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dup_1.c @@ -0,0 +1,47 @@ +/* { dg-options "-O2" } */ + +#include <arm_sve.h> + +svbool_t +test1 (int x) +{ + return svand_z (svptrue_b16 (), svdup_b16 (x), svptrue_b16 ()); +} + +svbool_t +test2 (int x) +{ + return svand_z (svptrue_b8 (), svdup_b32 (x), svptrue_b16 ()); +} + +svbool_t +test3 (int x) +{ + return svand_z (svptrue_b32 (), svdup_b32 (x), svptrue_b16 ()); +} + +svbool_t +test4 (int x) +{ + return svand_z (svptrue_b32 (), svdup_b32 (x), svptrue_b32 ()); +} + +svbool_t +test5 (int x) +{ + return svand_z (svptrue_b8 (), svdup_b64 (x), svptrue_b32 ()); +} + +svbool_t +test6 (int x) +{ + return svand_z (svptrue_b16 (), svdup_b64 (x), svptrue_b8 ()); +} + +svbool_t +test7 (int x) +{ + return svand_z (svptrue_b16 (), svdup_b64 (x), svptrue_b64 ()); +} + +/* { dg-final { scan-assembler-not {\tand\t} } } */