> On 29 Jul 2025, at 18:41, Richard Sandiford <richard.sandif...@arm.com> wrote:
>
> This patch continues the work of making ACLE intrinsics use VNx16BI
> for svbool_t results. It deals with the predicate forms of svdupq.
>
> The general predicate expansion builds an equivalent integer vector
> and then compares it with zero. This patch therefore relies on
> the earlier patches to the comparison patterns.
>
Ok.
Thanks,
Kyrill
> gcc/
> * config/aarch64/aarch64-protos.h
> (aarch64_convert_sve_data_to_pred): Remove the mode argument.
> * config/aarch64/aarch64.cc
> (aarch64_sve_emit_int_cmp): Allow PRED_MODE to be VNx16BI or
> the natural predicate mode for the data mode.
> (aarch64_convert_sve_data_to_pred): Remove the mode argument
> and instead always create a VNx16BI result.
> (aarch64_expand_sve_const_pred): Update call accordingly.
> * config/aarch64/aarch64-sve-builtins-base.cc
> (svdupq_impl::expand): Likewise, ensuring that the result
> has mode VNx16BI.
>
> gcc/testsuite/
> * gcc.target/aarch64/sve/acle/general/dupq_13.c: New test.
> ---
> gcc/config/aarch64/aarch64-protos.h | 2 +-
> .../aarch64/aarch64-sve-builtins-base.cc | 3 +-
> gcc/config/aarch64/aarch64.cc | 26 ++++++-----
> .../aarch64/sve/acle/general/dupq_13.c | 45 +++++++++++++++++++
> 4 files changed, 63 insertions(+), 13 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_13.c
>
> diff --git a/gcc/config/aarch64/aarch64-protos.h
> b/gcc/config/aarch64/aarch64-protos.h
> index 8f2fc9d2f97..7d3312b9918 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -1020,7 +1020,7 @@ void aarch64_err_no_fpadvsimd (machine_mode);
> void aarch64_expand_epilogue (rtx_call_insn *);
> rtx aarch64_ptrue_all (unsigned int);
> opt_machine_mode aarch64_ptrue_all_mode (rtx);
> -rtx aarch64_convert_sve_data_to_pred (rtx, machine_mode, rtx);
> +rtx aarch64_convert_sve_data_to_pred (rtx, rtx);
> rtx aarch64_expand_sve_dupq (rtx, machine_mode, rtx);
> void aarch64_expand_mov_immediate (rtx, rtx);
> rtx aarch64_stack_protect_canary_mem (machine_mode, rtx, aarch64_salt_type);
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> index 314d53ec9ad..ecc06877cac 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> @@ -1215,8 +1215,7 @@ public:
> if (mode != e.vector_mode (0))
> {
> rtx data_dupq = aarch64_expand_sve_dupq (NULL, mode, vq_reg);
> - return aarch64_convert_sve_data_to_pred (e.possible_target,
> - e.vector_mode (0), data_dupq);
> + return aarch64_convert_sve_data_to_pred (e.possible_target, data_dupq);
> }
>
> return aarch64_expand_sve_dupq (e.possible_target, mode, vq_reg);
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 3c8d08b7fdf..8b2395f243e 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -3935,16 +3935,24 @@ aarch64_sve_fp_pred (machine_mode data_mode, rtx
> *strictness)
>
> /* Emit a comparison CMP between OP0 and OP1, both of which have mode
> DATA_MODE, and return the result in a predicate of mode PRED_MODE.
> - Use TARGET as the target register if nonnull and convenient. */
> + Use TARGET as the target register if nonnull and convenient.
> +
> + PRED_MODE can be either VNx16BI or the natural predicate mode for
> + DATA_MODE. */
>
> static rtx
> aarch64_sve_emit_int_cmp (rtx target, machine_mode pred_mode, rtx_code cmp,
> machine_mode data_mode, rtx op1, rtx op2)
> {
> - insn_code icode = code_for_aarch64_pred_cmp (cmp, data_mode);
> + auto src_pred_mode = aarch64_sve_pred_mode (data_mode);
> + insn_code icode;
> + if (known_eq (GET_MODE_NUNITS (pred_mode), GET_MODE_NUNITS (data_mode)))
> + icode = code_for_aarch64_pred_cmp (cmp, data_mode);
> + else
> + icode = code_for_aarch64_pred_cmp_acle (cmp, data_mode);
> expand_operand ops[5];
> create_output_operand (&ops[0], target, pred_mode);
> - create_input_operand (&ops[1], CONSTM1_RTX (pred_mode), pred_mode);
> + create_input_operand (&ops[1], CONSTM1_RTX (src_pred_mode), src_pred_mode);
> create_integer_operand (&ops[2], SVE_KNOWN_PTRUE);
> create_input_operand (&ops[3], op1, data_mode);
> create_input_operand (&ops[4], op2, data_mode);
> @@ -3952,15 +3960,14 @@ aarch64_sve_emit_int_cmp (rtx target, machine_mode
> pred_mode, rtx_code cmp,
> return ops[0].value;
> }
>
> -/* Use a comparison to convert integer vector SRC into MODE, which is
> - the corresponding SVE predicate mode. Use TARGET for the result
> - if it's nonnull and convenient. */
> +/* Use a comparison to convert integer vector SRC into VNx16BI.
> + Use TARGET for the result if it's nonnull and convenient. */
>
> rtx
> -aarch64_convert_sve_data_to_pred (rtx target, machine_mode mode, rtx src)
> +aarch64_convert_sve_data_to_pred (rtx target, rtx src)
> {
> machine_mode src_mode = GET_MODE (src);
> - return aarch64_sve_emit_int_cmp (target, mode, NE, src_mode,
> + return aarch64_sve_emit_int_cmp (target, VNx16BImode, NE, src_mode,
> src, CONST0_RTX (src_mode));
> }
>
> @@ -6272,8 +6279,7 @@ aarch64_expand_sve_const_pred (rtx target,
> rtx_vector_builder &builder)
> for (unsigned int i = 0; i < builder.encoded_nelts (); ++i)
> int_builder.quick_push (INTVAL (builder.elt (i))
> ? constm1_rtx : const0_rtx);
> - return aarch64_convert_sve_data_to_pred (target, VNx16BImode,
> - int_builder.build ());
> + return aarch64_convert_sve_data_to_pred (target, int_builder.build ());
> }
>
> /* Set DEST to immediate IMM. */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_13.c
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_13.c
> new file mode 100644
> index 00000000000..6d702b86b7d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_13.c
> @@ -0,0 +1,45 @@
> +/* { dg-options "-O2" } */
> +
> +#include <arm_sve.h>
> +
> +svbool_t
> +test1 (int x0, int x1)
> +{
> + return svand_z (svptrue_b8 (), svdupq_b64 (x0, x1), svptrue_b16 ());
> +}
> +
> +svbool_t
> +test2 (int x0, int x1, int x2, int x3)
> +{
> + return svand_z (svptrue_b8 (), svdupq_b32 (x0, x1, x2, x3), svptrue_b16
> ());
> +}
> +
> +svbool_t
> +test3 (int x0, int x1, int x2, int x3)
> +{
> + return svand_z (svptrue_b32 (), svdupq_b32 (x0, x1, x2, x3), svptrue_b16
> ());
> +}
> +
> +svbool_t
> +test4 (int x0, int x1, int x2, int x3)
> +{
> + return svand_z (svptrue_b32 (), svdupq_b32 (x0, x1, x2, x3), svptrue_b32
> ());
> +}
> +
> +svbool_t
> +test5 (int x0, int x1, int x2, int x3)
> +{
> + return svand_z (svptrue_b8 (),
> + svdupq_b16 (x0, x1, x2, x3, x2, x0, x1, x3),
> + svptrue_b32 ());
> +}
> +
> +svbool_t
> +test6 (int x0, int x1, int x2, int x3)
> +{
> + return svand_z (svptrue_b64 (),
> + svdupq_b16 (x0, x1, x2, x3, x2, x0, x1, x3),
> + svptrue_b16 ());
> +}
> +
> +/* { dg-final { scan-assembler-not {\tand\tp} } } */
> --
> 2.43.0
>