Re: [PATCH 11/12] aarch64: Use VNx16BI for svdupq_b*

Kyrylo Tkachov Thu, 31 Jul 2025 10:04:26 -0700

> On 29 Jul 2025, at 18:41, Richard Sandiford <richard.sandif...@arm.com> wrote:
> 
> This patch continues the work of making ACLE intrinsics use VNx16BI
> for svbool_t results.  It deals with the predicate forms of svdupq.
> 
> The general predicate expansion builds an equivalent integer vector
> and then compares it with zero.  This patch therefore relies on
> the earlier patches to the comparison patterns.
> 

Ok.
Thanks,
Kyrill

> gcc/
> * config/aarch64/aarch64-protos.h
> (aarch64_convert_sve_data_to_pred): Remove the mode argument.
> * config/aarch64/aarch64.cc
> (aarch64_sve_emit_int_cmp): Allow PRED_MODE to be VNx16BI or
> the natural predicate mode for the data mode.
> (aarch64_convert_sve_data_to_pred): Remove the mode argument
> and instead always create a VNx16BI result.
> (aarch64_expand_sve_const_pred): Update call accordingly.
> * config/aarch64/aarch64-sve-builtins-base.cc
> (svdupq_impl::expand): Likewise, ensuring that the result
> has mode VNx16BI.
> 
> gcc/testsuite/
> * gcc.target/aarch64/sve/acle/general/dupq_13.c: New test.
> ---
> gcc/config/aarch64/aarch64-protos.h           |  2 +-
> .../aarch64/aarch64-sve-builtins-base.cc      |  3 +-
> gcc/config/aarch64/aarch64.cc                 | 26 ++++++-----
> .../aarch64/sve/acle/general/dupq_13.c        | 45 +++++++++++++++++++
> 4 files changed, 63 insertions(+), 13 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_13.c
> 
> diff --git a/gcc/config/aarch64/aarch64-protos.h 
> b/gcc/config/aarch64/aarch64-protos.h
> index 8f2fc9d2f97..7d3312b9918 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -1020,7 +1020,7 @@ void aarch64_err_no_fpadvsimd (machine_mode);
> void aarch64_expand_epilogue (rtx_call_insn *);
> rtx aarch64_ptrue_all (unsigned int);
> opt_machine_mode aarch64_ptrue_all_mode (rtx);
> -rtx aarch64_convert_sve_data_to_pred (rtx, machine_mode, rtx);
> +rtx aarch64_convert_sve_data_to_pred (rtx, rtx);
> rtx aarch64_expand_sve_dupq (rtx, machine_mode, rtx);
> void aarch64_expand_mov_immediate (rtx, rtx);
> rtx aarch64_stack_protect_canary_mem (machine_mode, rtx, aarch64_salt_type);
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
> b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> index 314d53ec9ad..ecc06877cac 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> @@ -1215,8 +1215,7 @@ public:
>     if (mode != e.vector_mode (0))
>       {
> rtx data_dupq = aarch64_expand_sve_dupq (NULL, mode, vq_reg);
> - return aarch64_convert_sve_data_to_pred (e.possible_target,
> - e.vector_mode (0), data_dupq);
> + return aarch64_convert_sve_data_to_pred (e.possible_target, data_dupq);
>       }
> 
>     return aarch64_expand_sve_dupq (e.possible_target, mode, vq_reg);
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 3c8d08b7fdf..8b2395f243e 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -3935,16 +3935,24 @@ aarch64_sve_fp_pred (machine_mode data_mode, rtx 
> *strictness)
> 
> /* Emit a comparison CMP between OP0 and OP1, both of which have mode
>    DATA_MODE, and return the result in a predicate of mode PRED_MODE.
> -   Use TARGET as the target register if nonnull and convenient.  */
> +   Use TARGET as the target register if nonnull and convenient.
> +
> +   PRED_MODE can be either VNx16BI or the natural predicate mode for
> +   DATA_MODE.  */
> 
> static rtx
> aarch64_sve_emit_int_cmp (rtx target, machine_mode pred_mode, rtx_code cmp,
>  machine_mode data_mode, rtx op1, rtx op2)
> {
> -  insn_code icode = code_for_aarch64_pred_cmp (cmp, data_mode);
> +  auto src_pred_mode = aarch64_sve_pred_mode (data_mode);
> +  insn_code icode;
> +  if (known_eq (GET_MODE_NUNITS (pred_mode), GET_MODE_NUNITS (data_mode)))
> +    icode = code_for_aarch64_pred_cmp (cmp, data_mode);
> +  else
> +    icode = code_for_aarch64_pred_cmp_acle (cmp, data_mode);
>   expand_operand ops[5];
>   create_output_operand (&ops[0], target, pred_mode);
> -  create_input_operand (&ops[1], CONSTM1_RTX (pred_mode), pred_mode);
> +  create_input_operand (&ops[1], CONSTM1_RTX (src_pred_mode), src_pred_mode);
>   create_integer_operand (&ops[2], SVE_KNOWN_PTRUE);
>   create_input_operand (&ops[3], op1, data_mode);
>   create_input_operand (&ops[4], op2, data_mode);
> @@ -3952,15 +3960,14 @@ aarch64_sve_emit_int_cmp (rtx target, machine_mode 
> pred_mode, rtx_code cmp,
>   return ops[0].value;
> }
> 
> -/* Use a comparison to convert integer vector SRC into MODE, which is
> -   the corresponding SVE predicate mode.  Use TARGET for the result
> -   if it's nonnull and convenient.  */
> +/* Use a comparison to convert integer vector SRC into VNx16BI.
> +   Use TARGET for the result if it's nonnull and convenient.  */
> 
> rtx
> -aarch64_convert_sve_data_to_pred (rtx target, machine_mode mode, rtx src)
> +aarch64_convert_sve_data_to_pred (rtx target, rtx src)
> {
>   machine_mode src_mode = GET_MODE (src);
> -  return aarch64_sve_emit_int_cmp (target, mode, NE, src_mode,
> +  return aarch64_sve_emit_int_cmp (target, VNx16BImode, NE, src_mode,
>   src, CONST0_RTX (src_mode));
> }
> 
> @@ -6272,8 +6279,7 @@ aarch64_expand_sve_const_pred (rtx target, 
> rtx_vector_builder &builder)
>   for (unsigned int i = 0; i < builder.encoded_nelts (); ++i)
>     int_builder.quick_push (INTVAL (builder.elt (i))
>    ? constm1_rtx : const0_rtx);
> -  return aarch64_convert_sve_data_to_pred (target, VNx16BImode,
> -   int_builder.build ());
> +  return aarch64_convert_sve_data_to_pred (target, int_builder.build ());
> }
> 
> /* Set DEST to immediate IMM.  */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_13.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_13.c
> new file mode 100644
> index 00000000000..6d702b86b7d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_13.c
> @@ -0,0 +1,45 @@
> +/* { dg-options "-O2" } */
> +
> +#include <arm_sve.h>
> +
> +svbool_t
> +test1 (int x0, int x1)
> +{
> +  return svand_z (svptrue_b8 (), svdupq_b64 (x0, x1), svptrue_b16 ());
> +}
> +
> +svbool_t
> +test2 (int x0, int x1, int x2, int x3)
> +{
> +  return svand_z (svptrue_b8 (), svdupq_b32 (x0, x1, x2, x3), svptrue_b16 
> ());
> +}
> +
> +svbool_t
> +test3 (int x0, int x1, int x2, int x3)
> +{
> +  return svand_z (svptrue_b32 (), svdupq_b32 (x0, x1, x2, x3), svptrue_b16 
> ());
> +}
> +
> +svbool_t
> +test4 (int x0, int x1, int x2, int x3)
> +{
> +  return svand_z (svptrue_b32 (), svdupq_b32 (x0, x1, x2, x3), svptrue_b32 
> ());
> +}
> +
> +svbool_t
> +test5 (int x0, int x1, int x2, int x3)
> +{
> +  return svand_z (svptrue_b8 (),
> +  svdupq_b16 (x0, x1, x2, x3, x2, x0, x1, x3),
> +  svptrue_b32 ());
> +}
> +
> +svbool_t
> +test6 (int x0, int x1, int x2, int x3)
> +{
> +  return svand_z (svptrue_b64 (),
> +  svdupq_b16 (x0, x1, x2, x3, x2, x0, x1, x3),
> +  svptrue_b16 ());
> +}
> +
> +/* { dg-final { scan-assembler-not {\tand\tp} } } */
> -- 
> 2.43.0
>
Re: [PATCH 11/12] aarch64: Use VNx16BI for svdupq_b*

Reply via email to