Gentle ping

On Fri, Apr 17, 2026 at 11:40 AM Pengxuan Zheng <
[email protected]> wrote:

> Currently, with Advanced SIMD
>
> vector char
> f (vector char a)
> {
>   return __builtin_shuffle (a, (vector char){ 15, 14, 13, 12, 11, 10, 9, 8,
>                                               7, 6, 5, 4, 3, 2, 1, 0 });
> }
>
> generates:
>
> f:
>         adrp    x0, .LANCHOR0
>         ldr     q31, [x0, #:lo12:.LANCHOR0]
>         tbl     v0.16b, {v0.16b}, v31.16b
>         ret
>         .set    .LANCHOR0,. + 0
>         .LC0:
>         .byte   15
>         .byte   14
>         .byte   13
>         .byte   12
>         .byte   11
>         .byte   10
>         .byte   9
>         .byte   8
>         .byte   7
>         .byte   6
>         .byte   5
>         .byte   4
>         .byte   3
>         .byte   2
>         .byte   1
>         .byte   0
>
> With this patch, it generates REV64 followed by EXT:
>
> f:
>         rev64   v0.16b, v0.16b
>         ext     v0.16b, v0.16b, v0.16b, #8
>         ret
>
> Bootstrapped and tested on aarch64_linux_gnu.
>
>         PR target/102055
>
> gcc/ChangeLog:
>
>         * config/aarch64/aarch64.cc (aarch64_evpc_rev64_ext): New.
>         (aarch64_expand_vec_perm_const_1): Call aarch64_evpc_rev64_ext.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/aarch64/pr102055.c: New test.
>
> Signed-off-by: Pengxuan Zheng <[email protected]>
> ---
>  gcc/config/aarch64/aarch64.cc               | 32 ++++++++++++++++
>  gcc/testsuite/gcc.target/aarch64/pr102055.c | 42 +++++++++++++++++++++
>  2 files changed, 74 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/pr102055.c
>
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 62194b96450..5fdb1a42c99 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -27714,6 +27714,36 @@ aarch64_evpc_rev_global (struct expand_vec_perm_d
> *d)
>    return true;
>  }
>
> +/* Recognize patterns for the Advanced SIMD REV64 + EXT insns, which
> reverse
> +   elements within a full vector.  */
> +
> +static bool
> +aarch64_evpc_rev64_ext (struct expand_vec_perm_d *d)
> +{
> +  poly_uint64 nelt = d->perm.length ();
> +
> +  if (!d->one_vector_p || d->vec_flags != VEC_ADVSIMD)
> +    return false;
> +
> +  if (!d->perm.series_p (0, 1, nelt - 1, -1))
> +    return false;
> +
> +  if (d->testing_p)
> +    return true;
> +
> +  rtx tmp1 = gen_reg_rtx (d->vmode);
> +  rtx tmp2 = gen_reg_rtx (V16QImode);
> +  rtx unspec_rev64
> +      = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), UNSPEC_REV64);
> +  emit_set_insn (tmp1, unspec_rev64);
> +  rtvec vec = gen_rtvec (3, gen_lowpart (V16QImode, tmp1),
> +                        gen_lowpart (V16QImode, tmp1), GEN_INT (8));
> +  rtx unspec_ext = gen_rtx_UNSPEC (V16QImode, vec, UNSPEC_EXT);
> +  emit_set_insn (tmp2, unspec_ext);
> +  emit_set_insn (d->target, gen_lowpart (d->vmode, tmp2));
> +  return true;
> +}
> +
>  static bool
>  aarch64_evpc_dup (struct expand_vec_perm_d *d)
>  {
> @@ -28175,6 +28205,8 @@ aarch64_expand_vec_perm_const_1 (struct
> expand_vec_perm_d *d)
>             return true;
>           else if (aarch64_evpc_hvla (d))
>             return true;
> +         else if (aarch64_evpc_rev64_ext (d))
> +           return true;
>           else if (aarch64_evpc_reencode (d))
>             return true;
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr102055.c
> b/gcc/testsuite/gcc.target/aarch64/pr102055.c
> new file mode 100644
> index 00000000000..39b6355fc66
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr102055.c
> @@ -0,0 +1,42 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +#define vector __attribute__ ((vector_size (16)))
> +
> +/*
> +** f:
> +**     rev64   v([0-9]+).16b, v0.16b
> +**     ext     v0.16b, v\1.16b, v\1.16b, #8
> +**     ret
> +*/
> +vector char
> +f (vector char a)
> +{
> +  return __builtin_shuffle (a, (vector char){ 15, 14, 13, 12, 11, 10, 9,
> 8,
> +                                             7, 6, 5, 4, 3, 2, 1, 0 });
> +}
> +
> +/*
> +** f1:
> +**     rev64   v([0-9]+).8h, v0.8h
> +**     ext     v0.16b, v\1.16b, v\1.16b, #8
> +**     ret
> +*/
> +vector short
> +f1 (vector short a)
> +{
> +  return __builtin_shuffle (a, (vector short){ 7, 6, 5, 4, 3, 2, 1, 0 });
> +}
> +
> +/*
> +** f2:
> +**     rev64   v([0-9]+).4s, v0.4s
> +**     ext     v0.16b, v\1.16b, v\1.16b, #8
> +**     ret
> +*/
> +vector int
> +f2 (vector int a)
> +{
> +  return __builtin_shuffle (a, (vector int){ 3, 2, 1, 0 });
> +}
> --
> 2.34.1
>
>

Reply via email to