Gentle ping
On Fri, Apr 17, 2026 at 11:40 AM Pengxuan Zheng <
[email protected]> wrote:
> Currently, with Advanced SIMD
>
> vector char
> f (vector char a)
> {
> return __builtin_shuffle (a, (vector char){ 15, 14, 13, 12, 11, 10, 9, 8,
> 7, 6, 5, 4, 3, 2, 1, 0 });
> }
>
> generates:
>
> f:
> adrp x0, .LANCHOR0
> ldr q31, [x0, #:lo12:.LANCHOR0]
> tbl v0.16b, {v0.16b}, v31.16b
> ret
> .set .LANCHOR0,. + 0
> .LC0:
> .byte 15
> .byte 14
> .byte 13
> .byte 12
> .byte 11
> .byte 10
> .byte 9
> .byte 8
> .byte 7
> .byte 6
> .byte 5
> .byte 4
> .byte 3
> .byte 2
> .byte 1
> .byte 0
>
> With this patch, it generates REV64 followed by EXT:
>
> f:
> rev64 v0.16b, v0.16b
> ext v0.16b, v0.16b, v0.16b, #8
> ret
>
> Bootstrapped and tested on aarch64_linux_gnu.
>
> PR target/102055
>
> gcc/ChangeLog:
>
> * config/aarch64/aarch64.cc (aarch64_evpc_rev64_ext): New.
> (aarch64_expand_vec_perm_const_1): Call aarch64_evpc_rev64_ext.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/aarch64/pr102055.c: New test.
>
> Signed-off-by: Pengxuan Zheng <[email protected]>
> ---
> gcc/config/aarch64/aarch64.cc | 32 ++++++++++++++++
> gcc/testsuite/gcc.target/aarch64/pr102055.c | 42 +++++++++++++++++++++
> 2 files changed, 74 insertions(+)
> create mode 100644 gcc/testsuite/gcc.target/aarch64/pr102055.c
>
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 62194b96450..5fdb1a42c99 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -27714,6 +27714,36 @@ aarch64_evpc_rev_global (struct expand_vec_perm_d
> *d)
> return true;
> }
>
> +/* Recognize patterns for the Advanced SIMD REV64 + EXT insns, which
> reverse
> + elements within a full vector. */
> +
> +static bool
> +aarch64_evpc_rev64_ext (struct expand_vec_perm_d *d)
> +{
> + poly_uint64 nelt = d->perm.length ();
> +
> + if (!d->one_vector_p || d->vec_flags != VEC_ADVSIMD)
> + return false;
> +
> + if (!d->perm.series_p (0, 1, nelt - 1, -1))
> + return false;
> +
> + if (d->testing_p)
> + return true;
> +
> + rtx tmp1 = gen_reg_rtx (d->vmode);
> + rtx tmp2 = gen_reg_rtx (V16QImode);
> + rtx unspec_rev64
> + = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), UNSPEC_REV64);
> + emit_set_insn (tmp1, unspec_rev64);
> + rtvec vec = gen_rtvec (3, gen_lowpart (V16QImode, tmp1),
> + gen_lowpart (V16QImode, tmp1), GEN_INT (8));
> + rtx unspec_ext = gen_rtx_UNSPEC (V16QImode, vec, UNSPEC_EXT);
> + emit_set_insn (tmp2, unspec_ext);
> + emit_set_insn (d->target, gen_lowpart (d->vmode, tmp2));
> + return true;
> +}
> +
> static bool
> aarch64_evpc_dup (struct expand_vec_perm_d *d)
> {
> @@ -28175,6 +28205,8 @@ aarch64_expand_vec_perm_const_1 (struct
> expand_vec_perm_d *d)
> return true;
> else if (aarch64_evpc_hvla (d))
> return true;
> + else if (aarch64_evpc_rev64_ext (d))
> + return true;
> else if (aarch64_evpc_reencode (d))
> return true;
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr102055.c
> b/gcc/testsuite/gcc.target/aarch64/pr102055.c
> new file mode 100644
> index 00000000000..39b6355fc66
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr102055.c
> @@ -0,0 +1,42 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +#define vector __attribute__ ((vector_size (16)))
> +
> +/*
> +** f:
> +** rev64 v([0-9]+).16b, v0.16b
> +** ext v0.16b, v\1.16b, v\1.16b, #8
> +** ret
> +*/
> +vector char
> +f (vector char a)
> +{
> + return __builtin_shuffle (a, (vector char){ 15, 14, 13, 12, 11, 10, 9,
> 8,
> + 7, 6, 5, 4, 3, 2, 1, 0 });
> +}
> +
> +/*
> +** f1:
> +** rev64 v([0-9]+).8h, v0.8h
> +** ext v0.16b, v\1.16b, v\1.16b, #8
> +** ret
> +*/
> +vector short
> +f1 (vector short a)
> +{
> + return __builtin_shuffle (a, (vector short){ 7, 6, 5, 4, 3, 2, 1, 0 });
> +}
> +
> +/*
> +** f2:
> +** rev64 v([0-9]+).4s, v0.4s
> +** ext v0.16b, v\1.16b, v\1.16b, #8
> +** ret
> +*/
> +vector int
> +f2 (vector int a)
> +{
> + return __builtin_shuffle (a, (vector int){ 3, 2, 1, 0 });
> +}
> --
> 2.34.1
>
>