apinski--- via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> From: Andrew Pinski <apin...@marvell.com>
>
> The function aarch64_evpc_ins would reuse the target even though
> it might be the same register as the two inputs.
> Instead of checking to see if we can reuse the target, creating
> a new register always is better.
>
> OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.
>
>       PR target/101529
>
> gcc/ChangeLog:
>
>       * config/aarch64/aarch64.c (aarch64_evpc_ins): Don't use target
>       as an input instead create a new reg.
>
> gcc/testsuite/ChangeLog:
>
>       * c-c++-common/torture/builtin-convertvector-2.c: New test.
>       * c-c++-common/torture/builtin-shufflevector-2.c: New test.
> ---
>  gcc/config/aarch64/aarch64.c                  |  8 ++++--
>  .../torture/builtin-convertvector-2.c         | 26 +++++++++++++++++++
>  .../torture/builtin-shufflevector-2.c         | 26 +++++++++++++++++++
>  3 files changed, 58 insertions(+), 2 deletions(-)
>  create mode 100644 
> gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c
>  create mode 100644 
> gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c
>
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 2c00583e12c..e4fc546fae7 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -23084,11 +23084,15 @@ aarch64_evpc_ins (struct expand_vec_perm_d *d)
>      }
>    gcc_assert (extractindex < nelt);
>  
> -  emit_move_insn (d->target, insv);
> +  /* Use a new reg instead of target as one of the
> +     operands might be target. */
> +  rtx original = gen_reg_rtx (GET_MODE (d->target));
> +
> +  emit_move_insn (original, insv);

Why can't we just remove the move and pass insv directly to
create_input_operand?  That'll encourage the RA to allocate insv and
d->target to the same register where possible (but will reload where
tying is not possible).

OK with that change if it works.

Thanks,
Richard

>    insn_code icode = code_for_aarch64_simd_vec_copy_lane (mode);
>    expand_operand ops[5];
>    create_output_operand (&ops[0], d->target, mode);
> -  create_input_operand (&ops[1], d->target, mode);
> +  create_input_operand (&ops[1], original, mode);
>    create_integer_operand (&ops[2], 1 << idx);
>    create_input_operand (&ops[3], extractv, mode);
>    create_integer_operand (&ops[4], extractindex);
> diff --git a/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c 
> b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c
> new file mode 100644
> index 00000000000..d88f6a72b5c
> --- /dev/null
> +++ b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c
> @@ -0,0 +1,26 @@
> +/* { dg-do run } */
> +/* PR target/101529 */
> +
> +typedef unsigned char __attribute__((__vector_size__ (1))) W;
> +typedef unsigned char __attribute__((__vector_size__ (8))) V;
> +typedef unsigned short __attribute__((__vector_size__ (16))) U;
> +
> +unsigned short us;
> +
> +/* aarch64 used to miscompile foo to just return 0. */
> +W
> +foo (unsigned char uc)
> +{
> +  V v = __builtin_convertvector ((U){ } >= us, V);
> +  return __builtin_shufflevector ((W){ }, v, 4) & uc;
> +}
> +
> +int
> +main (void)
> +{
> +  W x = foo (5);
> +  if (x[0] != 5)
> +    __builtin_abort();
> +  return 0;
> +}
> +
> diff --git a/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c 
> b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c
> new file mode 100644
> index 00000000000..7c4999ed4e9
> --- /dev/null
> +++ b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c
> @@ -0,0 +1,26 @@
> +/* { dg-do run}  */
> +/* PR target/101529 */
> +typedef unsigned char C;
> +typedef unsigned char __attribute__((__vector_size__ (8))) V;
> +typedef unsigned char __attribute__((__vector_size__ (32))) U;
> +
> +C c;
> +
> +/* aarch64 used to miscompile foo to just return a vector of 0s */
> +V
> +foo (V v)
> +{
> +  v |= __builtin_shufflevector (c * v, (U) (0 == (U){ }),
> +                             0, 1, 8, 32, 8, 20, 36, 36);
> +  return v;
> +}
> +
> +int
> +main (void)
> +{
> +  V v = foo ((V) { });
> +  for (unsigned i = 0; i < sizeof (v); i++)
> +    if (v[i] != (i >= 2 ? 0xff : 0))
> +      __builtin_abort ();
> +  return 0;
> +}

Reply via email to