Re: [PATCH v2] Leveraging the use of STP instruction for vec_duplicate

Richard Sandiford via Gcc-patches Fri, 21 Apr 2023 08:57:49 -0700

"Victor L. Do Nascimento" <victor.donascime...@arm.com> writes:
> The backend pattern for storing a pair of identical values in 32 and
> 64-bit modes with the machine instruction STP was missing, and
> multiple instructions were needed to reproduce this behavior as a
> result of failed RTL pattern match in the combine pass.
>
> For the test case:
>
> typedef long long v2di __attribute__((vector_size (16)));
> typedef int v2si __attribute__((vector_size (8)));
>
> void
> foo (v2di *x, long long a)
> {
>   v2di tmp = {a, a};
>   *x = tmp;
> }
>
> void
> foo2 (v2si *x, int a)
> {
>   v2si tmp = {a, a};
>   *x = tmp;
> }
>
> at -O2 on aarch64 gives:
>
> foo
>   stp x1, x1, [x0]
>   ret
> foo2:
>   stp w1, w1, [x0]
>   ret
>
> instead of:
>
> foo:
>   dup     v0.2d, x1
>   str     q0, [x0]
>   ret
> foo2:
>   dup     v0.2s, w1
>   str     d0, [x0]
>   ret
>
> Bootstrapped and regtested on aarch64-none-linux-gnu.  Ok to install?
>
> gcc/
>       * config/aarch64/aarch64-simd.md(aarch64_simd_stp<mode>): New.
>       * config/aarch64/constraints.md: Make "Umn" relaxed memory
>       constraint.
>       * config/aarch64/iterators.md(ldpstp_vel_sz): New.
>
> gcc/testsuite/
>       * gcc.target/aarch64/stp_vec_dup_32_64-1.c:


Nit: missing text after ":"

OK to install with that fixed, thanks.  Please follow
https://gcc.gnu.org/gitwrite.html to get write access.

Richard

> ---
>  gcc/config/aarch64/aarch64-simd.md            | 10 ++++
>  gcc/config/aarch64/constraints.md             |  2 +-
>  gcc/config/aarch64/iterators.md               |  3 +
>  .../gcc.target/aarch64/stp_vec_dup_32_64-1.c  | 57 +++++++++++++++++++
>  4 files changed, 71 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_vec_dup_32_64-1.c
>
> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index de2b7383749..8b5e67bd100 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -229,6 +229,16 @@
>    [(set_attr "type" "neon_stp")]
>  )
>  
> +(define_insn "aarch64_simd_stp<mode>"
> +  [(set (match_operand:VP_2E 0 "aarch64_mem_pair_lanes_operand" "=Umn,Umn")
> +     (vec_duplicate:VP_2E (match_operand:<VEL> 1 "register_operand" "w,r")))]
> +  "TARGET_SIMD"
> +  "@
> +   stp\\t%<Vetype>1, %<Vetype>1, %y0
> +   stp\\t%<vw>1, %<vw>1, %y0"
> +  [(set_attr "type" "neon_stp, store_<ldpstp_vel_sz>")]
> +)
> +
>  (define_insn "load_pair<VQ:mode><VQ2:mode>"
>    [(set (match_operand:VQ 0 "register_operand" "=w")
>       (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
> diff --git a/gcc/config/aarch64/constraints.md 
> b/gcc/config/aarch64/constraints.md
> index 5b20abc27e5..6df1dbec2a8 100644
> --- a/gcc/config/aarch64/constraints.md
> +++ b/gcc/config/aarch64/constraints.md
> @@ -287,7 +287,7 @@
>  ;; Used for storing or loading pairs in an AdvSIMD register using an STP/LDP
>  ;; as a vector-concat.  The address mode uses the same constraints as if it
>  ;; were for a single value.
> -(define_memory_constraint "Umn"
> +(define_relaxed_memory_constraint "Umn"
>    "@internal
>    A memory address suitable for a load/store pair operation."
>    (and (match_code "mem")
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index 6cbc97cc82c..980dacb8025 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -1017,6 +1017,9 @@
>  ;; Likewise for load/store pair.
>  (define_mode_attr ldpstp_sz [(SI "8") (DI "16")])
>  
> +;; Size of element access for STP/LDP-generated vectors.
> +(define_mode_attr ldpstp_vel_sz [(V2SI "8") (V2SF "8") (V2DI "16") (V2DF 
> "16")])
> +
>  ;; For inequal width int to float conversion
>  (define_mode_attr w1 [(HF "w") (SF "w") (DF "x")])
>  (define_mode_attr w2 [(HF "x") (SF "x") (DF "w")])
> diff --git a/gcc/testsuite/gcc.target/aarch64/stp_vec_dup_32_64-1.c 
> b/gcc/testsuite/gcc.target/aarch64/stp_vec_dup_32_64-1.c
> new file mode 100644
> index 00000000000..fc2c1ea39e0
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/stp_vec_dup_32_64-1.c
> @@ -0,0 +1,57 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +typedef long long v2di __attribute__((vector_size (16)));
> +typedef int v2si __attribute__((vector_size (8)));
> +
> +#define TESTV2DI(lab, idx)                   \
> +  void                                               \
> +  stpv2di_##lab (v2di *x, long long a)               \
> +  {                                          \
> +    v2di tmp = {a, a};                               \
> +    x[idx] = tmp;                            \
> +  }
> +
> +
> +#define TESTV2SI(lab, idx)                   \
> +  void                                               \
> +  stpv2si_##lab (v2si *x, int a)             \
> +  {                                          \
> +    v2si tmp = {a, a};                               \
> +    x[idx] = tmp;                            \
> +  }                                          \
> +
> +/* Core test, no imm assembler offset:  */
> +
> +TESTV2SI(0, 0)
> +TESTV2DI(0, 0)
> +/* { dg-final { scan-assembler {\s+stp\t(w[0-9]+), \1, \[x[0-9]+\]} } } */
> +/* { dg-final { scan-assembler {\s+stp\t(x[0-9]+), \1, \[x[0-9]+\]} } } */
> +
> +/* Lower offset bounds:  */
> +
> +/* Vaid offsets:  */
> +TESTV2SI(1, -32)
> +TESTV2DI(1, -32)
> +/* { dg-final { scan-assembler {\s+stp\t(w[0-9]+), \1, \[x[0-9]+, -256\]} } 
> } */
> +/* { dg-final { scan-assembler {\s+stp\t(x[0-9]+), \1, \[x[0-9]+, -512\]} } 
> } */
> +/* Invalid offsets:  */
> +TESTV2SI(2, -33)
> +TESTV2DI(2, -33)
> +/* { dg-final { scan-assembler-not {\s+stp\t(w[0-9]+), \1, \[x[0-9]+, 
> -264\]} } } */
> +/* { dg-final { scan-assembler-not {\s+stp\t(x[0-9]+), \1, \[x[0-9]+, 
> -528\]} } } */
> +
> +/* Upper offset bounds:   */
> +
> +/* Valid offsets:  */
> +TESTV2SI(3, 31)
> +TESTV2DI(3, 31)
> +/* { dg-final { scan-assembler {\s+stp\t(w[0-9]+), \1, \[x[0-9]+, 248\]} } } 
> */
> +/* { dg-final { scan-assembler {\s+stp\t(x[0-9]+), \1, \[x[0-9]+, 496\]} } } 
> */
> +/* Invalid offsets:  */
> +TESTV2SI(4, 32)
> +TESTV2DI(4, 32)
> +/* { dg-final { scan-assembler-not {\s+stp\t(w[0-9]+), \1, \[x[0-9]+, 256\]} 
> } } */
> +/* { dg-final { scan-assembler-not {\s+stp\t(x[0-9]+), \1, \[x[0-9]+, 512\]} 
> } } */
> +
> +

Re: [PATCH v2] Leveraging the use of STP instruction for vec_duplicate

Reply via email to