"Victor L. Do Nascimento" <victor.donascime...@arm.com> writes: > The backend pattern for storing a pair of identical values in 32 and > 64-bit modes with the machine instruction STP was missing, and > multiple instructions were needed to reproduce this behavior as a > result of failed RTL pattern match in the combine pass. > > For the test case: > > typedef long long v2di __attribute__((vector_size (16))); > typedef int v2si __attribute__((vector_size (8))); > > void > foo (v2di *x, long long a) > { > v2di tmp = {a, a}; > *x = tmp; > } > > void > foo2 (v2si *x, int a) > { > v2si tmp = {a, a}; > *x = tmp; > } > > at -O2 on aarch64 gives: > > foo > stp x1, x1, [x0] > ret > foo2: > stp w1, w1, [x0] > ret > > instead of: > > foo: > dup v0.2d, x1 > str q0, [x0] > ret > foo2: > dup v0.2s, w1 > str d0, [x0] > ret > > Bootstrapped and regtested on aarch64-none-linux-gnu. Ok to install? > > gcc/ > * config/aarch64/aarch64-simd.md(aarch64_simd_stp<mode>): New. > * config/aarch64/constraints.md: Make "Umn" relaxed memory > constraint. > * config/aarch64/iterators.md(ldpstp_vel_sz): New. > > gcc/testsuite/ > * gcc.target/aarch64/stp_vec_dup_32_64-1.c:
Nit: missing text after ":" OK to install with that fixed, thanks. Please follow https://gcc.gnu.org/gitwrite.html to get write access. Richard > --- > gcc/config/aarch64/aarch64-simd.md | 10 ++++ > gcc/config/aarch64/constraints.md | 2 +- > gcc/config/aarch64/iterators.md | 3 + > .../gcc.target/aarch64/stp_vec_dup_32_64-1.c | 57 +++++++++++++++++++ > 4 files changed, 71 insertions(+), 1 deletion(-) > create mode 100644 gcc/testsuite/gcc.target/aarch64/stp_vec_dup_32_64-1.c > > diff --git a/gcc/config/aarch64/aarch64-simd.md > b/gcc/config/aarch64/aarch64-simd.md > index de2b7383749..8b5e67bd100 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -229,6 +229,16 @@ > [(set_attr "type" "neon_stp")] > ) > > +(define_insn "aarch64_simd_stp<mode>" > + [(set (match_operand:VP_2E 0 "aarch64_mem_pair_lanes_operand" "=Umn,Umn") > + (vec_duplicate:VP_2E (match_operand:<VEL> 1 "register_operand" "w,r")))] > + "TARGET_SIMD" > + "@ > + stp\\t%<Vetype>1, %<Vetype>1, %y0 > + stp\\t%<vw>1, %<vw>1, %y0" > + [(set_attr "type" "neon_stp, store_<ldpstp_vel_sz>")] > +) > + > (define_insn "load_pair<VQ:mode><VQ2:mode>" > [(set (match_operand:VQ 0 "register_operand" "=w") > (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump")) > diff --git a/gcc/config/aarch64/constraints.md > b/gcc/config/aarch64/constraints.md > index 5b20abc27e5..6df1dbec2a8 100644 > --- a/gcc/config/aarch64/constraints.md > +++ b/gcc/config/aarch64/constraints.md > @@ -287,7 +287,7 @@ > ;; Used for storing or loading pairs in an AdvSIMD register using an STP/LDP > ;; as a vector-concat. The address mode uses the same constraints as if it > ;; were for a single value. > -(define_memory_constraint "Umn" > +(define_relaxed_memory_constraint "Umn" > "@internal > A memory address suitable for a load/store pair operation." > (and (match_code "mem") > diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md > index 6cbc97cc82c..980dacb8025 100644 > --- a/gcc/config/aarch64/iterators.md > +++ b/gcc/config/aarch64/iterators.md > @@ -1017,6 +1017,9 @@ > ;; Likewise for load/store pair. > (define_mode_attr ldpstp_sz [(SI "8") (DI "16")]) > > +;; Size of element access for STP/LDP-generated vectors. > +(define_mode_attr ldpstp_vel_sz [(V2SI "8") (V2SF "8") (V2DI "16") (V2DF > "16")]) > + > ;; For inequal width int to float conversion > (define_mode_attr w1 [(HF "w") (SF "w") (DF "x")]) > (define_mode_attr w2 [(HF "x") (SF "x") (DF "w")]) > diff --git a/gcc/testsuite/gcc.target/aarch64/stp_vec_dup_32_64-1.c > b/gcc/testsuite/gcc.target/aarch64/stp_vec_dup_32_64-1.c > new file mode 100644 > index 00000000000..fc2c1ea39e0 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/stp_vec_dup_32_64-1.c > @@ -0,0 +1,57 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +typedef long long v2di __attribute__((vector_size (16))); > +typedef int v2si __attribute__((vector_size (8))); > + > +#define TESTV2DI(lab, idx) \ > + void \ > + stpv2di_##lab (v2di *x, long long a) \ > + { \ > + v2di tmp = {a, a}; \ > + x[idx] = tmp; \ > + } > + > + > +#define TESTV2SI(lab, idx) \ > + void \ > + stpv2si_##lab (v2si *x, int a) \ > + { \ > + v2si tmp = {a, a}; \ > + x[idx] = tmp; \ > + } \ > + > +/* Core test, no imm assembler offset: */ > + > +TESTV2SI(0, 0) > +TESTV2DI(0, 0) > +/* { dg-final { scan-assembler {\s+stp\t(w[0-9]+), \1, \[x[0-9]+\]} } } */ > +/* { dg-final { scan-assembler {\s+stp\t(x[0-9]+), \1, \[x[0-9]+\]} } } */ > + > +/* Lower offset bounds: */ > + > +/* Vaid offsets: */ > +TESTV2SI(1, -32) > +TESTV2DI(1, -32) > +/* { dg-final { scan-assembler {\s+stp\t(w[0-9]+), \1, \[x[0-9]+, -256\]} } > } */ > +/* { dg-final { scan-assembler {\s+stp\t(x[0-9]+), \1, \[x[0-9]+, -512\]} } > } */ > +/* Invalid offsets: */ > +TESTV2SI(2, -33) > +TESTV2DI(2, -33) > +/* { dg-final { scan-assembler-not {\s+stp\t(w[0-9]+), \1, \[x[0-9]+, > -264\]} } } */ > +/* { dg-final { scan-assembler-not {\s+stp\t(x[0-9]+), \1, \[x[0-9]+, > -528\]} } } */ > + > +/* Upper offset bounds: */ > + > +/* Valid offsets: */ > +TESTV2SI(3, 31) > +TESTV2DI(3, 31) > +/* { dg-final { scan-assembler {\s+stp\t(w[0-9]+), \1, \[x[0-9]+, 248\]} } } > */ > +/* { dg-final { scan-assembler {\s+stp\t(x[0-9]+), \1, \[x[0-9]+, 496\]} } } > */ > +/* Invalid offsets: */ > +TESTV2SI(4, 32) > +TESTV2DI(4, 32) > +/* { dg-final { scan-assembler-not {\s+stp\t(w[0-9]+), \1, \[x[0-9]+, 256\]} > } } */ > +/* { dg-final { scan-assembler-not {\s+stp\t(x[0-9]+), \1, \[x[0-9]+, 512\]} > } } */ > + > +