Ping. Thanks, Jennifer > On 25 Apr 2025, at 17:08, Jennifer Schmitz <jschm...@nvidia.com> wrote: > > SVE loads and stores where the predicate is all-true can be optimized to > unpredicated instructions. For example, > svuint8_t foo (uint8_t *x) > { > return svld1 (svptrue_b8 (), x); > } > was compiled to: > foo: > ptrue p3.b, all > ld1b z0.b, p3/z, [x0] > ret > but can be compiled to: > foo: > ldr z0, [x0] > ret > > Late_combine2 had already been trying to do this, but was missing the > instruction: > (set (reg/i:VNx16QI 32 v0) > (unspec:VNx16QI [ > (const_vector:VNx16BI repeat [ > (const_int 1 [0x1]) > ]) > (mem:VNx16QI (reg/f:DI 0 x0 [orig:106 x ] [106]) > [0 MEM <svuint8_t> [(unsigned char *)x_2(D)]+0 S[16, 16] A8]) > ] UNSPEC_PRED_X)) > > This patch adds a new define_insn_and_split that matches the missing > instruction and splits it to an unpredicated load/store. Because LDR > offers fewer addressing modes than LD1[BHWD], the pattern is > guarded under reload_completed to only apply the transform once the > address modes have been chosen during RA. > > The patch was bootstrapped and tested on aarch64-linux-gnu, no regression. > OK for mainline? > > Signed-off-by: Jennifer Schmitz <jschm...@nvidia.com> > > gcc/ > * config/aarch64/aarch64-sve.md (*aarch64_sve_ptrue<mode>_ldr_str): > Add define_insn_and_split to fold predicated SVE loads/stores with > ptrue predicates to unpredicated instructions. > > gcc/testsuite/ > * gcc.target/aarch64/sve/ptrue_ldr_str.c: New test. > * gcc.target/aarch64/sve/cost_model_14.c: Adjust expected outcome. > * gcc.target/aarch64/sve/cost_model_4.c: Adjust expected outcome. > * gcc.target/aarch64/sve/cost_model_5.c: Adjust expected outcome. > * gcc.target/aarch64/sve/cost_model_6.c: Adjust expected outcome. > * gcc.target/aarch64/sve/cost_model_7.c: Adjust expected outcome. > * gcc.target/aarch64/sve/pcs/varargs_2_f16.c: Adjust expected outcome. > * gcc.target/aarch64/sve/pcs/varargs_2_f32.c: Adjust expected outcome. > * gcc.target/aarch64/sve/pcs/varargs_2_f64.c: Adjust expected outcome. > * gcc.target/aarch64/sve/pcs/varargs_2_mf8.c: Adjust expected outcome. > * gcc.target/aarch64/sve/pcs/varargs_2_s16.c: Adjust expected outcome. > * gcc.target/aarch64/sve/pcs/varargs_2_s32.c: Adjust expected outcome. > * gcc.target/aarch64/sve/pcs/varargs_2_s64.c: Adjust expected outcome. > * gcc.target/aarch64/sve/pcs/varargs_2_s8.c: Adjust expected outcome. > * gcc.target/aarch64/sve/pcs/varargs_2_u16.c: Adjust expected outcome. > * gcc.target/aarch64/sve/pcs/varargs_2_u32.c: Adjust expected outcome. > * gcc.target/aarch64/sve/pcs/varargs_2_u64.c: Adjust expected outcome. > * gcc.target/aarch64/sve/pcs/varargs_2_u8.c: Adjust expected outcome. > * gcc.target/aarch64/sve/peel_ind_2.c: Adjust expected outcome. > * gcc.target/aarch64/sve/single_1.c: Adjust expected outcome. > * gcc.target/aarch64/sve/single_2.c: Adjust expected outcome. > * gcc.target/aarch64/sve/single_3.c: Adjust expected outcome. > * gcc.target/aarch64/sve/single_4.c: Adjust expected outcome. > --- > gcc/config/aarch64/aarch64-sve.md | 17 ++++ > .../aarch64/sve/acle/general/attributes_6.c | 8 +- > .../gcc.target/aarch64/sve/cost_model_14.c | 4 +- > .../gcc.target/aarch64/sve/cost_model_4.c | 3 +- > .../gcc.target/aarch64/sve/cost_model_5.c | 3 +- > .../gcc.target/aarch64/sve/cost_model_6.c | 3 +- > .../gcc.target/aarch64/sve/cost_model_7.c | 3 +- > .../aarch64/sve/pcs/varargs_2_f16.c | 93 +++++++++++++++++-- > .../aarch64/sve/pcs/varargs_2_f32.c | 93 +++++++++++++++++-- > .../aarch64/sve/pcs/varargs_2_f64.c | 93 +++++++++++++++++-- > .../aarch64/sve/pcs/varargs_2_mf8.c | 32 +++---- > .../aarch64/sve/pcs/varargs_2_s16.c | 93 +++++++++++++++++-- > .../aarch64/sve/pcs/varargs_2_s32.c | 93 +++++++++++++++++-- > .../aarch64/sve/pcs/varargs_2_s64.c | 93 +++++++++++++++++-- > .../gcc.target/aarch64/sve/pcs/varargs_2_s8.c | 34 +++---- > .../aarch64/sve/pcs/varargs_2_u16.c | 93 +++++++++++++++++-- > .../aarch64/sve/pcs/varargs_2_u32.c | 93 +++++++++++++++++-- > .../aarch64/sve/pcs/varargs_2_u64.c | 93 +++++++++++++++++-- > .../gcc.target/aarch64/sve/pcs/varargs_2_u8.c | 32 +++---- > .../gcc.target/aarch64/sve/peel_ind_2.c | 4 +- > .../gcc.target/aarch64/sve/ptrue_ldr_str.c | 31 +++++++ > .../gcc.target/aarch64/sve/single_1.c | 11 ++- > .../gcc.target/aarch64/sve/single_2.c | 11 ++- > .../gcc.target/aarch64/sve/single_3.c | 11 ++- > .../gcc.target/aarch64/sve/single_4.c | 11 ++- > 25 files changed, 907 insertions(+), 148 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/ptrue_ldr_str.c > > diff --git a/gcc/config/aarch64/aarch64-sve.md > b/gcc/config/aarch64/aarch64-sve.md > index d4af3706294..03b7194d200 100644 > --- a/gcc/config/aarch64/aarch64-sve.md > +++ b/gcc/config/aarch64/aarch64-sve.md > @@ -702,6 +702,23 @@ > } > ) > > +;; Fold predicated loads/stores with a PTRUE predicate to unpredicated > +;; loads/stores after RA. > +(define_insn_and_split "*aarch64_sve_ptrue<mode>_ldr_str" > + [(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand") > + (unspec:SVE_FULL > + [(match_operand:<VPRED> 1 "aarch64_simd_imm_one") > + (match_operand:SVE_FULL 2 "aarch64_sve_nonimmediate_operand")] > + UNSPEC_PRED_X))] > + "TARGET_SVE && reload_completed > + && (<MODE>mode == VNx16QImode || !BYTES_BIG_ENDIAN) > + && ((REG_P (operands[0]) && MEM_P (operands[2])) > + || (REG_P (operands[2]) && MEM_P (operands[0])))" > + "#" > + "&& 1" > + [(set (match_dup 0) > + (match_dup 2))]) > + > ;; Unpredicated moves that cannot use LDR and STR, i.e. partial vectors > ;; or vectors for which little-endian ordering isn't acceptable. Memory > ;; accesses require secondary reloads. > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/attributes_6.c > b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/attributes_6.c > index 907637f06f9..eeba533ae74 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/attributes_6.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/attributes_6.c > @@ -29,7 +29,7 @@ test_add (fixed_int8_t x, fixed_int8_t y) > } > > /* > -** test_add_gnu: > +** test_add_gnu: {target aarch64_big_endian } > ** ( > ** add (z[0-9]+\.b), (?:z0\.b, z1\.b|z1\.b, z0\.b) > ** ptrue (p[0-7])\.b, vl32 > @@ -41,6 +41,12 @@ test_add (fixed_int8_t x, fixed_int8_t y) > ** ) > ** ret > */ > +/* > +** test_add_gnu: {target aarch64_little_endian } > +** add (z[0-9]+)\.b, (?:z0\.b, z1\.b|z1\.b, z0\.b) > +** str \1, \[x8\] > +** ret > +*/ > gnu_int8_t > test_add_gnu (fixed_int8_t x, fixed_int8_t y) > { > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_14.c > b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_14.c > index b65826b0889..d423dcfd03a 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_14.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_14.c > @@ -9,5 +9,7 @@ uint64_t f2(uint64_t *ptr, int n) { > return res; > } > > -/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 5 } } */ > +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 5 {target > aarch64_big_endian} } } */ > +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 1 {target > aarch64_little_endian} } } */ > +/* { dg-final { scan-assembler-times {\tldr\tz[0-9]+,} 4 {target > aarch64_little_endian} } } */ > /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d,} 8 } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_4.c > b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_4.c > index a7ecfe3a0de..93af4c1ce38 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_4.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_4.c > @@ -9,4 +9,5 @@ vset (int *restrict dst, int *restrict src, int count) > *dst++ = 1; > } > > -/* { dg-final { scan-assembler-times {\tst1w\tz} 1 } } */ > +/* { dg-final { scan-assembler-times {\tst1w\tz} 1 {target > aarch64_big_endian} } } */ > +/* { dg-final { scan-assembler-times {\tstr\tz} 1 {target > aarch64_little_endian} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_5.c > b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_5.c > index f3a29fc38a1..fab49edf4bb 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_5.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_5.c > @@ -9,5 +9,6 @@ vset (int *restrict dst, int *restrict src, int count) > *dst++ = 1; > } > > -/* { dg-final { scan-assembler-times {\tst1w\tz} 2 } } */ > +/* { dg-final { scan-assembler-times {\tst1w\tz} 2 {target > aarch64_big_endian} } } */ > +/* { dg-final { scan-assembler-times {\tstr\tz} 2 {target > aarch64_little_endian} } } */ > /* { dg-final { scan-assembler-not {\tstp\tq} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_6.c > b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_6.c > index 565e1e3ed39..160667bc680 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_6.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_6.c > @@ -9,4 +9,5 @@ vset (int *restrict dst, int *restrict src, int count) > *dst++ = 1; > } > > -/* { dg-final { scan-assembler-times {\tst1w\tz} 1 } } */ > +/* { dg-final { scan-assembler-times {\tst1w\tz} 1 {target > aarch64_big_endian} } } */ > +/* { dg-final { scan-assembler-times {\tstr\tz} 1 {target > aarch64_little_endian} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_7.c > b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_7.c > index 31057c0cdc7..b71c673cbfa 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_7.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_7.c > @@ -9,4 +9,5 @@ vset (int *restrict dst, int *restrict src, int count) > *dst++ = 1; > } > > -/* { dg-final { scan-assembler-times {\tst1w\tz} 2 } } */ > +/* { dg-final { scan-assembler-times {\tst1w\tz} 2 {target > aarch64_big_endian} } } */ > +/* { dg-final { scan-assembler-times {\tstr\tz} 2 {target > aarch64_little_endian} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f16.c > b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f16.c > index 50e77f9ed57..8d480a8fa1b 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f16.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f16.c > @@ -6,7 +6,7 @@ > #include <stdarg.h> > > /* > -** callee_0: > +** callee_0: {target aarch64_big_endian} > ** ... > ** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x1\] > ** ... > @@ -14,6 +14,15 @@ > ** ... > ** ret > */ > +/* > +** callee_0: {target aarch64_little_endian} > +** ... > +** ldr (z[0-9]+), \[x1\] > +** ... > +** str \1, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_0 (int16_t *ptr, ...) > { > @@ -27,7 +36,7 @@ callee_0 (int16_t *ptr, ...) > } > > /* > -** caller_0: > +** caller_0: {target aarch64_big_endian} > ** ... > ** fmov (z[0-9]+\.h), #9\.0[^\n]* > ** ... > @@ -35,6 +44,15 @@ callee_0 (int16_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_0: {target aarch64_little_endian} > +** ... > +** fmov (z[0-9]+)\.h, #9\.0[^\n]* > +** ... > +** str \1, \[x1\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_0 (int16_t *ptr) > { > @@ -42,7 +60,7 @@ caller_0 (int16_t *ptr) > } > > /* > -** callee_1: > +** callee_1: {target aarch64_big_endian} > ** ... > ** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x2\] > ** ... > @@ -50,6 +68,15 @@ caller_0 (int16_t *ptr) > ** ... > ** ret > */ > +/* > +** callee_1: {target aarch64_little_endian} > +** ... > +** ldr (z[0-9]+), \[x2\] > +** ... > +** str \1, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_1 (int16_t *ptr, ...) > { > @@ -64,7 +91,7 @@ callee_1 (int16_t *ptr, ...) > } > > /* > -** caller_1: > +** caller_1: {target aarch64_big_endian} > ** ... > ** fmov (z[0-9]+\.h), #9\.0[^\n]* > ** ... > @@ -72,6 +99,15 @@ callee_1 (int16_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_1: {target aarch64_little_endian} > +** ... > +** fmov (z[0-9]+)\.h, #9\.0[^\n]* > +** ... > +** str \1, \[x2\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_1 (int16_t *ptr) > { > @@ -79,7 +115,7 @@ caller_1 (int16_t *ptr) > } > > /* > -** callee_7: > +** callee_7: {target aarch64_big_endian} > ** ... > ** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x7\] > ** ... > @@ -87,6 +123,15 @@ caller_1 (int16_t *ptr) > ** ... > ** ret > */ > +/* > +** callee_7: {target aarch64_little_endian} > +** ... > +** ldr (z[0-9]+), \[x7\] > +** ... > +** str \1, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_7 (int16_t *ptr, ...) > { > @@ -106,7 +151,7 @@ callee_7 (int16_t *ptr, ...) > } > > /* > -** caller_7: > +** caller_7: {target aarch64_big_endian} > ** ... > ** fmov (z[0-9]+\.h), #9\.0[^\n]* > ** ... > @@ -114,6 +159,15 @@ callee_7 (int16_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_7: {target aarch64_little_endian} > +** ... > +** fmov (z[0-9]+)\.h, #9\.0[^\n]* > +** ... > +** str \1, \[x7\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_7 (int16_t *ptr) > { > @@ -122,7 +176,7 @@ caller_7 (int16_t *ptr) > > /* FIXME: We should be able to get rid of the va_list object. */ > /* > -** callee_8: > +** callee_8: {target aarch64_big_endian} > ** sub sp, sp, #([0-9]+) > ** ... > ** ldr (x[0-9]+), \[sp, \1\] > @@ -133,6 +187,18 @@ caller_7 (int16_t *ptr) > ** ... > ** ret > */ > +/* > +** callee_8: {target aarch64_little_endian} > +** sub sp, sp, #([0-9]+) > +** ... > +** ldr (x[0-9]+), \[sp, \1\] > +** ... > +** ldr (z[0-9]+), \[\2\] > +** ... > +** str \3, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_8 (int16_t *ptr, ...) > { > @@ -153,7 +219,7 @@ callee_8 (int16_t *ptr, ...) > } > > /* > -** caller_8: > +** caller_8: {target aarch64_big_endian} > ** ... > ** fmov (z[0-9]+\.h), #9\.0[^\n]* > ** ... > @@ -163,6 +229,17 @@ callee_8 (int16_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_8: {target aarch64_little_endian} > +** ... > +** fmov (z[0-9]+)\.h, #9\.0[^\n]* > +** ... > +** str \1, \[(x[0-9]+)\] > +** ... > +** str \2, \[sp\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_8 (int16_t *ptr) > { > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f32.c > b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f32.c > index e7b092af5d2..b3c699d314e 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f32.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f32.c > @@ -6,7 +6,7 @@ > #include <stdarg.h> > > /* > -** callee_0: > +** callee_0: {target aarch64_big_endian} > ** ... > ** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x1\] > ** ... > @@ -14,6 +14,15 @@ > ** ... > ** ret > */ > +/* > +** callee_0: {target aarch64_little_endian} > +** ... > +** ldr (z[0-9]+), \[x1\] > +** ... > +** str \1, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_0 (int32_t *ptr, ...) > { > @@ -27,7 +36,7 @@ callee_0 (int32_t *ptr, ...) > } > > /* > -** caller_0: > +** caller_0: {target aarch64_big_endian} > ** ... > ** fmov (z[0-9]+\.s), #9\.0[^\n]* > ** ... > @@ -35,6 +44,15 @@ callee_0 (int32_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_0: {target aarch64_little_endian} > +** ... > +** fmov (z[0-9]+)\.s, #9\.0[^\n]* > +** ... > +** str \1, \[x1\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_0 (int32_t *ptr) > { > @@ -42,7 +60,7 @@ caller_0 (int32_t *ptr) > } > > /* > -** callee_1: > +** callee_1: {target aarch64_big_endian} > ** ... > ** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x2\] > ** ... > @@ -50,6 +68,15 @@ caller_0 (int32_t *ptr) > ** ... > ** ret > */ > +/* > +** callee_1: {target aarch64_little_endian} > +** ... > +** ldr (z[0-9]+), \[x2\] > +** ... > +** str \1, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_1 (int32_t *ptr, ...) > { > @@ -64,7 +91,7 @@ callee_1 (int32_t *ptr, ...) > } > > /* > -** caller_1: > +** caller_1: {target aarch64_big_endian} > ** ... > ** fmov (z[0-9]+\.s), #9\.0[^\n]* > ** ... > @@ -72,6 +99,15 @@ callee_1 (int32_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_1: {target aarch64_little_endian} > +** ... > +** fmov (z[0-9]+)\.s, #9\.0[^\n]* > +** ... > +** str \1, \[x2\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_1 (int32_t *ptr) > { > @@ -79,7 +115,7 @@ caller_1 (int32_t *ptr) > } > > /* > -** callee_7: > +** callee_7: {target aarch64_big_endian} > ** ... > ** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x7\] > ** ... > @@ -87,6 +123,15 @@ caller_1 (int32_t *ptr) > ** ... > ** ret > */ > +/* > +** callee_7: {target aarch64_little_endian} > +** ... > +** ldr (z[0-9]+), \[x7\] > +** ... > +** str \1, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_7 (int32_t *ptr, ...) > { > @@ -106,7 +151,7 @@ callee_7 (int32_t *ptr, ...) > } > > /* > -** caller_7: > +** caller_7: {target aarch64_big_endian} > ** ... > ** fmov (z[0-9]+\.s), #9\.0[^\n]* > ** ... > @@ -114,6 +159,15 @@ callee_7 (int32_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_7: {target aarch64_little_endian} > +** ... > +** fmov (z[0-9]+)\.s, #9\.0[^\n]* > +** ... > +** str \1, \[x7\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_7 (int32_t *ptr) > { > @@ -122,7 +176,7 @@ caller_7 (int32_t *ptr) > > /* FIXME: We should be able to get rid of the va_list object. */ > /* > -** callee_8: > +** callee_8: {target aarch64_big_endian} > ** sub sp, sp, #([0-9]+) > ** ... > ** ldr (x[0-9]+), \[sp, \1\] > @@ -133,6 +187,18 @@ caller_7 (int32_t *ptr) > ** ... > ** ret > */ > +/* > +** callee_8: {target aarch64_little_endian} > +** sub sp, sp, #([0-9]+) > +** ... > +** ldr (x[0-9]+), \[sp, \1\] > +** ... > +** ldr (z[0-9]+), \[\2\] > +** ... > +** str \3, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_8 (int32_t *ptr, ...) > { > @@ -153,7 +219,7 @@ callee_8 (int32_t *ptr, ...) > } > > /* > -** caller_8: > +** caller_8: {target aarch64_big_endian} > ** ... > ** fmov (z[0-9]+\.s), #9\.0[^\n]* > ** ... > @@ -163,6 +229,17 @@ callee_8 (int32_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_8: {target aarch64_little_endian} > +** ... > +** fmov (z[0-9]+)\.s, #9\.0[^\n]* > +** ... > +** str \1, \[(x[0-9]+)\] > +** ... > +** str \2, \[sp\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_8 (int32_t *ptr) > { > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f64.c > b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f64.c > index c3389a8a4c3..7078afc6283 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f64.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f64.c > @@ -6,7 +6,7 @@ > #include <stdarg.h> > > /* > -** callee_0: > +** callee_0: {target aarch64_big_endian} > ** ... > ** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x1\] > ** ... > @@ -14,6 +14,15 @@ > ** ... > ** ret > */ > +/* > +** callee_0: {target aarch64_little_endian} > +** ... > +** ldr (z[0-9]+), \[x1\] > +** ... > +** str \1, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_0 (int64_t *ptr, ...) > { > @@ -27,7 +36,7 @@ callee_0 (int64_t *ptr, ...) > } > > /* > -** caller_0: > +** caller_0: {target aarch64_big_endian} > ** ... > ** fmov (z[0-9]+\.d), #9\.0[^\n]* > ** ... > @@ -35,6 +44,15 @@ callee_0 (int64_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_0: {target aarch64_little_endian} > +** ... > +** fmov (z[0-9]+)\.d, #9\.0[^\n]* > +** ... > +** str \1, \[x1\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_0 (int64_t *ptr) > { > @@ -42,7 +60,7 @@ caller_0 (int64_t *ptr) > } > > /* > -** callee_1: > +** callee_1: {target aarch64_big_endian} > ** ... > ** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x2\] > ** ... > @@ -50,6 +68,15 @@ caller_0 (int64_t *ptr) > ** ... > ** ret > */ > +/* > +** callee_1: {target aarch64_little_endian} > +** ... > +** ldr (z[0-9]+), \[x2\] > +** ... > +** str \1, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_1 (int64_t *ptr, ...) > { > @@ -64,7 +91,7 @@ callee_1 (int64_t *ptr, ...) > } > > /* > -** caller_1: > +** caller_1: {target aarch64_big_endian} > ** ... > ** fmov (z[0-9]+\.d), #9\.0[^\n]* > ** ... > @@ -72,6 +99,15 @@ callee_1 (int64_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_1: {target aarch64_little_endian} > +** ... > +** fmov (z[0-9]+)\.d, #9\.0[^\n]* > +** ... > +** str \1, \[x2\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_1 (int64_t *ptr) > { > @@ -79,7 +115,7 @@ caller_1 (int64_t *ptr) > } > > /* > -** callee_7: > +** callee_7: {target aarch64_big_endian} > ** ... > ** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x7\] > ** ... > @@ -87,6 +123,15 @@ caller_1 (int64_t *ptr) > ** ... > ** ret > */ > +/* > +** callee_7: {target aarch64_little_endian} > +** ... > +** ldr (z[0-9]+), \[x7\] > +** ... > +** str \1, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_7 (int64_t *ptr, ...) > { > @@ -106,7 +151,7 @@ callee_7 (int64_t *ptr, ...) > } > > /* > -** caller_7: > +** caller_7: {target aarch64_big_endian} > ** ... > ** fmov (z[0-9]+\.d), #9\.0[^\n]* > ** ... > @@ -114,6 +159,15 @@ callee_7 (int64_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_7: {target aarch64_little_endian} > +** ... > +** fmov (z[0-9]+)\.d, #9\.0[^\n]* > +** ... > +** str \1, \[x7\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_7 (int64_t *ptr) > { > @@ -122,7 +176,7 @@ caller_7 (int64_t *ptr) > > /* FIXME: We should be able to get rid of the va_list object. */ > /* > -** callee_8: > +** callee_8: {target aarch64_big_endian} > ** sub sp, sp, #([0-9]+) > ** ... > ** ldr (x[0-9]+), \[sp, \1\] > @@ -133,6 +187,18 @@ caller_7 (int64_t *ptr) > ** ... > ** ret > */ > +/* > +** callee_8: > +** sub sp, sp, #([0-9]+) > +** ... > +** ldr (x[0-9]+), \[sp, \1\] > +** ... > +** ldr (z[0-9]+), \[\2\] > +** ... > +** str \3, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_8 (int64_t *ptr, ...) > { > @@ -153,7 +219,7 @@ callee_8 (int64_t *ptr, ...) > } > > /* > -** caller_8: > +** caller_8: {target aarch64_big_endian} > ** ... > ** fmov (z[0-9]+\.d), #9\.0[^\n]* > ** ... > @@ -163,6 +229,17 @@ callee_8 (int64_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_8: {target aarch64_little_endian} > +** ... > +** fmov (z[0-9]+)\.d, #9\.0[^\n]* > +** ... > +** str \1, \[(x[0-9]+)\] > +** ... > +** str \2, \[sp\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_8 (int64_t *ptr) > { > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_mf8.c > b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_mf8.c > index 28777878d56..fcbac37156c 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_mf8.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_mf8.c > @@ -8,9 +8,9 @@ > /* > ** callee_0: > ** ... > -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x1\] > +** ldr (z[0-9]+), \[x1\] > ** ... > -** st1b \1, \2, \[x0\] > +** str \1, \[x0\] > ** ... > ** ret > */ > @@ -32,9 +32,9 @@ callee_0 (mfloat8_t *ptr, ...) > ** ... > ** umov (w[0-9]+), v0.b\[0\] > ** ... > -** mov (z[0-9]+\.b), \1 > +** mov (z[0-9]+)\.b, \1 > ** ... > -** st1b \2, p[0-7], \[x1\] > +** str \2, \[x1\] > ** ... > ** ret > */ > @@ -47,9 +47,9 @@ caller_0 (mfloat8_t *ptr, mfloat8_t in) > /* > ** callee_1: > ** ... > -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x2\] > +** ldr (z[0-9]+), \[x2\] > ** ... > -** st1b \1, p[0-7], \[x0\] > +** str \1, \[x0\] > ** ... > ** ret > */ > @@ -72,9 +72,9 @@ callee_1 (mfloat8_t *ptr, ...) > ** ... > ** umov (w[0-9]+), v0.b\[0\] > ** ... > -** mov (z[0-9]+\.b), \1 > +** mov (z[0-9]+)\.b, \1 > ** ... > -** st1b \2, p[0-7], \[x2\] > +** str \2, \[x2\] > ** ... > ** ret > */ > @@ -87,9 +87,9 @@ caller_1 (mfloat8_t *ptr, mfloat8_t in) > /* > ** callee_7: > ** ... > -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x7\] > +** ldr (z[0-9]+), \[x7\] > ** ... > -** st1b \1, p[0-7], \[x0\] > +** str \1, \[x0\] > ** ... > ** ret > */ > @@ -117,9 +117,9 @@ callee_7 (mfloat8_t *ptr, ...) > ** ... > ** umov (w[0-9]+), v0.b\[0\] > ** ... > -** mov (z[0-9]+\.b), \1 > +** mov (z[0-9]+)\.b, \1 > ** ... > -** st1b \2, p[0-7], \[x7\] > +** str \2, \[x7\] > ** ... > ** ret > */ > @@ -136,9 +136,9 @@ caller_7 (mfloat8_t *ptr, mfloat8_t in) > ** ... > ** ldr (x[0-9]+), \[sp, \1\] > ** ... > -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[\2\] > +** ldr (z[0-9]+), \[\2\] > ** ... > -** st1b \3, \4, \[x0\] > +** str \3, \[x0\] > ** ... > ** ret > */ > @@ -167,9 +167,9 @@ callee_8 (mfloat8_t *ptr, ...) > ** ... > ** umov (w[0-9]+), v0.b\[0\] > ** ... > -** mov (z[0-9]+\.b), \1 > +** mov (z[0-9]+)\.b, \1 > ** ... > -** st1b \2, p[0-7], \[(x[0-9]+)\] > +** str \2, \[(x[0-9]+)\] > ** ... > ** str \3, \[sp\] > ** ... > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s16.c > b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s16.c > index 3c644e13428..e65e64f6b72 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s16.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s16.c > @@ -6,7 +6,7 @@ > #include <stdarg.h> > > /* > -** callee_0: > +** callee_0: {target aarch64_big_endian} > ** ... > ** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x1\] > ** ... > @@ -14,6 +14,15 @@ > ** ... > ** ret > */ > +/* > +** callee_0: {target aarch64_little_endian} > +** ... > +** ldr (z[0-9]+), \[x1\] > +** ... > +** str \1, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_0 (int16_t *ptr, ...) > { > @@ -27,7 +36,7 @@ callee_0 (int16_t *ptr, ...) > } > > /* > -** caller_0: > +** caller_0: {target aarch64_big_endian} > ** ... > ** mov (z[0-9]+\.h), #42 > ** ... > @@ -35,6 +44,15 @@ callee_0 (int16_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_0: {target aarch64_little_endian} > +** ... > +** mov (z[0-9]+)\.h, #42 > +** ... > +** str \1, \[x1\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_0 (int16_t *ptr) > { > @@ -42,7 +60,7 @@ caller_0 (int16_t *ptr) > } > > /* > -** callee_1: > +** callee_1: {target aarch64_big_endian} > ** ... > ** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x2\] > ** ... > @@ -50,6 +68,15 @@ caller_0 (int16_t *ptr) > ** ... > ** ret > */ > +/* > +** callee_1: {target aarch64_little_endian} > +** ... > +** ldr (z[0-9]+), \[x2\] > +** ... > +** str \1, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_1 (int16_t *ptr, ...) > { > @@ -64,7 +91,7 @@ callee_1 (int16_t *ptr, ...) > } > > /* > -** caller_1: > +** caller_1: {target aarch64_big_endian} > ** ... > ** mov (z[0-9]+\.h), #42 > ** ... > @@ -72,6 +99,15 @@ callee_1 (int16_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_1: {target aarch64_little_endian} > +** ... > +** mov (z[0-9]+)\.h, #42 > +** ... > +** str \1, \[x2\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_1 (int16_t *ptr) > { > @@ -79,7 +115,7 @@ caller_1 (int16_t *ptr) > } > > /* > -** callee_7: > +** callee_7: {target aarch64_big_endian} > ** ... > ** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x7\] > ** ... > @@ -87,6 +123,15 @@ caller_1 (int16_t *ptr) > ** ... > ** ret > */ > +/* > +** callee_7: {target aarch64_little_endian} > +** ... > +** ldr (z[0-9]+), \[x7\] > +** ... > +** str \1, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_7 (int16_t *ptr, ...) > { > @@ -106,7 +151,7 @@ callee_7 (int16_t *ptr, ...) > } > > /* > -** caller_7: > +** caller_7: {target aarch64_big_endian} > ** ... > ** mov (z[0-9]+\.h), #42 > ** ... > @@ -114,6 +159,15 @@ callee_7 (int16_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_7: {target aarch64_little_endian} > +** ... > +** mov (z[0-9]+)\.h, #42 > +** ... > +** str \1, \[x7\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_7 (int16_t *ptr) > { > @@ -122,7 +176,7 @@ caller_7 (int16_t *ptr) > > /* FIXME: We should be able to get rid of the va_list object. */ > /* > -** callee_8: > +** callee_8: {target aarch64_big_endian} > ** sub sp, sp, #([0-9]+) > ** ... > ** ldr (x[0-9]+), \[sp, \1\] > @@ -133,6 +187,18 @@ caller_7 (int16_t *ptr) > ** ... > ** ret > */ > +/* > +** callee_8: {target aarch64_little_endian} > +** sub sp, sp, #([0-9]+) > +** ... > +** ldr (x[0-9]+), \[sp, \1\] > +** ... > +** ldr (z[0-9]+), \[\2\] > +** ... > +** str \3, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_8 (int16_t *ptr, ...) > { > @@ -153,7 +219,7 @@ callee_8 (int16_t *ptr, ...) > } > > /* > -** caller_8: > +** caller_8: {target aarch64_big_endian} > ** ... > ** mov (z[0-9]+\.h), #42 > ** ... > @@ -163,6 +229,17 @@ callee_8 (int16_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_8: {target aarch64_little_endian} > +** ... > +** mov (z[0-9]+)\.h, #42 > +** ... > +** str \1, \[(x[0-9]+)\] > +** ... > +** str \2, \[sp\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_8 (int16_t *ptr) > { > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s32.c > b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s32.c > index 652d609d3e4..6488a5fa242 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s32.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s32.c > @@ -6,7 +6,7 @@ > #include <stdarg.h> > > /* > -** callee_0: > +** callee_0: {target aarch64_big_endian} > ** ... > ** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x1\] > ** ... > @@ -14,6 +14,15 @@ > ** ... > ** ret > */ > +/* > +** callee_0: {target aarch64_little_endian} > +** ... > +** ldr (z[0-9]+), \[x1\] > +** ... > +** str \1, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_0 (int32_t *ptr, ...) > { > @@ -27,7 +36,7 @@ callee_0 (int32_t *ptr, ...) > } > > /* > -** caller_0: > +** caller_0: {target aarch64_big_endian} > ** ... > ** mov (z[0-9]+\.s), #42 > ** ... > @@ -35,6 +44,15 @@ callee_0 (int32_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_0: {target aarch64_little_endian} > +** ... > +** mov (z[0-9]+)\.s, #42 > +** ... > +** str \1, \[x1\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_0 (int32_t *ptr) > { > @@ -42,7 +60,7 @@ caller_0 (int32_t *ptr) > } > > /* > -** callee_1: > +** callee_1: {target aarch64_big_endian} > ** ... > ** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x2\] > ** ... > @@ -50,6 +68,15 @@ caller_0 (int32_t *ptr) > ** ... > ** ret > */ > +/* > +** callee_1: {target aarch64_little_endian} > +** ... > +** ldr (z[0-9]+), \[x2\] > +** ... > +** str \1, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_1 (int32_t *ptr, ...) > { > @@ -64,7 +91,7 @@ callee_1 (int32_t *ptr, ...) > } > > /* > -** caller_1: > +** caller_1: {target aarch64_big_endian} > ** ... > ** mov (z[0-9]+\.s), #42 > ** ... > @@ -72,6 +99,15 @@ callee_1 (int32_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_1: {target aarch64_little_endian} > +** ... > +** mov (z[0-9]+)\.s, #42 > +** ... > +** str \1, \[x2\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_1 (int32_t *ptr) > { > @@ -79,7 +115,7 @@ caller_1 (int32_t *ptr) > } > > /* > -** callee_7: > +** callee_7: {target aarch64_big_endian} > ** ... > ** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x7\] > ** ... > @@ -87,6 +123,15 @@ caller_1 (int32_t *ptr) > ** ... > ** ret > */ > +/* > +** callee_7: {target aarch64_little_endian} > +** ... > +** ldr (z[0-9]+), \[x7\] > +** ... > +** str \1, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_7 (int32_t *ptr, ...) > { > @@ -106,7 +151,7 @@ callee_7 (int32_t *ptr, ...) > } > > /* > -** caller_7: > +** caller_7: {target aarch64_big_endian} > ** ... > ** mov (z[0-9]+\.s), #42 > ** ... > @@ -114,6 +159,15 @@ callee_7 (int32_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_7: {target aarch64_little_endian} > +** ... > +** mov (z[0-9]+)\.s, #42 > +** ... > +** str \1, \[x7\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_7 (int32_t *ptr) > { > @@ -122,7 +176,7 @@ caller_7 (int32_t *ptr) > > /* FIXME: We should be able to get rid of the va_list object. */ > /* > -** callee_8: > +** callee_8: {target aarch64_big_endian} > ** sub sp, sp, #([0-9]+) > ** ... > ** ldr (x[0-9]+), \[sp, \1\] > @@ -133,6 +187,18 @@ caller_7 (int32_t *ptr) > ** ... > ** ret > */ > +/* > +** callee_8: {target aarch64_little_endian} > +** sub sp, sp, #([0-9]+) > +** ... > +** ldr (x[0-9]+), \[sp, \1\] > +** ... > +** ldr (z[0-9]+), \[\2\] > +** ... > +** str \3, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_8 (int32_t *ptr, ...) > { > @@ -153,7 +219,7 @@ callee_8 (int32_t *ptr, ...) > } > > /* > -** caller_8: > +** caller_8: {target aarch64_big_endian} > ** ... > ** mov (z[0-9]+\.s), #42 > ** ... > @@ -163,6 +229,17 @@ callee_8 (int32_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_8: {target aarch64_little_endian} > +** ... > +** mov (z[0-9]+)\.s, #42 > +** ... > +** str \1, \[(x[0-9]+)\] > +** ... > +** str \2, \[sp\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_8 (int32_t *ptr) > { > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s64.c > b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s64.c > index 72ea6a345cf..4b77b4ff7b0 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s64.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s64.c > @@ -6,7 +6,7 @@ > #include <stdarg.h> > > /* > -** callee_0: > +** callee_0: {target aarch64_big_endian} > ** ... > ** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x1\] > ** ... > @@ -14,6 +14,15 @@ > ** ... > ** ret > */ > +/* > +** callee_0: {target aarch64_little_endian} > +** ... > +** ldr (z[0-9]+), \[x1\] > +** ... > +** str \1, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_0 (int64_t *ptr, ...) > { > @@ -27,7 +36,7 @@ callee_0 (int64_t *ptr, ...) > } > > /* > -** caller_0: > +** caller_0: {target aarch64_big_endian} > ** ... > ** mov (z[0-9]+\.d), #42 > ** ... > @@ -35,6 +44,15 @@ callee_0 (int64_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_0: {target aarch64_little_endian} > +** ... > +** mov (z[0-9]+)\.d, #42 > +** ... > +** str \1, \[x1\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_0 (int64_t *ptr) > { > @@ -42,7 +60,7 @@ caller_0 (int64_t *ptr) > } > > /* > -** callee_1: > +** callee_1: {target aarch64_big_endian} > ** ... > ** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x2\] > ** ... > @@ -50,6 +68,15 @@ caller_0 (int64_t *ptr) > ** ... > ** ret > */ > +/* > +** callee_1: {target aarch64_little_endian} > +** ... > +** ldr (z[0-9]+), \[x2\] > +** ... > +** str \1, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_1 (int64_t *ptr, ...) > { > @@ -64,7 +91,7 @@ callee_1 (int64_t *ptr, ...) > } > > /* > -** caller_1: > +** caller_1: {target aarch64_big_endian} > ** ... > ** mov (z[0-9]+\.d), #42 > ** ... > @@ -72,6 +99,15 @@ callee_1 (int64_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_1: {target aarch64_little_endian} > +** ... > +** mov (z[0-9]+)\.d, #42 > +** ... > +** str \1, \[x2\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_1 (int64_t *ptr) > { > @@ -79,7 +115,7 @@ caller_1 (int64_t *ptr) > } > > /* > -** callee_7: > +** callee_7: {target aarch64_big_endian} > ** ... > ** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x7\] > ** ... > @@ -87,6 +123,15 @@ caller_1 (int64_t *ptr) > ** ... > ** ret > */ > +/* > +** callee_7: {target aarch64_little_endian} > +** ... > +** ldr (z[0-9]+), \[x7\] > +** ... > +** str \1, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_7 (int64_t *ptr, ...) > { > @@ -106,7 +151,7 @@ callee_7 (int64_t *ptr, ...) > } > > /* > -** caller_7: > +** caller_7: {target aarch64_big_endian} > ** ... > ** mov (z[0-9]+\.d), #42 > ** ... > @@ -114,6 +159,15 @@ callee_7 (int64_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_7: {target aarch64_little_endian} > +** ... > +** mov (z[0-9]+)\.d, #42 > +** ... > +** str \1, \[x7\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_7 (int64_t *ptr) > { > @@ -122,7 +176,7 @@ caller_7 (int64_t *ptr) > > /* FIXME: We should be able to get rid of the va_list object. */ > /* > -** callee_8: > +** callee_8: {target aarch64_big_endian} > ** sub sp, sp, #([0-9]+) > ** ... > ** ldr (x[0-9]+), \[sp, \1\] > @@ -133,6 +187,18 @@ caller_7 (int64_t *ptr) > ** ... > ** ret > */ > +/* > +** callee_8: {target aarch64_little_endian} > +** sub sp, sp, #([0-9]+) > +** ... > +** ldr (x[0-9]+), \[sp, \1\] > +** ... > +** ldr (z[0-9]+), \[\2\] > +** ... > +** str \3, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_8 (int64_t *ptr, ...) > { > @@ -153,7 +219,7 @@ callee_8 (int64_t *ptr, ...) > } > > /* > -** caller_8: > +** caller_8: {target aarch64_big_endian} > ** ... > ** mov (z[0-9]+\.d), #42 > ** ... > @@ -163,6 +229,17 @@ callee_8 (int64_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_8: {target aarch64_little_endian} > +** ... > +** mov (z[0-9]+)\.d, #42 > +** ... > +** str \1, \[(x[0-9]+)\] > +** ... > +** str \2, \[sp\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_8 (int64_t *ptr) > { > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s8.c > b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s8.c > index 02f4bec9a9c..9528ea3f48b 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s8.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s8.c > @@ -8,9 +8,9 @@ > /* > ** callee_0: > ** ... > -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x1\] > +** ldr (z[0-9]+), \[x1\] > ** ... > -** st1b \1, \2, \[x0\] > +** str \1, \[x0\] > ** ... > ** ret > */ > @@ -23,15 +23,15 @@ callee_0 (int8_t *ptr, ...) > va_start (va, ptr); > vec = va_arg (va, svint8_t); > va_end (va); > - svst1 (svptrue_b8 (), ptr, vec); > +svst1 (svptrue_b8 (), ptr, vec); > } > > /* > ** caller_0: > ** ... > -** mov (z[0-9]+\.b), #42 > +** mov (z[0-9]+)\.b, #42 > ** ... > -** st1b \1, p[0-7], \[x1\] > +** str \1, \[x1\] > ** ... > ** ret > */ > @@ -44,9 +44,9 @@ caller_0 (int8_t *ptr) > /* > ** callee_1: > ** ... > -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x2\] > +** ldr (z[0-9]+), \[x2\] > ** ... > -** st1b \1, p[0-7], \[x0\] > +** str \1, \[x0\] > ** ... > ** ret > */ > @@ -66,9 +66,9 @@ callee_1 (int8_t *ptr, ...) > /* > ** caller_1: > ** ... > -** mov (z[0-9]+\.b), #42 > +** mov (z[0-9]+)\.b, #42 > ** ... > -** st1b \1, p[0-7], \[x2\] > +** str \1, \[x2\] > ** ... > ** ret > */ > @@ -81,9 +81,9 @@ caller_1 (int8_t *ptr) > /* > ** callee_7: > ** ... > -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x7\] > +** ldr (z[0-9]+), \[x7\] > ** ... > -** st1b \1, p[0-7], \[x0\] > +** str \1, \[x0\] > ** ... > ** ret > */ > @@ -108,9 +108,9 @@ callee_7 (int8_t *ptr, ...) > /* > ** caller_7: > ** ... > -** mov (z[0-9]+\.b), #42 > +** mov (z[0-9]+)\.b, #42 > ** ... > -** st1b \1, p[0-7], \[x7\] > +** str \1, \[x7\] > ** ... > ** ret > */ > @@ -127,9 +127,9 @@ caller_7 (int8_t *ptr) > ** ... > ** ldr (x[0-9]+), \[sp, \1\] > ** ... > -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[\2\] > +** ldr (z[0-9]+), \[\2\] > ** ... > -** st1b \3, \4, \[x0\] > +** str \3, \[x0\] > ** ... > ** ret > */ > @@ -155,9 +155,9 @@ callee_8 (int8_t *ptr, ...) > /* > ** caller_8: > ** ... > -** mov (z[0-9]+\.b), #42 > +** mov (z[0-9]+)\.b, #42 > ** ... > -** st1b \1, p[0-7], \[(x[0-9]+)\] > +** str \1, \[(x[0-9]+)\] > ** ... > ** str \2, \[sp\] > ** ... > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u16.c > b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u16.c > index b60d448c0dc..74ef4daed4f 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u16.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u16.c > @@ -6,7 +6,7 @@ > #include <stdarg.h> > > /* > -** callee_0: > +** callee_0: {target aarch64_big_endian} > ** ... > ** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x1\] > ** ... > @@ -14,6 +14,15 @@ > ** ... > ** ret > */ > +/* > +** callee_0: {target aarch64_little_endian} > +** ... > +** ldr (z[0-9]+), \[x1\] > +** ... > +** str \1, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_0 (int16_t *ptr, ...) > { > @@ -27,7 +36,7 @@ callee_0 (int16_t *ptr, ...) > } > > /* > -** caller_0: > +** caller_0: {target aarch64_big_endian} > ** ... > ** mov (z[0-9]+\.h), #42 > ** ... > @@ -35,6 +44,15 @@ callee_0 (int16_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_0: {target aarch64_little_endian} > +** ... > +** mov (z[0-9]+)\.h, #42 > +** ... > +** str \1, \[x1\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_0 (int16_t *ptr) > { > @@ -42,7 +60,7 @@ caller_0 (int16_t *ptr) > } > > /* > -** callee_1: > +** callee_1: {target aarch64_big_endian} > ** ... > ** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x2\] > ** ... > @@ -50,6 +68,15 @@ caller_0 (int16_t *ptr) > ** ... > ** ret > */ > +/* > +** callee_1: {target aarch64_little_endian} > +** ... > +** ldr (z[0-9]+), \[x2\] > +** ... > +** str \1, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_1 (int16_t *ptr, ...) > { > @@ -64,7 +91,7 @@ callee_1 (int16_t *ptr, ...) > } > > /* > -** caller_1: > +** caller_1: {target aarch64_big_endian} > ** ... > ** mov (z[0-9]+\.h), #42 > ** ... > @@ -72,6 +99,15 @@ callee_1 (int16_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_1: {target aarch64_little_endian} > +** ... > +** mov (z[0-9]+)\.h, #42 > +** ... > +** str \1, \[x2\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_1 (int16_t *ptr) > { > @@ -79,7 +115,7 @@ caller_1 (int16_t *ptr) > } > > /* > -** callee_7: > +** callee_7: {target aarch64_big_endian} > ** ... > ** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x7\] > ** ... > @@ -87,6 +123,15 @@ caller_1 (int16_t *ptr) > ** ... > ** ret > */ > +/* > +** callee_7: {target aarch64_little_endian} > +** ... > +** ldr (z[0-9]+), \[x7\] > +** ... > +** str \1, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_7 (int16_t *ptr, ...) > { > @@ -106,7 +151,7 @@ callee_7 (int16_t *ptr, ...) > } > > /* > -** caller_7: > +** caller_7: {target aarch64_big_endian} > ** ... > ** mov (z[0-9]+\.h), #42 > ** ... > @@ -114,6 +159,15 @@ callee_7 (int16_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_7: {target aarch64_little_endian} > +** ... > +** mov (z[0-9]+)\.h, #42 > +** ... > +** str \1, \[x7\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_7 (int16_t *ptr) > { > @@ -122,7 +176,7 @@ caller_7 (int16_t *ptr) > > /* FIXME: We should be able to get rid of the va_list object. */ > /* > -** callee_8: > +** callee_8: {target aarch64_big_endian} > ** sub sp, sp, #([0-9]+) > ** ... > ** ldr (x[0-9]+), \[sp, \1\] > @@ -133,6 +187,18 @@ caller_7 (int16_t *ptr) > ** ... > ** ret > */ > +/* > +** callee_8: {target aarch64_little_endian} > +** sub sp, sp, #([0-9]+) > +** ... > +** ldr (x[0-9]+), \[sp, \1\] > +** ... > +** ldr (z[0-9]+), \[\2\] > +** ... > +** str \3, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_8 (int16_t *ptr, ...) > { > @@ -153,7 +219,7 @@ callee_8 (int16_t *ptr, ...) > } > > /* > -** caller_8: > +** caller_8: {target aarch64_big_endian} > ** ... > ** mov (z[0-9]+\.h), #42 > ** ... > @@ -163,6 +229,17 @@ callee_8 (int16_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_8: {target aarch64_little_endian} > +** ... > +** mov (z[0-9]+)\.h, #42 > +** ... > +** str \1, \[(x[0-9]+)\] > +** ... > +** str \2, \[sp\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_8 (int16_t *ptr) > { > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u32.c > b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u32.c > index 5f01464934d..4f9ff785dd9 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u32.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u32.c > @@ -6,7 +6,7 @@ > #include <stdarg.h> > > /* > -** callee_0: > +** callee_0: {target aarch64_big_endian} > ** ... > ** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x1\] > ** ... > @@ -14,6 +14,15 @@ > ** ... > ** ret > */ > +/* > +** callee_0: {target aarch64_little_endian} > +** ... > +** ldr (z[0-9]+), \[x1\] > +** ... > +** str \1, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_0 (int32_t *ptr, ...) > { > @@ -27,7 +36,7 @@ callee_0 (int32_t *ptr, ...) > } > > /* > -** caller_0: > +** caller_0: {target aarch64_big_endian} > ** ... > ** mov (z[0-9]+\.s), #42 > ** ... > @@ -35,6 +44,15 @@ callee_0 (int32_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_0: {target aarch64_little_endian} > +** ... > +** mov (z[0-9]+)\.s, #42 > +** ... > +** str \1, \[x1\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_0 (int32_t *ptr) > { > @@ -42,7 +60,7 @@ caller_0 (int32_t *ptr) > } > > /* > -** callee_1: > +** callee_1: {target aarch64_big_endian} > ** ... > ** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x2\] > ** ... > @@ -50,6 +68,15 @@ caller_0 (int32_t *ptr) > ** ... > ** ret > */ > +/* > +** callee_1: {target aarch64_little_endian} > +** ... > +** ldr (z[0-9]+), \[x2\] > +** ... > +** str \1, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_1 (int32_t *ptr, ...) > { > @@ -64,7 +91,7 @@ callee_1 (int32_t *ptr, ...) > } > > /* > -** caller_1: > +** caller_1: {target aarch64_big_endian} > ** ... > ** mov (z[0-9]+\.s), #42 > ** ... > @@ -72,6 +99,15 @@ callee_1 (int32_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_1: {target aarch64_little_endian} > +** ... > +** mov (z[0-9]+)\.s, #42 > +** ... > +** str \1, \[x2\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_1 (int32_t *ptr) > { > @@ -79,7 +115,7 @@ caller_1 (int32_t *ptr) > } > > /* > -** callee_7: > +** callee_7: {target aarch64_big_endian} > ** ... > ** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x7\] > ** ... > @@ -87,6 +123,15 @@ caller_1 (int32_t *ptr) > ** ... > ** ret > */ > +/* > +** callee_7: {target aarch64_little_endian} > +** ... > +** ldr (z[0-9]+), \[x7\] > +** ... > +** str \1, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_7 (int32_t *ptr, ...) > { > @@ -106,7 +151,7 @@ callee_7 (int32_t *ptr, ...) > } > > /* > -** caller_7: > +** caller_7: {target aarch64_big_endian} > ** ... > ** mov (z[0-9]+\.s), #42 > ** ... > @@ -114,6 +159,15 @@ callee_7 (int32_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_7: {target aarch64_little_endian} > +** ... > +** mov (z[0-9]+)\.s, #42 > +** ... > +** str \1, \[x7\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_7 (int32_t *ptr) > { > @@ -122,7 +176,7 @@ caller_7 (int32_t *ptr) > > /* FIXME: We should be able to get rid of the va_list object. */ > /* > -** callee_8: > +** callee_8: {target aarch64_big_endian} > ** sub sp, sp, #([0-9]+) > ** ... > ** ldr (x[0-9]+), \[sp, \1\] > @@ -133,6 +187,18 @@ caller_7 (int32_t *ptr) > ** ... > ** ret > */ > +/* > +** callee_8: {target aarch64_little_endian} > +** sub sp, sp, #([0-9]+) > +** ... > +** ldr (x[0-9]+), \[sp, \1\] > +** ... > +** ldr (z[0-9]+), \[\2\] > +** ... > +** str \3, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_8 (int32_t *ptr, ...) > { > @@ -153,7 +219,7 @@ callee_8 (int32_t *ptr, ...) > } > > /* > -** caller_8: > +** caller_8: {target aarch64_big_endian} > ** ... > ** mov (z[0-9]+\.s), #42 > ** ... > @@ -163,6 +229,17 @@ callee_8 (int32_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_8: {target aarch64_little_endian} > +** ... > +** mov (z[0-9]+)\.s, #42 > +** ... > +** str \1, \[(x[0-9]+)\] > +** ... > +** str \2, \[sp\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_8 (int32_t *ptr) > { > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u64.c > b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u64.c > index 986739fdc36..27e437bbe6c 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u64.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u64.c > @@ -6,7 +6,7 @@ > #include <stdarg.h> > > /* > -** callee_0: > +** callee_0: {target aarch64_big_endian} > ** ... > ** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x1\] > ** ... > @@ -14,6 +14,15 @@ > ** ... > ** ret > */ > +/* > +** callee_0: {target aarch64_little_endian} > +** ... > +** ldr (z[0-9]+), \[x1\] > +** ... > +** str \1, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_0 (int64_t *ptr, ...) > { > @@ -27,7 +36,7 @@ callee_0 (int64_t *ptr, ...) > } > > /* > -** caller_0: > +** caller_0: {target aarch64_big_endian} > ** ... > ** mov (z[0-9]+\.d), #42 > ** ... > @@ -35,6 +44,15 @@ callee_0 (int64_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_0: {target aarch64_little_endian} > +** ... > +** mov (z[0-9]+)\.d, #42 > +** ... > +** str \1, \[x1\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_0 (int64_t *ptr) > { > @@ -42,7 +60,7 @@ caller_0 (int64_t *ptr) > } > > /* > -** callee_1: > +** callee_1: {target aarch64_big_endian} > ** ... > ** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x2\] > ** ... > @@ -50,6 +68,15 @@ caller_0 (int64_t *ptr) > ** ... > ** ret > */ > +/* > +** callee_1: {target aarch64_little_endian} > +** ... > +** ldr (z[0-9]+), \[x2\] > +** ... > +** str \1, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_1 (int64_t *ptr, ...) > { > @@ -64,7 +91,7 @@ callee_1 (int64_t *ptr, ...) > } > > /* > -** caller_1: > +** caller_1: {target aarch64_big_endian} > ** ... > ** mov (z[0-9]+\.d), #42 > ** ... > @@ -72,6 +99,15 @@ callee_1 (int64_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_1: {target aarch64_little_endian} > +** ... > +** mov (z[0-9]+)\.d, #42 > +** ... > +** str \1, \[x2\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_1 (int64_t *ptr) > { > @@ -79,7 +115,7 @@ caller_1 (int64_t *ptr) > } > > /* > -** callee_7: > +** callee_7: {target aarch64_big_endian} > ** ... > ** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x7\] > ** ... > @@ -87,6 +123,15 @@ caller_1 (int64_t *ptr) > ** ... > ** ret > */ > +/* > +** callee_7: {target aarch64_little_endian} > +** ... > +** ldr (z[0-9]+), \[x7\] > +** ... > +** str \1, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_7 (int64_t *ptr, ...) > { > @@ -106,7 +151,7 @@ callee_7 (int64_t *ptr, ...) > } > > /* > -** caller_7: > +** caller_7: {target aarch64_big_endian} > ** ... > ** mov (z[0-9]+\.d), #42 > ** ... > @@ -114,6 +159,15 @@ callee_7 (int64_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_7: {target aarch64_little_endian} > +** ... > +** mov (z[0-9]+)\.d, #42 > +** ... > +** str \1, \[x7\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_7 (int64_t *ptr) > { > @@ -122,7 +176,7 @@ caller_7 (int64_t *ptr) > > /* FIXME: We should be able to get rid of the va_list object. */ > /* > -** callee_8: > +** callee_8: {target aarch64_big_endian} > ** sub sp, sp, #([0-9]+) > ** ... > ** ldr (x[0-9]+), \[sp, \1\] > @@ -133,6 +187,18 @@ caller_7 (int64_t *ptr) > ** ... > ** ret > */ > +/* > +** callee_8: {target aarch64_little_endian} > +** sub sp, sp, #([0-9]+) > +** ... > +** ldr (x[0-9]+), \[sp, \1\] > +** ... > +** ldr (z[0-9]+), \[\2\] > +** ... > +** str \3, \[x0\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > callee_8 (int64_t *ptr, ...) > { > @@ -153,7 +219,7 @@ callee_8 (int64_t *ptr, ...) > } > > /* > -** caller_8: > +** caller_8: {target aarch64_big_endian} > ** ... > ** mov (z[0-9]+\.d), #42 > ** ... > @@ -163,6 +229,17 @@ callee_8 (int64_t *ptr, ...) > ** ... > ** ret > */ > +/* > +** caller_8: {target aarch64_little_endian} > +** ... > +** mov (z[0-9]+)\.d, #42 > +** ... > +** str \1, \[(x[0-9]+)\] > +** ... > +** str \2, \[sp\] > +** ... > +** ret > +*/ > void __attribute__((noipa)) > caller_8 (int64_t *ptr) > { > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u8.c > b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u8.c > index 533cba67713..d43a6daa347 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u8.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u8.c > @@ -8,9 +8,9 @@ > /* > ** callee_0: > ** ... > -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x1\] > +** ldr (z[0-9]+), \[x1\] > ** ... > -** st1b \1, \2, \[x0\] > +** str \1, \[x0\] > ** ... > ** ret > */ > @@ -29,9 +29,9 @@ callee_0 (int8_t *ptr, ...) > /* > ** caller_0: > ** ... > -** mov (z[0-9]+\.b), #42 > +** mov (z[0-9]+)\.b, #42 > ** ... > -** st1b \1, p[0-7], \[x1\] > +** str \1, \[x1\] > ** ... > ** ret > */ > @@ -44,9 +44,9 @@ caller_0 (int8_t *ptr) > /* > ** callee_1: > ** ... > -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x2\] > +** ldr (z[0-9]+), \[x2\] > ** ... > -** st1b \1, p[0-7], \[x0\] > +** str \1, \[x0\] > ** ... > ** ret > */ > @@ -66,9 +66,9 @@ callee_1 (int8_t *ptr, ...) > /* > ** caller_1: > ** ... > -** mov (z[0-9]+\.b), #42 > +** mov (z[0-9]+)\.b, #42 > ** ... > -** st1b \1, p[0-7], \[x2\] > +** str \1, \[x2\] > ** ... > ** ret > */ > @@ -81,9 +81,9 @@ caller_1 (int8_t *ptr) > /* > ** callee_7: > ** ... > -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x7\] > +** ldr (z[0-9]+), \[x7\] > ** ... > -** st1b \1, p[0-7], \[x0\] > +** str \1, \[x0\] > ** ... > ** ret > */ > @@ -108,9 +108,9 @@ callee_7 (int8_t *ptr, ...) > /* > ** caller_7: > ** ... > -** mov (z[0-9]+\.b), #42 > +** mov (z[0-9]+)\.b, #42 > ** ... > -** st1b \1, p[0-7], \[x7\] > +** str \1, \[x7\] > ** ... > ** ret > */ > @@ -127,9 +127,9 @@ caller_7 (int8_t *ptr) > ** ... > ** ldr (x[0-9]+), \[sp, \1\] > ** ... > -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[\2\] > +** ldr (z[0-9]+), \[\2\] > ** ... > -** st1b \3, \4, \[x0\] > +** str \3, \[x0\] > ** ... > ** ret > */ > @@ -155,9 +155,9 @@ callee_8 (int8_t *ptr, ...) > /* > ** caller_8: > ** ... > -** mov (z[0-9]+\.b), #42 > +** mov (z[0-9]+)\.b, #42 > ** ... > -** st1b \1, p[0-7], \[(x[0-9]+)\] > +** str \1, \[(x[0-9]+)\] > ** ... > ** str \2, \[sp\] > ** ... > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2.c > b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2.c > index 985cd0c6d77..f07900b2f0a 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2.c > @@ -19,5 +19,7 @@ foo (void) > /* We should operate on aligned vectors. */ > /* { dg-final { scan-assembler {\t(adrp|adr)\tx[0-9]+, (x|\.LANCHOR0)\n} } } > */ > /* We should unroll the loop three times. */ > -/* { dg-final { scan-assembler-times "\tst1w\t" 3 } } */ > +/* { dg-final { scan-assembler-times "\tst1w\t" 3 { target > aarch64_big_endian } } } */ > +/* { dg-final { scan-assembler-times "\tst1w\t" 2 { target > aarch64_little_endian } } } */ > +/* { dg-final { scan-assembler-times "\tstr\t" 1 { target > aarch64_little_endian } } } */ > /* { dg-final { scan-assembler {\tptrue\t(p[0-9]+)\.s, > vl7\n.*\teor\tp[0-9]+\.b, (p[0-9]+)/z, (\1\.b, \2\.b|\2\.b, \1\.b)\n} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/ptrue_ldr_str.c > b/gcc/testsuite/gcc.target/aarch64/sve/ptrue_ldr_str.c > new file mode 100644 > index 00000000000..c3bfa98e5e8 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/ptrue_ldr_str.c > @@ -0,0 +1,31 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > +/* { dg-require-effective-target aarch64_little_endian } */ > + > +#include <arm_sve.h> > + > +#define TEST(TYPE, TY, B) \ > + sv##TYPE ld_##TY (TYPE *x) \ > + { \ > + return svld1_##TY(svptrue_b##B (), x); \ > + } \ > + void st_##TY (TYPE *x, sv##TYPE data) \ > + { \ > + svst1_##TY(svptrue_b##B (), x, data); \ > + } > + > +TEST(bfloat16_t, bf16, 16) > +TEST(float16_t, f16, 16) > +TEST(float32_t, f32, 32) > +TEST(float64_t, f64, 64) > +TEST(uint8_t, u8, 8) > +TEST(uint16_t, u16, 16) > +TEST(uint32_t, u32, 32) > +TEST(uint64_t, u64, 64) > +TEST(int8_t, s8, 8) > +TEST(int16_t, s16, 16) > +TEST(int32_t, s32, 32) > +TEST(int64_t, s64, 64) > + > +/* { dg-final { scan-assembler-times {\tldr\tz0, \[x0\]} 12 } } */ > +/* { dg-final { scan-assembler-times {\tstr\tz0, \[x0\]} 12 } } */ > \ No newline at end of file > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/single_1.c > b/gcc/testsuite/gcc.target/aarch64/sve/single_1.c > index d9bb97e12cd..b9c3d3e5241 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/single_1.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/single_1.c > @@ -40,12 +40,13 @@ TEST_LOOP (double, 3.0) > /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } > */ > /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } > */ > > -/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl32\n} 11 } } */ > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl32\n} 11 { > target aarch64_big_endian } } } */ > > -/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */ > -/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */ > -/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */ > -/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */ > +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 { target > aarch64_big_endian } } } */ > +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 { target > aarch64_big_endian } } } */ > +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 { target > aarch64_big_endian } } } */ > +/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 2 { target > aarch64_big_endian } } } */ > +/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 11 { target > aarch64_little_endian } } } */ > > /* { dg-final { scan-assembler-not {\twhile} } } */ > /* { dg-final { scan-assembler-not {\tb} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/single_2.c > b/gcc/testsuite/gcc.target/aarch64/sve/single_2.c > index d27eead17e3..44810364380 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/single_2.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/single_2.c > @@ -16,12 +16,13 @@ > /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } > */ > /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } > */ > > -/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl64\n} 11 } } */ > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl64\n} 11 { > target aarch64_big_endian } } } */ > > -/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */ > -/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */ > -/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */ > -/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */ > +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 { target > aarch64_big_endian } } } */ > +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 { target > aarch64_big_endian } } } */ > +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 { target > aarch64_big_endian } } } */ > +/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 2 { target > aarch64_big_endian } } } */ > +/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 11 { target > aarch64_little_endian } } } */ > > /* { dg-final { scan-assembler-not {\twhile} } } */ > /* { dg-final { scan-assembler-not {\tb} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/single_3.c > b/gcc/testsuite/gcc.target/aarch64/sve/single_3.c > index 313a72da067..26614b25217 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/single_3.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/single_3.c > @@ -16,12 +16,13 @@ > /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } > */ > /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } > */ > > -/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl128\n} 11 } } */ > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl128\n} 11 { > target aarch64_big_endian } } } */ > > -/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */ > -/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */ > -/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */ > -/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */ > +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 { target > aarch64_big_endian } } } */ > +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 { target > aarch64_big_endian } } } */ > +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 { target > aarch64_big_endian } } } */ > +/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 2 { target > aarch64_big_endian } } } */ > +/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 11 { target > aarch64_little_endian } } } */ > > /* { dg-final { scan-assembler-not {\twhile} } } */ > /* { dg-final { scan-assembler-not {\tb} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/single_4.c > b/gcc/testsuite/gcc.target/aarch64/sve/single_4.c > index 4f46654a5ee..475482584cf 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/single_4.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/single_4.c > @@ -16,12 +16,13 @@ > /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } > */ > /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } > */ > > -/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl256\n} 11 } } */ > +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl256\n} 11 { > target aarch64_big_endian } } } */ > > -/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */ > -/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */ > -/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */ > -/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */ > +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 { target > aarch64_big_endian } } } */ > +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 { target > aarch64_big_endian } } } */ > +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 { target > aarch64_big_endian } } } */ > +/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 2 { target > aarch64_big_endian } } } */ > +/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 11 { target > aarch64_little_endian } } } */ > > /* { dg-final { scan-assembler-not {\twhile} } } */ > /* { dg-final { scan-assembler-not {\tb} } } */ > -- > 2.34.1
smime.p7s
Description: S/MIME cryptographic signature