> -----Original Message----- > From: Pengfei Li <pengfei....@arm.com> > Sent: Friday, August 8, 2025 11:00 AM > To: Kyrylo Tkachov <ktkac...@nvidia.com> > Cc: gcc-patches@gcc.gnu.org; Richard Sandiford <richard.sandif...@arm.com>; > Tamar Christina <tamar.christ...@arm.com> > Subject: [PATCH v2] AArch64: Fix invalid immediate offsets in SVE > gather/scatter > [PR121449] > > Below v2 patch just updates the commit message in v1. > > Please let me know if it's good enough now. > > Thanks, > Pengfei > > -- >8 -- > This patch fixes incorrect constraints in RTL patterns for AArch64 SVE > gather/scatter with type widening/narrowing and vector-plus-immediate > addressing. The bug leads to below "immediate offset out of range" > errors during assembly, eventually causing compilation failures. > > /tmp/ccsVqBp1.s: Assembler messages: > /tmp/ccsVqBp1.s:54: Error: immediate offset out of range 0 to 31 at operand 3 > -- > `ld1b z1.d,p0/z,[z1.d,#64]' > > Current RTL patterns for such instructions incorrectly use vgw or vgd > constraints for the immediate operand, base on the vector element type > in Z registers (zN.s or zN.d). However, for gather/scatter with type > conversions, the immediate range for vector-plus-immediate addressing is > determined by the element type in memory, which differs from that in > vector registers. Using the wrong constraint can produce out-of-range > offset values that cannot be encoded in the instruction. > > This patch corrects the constraints used in these patterns. A test case > that reproduces the issue is also included. > > Bootstrapped and regression-tested on aarch64-linux-gnu. > > gcc/ChangeLog: > PR target/121449 > * config/aarch64/aarch64-sve.md > (mask_gather_load<mode><v_int_container>): Fix constraints. > (mask_gather_load<mode><v_int_container>): Likewise. > (mask_scatter_store<mode><v_int_container>): Likewise. > (mask_scatter_store<mode><v_int_container>): Likewise.
You just need to mention each pattern names once. So gcc/ChangeLog: PR target/121449 * config/aarch64/aarch64-sve.md (mask_gather_load<mode><v_int_container>): Fix constraints. (mask_scatter_store<mode><v_int_container>): Likewise. Ok with those changes (you don't need to send a new patch for review). Thanks, Tamar > > gcc/testsuite/ChangeLog: > PR target/121449 > * g++.target/aarch64/sve/pr121449.C: New test. > --- > gcc/config/aarch64/aarch64-sve.md | 64 +++++++++---------- > .../g++.target/aarch64/sve/pr121449.C | 44 +++++++++++++ > 2 files changed, 76 insertions(+), 32 deletions(-) > create mode 100644 gcc/testsuite/g++.target/aarch64/sve/pr121449.C > > diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64- > sve.md > index 88d323af32d..51e2d7d7e87 100644 > --- a/gcc/config/aarch64/aarch64-sve.md > +++ b/gcc/config/aarch64/aarch64-sve.md > @@ -1542,18 +1542,18 @@ > UNSPEC_LD1_GATHER))] > "TARGET_SVE && TARGET_NON_STREAMING" > {@ [cons: =0, 1, 2, 3, 4, 5 ] > - [&w, Z, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s] > - [?w, Z, 0, Ui1, Ui1, Upl] ^ > - [&w, vgw, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1] > - [?w, vgw, 0, Ui1, Ui1, Upl] ^ > - [&w, rk, w, Z, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw] > - [?w, rk, 0, Z, Ui1, Upl] ^ > - [&w, rk, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw] > - [?w, rk, 0, Ui1, Ui1, Upl] ^ > - [&w, rk, w, Z, i, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw > %p4] > - [?w, rk, 0, Z, i, Upl] ^ > - [&w, rk, w, Ui1, i, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw > %p4] > - [?w, rk, 0, Ui1, i, Upl] ^ > + [&w, Z, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s] > + [?w, Z, 0, Ui1, Ui1, Upl] ^ > + [&w, vg<Vesize>, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1] > + [?w, vg<Vesize>, 0, Ui1, Ui1, Upl] ^ > + [&w, rk, w, Z, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, > sxtw] > + [?w, rk, 0, Z, Ui1, Upl] ^ > + [&w, rk, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, > uxtw] > + [?w, rk, 0, Ui1, Ui1, Upl] ^ > + [&w, rk, w, Z, i, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, > sxtw %p4] > + [?w, rk, 0, Z, i, Upl] ^ > + [&w, rk, w, Ui1, i, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, > uxtw %p4] > + [?w, rk, 0, Ui1, i, Upl] ^ > } > ) > > @@ -1572,14 +1572,14 @@ > UNSPEC_LD1_GATHER))] > "TARGET_SVE && TARGET_NON_STREAMING" > {@ [cons: =0, 1, 2, 3, 4, 5] > - [&w, Z, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d] > - [?w, Z, 0, i, Ui1, Upl] ^ > - [&w, vgd, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1] > - [?w, vgd, 0, i, Ui1, Upl] ^ > - [&w, rk, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d] > - [?w, rk, 0, i, Ui1, Upl] ^ > - [&w, rk, w, i, i, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4] > - [?w, rk, 0, i, i, Upl] ^ > + [&w, Z, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d] > + [?w, Z, 0, i, Ui1, Upl] ^ > + [&w, vg<Vesize>, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1] > + [?w, vg<Vesize>, 0, i, Ui1, Upl] ^ > + [&w, rk, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d] > + [?w, rk, 0, i, Ui1, Upl] ^ > + [&w, rk, w, i, i, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, > lsl %p4] > + [?w, rk, 0, i, i, Upl] ^ > } > ) > > @@ -2488,13 +2488,13 @@ > (match_operand:SVE_4 4 "register_operand")] > UNSPEC_ST1_SCATTER))] > "TARGET_SVE && TARGET_NON_STREAMING" > - {@ [ cons: 0 , 1 , 2 , 3 , 4 , 5 ] > - [ Z , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%1.s] > - [ vgw , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%1.s, > #%0] > - [ rk , w , Z , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, > %1.s, sxtw] > - [ rk , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, > %1.s, uxtw] > - [ rk , w , Z , i , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, > %1.s, sxtw %p3] > - [ rk , w , Ui1 , i , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, > %1.s, uxtw %p3] > + {@ [ cons: 0 , 1 , 2 , 3 , 4 , 5 ] > + [ Z , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%1.s] > + [ vg<Vesize> , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%1.s, > #%0] > + [ rk , w , Z , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, > %1.s, sxtw] > + [ rk , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, > %1.s, uxtw] > + [ rk , w , Z , i , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, > %1.s, sxtw %p3] > + [ rk , w , Ui1 , i , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, > %1.s, uxtw %p3] > } > ) > > @@ -2511,11 +2511,11 @@ > (match_operand:SVE_2 4 "register_operand")] > UNSPEC_ST1_SCATTER))] > "TARGET_SVE && TARGET_NON_STREAMING" > - {@ [ cons: 0 , 1 , 3 , 4 , 5 ] > - [ Z , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%1.d] > - [ vgd , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%1.d, #%0] > - [ rk , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d] > - [ rk , w , i , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl > %p3] > + {@ [ cons: 0 , 1 , 3 , 4 , 5 ] > + [ Z , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%1.d] > + [ vg<Vesize> , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%1.d, #%0] > + [ rk , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d] > + [ rk , w , i , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d, > lsl %p3] > } > ) > > diff --git a/gcc/testsuite/g++.target/aarch64/sve/pr121449.C > b/gcc/testsuite/g++.target/aarch64/sve/pr121449.C > new file mode 100644 > index 00000000000..b2e13765dfa > --- /dev/null > +++ b/gcc/testsuite/g++.target/aarch64/sve/pr121449.C > @@ -0,0 +1,44 @@ > +/* PR target/121449 */ > +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ > +/* { dg-options "-O3 -save-temps" } */ > + > +struct example; > + > +struct array { > + unsigned length(); > + example *operator[](unsigned i) { > + example **data = reinterpret_cast<example **>(this); > + return data[i]; > + } > +}; > + > +struct example { > + int a[16]; > + bool is_even; > + int version; > + int count() { return is_even ? 2 : 1; } > + void fun1(int, long); > + void fun2(unsigned, unsigned); > + void process(array &, array &); > +}; > + > +bool found; > + > +void example::process(array &a, array &b) { > + for (unsigned i = 1; a.length(); i++) { > + long total = 0; > + for (unsigned k = 0; k <= i; k++) { > + total += a[k]->count(); > + } > + for (unsigned j = 0; j < i; j++) { > + int major = b[j]->version; > + if (found) > + major += i; > + fun1(i + 1, total); > + fun2(j, major); > + } > + } > +} > + > +/* { dg-final { scan-assembler-not {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, > \[(z[0-9]+)\.d, > #64\]} } } */ > + > -- > 2.43.0