Hi Tamar,
> On 3 Oct 2025, at 11:45, Tamar Christina <[email protected]> wrote:
>
> This patch is a mechanical rewrite of the widen_[us]sum optabs from a direct
> to
> a conversion optab. The result of which requires the output mode to be added
> to
> the existing patterns.
>
> No change in functionality is expected.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
Ok if the midend naming scheme is approved.
Thanks,
Kyrill
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> PR middle-end/122069
> * config/aarch64/aarch64-simd.md (widen_ssum<mode>3): Change into..
> (widen_ssum<Vdblw><mode>3, widen_ssum<Vwide><mode>3): ... these.
> (widen_usum<mode>3): Change into ...
> (widen_usum<Vdblw><mode>3, widen_usum<Vwide><mode>3): ... these.
> * config/aarch64/iterators.md (Vdblw): New.
> (Vwide): Extend to match VWIDE.
>
> gcc/testsuite/ChangeLog:
>
> PR middle-end/122069
> * gcc.target/aarch64/pr122069_1.c: New test.
> * gcc.target/aarch64/pr122069_2.c: New test.
>
> ---
> diff --git a/gcc/config/aarch64/aarch64-simd.md
> b/gcc/config/aarch64/aarch64-simd.md
> index
> 0d5b02a739fa74724d6dc8b658638d55b8db6890..6488119a14020c801f9994ef84250ceb5ba15481
> 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -4647,7 +4647,7 @@ (define_insn
> "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
>
> ;; <su><addsub>w<q>.
>
> -(define_expand "widen_ssum<mode>3"
> +(define_expand "widen_ssum<Vdblw><mode>3"
> [(set (match_operand:<VDBLW> 0 "register_operand")
> (plus:<VDBLW> (sign_extend:<VDBLW>
> (match_operand:VQW 1 "register_operand"))
> @@ -4664,7 +4664,7 @@ (define_expand "widen_ssum<mode>3"
> }
> )
>
> -(define_expand "widen_ssum<mode>3"
> +(define_expand "widen_ssum<Vwide><mode>3"
> [(set (match_operand:<VWIDE> 0 "register_operand")
> (plus:<VWIDE> (sign_extend:<VWIDE>
> (match_operand:VD_BHSI 1 "register_operand"))
> @@ -4675,7 +4675,7 @@ (define_expand "widen_ssum<mode>3"
> DONE;
> })
>
> -(define_expand "widen_usum<mode>3"
> +(define_expand "widen_usum<Vdblw><mode>3"
> [(set (match_operand:<VDBLW> 0 "register_operand")
> (plus:<VDBLW> (zero_extend:<VDBLW>
> (match_operand:VQW 1 "register_operand"))
> @@ -4692,7 +4692,7 @@ (define_expand "widen_usum<mode>3"
> }
> )
>
> -(define_expand "widen_usum<mode>3"
> +(define_expand "widen_usum<Vwide><mode>3"
> [(set (match_operand:<VWIDE> 0 "register_operand")
> (plus:<VWIDE> (zero_extend:<VWIDE>
> (match_operand:VD_BHSI 1 "register_operand"))
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index
> 332e7ffd2eaf6597f7bce9c22df70a72ebfe8164..61ca4990b94170f016a9f50e3505c8cfb24df9be
> 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -1901,6 +1901,11 @@ (define_mode_attr VDBLW [(V8QI "V4HI") (V16QI "V8HI")
> (V4HI "V2SI") (V8HI "V4SI")
> (V2SI "DI") (V4SI "V2DI")])
>
> +;; Modes with double-width elements.
> +(define_mode_attr Vdblw [(V8QI "v4hi") (V16QI "v8hi")
> + (V4HI "v2si") (V8HI "v4si")
> + (V2SI "di") (V4SI "v2di")])
> +
> (define_mode_attr VQUADW [(V8QI "V4SI") (V16QI "V8SI")
> (V4HI "V2DI") (V8HI "V4DI")])
>
> @@ -2003,7 +2008,9 @@ (define_mode_attr v2xwide [(V8QI "v8hi") (V4HI "v4si")
> (define_mode_attr VWIDE_PRED [(VNx8HF "VNx4BI") (VNx4SF "VNx2BI")])
>
> ;; Widened modes of vector modes, lowercase
> -(define_mode_attr Vwide [(V2SF "v2df") (V4HF "v4sf")
> +(define_mode_attr Vwide [(V2SI "v2di") (V4HI "v4si")
> + (V2SF "v2df") (V4HF "v4sf")
> + (V8QI "v8hi")
> (VNx16QI "vnx8hi") (VNx8HI "vnx4si")
> (VNx4SI "vnx2di")
> (VNx8HF "vnx4sf") (VNx4SF "vnx2df")
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr122069_1.c
> b/gcc/testsuite/gcc.target/aarch64/pr122069_1.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..b2f973261ea0df8d3d5c7da29834b35ed21a4d52
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr122069_1.c
> @@ -0,0 +1,46 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=armv8-a -mautovec-preference=asimd-only --param
> vect-epilogues-nomask=0 -fno-schedule-insns -fno-reorder-blocks
> -fno-schedule-insns2 -fdump-tree-vect-details" }*/
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +inline char char_abs(char i) {
> + return (i < 0 ? -i : i);
> +}
> +
> +/*
> +** foo_int:
> +** ...
> +** sub v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
> +** zip1 v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
> +** zip2 v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
> +** uaddw v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4h
> +** uaddw2 v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.8h
> +** uaddw v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4h
> +** uaddw2 v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.8h
> +** ...
> +*/
> +int foo_int(unsigned char *x, unsigned char * restrict y) {
> + int sum = 0;
> + for (int i = 0; i < 8000; i++)
> + sum += char_abs(x[i] - y[i]);
> + return sum;
> +}
> +
> +/*
> +** foo2_int:
> +** ...
> +** add v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h
> +** uaddw v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4h
> +** uaddw2 v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.8h
> +** ...
> +*/
> +int foo2_int(unsigned short *x, unsigned short * restrict y) {
> + int sum = 0;
> + for (int i = 0; i < 8000; i++)
> + {
> + x[i] = x[i] + y[i];
> + sum += x[i];
> + }
> + return sum;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr122069_2.c
> b/gcc/testsuite/gcc.target/aarch64/pr122069_2.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..c6a276f88a67a50038268c177bc60f4dee5258f1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr122069_2.c
> @@ -0,0 +1,80 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -march=armv8-a -mautovec-preference=asimd-only --param
> vect-epilogues-nomask=0 -fno-schedule-insns -fno-reorder-blocks
> -fno-schedule-insns2 -fdump-tree-vect-details" }*/
> +
> +inline char char_abs(char i) {
> + return (i < 0 ? -i : i);
> +}
> +
> +__attribute__((noipa))
> +int foo_int(unsigned char *x, unsigned char * restrict y) {
> + int sum = 0;
> + for (int i = 0; i < 100; i++)
> + sum += char_abs(x[i] - y[i]);
> + return sum;
> +}
> +
> +__attribute__((noipa))
> +int foo2_int(unsigned short *x, unsigned short * restrict y,
> + unsigned short * restrict z) {
> + int sum = 0;
> + for (int i = 0; i < 100; i++)
> + {
> + z[i] = x[i] + y[i];
> + sum += z[i];
> + }
> + return sum;
> +}
> +
> +__attribute__((noipa))
> +int foo_int2(unsigned char *x, unsigned char * restrict y) {
> + int sum = 0;
> +#pragma GCC novector
> + for (int i = 0; i < 100; i++)
> + sum += char_abs(x[i] - y[i]);
> + return sum;
> +}
> +
> +__attribute__((noipa))
> +int foo2_int2(unsigned short *x, unsigned short * restrict y,
> + unsigned short * restrict z) {
> + int sum = 0;
> +#pragma GCC novector
> + for (int i = 0; i < 100; i++)
> + {
> + z[i] = x[i] + y[i];
> + sum += z[i];
> + }
> + return sum;
> +}
> +
> +int main ()
> +{
> + unsigned short a[100];
> + unsigned short b[100];
> + unsigned short r1[100];
> + unsigned short r2[100];
> + unsigned char c[100];
> + unsigned char d[100];
> +#pragma GCC novector
> + for (int i = 0; i < 100; i++)
> + {
> + a[i] = c[i] = i;
> + b[i] = d[i] = 100 - i;
> + }
> +
> + if (foo_int (c, d) != foo_int2 (c, d))
> + __builtin_abort();
> +
> +
> + if (foo2_int (a, b, r1) != foo2_int2 (a, b, r2))
> + __builtin_abort();
> +
> +#pragma GCC novector
> + for (int i = 0; i < 100; i++)
> + if (r1[i] != r2[i])
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
> \ No newline at end of file
>
>
> --
> <rb19870.patch>