On Wed, Jun 25, 2025 at 10:04:49AM +0200, Juergen Christ wrote:
> Some patterns that are detected by the autovectorizer can be supported by
> s390. Add expanders such that autovectorization of these patterns works.
>
> RTL for the builtins used unspec to represent highpart multiplication.
> Replace this by the correct RTL to allow further simplification.
>
> Bootstrapped and regtested on s390. Ok for trunk?
>
> gcc/ChangeLog:
>
> * config/s390/s390.md: Removed unused unspecs.
> * config/s390/vector.md (avg<mode>3_ceil): New expander.
> (uavg<mode>3_ceil): New expander.
> (smul<mode>3_highpart): New expander.
> (umul<mode>3_highpart): New expander.
> * config/s390/vx-builtins.md (vec_umulh<mode>): Remove unspec.
> (vec_smulh<mode>): Remove unspec.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/s390/vector/pattern-avg-1.c: New test.
> * gcc.target/s390/vector/pattern-mulh-1.c: New test.
>
> Signed-off-by: Juergen Christ <[email protected]>
> ---
> gcc/config/s390/s390.md | 3 --
> gcc/config/s390/vector.md | 26 +++++++++++++++++
> gcc/config/s390/vx-builtins.md | 10 +++----
> .../gcc.target/s390/vector/pattern-avg-1.c | 26 +++++++++++++++++
> .../gcc.target/s390/vector/pattern-mulh-1.c | 29 +++++++++++++++++++
> 5 files changed, 85 insertions(+), 9 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c
> create mode 100644 gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c
>
> diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
> index 97a4bdf96b2d..440ce93574f4 100644
> --- a/gcc/config/s390/s390.md
> +++ b/gcc/config/s390/s390.md
> @@ -139,9 +139,6 @@
> UNSPEC_LCBB
>
> ; Vector
> - UNSPEC_VEC_SMULT_HI
> - UNSPEC_VEC_UMULT_HI
> - UNSPEC_VEC_SMULT_LO
> UNSPEC_VEC_SMULT_EVEN
> UNSPEC_VEC_UMULT_EVEN
> UNSPEC_VEC_SMULT_ODD
> diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
> index 6f4e1929eb80..8d7ca1a520f3 100644
> --- a/gcc/config/s390/vector.md
> +++ b/gcc/config/s390/vector.md
> @@ -3576,3 +3576,29 @@
> ; vec_unpacks_float_lo
> ; vec_unpacku_float_hi
> ; vec_unpacku_float_lo
> +
> +(define_expand "avg<mode>3_ceil"
> + [(set (match_operand:VIT_HW_VXE3_T 0
> "register_operand" "=v")
> + (unspec:VIT_HW_VXE3_T [(match_operand:VIT_HW_VXE3_T 1
> "register_operand" "v")
> + (match_operand:VIT_HW_VXE3_T 2
> "register_operand" "v")]
> + UNSPEC_VEC_AVG))]
> + "TARGET_VX")
Expanders don't have constraints.
> +
> +(define_expand "uavg<mode>3_ceil"
> + [(set (match_operand:VIT_HW_VXE3_T 0
> "register_operand" "=v")
> + (unspec:VIT_HW_VXE3_T [(match_operand:VIT_HW_VXE3_T 1
> "register_operand" "v")
> + (match_operand:VIT_HW_VXE3_T 2
> "register_operand" "v")]
> + UNSPEC_VEC_AVGU))]
> + "TARGET_VX")
Ditto.
> +
> +(define_expand "smul<mode>3_highpart"
> + [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand"
> "=v")
> + (smul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1
> "register_operand" "v")
> + (match_operand:VIT_HW_VXE3_DT 2
> "register_operand" "v")))]
> + "TARGET_VX")
Ditto.
> +
> +(define_expand "umul<mode>3_highpart"
> + [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand"
> "=v")
> + (umul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1
> "register_operand" "v")
> + (match_operand:VIT_HW_VXE3_DT 2
> "register_operand" "v")))]
> + "TARGET_VX")
Ditto.
> diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md
> index a7bb7ff92f5e..2478f74e161a 100644
> --- a/gcc/config/s390/vx-builtins.md
> +++ b/gcc/config/s390/vx-builtins.md
> @@ -983,9 +983,8 @@
> ; vmhb, vmhh, vmhf, vmhg, vmhq
> (define_insn "vec_smulh<mode>"
> [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand"
> "=v")
^
~~~~
Wrong indentation.
> - (unspec:VIT_HW_VXE3_DT [(match_operand:VIT_HW_VXE3_DT 1
> "register_operand" "v")
> - (match_operand:VIT_HW_VXE3_DT 2
> "register_operand" "v")]
> - UNSPEC_VEC_SMULT_HI))]
> + (smul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1
> "register_operand" "v")
> + (match_operand:VIT_HW_VXE3_DT 2
> "register_operand" "v")))]
> "TARGET_VX"
> "vmh<bhfgq>\t%v0,%v1,%v2"
> [(set_attr "op_type" "VRR")])
> @@ -993,9 +992,8 @@
> ; vmlhb, vmlhh, vmlhf, vmlhg, vmlhq
> (define_insn "vec_umulh<mode>"
> [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand"
> "=v")
^
~~~~
Wrong indentation.
With those changes ok for trunk.
Thanks,
Stefan
> - (unspec:VIT_HW_VXE3_DT [(match_operand:VIT_HW_VXE3_DT 1
> "register_operand" "v")
> - (match_operand:VIT_HW_VXE3_DT 2
> "register_operand" "v")]
> - UNSPEC_VEC_UMULT_HI))]
> + (umul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1
> "register_operand" "v")
> + (match_operand:VIT_HW_VXE3_DT 2
> "register_operand" "v")))]
> "TARGET_VX"
> "vmlh<bhfgq>\t%v0,%v1,%v2"
> [(set_attr "op_type" "VRR")])
> diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c
> b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c
> new file mode 100644
> index 000000000000..a15301aabe54
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c
> @@ -0,0 +1,26 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -mzarch -march=z16 -ftree-vectorize
> -fdump-tree-optimized" } */
> +
> +#define TEST(T1,T2,N) \
> + void \
> + avg##T1 (signed T1 *__restrict res, signed T1 *__restrict a, \
> + signed T1 *__restrict b) \
> + { \
> + for (int i = 0; i < N; ++i) \
> + res[i] = ((signed T2)a[i] + b[i] + 1) >> 1; \
> + } \
> + \
> + void \
> + uavg##T1 (unsigned T1 *__restrict res, unsigned T1 *__restrict a, \
> + unsigned T1 *__restrict b) \
> + { \
> + for (int i = 0; i < N; ++i) \
> + res[i] = ((unsigned T2)a[i] + b[i] + 1) >> 1; \
> + }
> +
> +TEST(char,short,16)
> +TEST(short,int,8)
> +TEST(int,long,4)
> +TEST(long,__int128,2)
> +
> +/* { dg-final { scan-tree-dump-times "\.AVG_CEIL" 8 "optimized" } } */
> diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c
> b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c
> new file mode 100644
> index 000000000000..cd8e4e7d7a09
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c
> @@ -0,0 +1,29 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -mzarch -march=arch15 -ftree-vectorize
> -fdump-tree-optimized" } */
> +
> +#define TEST(T1,T2,N,S) \
> + void \
> + mulh##T1 (signed T1 *__restrict res, \
> + signed T1 *__restrict l, \
> + signed T1 *__restrict r) \
> + { \
> + for (int i = 0; i < N; ++i) \
> + res[i] = (signed T1) (((signed T2)l[i] * (signed T2)r[i]) >> S); \
> + } \
> + \
> + void \
> + umulh##T1 (unsigned T1 *__restrict res, \
> + unsigned T1 *__restrict l, \
> + unsigned T1 *__restrict r) \
> + { \
> + for (int i = 0; i < N; ++i) \
> + res[i] = (unsigned T1) \
> + (((unsigned T2)l[i] * (unsigned T2)r[i]) >> S); \
> + }
> +
> +TEST(char,short,16,8)
> +TEST(short,int,8,16)
> +TEST(int,long,4,32)
> +TEST(long,__int128,2,64)
> +
> +/* { dg-final { scan-tree-dump-times "\.MULH" 8 "optimized" } } */
> --
> 2.43.5
>