Some patterns that are detected by the autovectorizer can be supported by s390. Add expanders such that autovectorization of these patterns works.
RTL for the builtins used unspec to represent highpart multiplication. Replace this by the correct RTL to allow further simplification. Bootstrapped and regtested on s390. Ok for trunk? gcc/ChangeLog: * config/s390/s390.md: Removed unused unspecs. * config/s390/vector.md (avg<mode>3_ceil): New expander. (uavg<mode>3_ceil): New expander. (smul<mode>3_highpart): New expander. (umul<mode>3_highpart): New expander. * config/s390/vx-builtins.md (vec_umulh<mode>): Remove unspec. (vec_smulh<mode>): Remove unspec. gcc/testsuite/ChangeLog: * gcc.target/s390/vector/pattern-avg-1.c: New test. * gcc.target/s390/vector/pattern-mulh-1.c: New test. Signed-off-by: Juergen Christ <jchr...@linux.ibm.com> --- gcc/config/s390/s390.md | 3 -- gcc/config/s390/vector.md | 26 +++++++++++++++++ gcc/config/s390/vx-builtins.md | 10 +++---- .../gcc.target/s390/vector/pattern-avg-1.c | 26 +++++++++++++++++ .../gcc.target/s390/vector/pattern-mulh-1.c | 29 +++++++++++++++++++ 5 files changed, 85 insertions(+), 9 deletions(-) create mode 100644 gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c create mode 100644 gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 97a4bdf96b2d..440ce93574f4 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -139,9 +139,6 @@ UNSPEC_LCBB ; Vector - UNSPEC_VEC_SMULT_HI - UNSPEC_VEC_UMULT_HI - UNSPEC_VEC_SMULT_LO UNSPEC_VEC_SMULT_EVEN UNSPEC_VEC_UMULT_EVEN UNSPEC_VEC_SMULT_ODD diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index 6f4e1929eb80..8d7ca1a520f3 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -3576,3 +3576,29 @@ ; vec_unpacks_float_lo ; vec_unpacku_float_hi ; vec_unpacku_float_lo + +(define_expand "avg<mode>3_ceil" + [(set (match_operand:VIT_HW_VXE3_T 0 "register_operand" "=v") + (unspec:VIT_HW_VXE3_T [(match_operand:VIT_HW_VXE3_T 1 "register_operand" "v") + (match_operand:VIT_HW_VXE3_T 2 "register_operand" "v")] + UNSPEC_VEC_AVG))] + "TARGET_VX") + +(define_expand "uavg<mode>3_ceil" + [(set (match_operand:VIT_HW_VXE3_T 0 "register_operand" "=v") + (unspec:VIT_HW_VXE3_T [(match_operand:VIT_HW_VXE3_T 1 "register_operand" "v") + (match_operand:VIT_HW_VXE3_T 2 "register_operand" "v")] + UNSPEC_VEC_AVGU))] + "TARGET_VX") + +(define_expand "smul<mode>3_highpart" + [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand" "=v") + (smul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 "register_operand" "v") + (match_operand:VIT_HW_VXE3_DT 2 "register_operand" "v")))] + "TARGET_VX") + +(define_expand "umul<mode>3_highpart" + [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand" "=v") + (umul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 "register_operand" "v") + (match_operand:VIT_HW_VXE3_DT 2 "register_operand" "v")))] + "TARGET_VX") diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md index a7bb7ff92f5e..2478f74e161a 100644 --- a/gcc/config/s390/vx-builtins.md +++ b/gcc/config/s390/vx-builtins.md @@ -983,9 +983,8 @@ ; vmhb, vmhh, vmhf, vmhg, vmhq (define_insn "vec_smulh<mode>" [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand" "=v") - (unspec:VIT_HW_VXE3_DT [(match_operand:VIT_HW_VXE3_DT 1 "register_operand" "v") - (match_operand:VIT_HW_VXE3_DT 2 "register_operand" "v")] - UNSPEC_VEC_SMULT_HI))] + (smul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 "register_operand" "v") + (match_operand:VIT_HW_VXE3_DT 2 "register_operand" "v")))] "TARGET_VX" "vmh<bhfgq>\t%v0,%v1,%v2" [(set_attr "op_type" "VRR")]) @@ -993,9 +992,8 @@ ; vmlhb, vmlhh, vmlhf, vmlhg, vmlhq (define_insn "vec_umulh<mode>" [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand" "=v") - (unspec:VIT_HW_VXE3_DT [(match_operand:VIT_HW_VXE3_DT 1 "register_operand" "v") - (match_operand:VIT_HW_VXE3_DT 2 "register_operand" "v")] - UNSPEC_VEC_UMULT_HI))] + (umul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 "register_operand" "v") + (match_operand:VIT_HW_VXE3_DT 2 "register_operand" "v")))] "TARGET_VX" "vmlh<bhfgq>\t%v0,%v1,%v2" [(set_attr "op_type" "VRR")]) diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c new file mode 100644 index 000000000000..a15301aabe54 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mzarch -march=z16 -ftree-vectorize -fdump-tree-optimized" } */ + +#define TEST(T1,T2,N) \ + void \ + avg##T1 (signed T1 *__restrict res, signed T1 *__restrict a, \ + signed T1 *__restrict b) \ + { \ + for (int i = 0; i < N; ++i) \ + res[i] = ((signed T2)a[i] + b[i] + 1) >> 1; \ + } \ + \ + void \ + uavg##T1 (unsigned T1 *__restrict res, unsigned T1 *__restrict a, \ + unsigned T1 *__restrict b) \ + { \ + for (int i = 0; i < N; ++i) \ + res[i] = ((unsigned T2)a[i] + b[i] + 1) >> 1; \ + } + +TEST(char,short,16) +TEST(short,int,8) +TEST(int,long,4) +TEST(long,__int128,2) + +/* { dg-final { scan-tree-dump-times "\.AVG_CEIL" 8 "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c new file mode 100644 index 000000000000..cd8e4e7d7a09 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mzarch -march=arch15 -ftree-vectorize -fdump-tree-optimized" } */ + +#define TEST(T1,T2,N,S) \ + void \ + mulh##T1 (signed T1 *__restrict res, \ + signed T1 *__restrict l, \ + signed T1 *__restrict r) \ + { \ + for (int i = 0; i < N; ++i) \ + res[i] = (signed T1) (((signed T2)l[i] * (signed T2)r[i]) >> S); \ + } \ + \ + void \ + umulh##T1 (unsigned T1 *__restrict res, \ + unsigned T1 *__restrict l, \ + unsigned T1 *__restrict r) \ + { \ + for (int i = 0; i < N; ++i) \ + res[i] = (unsigned T1) \ + (((unsigned T2)l[i] * (unsigned T2)r[i]) >> S); \ + } + +TEST(char,short,16,8) +TEST(short,int,8,16) +TEST(int,long,4,32) +TEST(long,__int128,2,64) + +/* { dg-final { scan-tree-dump-times "\.MULH" 8 "optimized" } } */ -- 2.43.5