Some patterns that are detected by the autovectorizer can be supported by
s390.  Add expanders such that autovectorization of these patterns works.

RTL for the builtins used unspec to represent highpart multiplication.
Replace this by the correct RTL to allow further simplification.

Bootstrapped and regtested on s390.  Ok for trunk?

gcc/ChangeLog:

        * config/s390/s390.md: Removed unused unspecs.
        * config/s390/vector.md (avg<mode>3_ceil): New expander.
        (uavg<mode>3_ceil): New expander.
        (smul<mode>3_highpart): New expander.
        (umul<mode>3_highpart): New expander.
        * config/s390/vx-builtins.md (vec_umulh<mode>): Remove unspec.
        (vec_smulh<mode>): Remove unspec.

gcc/testsuite/ChangeLog:

        * gcc.target/s390/vector/pattern-avg-1.c: New test.
        * gcc.target/s390/vector/pattern-mulh-1.c: New test.

Signed-off-by: Juergen Christ <jchr...@linux.ibm.com>
---
 gcc/config/s390/s390.md                       |  3 --
 gcc/config/s390/vector.md                     | 26 +++++++++++++++++
 gcc/config/s390/vx-builtins.md                | 10 +++----
 .../gcc.target/s390/vector/pattern-avg-1.c    | 26 +++++++++++++++++
 .../gcc.target/s390/vector/pattern-mulh-1.c   | 29 +++++++++++++++++++
 5 files changed, 85 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c

diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 97a4bdf96b2d..440ce93574f4 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -139,9 +139,6 @@
    UNSPEC_LCBB
 
    ; Vector
-   UNSPEC_VEC_SMULT_HI
-   UNSPEC_VEC_UMULT_HI
-   UNSPEC_VEC_SMULT_LO
    UNSPEC_VEC_SMULT_EVEN
    UNSPEC_VEC_UMULT_EVEN
    UNSPEC_VEC_SMULT_ODD
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 6f4e1929eb80..8d7ca1a520f3 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -3576,3 +3576,29 @@
 ; vec_unpacks_float_lo
 ; vec_unpacku_float_hi
 ; vec_unpacku_float_lo
+
+(define_expand "avg<mode>3_ceil"
+  [(set (match_operand:VIT_HW_VXE3_T                        0 
"register_operand" "=v")
+       (unspec:VIT_HW_VXE3_T [(match_operand:VIT_HW_VXE3_T 1 
"register_operand"  "v")
+                              (match_operand:VIT_HW_VXE3_T 2 
"register_operand"  "v")]
+                             UNSPEC_VEC_AVG))]
+  "TARGET_VX")
+
+(define_expand "uavg<mode>3_ceil"
+  [(set (match_operand:VIT_HW_VXE3_T                        0 
"register_operand" "=v")
+       (unspec:VIT_HW_VXE3_T [(match_operand:VIT_HW_VXE3_T 1 
"register_operand"  "v")
+                              (match_operand:VIT_HW_VXE3_T 2 
"register_operand"  "v")]
+                             UNSPEC_VEC_AVGU))]
+  "TARGET_VX")
+
+(define_expand "smul<mode>3_highpart"
+  [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand"                     
  "=v")
+       (smul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 
"register_operand" "v")
+                                     (match_operand:VIT_HW_VXE3_DT 2 
"register_operand" "v")))]
+  "TARGET_VX")
+
+(define_expand "umul<mode>3_highpart"
+  [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand"                     
  "=v")
+       (umul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 
"register_operand" "v")
+                                     (match_operand:VIT_HW_VXE3_DT 2 
"register_operand" "v")))]
+  "TARGET_VX")
diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md
index a7bb7ff92f5e..2478f74e161a 100644
--- a/gcc/config/s390/vx-builtins.md
+++ b/gcc/config/s390/vx-builtins.md
@@ -983,9 +983,8 @@
 ; vmhb, vmhh, vmhf, vmhg, vmhq
 (define_insn "vec_smulh<mode>"
   [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand"                     
  "=v")
-       (unspec:VIT_HW_VXE3_DT [(match_operand:VIT_HW_VXE3_DT 1 
"register_operand" "v")
-                               (match_operand:VIT_HW_VXE3_DT 2 
"register_operand" "v")]
-                              UNSPEC_VEC_SMULT_HI))]
+       (smul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 
"register_operand" "v")
+                                     (match_operand:VIT_HW_VXE3_DT 2 
"register_operand" "v")))]
   "TARGET_VX"
   "vmh<bhfgq>\t%v0,%v1,%v2"
   [(set_attr "op_type" "VRR")])
@@ -993,9 +992,8 @@
 ; vmlhb, vmlhh, vmlhf, vmlhg, vmlhq
 (define_insn "vec_umulh<mode>"
   [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand"                     
  "=v")
-       (unspec:VIT_HW_VXE3_DT [(match_operand:VIT_HW_VXE3_DT 1 
"register_operand" "v")
-                               (match_operand:VIT_HW_VXE3_DT 2 
"register_operand" "v")]
-                              UNSPEC_VEC_UMULT_HI))]
+       (umul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 
"register_operand" "v")
+                                     (match_operand:VIT_HW_VXE3_DT 2 
"register_operand" "v")))]
   "TARGET_VX"
   "vmlh<bhfgq>\t%v0,%v1,%v2"
   [(set_attr "op_type" "VRR")])
diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c 
b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c
new file mode 100644
index 000000000000..a15301aabe54
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z16 -ftree-vectorize 
-fdump-tree-optimized" } */
+
+#define TEST(T1,T2,N)                                                   \
+  void                                                                  \
+  avg##T1 (signed T1 *__restrict res, signed T1 *__restrict a,          \
+           signed T1 *__restrict b)                                     \
+  {                                                                     \
+    for (int i = 0; i < N; ++i)                                         \
+      res[i] = ((signed T2)a[i] + b[i] + 1) >> 1;                       \
+  }                                                                     \
+                                                                        \
+  void                                                                  \
+  uavg##T1 (unsigned T1 *__restrict res, unsigned T1 *__restrict a,     \
+            unsigned T1 *__restrict b)                                  \
+  {                                                                     \
+    for (int i = 0; i < N; ++i)                                         \
+      res[i] = ((unsigned T2)a[i] + b[i] + 1) >> 1;                     \
+  }
+
+TEST(char,short,16)
+TEST(short,int,8)
+TEST(int,long,4)
+TEST(long,__int128,2)
+
+/* { dg-final { scan-tree-dump-times "\.AVG_CEIL" 8 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c 
b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c
new file mode 100644
index 000000000000..cd8e4e7d7a09
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=arch15 -ftree-vectorize 
-fdump-tree-optimized" } */
+
+#define TEST(T1,T2,N,S)                                                 \
+  void                                                                  \
+  mulh##T1 (signed T1 *__restrict res,                                  \
+            signed T1 *__restrict l,                                    \
+            signed T1 *__restrict r)                                    \
+  {                                                                     \
+    for (int i = 0; i < N; ++i)                                         \
+      res[i] = (signed T1) (((signed T2)l[i] * (signed T2)r[i]) >> S);  \
+  }                                                                     \
+                                                                        \
+  void                                                                  \
+  umulh##T1 (unsigned T1 *__restrict res,                               \
+             unsigned T1 *__restrict l,                                 \
+             unsigned T1 *__restrict r)                                 \
+  {                                                                     \
+    for (int i = 0; i < N; ++i)                                         \
+      res[i] = (unsigned T1)                                            \
+        (((unsigned T2)l[i] * (unsigned T2)r[i]) >> S);                 \
+  }
+
+TEST(char,short,16,8)
+TEST(short,int,8,16)
+TEST(int,long,4,32)
+TEST(long,__int128,2,64)
+
+/* { dg-final { scan-tree-dump-times "\.MULH" 8 "optimized" } } */
-- 
2.43.5

Reply via email to