vcondmn expanders for HF modes.

liuhongt via Gcc-patches Wed, 22 Sep 2021 22:53:34 -0700

From: Hongyu Wang <hongyu.w...@intel.com>

gcc/ChangeLog:


        * config/i386/i386-expand.c (ix86_use_mask_cmp_p): Enable
        HFmode mask_cmp.
        * config/i386/sse.md (sseintvecmodelower): Add HF vector modes.
        (<avx512>_store<mode>_mask): Extend to support HF vector modes.
        (vec_cmp<mode><avx512fmaskmodelower>): Likewise.
        (vcond_mask_<mode><avx512fmaskmodelower>): Likewise.
        (vcond<mode><mode>): New expander.
        (vcond<mode><sseintvecmodelower>): Likewise.
        (vcond<sseintvecmodelower><mode>): Likewise.
        (vcondu<mode><sseintvecmodelower>): Likewise.

gcc/testsuite/ChangeLog:

        * g++.target/i386/avx512fp16-vcondmn-vec.C: New test.
        * g++.target/i386/avx512fp16-vcondmn-minmax.C: Ditto.
        * gcc.target/i386/avx512fp16-vcondmn-loop-1.c: Ditto.
        * gcc.target/i386/avx512fp16-vcondmn-loop-2.c: Ditto.
        * gcc.target/i386/avx512fp16-vec_cmpmn.c: Ditto.
---
 gcc/config/i386/i386-expand.c                 |   2 +
 gcc/config/i386/sse.md                        |  84 ++++++++--
 .../i386/avx512fp16-vcondmn-minmax.C          |  25 +++
 .../g++.target/i386/avx512fp16-vcondmn-vec.C  |  70 +++++++++
 .../i386/avx512fp16-vcondmn-loop-1.c          |  70 +++++++++
 .../i386/avx512fp16-vcondmn-loop-2.c          | 143 ++++++++++++++++++
 .../gcc.target/i386/avx512fp16-vec_cmpmn.c    |  32 ++++
 7 files changed, 414 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-minmax.C
 create mode 100644 gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-vec.C
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vec_cmpmn.c

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index dbbf5e34656..94ac303585e 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -3638,6 +3638,8 @@ ix86_use_mask_cmp_p (machine_mode mode, machine_mode 
cmp_mode,
     return false;
   else if (vector_size == 64)
     return true;
+  else if (GET_MODE_INNER (cmp_mode) == HFmode)
+    return true;
 
   /* When op_true is NULL, op_false must be NULL, or vice versa.  */
   gcc_assert (!op_true == !op_false);
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index a48c8e8bede..084fc7f4693 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -989,9 +989,9 @@ (define_mode_attr sseintvecmode2
    (V16HF "OI") (V8HF "TI")])
 
 (define_mode_attr sseintvecmodelower
-  [(V16SF "v16si") (V8DF "v8di")
-   (V8SF "v8si") (V4DF "v4di")
-   (V4SF "v4si") (V2DF "v2di")
+  [(V32HF "v32hi") (V16SF "v16si") (V8DF "v8di")
+   (V16HF "v16hi") (V8SF "v8si") (V4DF "v4di")
+   (V8HF "v8hi") (V4SF "v4si") (V2DF "v2di")
    (V8SI "v8si") (V4DI "v4di")
    (V4SI "v4si") (V2DI "v2di")
    (V16HI "v16hi") (V8HI "v8hi")
@@ -1568,9 +1568,9 @@ (define_insn "<avx512>_store<mode>_mask"
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "<avx512>_store<mode>_mask"
-  [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
-       (vec_merge:VI12_AVX512VL
-         (match_operand:VI12_AVX512VL 1 "register_operand" "v")
+  [(set (match_operand:VI12HF_AVX512VL 0 "memory_operand" "=m")
+       (vec_merge:VI12HF_AVX512VL
+         (match_operand:VI12HF_AVX512VL 1 "register_operand" "v")
          (match_dup 0)
          (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
   "TARGET_AVX512BW"
@@ -3810,8 +3810,8 @@ (define_insn "<sse>_<unord>comi<round_saeonly_name>"
 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
   [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
        (match_operator:<avx512fmaskmode> 1 ""
-         [(match_operand:V48_AVX512VL 2 "register_operand")
-          (match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))]
+         [(match_operand:V48H_AVX512VL 2 "register_operand")
+          (match_operand:V48H_AVX512VL 3 "nonimmediate_operand")]))]
   "TARGET_AVX512F"
 {
   bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
@@ -4018,6 +4018,51 @@ (define_expand "vcond<V_128:mode><VF_128:mode>"
   DONE;
 })
 
+(define_expand "vcond<mode><mode>"
+  [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
+       (if_then_else:VF_AVX512FP16VL
+         (match_operator 3 ""
+           [(match_operand:VF_AVX512FP16VL 4 "vector_operand")
+            (match_operand:VF_AVX512FP16VL 5 "vector_operand")])
+         (match_operand:VF_AVX512FP16VL 1 "general_operand")
+         (match_operand:VF_AVX512FP16VL 2 "general_operand")))]
+  "TARGET_AVX512FP16"
+{
+  bool ok = ix86_expand_fp_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
+(define_expand "vcond<mode><sseintvecmodelower>"
+  [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
+       (if_then_else:VF_AVX512FP16VL
+         (match_operator 3 ""
+           [(match_operand:<sseintvecmode> 4 "vector_operand")
+            (match_operand:<sseintvecmode> 5 "vector_operand")])
+         (match_operand:VF_AVX512FP16VL 1 "general_operand")
+         (match_operand:VF_AVX512FP16VL 2 "general_operand")))]
+  "TARGET_AVX512FP16"
+{
+  bool ok = ix86_expand_int_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
+(define_expand "vcond<sseintvecmodelower><mode>"
+  [(set (match_operand:<sseintvecmode> 0 "register_operand")
+       (if_then_else:<sseintvecmode>
+         (match_operator 3 ""
+           [(match_operand:VF_AVX512FP16VL 4 "vector_operand")
+            (match_operand:VF_AVX512FP16VL 5 "vector_operand")])
+         (match_operand:<sseintvecmode> 1 "general_operand")
+         (match_operand:<sseintvecmode> 2 "general_operand")))]
+  "TARGET_AVX512FP16"
+{
+  bool ok = ix86_expand_fp_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
   [(set (match_operand:V48_AVX512VL 0 "register_operand")
        (vec_merge:V48_AVX512VL
@@ -4027,10 +4072,10 @@ (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
   "TARGET_AVX512F")
 
 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
-  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
-       (vec_merge:VI12_AVX512VL
-         (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
-         (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand")
+  [(set (match_operand:VI12HF_AVX512VL 0 "register_operand")
+       (vec_merge:VI12HF_AVX512VL
+         (match_operand:VI12HF_AVX512VL 1 "nonimmediate_operand")
+         (match_operand:VI12HF_AVX512VL 2 "nonimm_or_0_operand")
          (match_operand:<avx512fmaskmode> 3 "register_operand")))]
   "TARGET_AVX512BW")
 
@@ -15538,6 +15583,21 @@ (define_expand "vcondu<VI8F_128:mode>v2di"
   DONE;
 })
 
+(define_expand "vcondu<mode><sseintvecmodelower>"
+  [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
+       (if_then_else:VF_AVX512FP16VL
+         (match_operator 3 ""
+           [(match_operand:<sseintvecmode> 4 "vector_operand")
+            (match_operand:<sseintvecmode> 5 "vector_operand")])
+         (match_operand:VF_AVX512FP16VL 1 "general_operand")
+         (match_operand:VF_AVX512FP16VL 2 "general_operand")))]
+  "TARGET_AVX512FP16"
+{
+  bool ok = ix86_expand_int_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
 (define_expand "vcondeq<VI8F_128:mode>v2di"
   [(set (match_operand:VI8F_128 0 "register_operand")
        (if_then_else:VI8F_128
diff --git a/gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-minmax.C 
b/gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-minmax.C
new file mode 100644
index 00000000000..6d50f4974c5
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-minmax.C
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+
+/* { dg-final { scan-assembler-times "vminph" 3 } } */
+/* { dg-final { scan-assembler-times "vmaxph" 3 } } */
+
+typedef _Float16 v8hf __attribute__ ((vector_size (16)));
+typedef _Float16 v16hf __attribute__ ((vector_size (32)));
+typedef _Float16 v32hf __attribute__ ((vector_size (64)));
+
+#define VCONDMINMAX(size, op, name)  \
+v##size##hf \
+__attribute__ ((noinline, noclone)) \
+vminmax_##v##size##hf##v##size##hf##name (v##size##hf a, v##size##hf b)  \
+{ \
+  return (a op b) ? a : b;  \
+}
+
+VCONDMINMAX (8, <, min)
+VCONDMINMAX (8, >, max)
+VCONDMINMAX (16, <, min)
+VCONDMINMAX (16, >, max)
+VCONDMINMAX (32, <, min)
+VCONDMINMAX (32, >, max)
+
diff --git a/gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-vec.C 
b/gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-vec.C
new file mode 100644
index 00000000000..de93e2c5c86
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/avx512fp16-vcondmn-vec.C
@@ -0,0 +1,70 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+
+/* { dg-final { scan-assembler-times "vcmpph" 45 } } */
+/* { dg-final { scan-assembler-times "vpcmpuw" 12 } } */
+/* { dg-final { scan-assembler-times "vpcmpw" 18 } } */
+/* { dg-final { scan-assembler-times 
"(?:vpblendmw|vmovdqu16\[^\{\n\]+\{%k\[1-7\]\})" 75 } } */
+
+typedef _Float16 v8hf __attribute__ ((vector_size (16)));
+typedef _Float16 v16hf __attribute__ ((vector_size (32)));
+typedef _Float16 v32hf __attribute__ ((vector_size (64)));
+typedef short v8hi __attribute__ ((vector_size (16)));
+typedef short v16hi __attribute__ ((vector_size (32)));
+typedef short v32hi __attribute__ ((vector_size (64)));
+typedef unsigned short v8uhi __attribute__ ((vector_size (16)));
+typedef unsigned short v16uhi __attribute__ ((vector_size (32)));
+typedef unsigned short v32uhi __attribute__ ((vector_size (64)));
+
+#define VCONDMOV(size, op, name)  \
+v##size##hf \
+__attribute__ ((noinline, noclone)) \
+vcond_##v##size##hf##v##size##hf##name (v##size##hf a, v##size##hf b,  \
+                          v##size##hf c, v##size##hf d)  \
+{ \
+  return (a op b) ? c : d;  \
+}\
+v##size##hf \
+__attribute__ ((noinline, noclone)) \
+vcond_##v##size##hi##v##size##hf##name (v##size##hi a, v##size##hi b,  \
+                          v##size##hf c, v##size##hf d)  \
+{ \
+  return (a op b) ? c : d;  \
+}\
+v##size##hi \
+__attribute__ ((noinline, noclone)) \
+vcond_##v##size##hf##v##size##hi##name (v##size##hi a, v##size##hi b,  \
+                          v##size##hf c, v##size##hf d)  \
+{ \
+  return (c op d) ? a : b;  \
+} \
+v##size##hf \
+__attribute__ ((noinline, noclone)) \
+vcond_##v##size##uhi##v##size##hf##name (v##size##uhi a, v##size##uhi b,  \
+                          v##size##hf c, v##size##hf d)  \
+{ \
+  return (a op b) ? c : d;  \
+}\
+v##size##uhi \
+__attribute__ ((noinline, noclone)) \
+vcond_##v##size##hf##v##size##uhi##name (v##size##uhi a, v##size##uhi b,  \
+                          v##size##hf c, v##size##hf d)  \
+{ \
+  return (c op d) ? a : b;  \
+} \
+
+VCONDMOV (8, <, lt)
+VCONDMOV (8, >, gt)
+VCONDMOV (8, ==, eq)
+VCONDMOV (8, <=, le)
+VCONDMOV (8, >=, ge)
+VCONDMOV (16, <, lt)
+VCONDMOV (16, >, gt)
+VCONDMOV (16, <=, le)
+VCONDMOV (16, >=, ge)
+VCONDMOV (16, ==, eq)
+VCONDMOV (32, <, lt)
+VCONDMOV (32, >, gt)
+VCONDMOV (32, <=, le)
+VCONDMOV (32, >=, ge)
+VCONDMOV (32, ==, eq)
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-1.c 
b/gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-1.c
new file mode 100644
index 00000000000..e8745aba64e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-1.c
@@ -0,0 +1,70 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mprefer-vector-width=512" } */
+
+/* { dg-final { scan-assembler-times "vcmpph" 27 } } */
+/* { dg-final { scan-assembler-times "(?:vpcmpw|vpcmpeqw)" 12 } } */
+/* { dg-final { scan-assembler-times "vpcmpuw" 6 } } */
+
+typedef unsigned short u16;
+typedef short s16;
+
+#define CONDMOV_LOOP(size, type, ptype, op, name) \
+void \
+__attribute__ ((noinline, noclone, optimize("tree-vectorize"))) \
+loop_cond_##size##ptype##type##name ( \
+  ptype * restrict a, ptype * restrict b,      \
+  type * restrict c, type * restrict d) \
+{ \
+  int i;  \
+  for (i = 0; i < size; i++)  \
+    { \
+      if (a[i] op b[i])        \
+       d[i] = c[i];  \
+    } \
+}
+
+CONDMOV_LOOP (32, _Float16, _Float16, <, lt)
+CONDMOV_LOOP (32, _Float16, _Float16, >, gt)
+CONDMOV_LOOP (32, _Float16, _Float16, ==, eq)
+CONDMOV_LOOP (16, _Float16, _Float16, <, lt)
+CONDMOV_LOOP (16, _Float16, _Float16, >, gt)
+CONDMOV_LOOP (16, _Float16, _Float16, ==, eq)
+CONDMOV_LOOP (8, _Float16, _Float16, <, lt)
+CONDMOV_LOOP (8, _Float16, _Float16, >, gt)
+CONDMOV_LOOP (8, _Float16, _Float16, ==, eq)
+CONDMOV_LOOP (32, _Float16, s16, <, lt)
+CONDMOV_LOOP (32, _Float16, s16, >, gt)
+CONDMOV_LOOP (32, _Float16, s16, ==, eq)
+CONDMOV_LOOP (16, _Float16, s16, <, lt)
+CONDMOV_LOOP (16, _Float16, s16, >, gt)
+CONDMOV_LOOP (16, _Float16, s16, ==, eq)
+CONDMOV_LOOP (8, _Float16, s16, <, lt)
+CONDMOV_LOOP (8, _Float16, s16, >, gt)
+CONDMOV_LOOP (8, _Float16, s16, ==, eq)
+CONDMOV_LOOP (32, s16, _Float16, <, lt)
+CONDMOV_LOOP (32, s16, _Float16, >, gt)
+CONDMOV_LOOP (32, s16, _Float16, ==, eq)
+CONDMOV_LOOP (16, s16, _Float16, <, lt)
+CONDMOV_LOOP (16, s16, _Float16, >, gt)
+CONDMOV_LOOP (16, s16, _Float16, ==, eq)
+CONDMOV_LOOP (8, s16, _Float16, <, lt)
+CONDMOV_LOOP (8, s16, _Float16, >, gt)
+CONDMOV_LOOP (8, s16, _Float16, ==, eq)
+CONDMOV_LOOP (32, _Float16, u16, <, lt)
+CONDMOV_LOOP (32, _Float16, u16, >, gt)
+CONDMOV_LOOP (32, _Float16, u16, ==, eq)
+CONDMOV_LOOP (16, _Float16, u16, <, lt)
+CONDMOV_LOOP (16, _Float16, u16, >, gt)
+CONDMOV_LOOP (16, _Float16, u16, ==, eq)
+CONDMOV_LOOP (8, _Float16, u16, <, lt)
+CONDMOV_LOOP (8, _Float16, u16, >, gt)
+CONDMOV_LOOP (8, _Float16, u16, ==, eq)
+CONDMOV_LOOP (32, u16, _Float16, <, lt)
+CONDMOV_LOOP (32, u16, _Float16, >, gt)
+CONDMOV_LOOP (32, u16, _Float16, ==, eq)
+CONDMOV_LOOP (16, u16, _Float16, <, lt)
+CONDMOV_LOOP (16, u16, _Float16, >, gt)
+CONDMOV_LOOP (16, u16, _Float16, ==, eq)
+CONDMOV_LOOP (8, u16, _Float16, <, lt)
+CONDMOV_LOOP (8, u16, _Float16, >, gt)
+CONDMOV_LOOP (8, u16, _Float16, ==, eq)
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-2.c 
b/gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-2.c
new file mode 100644
index 00000000000..a0d5f988088
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcondmn-loop-2.c
@@ -0,0 +1,143 @@
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mprefer-vector-width=512" } */
+
+static void condmov_test (void);
+#define DO_TEST condmov_test
+#define AVX512FP16
+#define AVX512VL
+#include "avx512f-check.h"
+#include "avx512fp16-vcondmn-loop-1.c"
+
+_Float16 a[32], b[32], c[32], fexp[32], fref[32];
+s16 sa[32], sb[32], sc[32], sexp[32], sref[32];
+u16 ua[32], ub[32], uc[32], uexp[32], uref[32];
+
+#define EMULATE_CONDMOV_LOOP(size, type, ptype, op, name) \
+void \
+__attribute__ ((noinline, noclone)) \
+scalar_cond_##size##ptype##type##name ( \
+  ptype * restrict a, ptype * restrict b,      \
+  type * restrict c, type * restrict d)  \
+{ \
+  int i;  \
+  for (i = 0; i < size; i++)  \
+    { \
+      if (a[i] op b[i])        \
+       d[i] = c[i];  \
+    } \
+}
+
+EMULATE_CONDMOV_LOOP (32, _Float16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (32, _Float16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (32, _Float16, _Float16, ==, eq)
+EMULATE_CONDMOV_LOOP (16, _Float16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (16, _Float16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (16, _Float16, _Float16, ==, eq)
+EMULATE_CONDMOV_LOOP (8, _Float16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (8, _Float16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (8, _Float16, _Float16, ==, eq)
+EMULATE_CONDMOV_LOOP (32, _Float16, s16, <, lt)
+EMULATE_CONDMOV_LOOP (32, _Float16, s16, >, gt)
+EMULATE_CONDMOV_LOOP (32, _Float16, s16, ==, eq)
+EMULATE_CONDMOV_LOOP (16, _Float16, s16, <, lt)
+EMULATE_CONDMOV_LOOP (16, _Float16, s16, >, gt)
+EMULATE_CONDMOV_LOOP (16, _Float16, s16, ==, eq)
+EMULATE_CONDMOV_LOOP (8, _Float16, s16, <, lt)
+EMULATE_CONDMOV_LOOP (8, _Float16, s16, >, gt)
+EMULATE_CONDMOV_LOOP (8, _Float16, s16, ==, eq)
+EMULATE_CONDMOV_LOOP (32, s16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (32, s16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (32, s16, _Float16, ==, eq)
+EMULATE_CONDMOV_LOOP (16, s16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (16, s16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (16, s16, _Float16, ==, eq)
+EMULATE_CONDMOV_LOOP (8, s16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (8, s16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (8, s16, _Float16, ==, eq)
+EMULATE_CONDMOV_LOOP (32, _Float16, u16, <, lt)
+EMULATE_CONDMOV_LOOP (32, _Float16, u16, >, gt)
+EMULATE_CONDMOV_LOOP (32, _Float16, u16, ==, eq)
+EMULATE_CONDMOV_LOOP (16, _Float16, u16, <, lt)
+EMULATE_CONDMOV_LOOP (16, _Float16, u16, >, gt)
+EMULATE_CONDMOV_LOOP (16, _Float16, u16, ==, eq)
+EMULATE_CONDMOV_LOOP (8, _Float16, u16, <, lt)
+EMULATE_CONDMOV_LOOP (8, _Float16, u16, >, gt)
+EMULATE_CONDMOV_LOOP (8, _Float16, u16, ==, eq)
+EMULATE_CONDMOV_LOOP (32, u16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (32, u16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (32, u16, _Float16, ==, eq)
+EMULATE_CONDMOV_LOOP (16, u16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (16, u16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (16, u16, _Float16, ==, eq)
+EMULATE_CONDMOV_LOOP (8, u16, _Float16, <, lt)
+EMULATE_CONDMOV_LOOP (8, u16, _Float16, >, gt)
+EMULATE_CONDMOV_LOOP (8, u16, _Float16, ==, eq)
+
+void init()
+{
+  int i;
+  for (i = 0; i < 32; i++)
+    {
+      ua[i] = sa[i] = a[i] = i; 
+      ub[i] = sb[i] = b[i] = i;
+      uc[i] = sc[i] = c[i] = (32 - i) * 2;
+      uexp[i] = sexp[i] = fexp[i] = -1;
+      uref[i] = sref[i] = fref[i] = -1;
+    }
+}
+
+int check_cond(void *a, void *b, int size)
+{
+  int i;
+  u16 *pa = (u16 *)a, *pb = (u16 *)b;
+  for (i = 0; i < size; i++)
+    if (pa[i] != pb[i])
+      return 0;
+  return 1;
+}
+
+#define TEST_CONDMOV_LOOP(size, name)  \
+{ \
+  init ();  \
+  scalar_cond_##size##_Float16_Float16##name (a, b, c, fexp);  \
+  loop_cond_##size##_Float16_Float16##name (a, b, c, fref);  \
+  if (!check_cond ((void *)fexp, (void *)fref, size)) \
+    abort();  \
+  \
+  init ();  \
+  scalar_cond_##size##_Float16s16##name (a, b, sc, sexp);  \
+  loop_cond_##size##_Float16s16##name (a, b, sc, sref);  \
+  if (!check_cond ((void *)sexp, (void *)sref, size)) \
+    abort();  \
+  \
+  init ();  \
+  scalar_cond_##size##s16_Float16##name (sa, sb, c, fexp);  \
+  loop_cond_##size##s16_Float16##name (sa, sb, c, fref);  \
+  if (!check_cond ((void *)fexp, (void *)fref, size)) \
+    abort();  \
+  \
+  init ();  \
+  scalar_cond_##size##_Float16u16##name (a, b, uc, uexp);  \
+  loop_cond_##size##_Float16u16##name (a, b, uc, uref);  \
+  if (!check_cond ((void *)uexp, (void *)uref, size)) \
+    abort();  \
+  \
+  init ();  \
+  scalar_cond_##size##u16_Float16##name (ua, ub, c, fexp);  \
+  loop_cond_##size##u16_Float16##name (ua, ub, c, fref);  \
+  if (!check_cond ((void *)fexp, (void *)fref, size)) \
+    abort();  \
+}
+
+static void condmov_test()
+{
+  TEST_CONDMOV_LOOP (32, lt)
+  TEST_CONDMOV_LOOP (32, gt)
+  TEST_CONDMOV_LOOP (32, eq)
+  TEST_CONDMOV_LOOP (16, lt)
+  TEST_CONDMOV_LOOP (16, gt)
+  TEST_CONDMOV_LOOP (16, eq)
+  TEST_CONDMOV_LOOP (8, lt)
+  TEST_CONDMOV_LOOP (8, gt)
+  TEST_CONDMOV_LOOP (8, eq)
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vec_cmpmn.c 
b/gcc/testsuite/gcc.target/i386/avx512fp16-vec_cmpmn.c
new file mode 100644
index 00000000000..ef9f85373f9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vec_cmpmn.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+
+/* { dg-final { scan-assembler-times "vcmpph" 15 } } */
+
+typedef _Float16 v8hf __attribute__ ((vector_size (16)));
+typedef _Float16 v16hf __attribute__ ((vector_size (32)));
+typedef _Float16 v32hf __attribute__ ((vector_size (64)));
+
+#define VCMPMN(type, op, name) \
+type  \
+__attribute__ ((noinline, noclone)) \
+vec_cmp_##type##type##name (type a, type b) \
+{ \
+  return a op b;  \
+}
+
+VCMPMN (v8hf, <, lt)
+VCMPMN (v16hf, <, lt)
+VCMPMN (v32hf, <, lt)
+VCMPMN (v8hf, <=, le)
+VCMPMN (v16hf, <=, le)
+VCMPMN (v32hf, <=, le)
+VCMPMN (v8hf, >, gt)
+VCMPMN (v16hf, >, gt)
+VCMPMN (v32hf, >, gt)
+VCMPMN (v8hf, >=, ge)
+VCMPMN (v16hf, >=, ge)
+VCMPMN (v32hf, >=, ge)
+VCMPMN (v8hf, ==, eq)
+VCMPMN (v16hf, ==, eq)
+VCMPMN (v32hf, ==, eq)
-- 
2.27.0

[PATCH 7/7] AVX512FP16: Enable vec_cmpmn/vcondmn expanders for HF modes.

Reply via email to