From: Hongyu Wang <hongyu.w...@intel.com> NB: 64bit/32bit vectorize for HFmode is not supported for now, will adjust this patch when V2HF/V4HF operations supported.
gcc/ChangeLog: * config/i386/i386.md (fix<fixunssuffix>_trunchf<mode>2): New expander. (fixuns_trunchfhi2): Likewise. (*fixuns_trunchfsi2zext): New define_insn. * config/i386/sse.md (ssePHmodelower): New mode_attr. (fix<fixunssuffix>_trunc<ssePHmodelower><mode>2): New expander for same element vector fix_truncate. (fix<fixunssuffix>_trunc<ssePHmodelower><mode>2): Likewise for V4HF to V4SI/V4DI fix_truncate. (fix<fixunssuffix>_truncv2hfv2di2): Likeise for V2HF to V2DI fix_truncate. gcc/testsuite/ChangeLog: * gcc.target/i386/avx512fp16-trunchf.c: New test. * gcc.target/i386/avx512fp16-truncvnhf.c: Ditto. --- gcc/config/i386/i386.md | 29 +++++++++ gcc/config/i386/sse.md | 43 +++++++++++++ .../gcc.target/i386/avx512fp16-trunchf.c | 59 ++++++++++++++++++ .../gcc.target/i386/avx512fp16-truncvnhf.c | 61 +++++++++++++++++++ 4 files changed, 192 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-trunchf.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-truncvnhf.c diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index a087e557d7f..c6279e620c9 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -4810,6 +4810,16 @@ (define_expand "fix_trunc<mode>di2" } }) +(define_insn "fix<fixunssuffix>_trunchf<mode>2" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (any_fix:SWI48 + (match_operand:HF 1 "nonimmediate_operand" "vm")))] + "TARGET_AVX512FP16" + "vcvttsh2<fixsuffix>si\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "prefix" "evex") + (set_attr "mode" "<MODE>")]) + ;; Signed conversion to SImode. (define_expand "fix_truncxfsi2" @@ -4917,6 +4927,17 @@ (define_insn "fixuns_trunc<mode>si2_avx512f" (set_attr "prefix" "evex") (set_attr "mode" "SI")]) +(define_insn "*fixuns_trunchfsi2zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (unsigned_fix:SI + (match_operand:HF 1 "nonimmediate_operand" "vm"))))] + "TARGET_64BIT && TARGET_AVX512FP16" + "vcvttsh2usi\t{%1, %k0|%k0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "prefix" "evex") + (set_attr "mode" "SI")]) + (define_insn "*fixuns_trunc<mode>si2_avx512f_zext" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI @@ -4949,6 +4970,14 @@ (define_insn_and_split "*fixuns_trunc<mode>_1" ;; Without these patterns, we'll try the unsigned SI conversion which ;; is complex for SSE, rather than the signed SI conversion, which isn't. +(define_expand "fixuns_trunchfhi2" + [(set (match_dup 2) + (fix:SI (match_operand:HF 1 "nonimmediate_operand"))) + (set (match_operand:HI 0 "nonimmediate_operand") + (subreg:HI (match_dup 2) 0))] + "TARGET_AVX512FP16" + "operands[2] = gen_reg_rtx (SImode);") + (define_expand "fixuns_trunc<mode>hi2" [(set (match_dup 2) (fix:SI (match_operand:MODEF 1 "nonimmediate_operand"))) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 1ca95984afc..f8a5f197f3c 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1034,6 +1034,13 @@ (define_mode_attr ssePHmode (V8DI "V8HF") (V4DI "V8HF") (V2DI "V8HF") (V8DF "V8HF") (V16SF "V16HF") (V8SF "V8HF")]) +;; Mapping of vector modes to vector hf modes of same element. +(define_mode_attr ssePHmodelower + [(V32HI "v32hf") (V16HI "v16hf") (V8HI "v8hf") + (V16SI "v16hf") (V8SI "v8hf") (V4SI "v4hf") + (V8DI "v8hf") (V4DI "v4hf") (V2DI "v2hf") + (V8DF "v8hf") (V16SF "v16hf") (V8SF "v8hf")]) + ;; Mapping of vector modes to packed single mode of the same size (define_mode_attr ssePSmode [(V16SI "V16SF") (V8DF "V16SF") @@ -6175,6 +6182,12 @@ (define_insn "avx512fp16_vcvt<floatsuffix>si2sh<rex64namesuffix><round_name>" (set_attr "prefix" "evex") (set_attr "mode" "HF")]) +(define_expand "fix<fixunssuffix>_trunc<ssePHmodelower><mode>2" + [(set (match_operand:VI2H_AVX512VL 0 "register_operand") + (any_fix:VI2H_AVX512VL + (match_operand:<ssePHmode> 1 "nonimmediate_operand")))] + "TARGET_AVX512FP16") + (define_insn "avx512fp16_fix<fixunssuffix>_trunc<mode>2<mask_name><round_saeonly_name>" [(set (match_operand:VI2H_AVX512VL 0 "register_operand" "=v") (any_fix:VI2H_AVX512VL @@ -6185,6 +6198,21 @@ (define_insn "avx512fp16_fix<fixunssuffix>_trunc<mode>2<mask_name><round_saeonly (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) +(define_expand "fix<fixunssuffix>_truncv4hf<mode>2" + [(set (match_operand:VI4_128_8_256 0 "register_operand") + (any_fix:VI4_128_8_256 + (match_operand:V4HF 1 "nonimmediate_operand")))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" +{ + if (!MEM_P (operands[1])) + { + operands[1] = lowpart_subreg (V8HFmode, operands[1], V4HFmode); + emit_insn (gen_avx512fp16_fix<fixunssuffix>_trunc<mode>2 (operands[0], + operands[1])); + DONE; + } +}) + (define_insn "avx512fp16_fix<fixunssuffix>_trunc<mode>2<mask_name>" [(set (match_operand:VI4_128_8_256 0 "register_operand" "=v") (any_fix:VI4_128_8_256 @@ -6207,6 +6235,21 @@ (define_insn "*avx512fp16_fix<fixunssuffix>_trunc<mode>2_load<mask_name>" (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) +(define_expand "fix<fixunssuffix>_truncv2hfv2di2" + [(set (match_operand:V2DI 0 "register_operand") + (any_fix:V2DI + (match_operand:V2HF 1 "nonimmediate_operand")))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" +{ + if (!MEM_P (operands[1])) + { + operands[1] = lowpart_subreg (V8HFmode, operands[1], V2HFmode); + emit_insn (gen_avx512fp16_fix<fixunssuffix>_truncv2di2 (operands[0], + operands[1])); + DONE; + } +}) + (define_insn "avx512fp16_fix<fixunssuffix>_truncv2di2<mask_name>" [(set (match_operand:V2DI 0 "register_operand" "=v") (any_fix:V2DI diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-trunchf.c b/gcc/testsuite/gcc.target/i386/avx512fp16-trunchf.c new file mode 100644 index 00000000000..2c025b7803c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-trunchf.c @@ -0,0 +1,59 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512fp16" } */ +/* { dg-final { scan-assembler-times "vcvttsh2si\[ \\t\]+\[^\{\n\]*(?:%xmm\[0-9\]|\\(%esp\\))+, %eax(?:\n|\[ \\t\]+#)" 3 } } */ +/* { dg-final { scan-assembler-times "vcvttsh2usi\[ \\t\]+\[^\{\n\]*(?:%xmm\[0-9\]|\\(%esp\\))+, %eax(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vcvttsh2si\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+, %rax(?:\n|\[ \\t\]+#)" 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vcvttsh2usi\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+, %rax(?:\n|\[ \\t\]+#)" 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler "xorl\[ \\t\]+%edx, %edx" { target ia32 } } } */ + +#include <immintrin.h> + +short +__attribute__ ((noinline, noclone)) +trunc_f16_to_si16 (_Float16 f) +{ + return f; +} + +unsigned short +__attribute__ ((noinline, noclone)) +trunc_f16_to_su16 (_Float16 f) +{ + return f; +} + +int +__attribute__ ((noinline, noclone)) +trunc_f16_to_si32 (_Float16 f) +{ + return f; +} + +unsigned int +__attribute__ ((noinline, noclone)) +trunc_f16_to_su32 (_Float16 f) +{ + return f; +} + +long long +__attribute__ ((noinline, noclone)) +trunc_f16_to_si64 (_Float16 f) +{ + return f; +} + +unsigned long long +__attribute__ ((noinline, noclone)) +trunc_f16_to_su64 (_Float16 f) +{ + return f; +} + +unsigned long long +__attribute__ ((noinline, noclone)) +trunc_f16_to_su64_zext (_Float16 f) +{ + return (unsigned int) f; +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-truncvnhf.c b/gcc/testsuite/gcc.target/i386/avx512fp16-truncvnhf.c new file mode 100644 index 00000000000..ee55cd12300 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-truncvnhf.c @@ -0,0 +1,61 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -ftree-slp-vectorize -mprefer-vector-width=512" } */ + +extern long long di[8]; +extern unsigned long long udi[8]; +extern int si[16]; +extern unsigned int usi[16]; +extern short hi[32]; +extern unsigned short uhi[32]; +extern _Float16 hf[32]; + +#define DO_PRAGMA(X) _Pragma(#X) + +#define FIX_TRUNCHFVV(size, mode) \ + void __attribute__ ((noinline, noclone)) \ +fix_trunc##size##hf##v##size##mode () \ +{\ + int i; \ + DO_PRAGMA (GCC unroll size) \ + for (i = 0; i < size; i++) \ + mode[i] = hf[i]; \ +} + +FIX_TRUNCHFVV(32, hi) +FIX_TRUNCHFVV(16, hi) +FIX_TRUNCHFVV(8, hi) +FIX_TRUNCHFVV(16, si) +FIX_TRUNCHFVV(8, si) +FIX_TRUNCHFVV(4, si) +FIX_TRUNCHFVV(8, di) +FIX_TRUNCHFVV(4, di) +FIX_TRUNCHFVV(2, di) + +FIX_TRUNCHFVV(32, uhi) +FIX_TRUNCHFVV(16, uhi) +FIX_TRUNCHFVV(8, uhi) +FIX_TRUNCHFVV(16, usi) +FIX_TRUNCHFVV(8, usi) +FIX_TRUNCHFVV(4, usi) +FIX_TRUNCHFVV(8, udi) +FIX_TRUNCHFVV(4, udi) +FIX_TRUNCHFVV(2, udi) + +/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2uw\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2uw\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvttph2uw\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -- 2.27.0