On Fri, Oct 22, 2021 at 1:57 PM Kong, Lingling via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > Hi, > > This patch is to support transform in fast-math something like > _mm512_add_ph(x1, _mm512_fmadd_pch(a, b, _mm512_setzero_ph())) to > _mm512_fmadd_pch(a, b, x1). > > And support transform _mm512_add_ph(x1, _mm512_fmul_pch(a, b)) to > _mm512_fmadd_pch(a, b, x1). > Ok for master? LGTM. Also please add cfma_optab/conj_cfma_optab, so vectorizer can catch some complex fma pattern match optimization. > > gcc/ChangeLog: > > * config/i386/sse.md (fma_<mode>_fadd_fmul): Add new > define_insn_and_split. > (fma_<mode>_fadd_fcmul):Likewise > (fma_<complexopname>_<mode>_fma_zero):Likewise > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/avx512fp16-complex-fma.c: New test. > --- > gcc/config/i386/sse.md | 52 +++++++++++++++++++ > .../gcc.target/i386/avx512fp16-complex-fma.c | 18 +++++++ > 2 files changed, 70 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-complex-fma.c > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index > fbf056bf9e6..36407ca4a59 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -5958,6 +5958,58 @@ > [(set_attr "type" "ssemuladd") > (set_attr "mode" "<MODE>")]) > > +(define_insn_and_split "fma_<mode>_fadd_fmul" > + [(set (match_operand:VF_AVX512FP16VL 0 "register_operand") > + (plus:VF_AVX512FP16VL > + (unspec:VF_AVX512FP16VL > + [(match_operand:VF_AVX512FP16VL 1 "vector_operand") > + (match_operand:VF_AVX512FP16VL 2 "vector_operand")] > + UNSPEC_COMPLEX_FMUL) > + (match_operand:VF_AVX512FP16VL 3 "vector_operand")))] > + "TARGET_AVX512FP16 && flag_unsafe_math_optimizations > + && ix86_pre_reload_split()" > + "#" > + "&& 1" > + [(set (match_dup 0) > + (unspec:VF_AVX512FP16VL > + [(match_dup 1) (match_dup 2) (match_dup 3)] > + UNSPEC_COMPLEX_FMA))]) > + > +(define_insn_and_split "fma_<mode>_fadd_fcmul" > + [(set (match_operand:VF_AVX512FP16VL 0 "register_operand") > + (plus:VF_AVX512FP16VL > + (unspec:VF_AVX512FP16VL > + [(match_operand:VF_AVX512FP16VL 1 "vector_operand") > + (match_operand:VF_AVX512FP16VL 2 "vector_operand")] > + UNSPEC_COMPLEX_FCMUL) > + (match_operand:VF_AVX512FP16VL 3 "vector_operand")))] > + "TARGET_AVX512FP16 && flag_unsafe_math_optimizations > + && ix86_pre_reload_split()" > + "#" > + "&& 1" > + [(set (match_dup 0) > + (unspec:VF_AVX512FP16VL > + [(match_dup 1) (match_dup 2) (match_dup 3)] > + UNSPEC_COMPLEX_FCMA))]) > + > +(define_insn_and_split "fma_<complexopname>_<mode>_fma_zero" > + [(set (match_operand:VF_AVX512FP16VL 0 "register_operand") > + (plus:VF_AVX512FP16VL > + (unspec:VF_AVX512FP16VL > + [(match_operand:VF_AVX512FP16VL 1 "vector_operand") > + (match_operand:VF_AVX512FP16VL 2 "vector_operand") > + (match_operand:VF_AVX512FP16VL 3 "const0_operand")] > + UNSPEC_COMPLEX_F_C_MA) > + (match_operand:VF_AVX512FP16VL 4 "vector_operand")))] > + "TARGET_AVX512FP16 && flag_unsafe_math_optimizations > + && ix86_pre_reload_split()" > + "#" > + "&& 1" > + [(set (match_dup 0) > + (unspec:VF_AVX512FP16VL > + [(match_dup 1) (match_dup 2) (match_dup 4)] > + UNSPEC_COMPLEX_F_C_MA))]) > + > (define_insn "<avx512>_<complexopname>_<mode>_mask<round_name>" > [(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v") > (vec_merge:VF_AVX512FP16VL > diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-complex-fma.c > b/gcc/testsuite/gcc.target/i386/avx512fp16-complex-fma.c > new file mode 100644 > index 00000000000..2dfd369e785 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-complex-fma.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mavx512fp16 -O2 -Ofast" } */ > +/* { dg-final { scan-assembler-times "vfmaddcph\[ > +\\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+( > +?:\n|\[ \\t\]+#)" 2 } } */ > +/* { dg-final { scan-assembler-not "vaddph\[ > +\\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+( > +?:\n|\[ \\t\]+#)"} } */ > +/* { dg-final { scan-assembler-not "vfmulcph\[ > +\\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+( > +?:\n|\[ \\t\]+#)"} } */ > +/* { dg-final { scan-assembler-times "vfcmaddcph\[ > +\\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+( > +?:\n|\[ \\t\]+#)" 2 } } */ > + > +#include <immintrin.h> > +volatile __m512h x1, x2, res, a, b; > +void extern > +avx512f_test (void) > +{ > + res = _mm512_add_ph (x1, _mm512_fmadd_pch (a, b, > +_mm512_setzero_ph())); > + res = _mm512_add_ph (x1, _mm512_fcmadd_pch (a, b, > +_mm512_setzero_ph())); > + > + res = _mm512_add_ph (x1, _mm512_fmul_pch (a, b)); > + res = _mm512_add_ph (x1, _mm512_fcmul_pch (a, b)); } > -- > 2.18.1 >
-- BR, Hongtao