On Mon, Jul 15, 2024 at 10:21 AM Hongyu Wang <wwwhhhyyy...@gmail.com> wrote: > > > Could you just git revert 6d0b7b69d143025f271d0041cfa29cf26e6c343b? > > We can still deal with BFmode permutation the same way as HFmode, so > the change in ix86_vectorize_vec_perm_const can be preserved. > > Hongtao Liu <crazy...@gmail.com> 于2024年7月15日周一 09:40写道: > > > > On Sat, Jul 13, 2024 at 3:44 PM Hongyu Wang <hongyu.w...@intel.com> wrote: > > > > > > Hi, > > > > > > According to the instruction spec of AVX512BF16, the convert from float > > > to BF16 is not a simple truncation. It has special handling for > > > denormal/nan, even for normal float it will add an extra bias according > > > to the least significant bit for bf number. This means we cannot use the > > > vcvtne2ps2bf16 for any bf16 vector shuffle. > > > The optimization introduced in r15-1368 adds a specific split to convert > > > HImode permutation with this instruction, so remove it and treat the > > > BFmode permutation same as HFmode. I see, patch LGTM. > > > > > > Bootstrapped & regtested on x86_64-pc-linux-gnu. OK for trunk? > > Could you just git revert 6d0b7b69d143025f271d0041cfa29cf26e6c343b? > > > > > > gcc/ChangeLog: > > > > > > PR target/115889 > > > * config/i386/predicates.md (vcvtne2ps2bf_parallel): Remove. > > > * config/i386/sse.md (hi_cvt_bf): Remove. > > > (HI_CVT_BF): Likewise. > > > (vpermt2_sepcial_bf16_shuffle_<mode>):Likewise. > > > > > > gcc/testsuite/ChangeLog: > > > > > > PR target/115889 > > > * gcc.target/i386/vpermt2-special-bf16-shufflue.c: Adjust option > > > and output scan. > > > --- > > > gcc/config/i386/predicates.md | 11 ------ > > > gcc/config/i386/sse.md | 35 ------------------- > > > .../i386/vpermt2-special-bf16-shufflue.c | 5 ++- > > > 3 files changed, 2 insertions(+), 49 deletions(-) > > > > > > diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md > > > index a894847adaf..5d0bb1e0f54 100644 > > > --- a/gcc/config/i386/predicates.md > > > +++ b/gcc/config/i386/predicates.md > > > @@ -2327,14 +2327,3 @@ (define_predicate "apx_ndd_add_memory_operand" > > > > > > return true; > > > }) > > > - > > > -;; Check that each element is odd and incrementally increasing from 1 > > > -(define_predicate "vcvtne2ps2bf_parallel" > > > - (and (match_code "const_vector") > > > - (match_code "const_int" "a")) > > > -{ > > > - for (int i = 0; i < XVECLEN (op, 0); ++i) > > > - if (INTVAL (XVECEXP (op, 0, i)) != (2 * i + 1)) > > > - return false; > > > - return true; > > > -}) > > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > > > index b3b4697924b..c134494cd20 100644 > > > --- a/gcc/config/i386/sse.md > > > +++ b/gcc/config/i386/sse.md > > > @@ -31460,38 +31460,3 @@ (define_insn "vpdp<vpdpwprodtype>_<mode>" > > > "TARGET_AVXVNNIINT16" > > > "vpdp<vpdpwprodtype>\t{%3, %2, %0|%0, %2, %3}" > > > [(set_attr "prefix" "vex")]) > > > - > > > -(define_mode_attr hi_cvt_bf > > > - [(V8HI "v8bf") (V16HI "v16bf") (V32HI "v32bf")]) > > > - > > > -(define_mode_attr HI_CVT_BF > > > - [(V8HI "V8BF") (V16HI "V16BF") (V32HI "V32BF")]) > > > - > > > -(define_insn_and_split "vpermt2_sepcial_bf16_shuffle_<mode>" > > > - [(set (match_operand:VI2_AVX512F 0 "register_operand") > > > - (unspec:VI2_AVX512F > > > - [(match_operand:VI2_AVX512F 1 "vcvtne2ps2bf_parallel") > > > - (match_operand:VI2_AVX512F 2 "register_operand") > > > - (match_operand:VI2_AVX512F 3 "nonimmediate_operand")] > > > - UNSPEC_VPERMT2))] > > > - "TARGET_AVX512VL && TARGET_AVX512BF16 && ix86_pre_reload_split ()" > > > - "#" > > > - "&& 1" > > > - [(const_int 0)] > > > -{ > > > - rtx op0 = gen_reg_rtx (<HI_CVT_BF>mode); > > > - operands[2] = lowpart_subreg (<ssePSmode>mode, > > > - force_reg (<MODE>mode, operands[2]), > > > - <MODE>mode); > > > - operands[3] = lowpart_subreg (<ssePSmode>mode, > > > - force_reg (<MODE>mode, operands[3]), > > > - <MODE>mode); > > > - > > > - emit_insn (gen_avx512f_cvtne2ps2bf16_<hi_cvt_bf>(op0, > > > - operands[3], > > > - operands[2])); > > > - emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, > > > - <HI_CVT_BF>mode)); > > > - DONE; > > > -} > > > -[(set_attr "mode" "<sseinsnmode>")]) > > > diff --git > > > a/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c > > > b/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c > > > index 5c65f2a9884..4cbc85735de 100755 > > > --- a/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c > > > +++ b/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c > > > @@ -1,7 +1,6 @@ > > > /* { dg-do compile } */ > > > -/* { dg-options "-O2 -mavx512bf16 -mavx512vl" } */ > > > -/* { dg-final { scan-assembler-not "vpermi2b" } } */ > > > -/* { dg-final { scan-assembler-times "vcvtne2ps2bf16" 3 } } */ > > > +/* { dg-options "-O2 -mavx512vbmi -mavx512vl" } */ > > > +/* { dg-final { scan-assembler-times "vpermi2w" 3 } } */ > > > > > > typedef __bf16 v8bf __attribute__((vector_size(16))); > > > typedef __bf16 v16bf __attribute__((vector_size(32))); > > > -- > > > 2.34.1 > > > > > > > > > -- > > BR, > > Hongtao
-- BR, Hongtao