> Could you just git revert 6d0b7b69d143025f271d0041cfa29cf26e6c343b? We can still deal with BFmode permutation the same way as HFmode, so the change in ix86_vectorize_vec_perm_const can be preserved.
Hongtao Liu <crazy...@gmail.com> 于2024年7月15日周一 09:40写道: > > On Sat, Jul 13, 2024 at 3:44 PM Hongyu Wang <hongyu.w...@intel.com> wrote: > > > > Hi, > > > > According to the instruction spec of AVX512BF16, the convert from float > > to BF16 is not a simple truncation. It has special handling for > > denormal/nan, even for normal float it will add an extra bias according > > to the least significant bit for bf number. This means we cannot use the > > vcvtne2ps2bf16 for any bf16 vector shuffle. > > The optimization introduced in r15-1368 adds a specific split to convert > > HImode permutation with this instruction, so remove it and treat the > > BFmode permutation same as HFmode. > > > > Bootstrapped & regtested on x86_64-pc-linux-gnu. OK for trunk? > Could you just git revert 6d0b7b69d143025f271d0041cfa29cf26e6c343b? > > > > gcc/ChangeLog: > > > > PR target/115889 > > * config/i386/predicates.md (vcvtne2ps2bf_parallel): Remove. > > * config/i386/sse.md (hi_cvt_bf): Remove. > > (HI_CVT_BF): Likewise. > > (vpermt2_sepcial_bf16_shuffle_<mode>):Likewise. > > > > gcc/testsuite/ChangeLog: > > > > PR target/115889 > > * gcc.target/i386/vpermt2-special-bf16-shufflue.c: Adjust option > > and output scan. > > --- > > gcc/config/i386/predicates.md | 11 ------ > > gcc/config/i386/sse.md | 35 ------------------- > > .../i386/vpermt2-special-bf16-shufflue.c | 5 ++- > > 3 files changed, 2 insertions(+), 49 deletions(-) > > > > diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md > > index a894847adaf..5d0bb1e0f54 100644 > > --- a/gcc/config/i386/predicates.md > > +++ b/gcc/config/i386/predicates.md > > @@ -2327,14 +2327,3 @@ (define_predicate "apx_ndd_add_memory_operand" > > > > return true; > > }) > > - > > -;; Check that each element is odd and incrementally increasing from 1 > > -(define_predicate "vcvtne2ps2bf_parallel" > > - (and (match_code "const_vector") > > - (match_code "const_int" "a")) > > -{ > > - for (int i = 0; i < XVECLEN (op, 0); ++i) > > - if (INTVAL (XVECEXP (op, 0, i)) != (2 * i + 1)) > > - return false; > > - return true; > > -}) > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > > index b3b4697924b..c134494cd20 100644 > > --- a/gcc/config/i386/sse.md > > +++ b/gcc/config/i386/sse.md > > @@ -31460,38 +31460,3 @@ (define_insn "vpdp<vpdpwprodtype>_<mode>" > > "TARGET_AVXVNNIINT16" > > "vpdp<vpdpwprodtype>\t{%3, %2, %0|%0, %2, %3}" > > [(set_attr "prefix" "vex")]) > > - > > -(define_mode_attr hi_cvt_bf > > - [(V8HI "v8bf") (V16HI "v16bf") (V32HI "v32bf")]) > > - > > -(define_mode_attr HI_CVT_BF > > - [(V8HI "V8BF") (V16HI "V16BF") (V32HI "V32BF")]) > > - > > -(define_insn_and_split "vpermt2_sepcial_bf16_shuffle_<mode>" > > - [(set (match_operand:VI2_AVX512F 0 "register_operand") > > - (unspec:VI2_AVX512F > > - [(match_operand:VI2_AVX512F 1 "vcvtne2ps2bf_parallel") > > - (match_operand:VI2_AVX512F 2 "register_operand") > > - (match_operand:VI2_AVX512F 3 "nonimmediate_operand")] > > - UNSPEC_VPERMT2))] > > - "TARGET_AVX512VL && TARGET_AVX512BF16 && ix86_pre_reload_split ()" > > - "#" > > - "&& 1" > > - [(const_int 0)] > > -{ > > - rtx op0 = gen_reg_rtx (<HI_CVT_BF>mode); > > - operands[2] = lowpart_subreg (<ssePSmode>mode, > > - force_reg (<MODE>mode, operands[2]), > > - <MODE>mode); > > - operands[3] = lowpart_subreg (<ssePSmode>mode, > > - force_reg (<MODE>mode, operands[3]), > > - <MODE>mode); > > - > > - emit_insn (gen_avx512f_cvtne2ps2bf16_<hi_cvt_bf>(op0, > > - operands[3], > > - operands[2])); > > - emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, > > - <HI_CVT_BF>mode)); > > - DONE; > > -} > > -[(set_attr "mode" "<sseinsnmode>")]) > > diff --git a/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c > > b/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c > > index 5c65f2a9884..4cbc85735de 100755 > > --- a/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c > > +++ b/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c > > @@ -1,7 +1,6 @@ > > /* { dg-do compile } */ > > -/* { dg-options "-O2 -mavx512bf16 -mavx512vl" } */ > > -/* { dg-final { scan-assembler-not "vpermi2b" } } */ > > -/* { dg-final { scan-assembler-times "vcvtne2ps2bf16" 3 } } */ > > +/* { dg-options "-O2 -mavx512vbmi -mavx512vl" } */ > > +/* { dg-final { scan-assembler-times "vpermi2w" 3 } } */ > > > > typedef __bf16 v8bf __attribute__((vector_size(16))); > > typedef __bf16 v16bf __attribute__((vector_size(32))); > > -- > > 2.34.1 > > > > > -- > BR, > Hongtao