https://gcc.gnu.org/g:02a3bf5e2f0c18078bf67fc0002219edba1d76ff

commit r15-2030-g02a3bf5e2f0c18078bf67fc0002219edba1d76ff
Author: Hongyu Wang <hongyu.w...@intel.com>
Date:   Sat Jul 13 11:45:31 2024 +0800

    AVX512BF16: Do not allow permutation with vcvtne2ps2bf16 [PR115889]
    
    According to the instruction spec of AVX512BF16, the convert from float
    to BF16 is not a simple truncation. It has special handling for
    denormal/nan, even for normal float it will add an extra bias according
    to the least significant bit for bf number. This means we cannot use the
    vcvtne2ps2bf16 for any bf16 vector shuffle.
    The optimization introduced in r15-1368 adds a specific split to convert
    HImode permutation with this instruction, so remove it and treat the
    BFmode permutation same as HFmode.
    
    gcc/ChangeLog:
    
            PR target/115889
            * config/i386/predicates.md (vcvtne2ps2bf_parallel): Remove.
            * config/i386/sse.md (hi_cvt_bf): Remove.
            (HI_CVT_BF): Likewise.
            (vpermt2_sepcial_bf16_shuffle_<mode>):Likewise.
    
    gcc/testsuite/ChangeLog:
    
            PR target/115889
            * gcc.target/i386/vpermt2-special-bf16-shufflue.c: Adjust output
            scan.

Diff:
---
 gcc/config/i386/predicates.md                      | 11 -------
 gcc/config/i386/sse.md                             | 35 ----------------------
 .../i386/vpermt2-special-bf16-shufflue.c           |  3 +-
 3 files changed, 1 insertion(+), 48 deletions(-)

diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index a894847adaf7..5d0bb1e0f54a 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -2327,14 +2327,3 @@
 
   return true;
 })
-
-;; Check that each element is odd and incrementally increasing from 1
-(define_predicate "vcvtne2ps2bf_parallel"
-  (and (match_code "const_vector")
-       (match_code "const_int" "a"))
-{
-  for (int i = 0; i < XVECLEN (op, 0); ++i)
-    if (INTVAL (XVECEXP (op, 0, i)) != (2 * i + 1))
-      return false;
-  return true;
-})
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index b3b4697924b5..c134494cd200 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -31460,38 +31460,3 @@
   "TARGET_AVXVNNIINT16"
   "vpdp<vpdpwprodtype>\t{%3, %2, %0|%0, %2, %3}"
    [(set_attr "prefix" "vex")])
-
-(define_mode_attr hi_cvt_bf
-  [(V8HI "v8bf") (V16HI "v16bf") (V32HI "v32bf")])
-
-(define_mode_attr HI_CVT_BF
-  [(V8HI "V8BF") (V16HI "V16BF") (V32HI "V32BF")])
-
-(define_insn_and_split "vpermt2_sepcial_bf16_shuffle_<mode>"
-  [(set (match_operand:VI2_AVX512F 0 "register_operand")
-       (unspec:VI2_AVX512F
-         [(match_operand:VI2_AVX512F 1 "vcvtne2ps2bf_parallel")
-          (match_operand:VI2_AVX512F 2 "register_operand")
-          (match_operand:VI2_AVX512F 3 "nonimmediate_operand")]
-          UNSPEC_VPERMT2))]
-  "TARGET_AVX512VL && TARGET_AVX512BF16 && ix86_pre_reload_split ()"
-  "#"
-  "&& 1"
-  [(const_int 0)]
-{
-  rtx op0 = gen_reg_rtx (<HI_CVT_BF>mode);
-  operands[2] = lowpart_subreg (<ssePSmode>mode,
-                               force_reg (<MODE>mode, operands[2]),
-                               <MODE>mode);
-  operands[3] = lowpart_subreg (<ssePSmode>mode,
-                               force_reg (<MODE>mode, operands[3]),
-                               <MODE>mode);
-
-  emit_insn (gen_avx512f_cvtne2ps2bf16_<hi_cvt_bf>(op0,
-                                                  operands[3],
-                                                  operands[2]));
-  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0,
-                                              <HI_CVT_BF>mode));
-  DONE;
-}
-[(set_attr "mode" "<sseinsnmode>")])
diff --git a/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c 
b/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c
index 5c65f2a98847..e504f3f4cd70 100755
--- a/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c
+++ b/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c
@@ -1,7 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mavx512bf16 -mavx512vl" } */
-/* { dg-final { scan-assembler-not "vpermi2b" } } */
-/* { dg-final { scan-assembler-times "vcvtne2ps2bf16" 3 } } */
+/* { dg-final { scan-assembler-times "vpermi2w" 3 } } */
 
 typedef __bf16 v8bf __attribute__((vector_size(16)));
 typedef __bf16 v16bf __attribute__((vector_size(32)));

Reply via email to