https://github.com/alanzhao1 created https://github.com/llvm/llvm-project/pull/115570
Reverts llvm/llvm-project#114070 Reason: Causes `immintrin.h` to fail to compile if `-msse` and `-mno-sse2` are passed to clang: https://github.com/llvm/llvm-project/pull/114070#issuecomment-2465926700 >From e06af1d045a57e93ebf3c86c4ac70aa752a93fa1 Mon Sep 17 00:00:00 2001 From: Alan Zhao <azhao...@gmail.com> Date: Fri, 8 Nov 2024 16:12:54 -0800 Subject: [PATCH] Revert "[X86][AMX] Support AMX-AVX512 (#114070)" This reverts commit 58a17e1bbc54357385d0b89cfc5635e402c31ef6. --- clang/docs/ReleaseNotes.rst | 1 - clang/include/clang/Basic/BuiltinsX86_64.def | 13 - clang/include/clang/Driver/Options.td | 2 - clang/lib/Basic/Targets/X86.cpp | 6 - clang/lib/Basic/Targets/X86.h | 1 - clang/lib/Headers/CMakeLists.txt | 1 - clang/lib/Headers/amxavx512intrin.h | 382 ------------------ clang/lib/Headers/immintrin.h | 4 - clang/lib/Sema/SemaX86.cpp | 6 - clang/test/CodeGen/X86/amx_avx512_api.c | 52 --- clang/test/CodeGen/X86/amxavx512-builtins.c | 41 -- clang/test/CodeGen/attr-target-x86.c | 8 +- clang/test/Driver/x86-target-features.c | 7 - clang/test/Preprocessor/x86_target_features.c | 12 - llvm/include/llvm/IR/IntrinsicsX86.td | 51 --- .../llvm/TargetParser/X86TargetParser.def | 1 - llvm/lib/Target/X86/X86.td | 4 - llvm/lib/Target/X86/X86ExpandPseudo.cpp | 67 +-- llvm/lib/Target/X86/X86ISelLowering.cpp | 76 ---- llvm/lib/Target/X86/X86InstrAMX.td | 147 ------- llvm/lib/Target/X86/X86InstrPredicates.td | 1 - llvm/lib/Target/X86/X86LowerAMXType.cpp | 11 - llvm/lib/Target/X86/X86PreTileConfig.cpp | 18 +- llvm/lib/TargetParser/Host.cpp | 1 - llvm/lib/TargetParser/X86TargetParser.cpp | 2 - .../CodeGen/X86/amx-across-func-tilemovrow.ll | 171 -------- .../test/CodeGen/X86/amx-avx512-intrinsics.ll | 116 ------ .../CodeGen/X86/amx-tile-avx512-internals.ll | 61 --- llvm/test/MC/Disassembler/X86/amx-avx512.txt | 106 ----- llvm/test/MC/X86/amx-avx512-att.s | 105 ----- llvm/test/MC/X86/amx-avx512-intel.s | 105 ----- 31 files changed, 12 insertions(+), 1567 deletions(-) delete mode 100644 clang/lib/Headers/amxavx512intrin.h delete mode 100644 clang/test/CodeGen/X86/amx_avx512_api.c delete mode 100644 clang/test/CodeGen/X86/amxavx512-builtins.c delete mode 100644 llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll delete mode 100644 llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll delete mode 100644 llvm/test/CodeGen/X86/amx-tile-avx512-internals.ll delete mode 100644 llvm/test/MC/Disassembler/X86/amx-avx512.txt delete mode 100644 llvm/test/MC/X86/amx-avx512-att.s delete mode 100644 llvm/test/MC/X86/amx-avx512-intel.s diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index c3424e0e6f34c9..f82fbb73b12162 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -739,7 +739,6 @@ X86 Support * Supported intrinsics of ``_mm(256|512)_(mask(z))_loadrs_epi(8|16|32|64)``. - Support ISA of ``AMX-FP8``. - Support ISA of ``AMX-TRANSPOSE``. -- Support ISA of ``AMX-AVX512``. Arm and AArch64 Support ^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/BuiltinsX86_64.def b/clang/include/clang/Basic/BuiltinsX86_64.def index 9f7462b1e0d962..d95e8455a304b6 100644 --- a/clang/include/clang/Basic/BuiltinsX86_64.def +++ b/clang/include/clang/Basic/BuiltinsX86_64.def @@ -133,12 +133,6 @@ TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0t1_internal, "vUsUsUsV256i*V256i*vC*z", TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-transpose") TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1t1_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-transpose") TARGET_BUILTIN(__builtin_ia32_ttransposed_internal, "V256iUsUsV256i", "n", "amx-transpose") -TARGET_BUILTIN(__builtin_ia32_tcvtrowd2ps_internal, "V16fUsUsV256iUi", "n", "amx-avx512,avx10.2-512") -TARGET_BUILTIN(__builtin_ia32_tcvtrowps2pbf16h_internal, "V32yUsUsV256iUi", "n", "amx-avx512,avx10.2-512") -TARGET_BUILTIN(__builtin_ia32_tcvtrowps2pbf16l_internal, "V32yUsUsV256iUi", "n", "amx-avx512,avx10.2-512") -TARGET_BUILTIN(__builtin_ia32_tcvtrowps2phh_internal, "V32xUsUsV256iUi", "n", "amx-avx512,avx10.2-512") -TARGET_BUILTIN(__builtin_ia32_tcvtrowps2phl_internal, "V32xUsUsV256iUi", "n", "amx-avx512,avx10.2-512") -TARGET_BUILTIN(__builtin_ia32_tilemovrow_internal, "V16iUsUsV256iUi", "n", "amx-avx512,avx10.2-512") // AMX TARGET_BUILTIN(__builtin_ia32_tile_loadconfig, "vvC*", "n", "amx-tile") TARGET_BUILTIN(__builtin_ia32_tile_storeconfig, "vvC*", "n", "amx-tile") @@ -165,13 +159,6 @@ TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1, "vIUcvC*z", "n", "amx-transpose") TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1t1, "vIUcvC*z", "n","amx-transpose") TARGET_BUILTIN(__builtin_ia32_ttransposed, "vIUcIUc", "n", "amx-transpose") -TARGET_BUILTIN(__builtin_ia32_tcvtrowd2ps, "V16fIUcUi", "n", "amx-avx512,avx10.2-512") -TARGET_BUILTIN(__builtin_ia32_tcvtrowps2pbf16h, "V32yIUcUi", "n", "amx-avx512,avx10.2-512") -TARGET_BUILTIN(__builtin_ia32_tcvtrowps2pbf16l, "V32yIUcUi", "n", "amx-avx512,avx10.2-512") -TARGET_BUILTIN(__builtin_ia32_tcvtrowps2phh, "V32xIUcUi", "n", "amx-avx512,avx10.2-512") -TARGET_BUILTIN(__builtin_ia32_tcvtrowps2phl, "V32xIUcUi", "n", "amx-avx512,avx10.2-512") -TARGET_BUILTIN(__builtin_ia32_tilemovrow, "V16iIUcUi", "n", "amx-avx512,avx10.2-512") - TARGET_BUILTIN(__builtin_ia32_prefetchi, "vvC*Ui", "nc", "prefetchi") TARGET_BUILTIN(__builtin_ia32_cmpccxadd32, "Siv*SiSiIi", "n", "cmpccxadd") TARGET_BUILTIN(__builtin_ia32_cmpccxadd64, "SLLiSLLi*SLLiSLLiIi", "n", "cmpccxadd") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 0dba5672c5a85d..8887e0c1495d2a 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -6285,8 +6285,6 @@ def mno_80387 : Flag<["-"], "mno-80387">, Alias<mno_x87>; def mno_fp_ret_in_387 : Flag<["-"], "mno-fp-ret-in-387">, Alias<mno_x87>; def mmmx : Flag<["-"], "mmmx">, Group<m_x86_Features_Group>; def mno_mmx : Flag<["-"], "mno-mmx">, Group<m_x86_Features_Group>; -def mamx_avx512 : Flag<["-"], "mamx-avx512">, Group<m_x86_Features_Group>; -def mno_amx_avx512 : Flag<["-"], "mno-amx-avx512">, Group<m_x86_Features_Group>; def mamx_bf16 : Flag<["-"], "mamx-bf16">, Group<m_x86_Features_Group>; def mno_amx_bf16 : Flag<["-"], "mno-amx-bf16">, Group<m_x86_Features_Group>; def mamx_complex : Flag<["-"], "mamx-complex">, Group<m_x86_Features_Group>; diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 3c3dbfa13e452b..d7d3adef42c79a 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -432,8 +432,6 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features, HasAMXFP8 = true; } else if (Feature == "+amx-transpose") { HasAMXTRANSPOSE = true; - } else if (Feature == "+amx-avx512") { - HasAMXAVX512 = true; } else if (Feature == "+cmpccxadd") { HasCMPCCXADD = true; } else if (Feature == "+raoint") { @@ -957,8 +955,6 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__AMX_FP8__"); if (HasAMXTRANSPOSE) Builder.defineMacro("__AMX_TRANSPOSE__"); - if (HasAMXAVX512) - Builder.defineMacro("__AMX_AVX512__"); if (HasCMPCCXADD) Builder.defineMacro("__CMPCCXADD__"); if (HasRAOINT) @@ -1084,7 +1080,6 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const { return llvm::StringSwitch<bool>(Name) .Case("adx", true) .Case("aes", true) - .Case("amx-avx512", true) .Case("amx-bf16", true) .Case("amx-complex", true) .Case("amx-fp16", true) @@ -1205,7 +1200,6 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { return llvm::StringSwitch<bool>(Feature) .Case("adx", HasADX) .Case("aes", HasAES) - .Case("amx-avx512", HasAMXAVX512) .Case("amx-bf16", HasAMXBF16) .Case("amx-complex", HasAMXCOMPLEX) .Case("amx-fp16", HasAMXFP16) diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index 70047731b17295..e2eba63b992355 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -159,7 +159,6 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { bool HasAMXCOMPLEX = false; bool HasAMXFP8 = false; bool HasAMXTRANSPOSE = false; - bool HasAMXAVX512 = false; bool HasSERIALIZE = false; bool HasTSXLDTRK = false; bool HasUSERMSR = false; diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 76366ca1f108e9..67242cd4d981bc 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -146,7 +146,6 @@ set(x86_files adcintrin.h adxintrin.h ammintrin.h - amxavx512intrin.h amxcomplexintrin.h amxfp16intrin.h amxfp8intrin.h diff --git a/clang/lib/Headers/amxavx512intrin.h b/clang/lib/Headers/amxavx512intrin.h deleted file mode 100644 index 945edea543e706..00000000000000 --- a/clang/lib/Headers/amxavx512intrin.h +++ /dev/null @@ -1,382 +0,0 @@ -/*===--------------------- amxavx512intrin.h - AMXAVX512 --------------------=== - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - *===------------------------------------------------------------------------=== - */ -#ifndef __IMMINTRIN_H -#error "Never use <amxavx512intrin.h> directly; include <immintrin.h> instead." -#endif // __IMMINTRIN_H - -#ifndef __AMX_AVX512INTRIN_H -#define __AMX_AVX512INTRIN_H -#ifdef __x86_64__ - -#define __DEFAULT_FN_ATTRS_AVX512 \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("amx-avx512,avx10.2-512"))) - -/// Moves a row from a tile register to a zmm destination register, converting -/// the int32 source elements to fp32. The row of the tile is selected by a -/// 32b GPR. -/// -/// \headerfile <x86intrin.h> -/// -/// \code -/// __m512i _tile_cvtrowd2ps(__tile tsrc, unsigned int row); -/// \endcode -/// -/// \code{.operation} -/// VL := 512 -/// VL_bytes := VL >> 3 -/// row_index := row & 0xffff -/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes -/// FOR i := 0 TO (VL_bytes / 4) - 1 -/// IF i + row_chunk / 4 >= tsrc.colsb / 4 -/// dst.dword[i] := 0 -/// ELSE -/// dst.f32[i] := CONVERT_INT32_TO_FP32(tsrc.row[row_index].dword[row_chunk/4+i], RNE) -/// FI -/// ENDFOR -/// dst[MAX_VL-1:VL] := 0 -/// zero_tileconfig_start() -/// \endcode -/// -/// This intrinsic corresponds to the \c TCVTROWD2PS instruction. -/// -/// \param tsrc -/// The source tile. Max size is 1024 Bytes. -/// \param row -/// The row of the source tile -#define _tile_cvtrowd2ps(tsrc, row) __builtin_ia32_tcvtrowd2ps(tsrc, row) - -/// Moves a row from a tile register to a zmm destination register, converting -/// the fp32 source elements to bf16. It places the resulting bf16 elements -/// in the high 16 bits within each dword. The row of the tile is selected -/// by a 32b GPR. -/// -/// \headerfile <x86intrin.h> -/// -/// \code -/// __m512i _tile_cvtrowps2pbf16h(__tile tsrc, unsigned int row); -/// \endcode -/// -/// \code{.operation} -/// VL := 512 -/// VL_bytes := VL >> 3 -/// row_index := row & 0xffff -/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes -/// FOR i := 0 TO (VL_bytes / 4) - 1 -/// IF i + row_chunk / 4 >= tsrc.colsb / 4 -/// dst.dword[i] := 0 -/// ELSE -/// dst.word[2*i+0] := 0 -/// dst.bf16[2*i+1] := CONVERT_FP32_TO_BF16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE) -/// FI -/// ENDFOR -/// dst[MAX_VL-1:VL] := 0 -/// zero_tileconfig_start() -/// \endcode -/// -/// This intrinsic corresponds to the \c TCVTROWPS2PBF16H instruction. -/// -/// \param tsrc -/// The source tile. Max size is 1024 Bytes. -/// \param row -/// The the row of the source tile. -#define _tile_cvtrowps2pbf16h(tsrc, row) \ - __builtin_ia32_tcvtrowps2pbf16h(tsrc, row) - -/// Moves a row from a tile register to a zmm destination register, converting -/// the fp32 source elements to bf16. It places the resulting bf16 elements -/// in the low 16 bits within each dword. The row of the tile is selected -/// by a 32b GPR. -/// -/// \headerfile <x86intrin.h> -/// -/// \code -/// __m512i _tile_cvtrowps2pbf16l(__tile tsrc, unsigned int row); -/// \endcode -/// -/// \code{.operation} -/// VL := 512 -/// VL_bytes := VL >> 3 -/// row_index := row & 0xffff -/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes -/// FOR i := 0 TO (VL_bytes / 4) - 1 -/// IF i + row_chunk / 4 >= tsrc.colsb / 4 -/// dst.dword[i] := 0 -/// ELSE -/// dst.word[2*i+1] := 0 -/// dst.bf16[2*i+0] := CONVERT_FP32_TO_BF16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE) -/// FI -/// ENDFOR -/// dst[MAX_VL-1:VL] := 0 -/// zero_tileconfig_start() -/// \endcode -/// -/// This intrinsic corresponds to the \c TCVTROWPS2PBF16L instruction. -/// -/// \param tsrc -/// The source tile. Max size is 1024 Bytes. -/// \param row -/// The the row of the source tile. -#define _tile_cvtrowps2pbf16l(tsrc, row) \ - __builtin_ia32_tcvtrowps2pbf16l(tsrc, row) - -/// Moves a row from a tile register to a zmm destination register, converting -/// the fp32 source elements to fp16. It places the resulting fp16 elements -/// in the high 16 bits within each dword. The row of the tile is selected -/// by a 32b GPR. -/// -/// \headerfile <x86intrin.h> -/// -/// \code -/// __m512i _tile_cvtrowps2phh(__tile tsrc, unsigned int row); -/// \endcode -/// -/// \code{.operation} -/// VL := 512 -/// VL_bytes := VL >> 3 -/// row_index := row & 0xffff -/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes -/// FOR i := 0 TO (VL_bytes / 4) - 1 -/// IF i + row_chunk / 4 >= tsrc.colsb / 4 -/// dst.dword[i] := 0 -/// ELSE -/// dst.word[2*i+0] := 0 -/// dst.fp16[2*i+1] := CONVERT_FP32_TO_FP16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE) -/// FI -/// ENDFOR -/// dst[MAX_VL-1:VL] := 0 -/// zero_tileconfig_start() -/// \endcode -/// -/// This intrinsic corresponds to the \c TCVTROWPS2PHH instruction. -/// -/// \param tsrc -/// The source tile. Max size is 1024 Bytes. -/// \param row -/// The the row of the source tile. -#define _tile_cvtrowps2phh(tsrc, row) __builtin_ia32_tcvtrowps2phh(tsrc, row) - -/// Moves a row from a tile register to a zmm destination register, converting -/// the fp32 source elements to fp16. It places the resulting fp16 elements -/// in the low 16 bits within each dword. The row of the tile is selected -/// by a 32b GPR. -/// -/// \headerfile <x86intrin.h> -/// -/// \code -/// __m512i _tile_cvtrowps2phl(__tile tsrc, unsigned int row); -/// \endcode -/// -/// \code{.operation} -/// VL := 512 -/// VL_bytes := VL >> 3 -/// row_index := row & 0xffff -/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes -/// FOR i := 0 TO (VL_bytes / 4) - 1 -/// IF i + row_chunk / 4 >= tsrc.colsb / 4 -/// dst.dword[i] := 0 -/// ELSE -/// dst.word[2*i+1] := 0 -/// dst.fp16[2*i+0] := CONVERT_FP32_TO_FP16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE) -/// FI -/// ENDFOR -/// dst[MAX_VL-1:VL] := 0 -/// zero_tileconfig_start() -/// \endcode -/// -/// This intrinsic corresponds to the \c TCVTROWPS2PHL instruction. -/// -/// \param tsrc -/// The source tile. Max size is 1024 Bytes. -/// \param row -/// The the row of the source tile. -#define _tile_cvtrowps2phl(tsrc, row) __builtin_ia32_tcvtrowps2phl(tsrc, row) - -/// Move one row of a tile data to a v16f32 data. -/// The row of the tile is selected by a 32b GPR. -/// -/// \headerfile <immintrin.h> -/// -/// \code -/// __m512 _tile_movrow(__tile a, unsigned b); -/// \endcode -/// -/// This intrinsic corresponds to the <c> TILEMOVROW </c> instruction. -/// -/// \param a -/// The 1st source tile. Max size is 1024 Bytes. -/// \param b -/// The 2nd source r32. Size is 4 Bytes. -/// \returns -/// The destination v16f32 data. Size is 64 Bytes. -/// -/// \code{.operation} -/// VL := 512 -/// VL_bytes := VL>>3 -/// row_index := b&0xffff -/// row_chunk := ((b>>16)&0xffff) * VL_bytes -/// FOR i := 0 TO (VL_bytes-1) -/// IF (row_chunk + i >= a.colsb) -/// dst.byte[i] := 0 -/// ELSE -/// dst.byte[i] := a.row[row_index].byte[row_chunk+i] -/// ENDFOR -/// \endcode -#define _tile_movrow(a, b) __builtin_ia32_tilemovrow(a, b) - -/// This is internal intrinsic. C/C++ user should avoid calling it directly. - -static __inline__ __m512 __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowd2ps_internal( - unsigned short m, unsigned short n, _tile1024i src, unsigned u) { - return __builtin_ia32_tcvtrowd2ps_internal(m, n, src, u); -} - -static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512 -_tile_cvtrowps2pbf16h_internal(unsigned short m, unsigned short n, - _tile1024i src, unsigned u) { - return __builtin_ia32_tcvtrowps2pbf16h_internal(m, n, src, u); -} - -static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512 -_tile_cvtrowps2pbf16l_internal(unsigned short m, unsigned short n, - _tile1024i src, unsigned u) { - return __builtin_ia32_tcvtrowps2pbf16l_internal(m, n, src, u); -} - -static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phh_internal( - unsigned short m, unsigned short n, _tile1024i src, unsigned u) { - return __builtin_ia32_tcvtrowps2phh_internal(m, n, src, u); -} - -static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phl_internal( - unsigned short m, unsigned short n, _tile1024i src, unsigned u) { - return __builtin_ia32_tcvtrowps2phl_internal(m, n, src, u); -} - -static __inline__ __m512i __DEFAULT_FN_ATTRS_AVX512 _tile_movrow_internal( - unsigned short m, unsigned short n, _tile1024i src, unsigned u) { - return (__m512i)__builtin_ia32_tilemovrow_internal(m, n, src, u); -} - -/// Move a row from a tile (src0) to a v16f32 dst, converting the int32 source -/// elements to fp32. No SIMD exceptions are generated. Rounding is done as if -/// MXCSR.RC=RNE. Embedded rounding is not supported. -/// The row and chunk elements of tile is fetched from 32bit src1. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> TCVTROWD2PS </c> instruction. -/// -/// \param src0 -/// The 1st source tile. Max size is 1024 Bytes. -/// \param src1 -/// The 2nd source r32. Size is 4 Bytes. -/// \returns -/// The destination v16f32 data. Size is 64 Bytes. -__DEFAULT_FN_ATTRS_AVX512 -static __m512 __tile_cvtrowd2ps(__tile1024i src0, unsigned src1) { - return _tile_cvtrowd2ps_internal(src0.row, src0.col, src0.tile, src1); -} - -/// Move a row from a tile (src0) to a v32bf16 dst, converting the fp32 source -/// elements to bf16 at high 16-bits of each dword. -/// The row and chunk elements of tile is fetched from 32bit src1. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> TCVTROWPS2PBF16H </c> instruction. -/// -/// \param src0 -/// The 1st source tile. Max size is 1024 Bytes. -/// \param src1 -/// The 2nd source r32. Size is 4 Bytes. -/// \returns -/// The destination v32bf16 data. Size is 64 Bytes. -__DEFAULT_FN_ATTRS_AVX512 -static __m512bh __tile_cvtrowps2pbf16h(__tile1024i src0, unsigned src1) { - return _tile_cvtrowps2pbf16h_internal(src0.row, src0.col, src0.tile, src1); -} - -/// Move a row from a tile (src0) to a v32bf16 dst, converting the fp32 source -/// elements to bf16 at low 16-bits of each dword. -/// The row and chunk elements of tile is fetched from 32bit src1. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> TCVTROWPS2PBF16L </c> instruction. -/// -/// \param src0 -/// The 1st source tile. Max size is 1024 Bytes. -/// \param src1 -/// The 2nd source r32. Size is 4 Bytes. -/// \returns -/// The destination v32bf16 data. Size is 64 Bytes. -__DEFAULT_FN_ATTRS_AVX512 -static __m512bh __tile_cvtrowps2pbf16l(__tile1024i src0, unsigned src1) { - return _tile_cvtrowps2pbf16l_internal(src0.row, src0.col, src0.tile, src1); -} - -/// Move a row from a tile (src0) to a v32fp16 dst, converting the fp32 source -/// elements to fp16 at high 16-bits of each dword. -/// The row and chunk elements of tile is fetched from 32bit src1. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> TCVTROWPS2PHH </c> instruction. -/// -/// \param src0 -/// The 1st source tile. Max size is 1024 Bytes. -/// \param src1 -/// The 2nd source r32. Size is 4 Bytes. -/// \returns -/// The destination v32fp16 data. Size is 64 Bytes. -__DEFAULT_FN_ATTRS_AVX512 -static __m512h __tile_cvtrowps2phh(__tile1024i src0, unsigned src1) { - return _tile_cvtrowps2phh_internal(src0.row, src0.col, src0.tile, src1); -} - -/// Move a row from a tile (src0) to a v32fp16 dst, converting the fp32 source -/// elements to fp16 at low 16-bits of each dword. -/// The row and chunk elements of tile is fetched from 32bit src1. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> TCVTROWPS2PHL </c> instruction. -/// -/// \param src0 -/// The 1st source tile. Max size is 1024 Bytes. -/// \param src1 -/// The 2nd source r32. Size is 4 Bytes. -/// \returns -/// The destination v32fp16 data. Size is 64 Bytes. -__DEFAULT_FN_ATTRS_AVX512 -static __m512h __tile_cvtrowps2phl(__tile1024i src0, unsigned src1) { - return _tile_cvtrowps2phl_internal(src0.row, src0.col, src0.tile, src1); -} - -/// Move one row of a tile data to a v16f32 data. -/// The row of the tile is selected by a 32b GPR. -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> TILEMOVROW </c> instruction. -/// -/// \param src0 -/// The 1st source tile. Max size is 1024 Bytes. -/// \param src1 -/// The 2nd source r32. Size is 4 Bytes. -/// \returns -/// The destination v16i32 data. Size is 64 Bytes. -__DEFAULT_FN_ATTRS_AVX512 -static __m512i __tile_movrow(__tile1024i src0, unsigned src1) { - return (__m512i)_tile_movrow_internal(src0.row, src0.col, src0.tile, src1); -} - -#endif // __x86_64__ -#endif // __AMX_AVX512INTRIN_H diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h index bc240e28d59142..4bf7eac4195eec 100644 --- a/clang/lib/Headers/immintrin.h +++ b/clang/lib/Headers/immintrin.h @@ -656,10 +656,6 @@ _storebe_i64(void * __P, long long __D) { #include <amxtransposeintrin.h> #endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_AVX512__) -#include <amxavx512intrin.h> -#endif - #if !defined(__SCE__) || __has_feature(modules) || \ defined(__AVX512VP2INTERSECT__) #include <avx512vp2intersectintrin.h> diff --git a/clang/lib/Sema/SemaX86.cpp b/clang/lib/Sema/SemaX86.cpp index 1155a5edc73c34..ef878d16d445fd 100644 --- a/clang/lib/Sema/SemaX86.cpp +++ b/clang/lib/Sema/SemaX86.cpp @@ -635,12 +635,6 @@ bool SemaX86::CheckBuiltinTileArguments(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_t2rpntlvwz0t1: case X86::BI__builtin_ia32_t2rpntlvwz1: case X86::BI__builtin_ia32_t2rpntlvwz1t1: - case X86::BI__builtin_ia32_tcvtrowps2pbf16h: - case X86::BI__builtin_ia32_tcvtrowps2pbf16l: - case X86::BI__builtin_ia32_tcvtrowps2phh: - case X86::BI__builtin_ia32_tcvtrowps2phl: - case X86::BI__builtin_ia32_tcvtrowd2ps: - case X86::BI__builtin_ia32_tilemovrow: return CheckBuiltinTileArgumentsRange(TheCall, 0); case X86::BI__builtin_ia32_tdpbssd: case X86::BI__builtin_ia32_tdpbsud: diff --git a/clang/test/CodeGen/X86/amx_avx512_api.c b/clang/test/CodeGen/X86/amx_avx512_api.c deleted file mode 100644 index aea790d61268d3..00000000000000 --- a/clang/test/CodeGen/X86/amx_avx512_api.c +++ /dev/null @@ -1,52 +0,0 @@ -// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown \ -// RUN: -target-feature +amx-avx512 -target-feature +avx10.2-512 \ -// RUN: -emit-llvm -o - -Werror -pedantic | FileCheck %s --check-prefixes=CHECK - -#include <immintrin.h> - -char buf[1024]; -#define STRIDE 32 - -char buf2[1024]; - -__m512 test_tile_cvtrowd2ps(__tile1024i a, unsigned b) { - //CHECK-LABEL: @test_tile_cvtrowd2ps - //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) - //CHECK-DAG: call <16 x float> @llvm.x86.tcvtrowd2ps.internal - return __tile_cvtrowd2ps(a, b); -} - -__m512bh test_tile_cvtrowps2pbf16h(__tile1024i a, unsigned b) { - //CHECK-LABEL: @test_tile_cvtrowps2pbf16h - //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) - //CHECK-DAG: call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h.internal - return __tile_cvtrowps2pbf16h(a, b); -} - -__m512bh test_tile_cvtrowps2pbf16l(__tile1024i a, unsigned b) { - //CHECK-LABEL: @test_tile_cvtrowps2pbf16l - //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) - //CHECK-DAG: call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l.internal - return __tile_cvtrowps2pbf16l(a, b); -} - -__m512h test_tile_cvtrowps2phh(__tile1024i a, unsigned b) { - //CHECK-LABEL: @test_tile_cvtrowps2phh - //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) - //CHECK-DAG: call <32 x half> @llvm.x86.tcvtrowps2phh.internal - return __tile_cvtrowps2phh(a, b); -} - -__m512h test_tile_cvtrowps2phl(__tile1024i a, unsigned b) { - //CHECK-LABEL: @test_tile_cvtrowps2phl - //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) - //CHECK-DAG: call <32 x half> @llvm.x86.tcvtrowps2phl.internal - return __tile_cvtrowps2phl(a, b); -} - -__m512i test_tile_movrow(__tile1024i a, unsigned b) { - //CHECK-LABEL: @test_tile_movrow - //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) - //CHECK-DAG: call <16 x i32> @llvm.x86.tilemovrow.internal - return __tile_movrow(a, b); -} diff --git a/clang/test/CodeGen/X86/amxavx512-builtins.c b/clang/test/CodeGen/X86/amxavx512-builtins.c deleted file mode 100644 index 172b5ae8f53081..00000000000000 --- a/clang/test/CodeGen/X86/amxavx512-builtins.c +++ /dev/null @@ -1,41 +0,0 @@ -// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +amx-tile -target-feature +amx-avx512 \ -// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression | FileCheck %s - -#include <immintrin.h> -#include <stddef.h> - -__m512 test_tile_cvtrowd2ps(unsigned int A) { - // CHECK-LABEL: @test_tile_cvtrowd2ps( - // CHECK: call <16 x float> @llvm.x86.tcvtrowd2ps(i8 1, i32 %{{.*}}) - return _tile_cvtrowd2ps(1, A); -} - -__m512bh test_tile_cvtrowps2pbf16h(unsigned int A) { - // CHECK-LABEL: @test_tile_cvtrowps2pbf16h( - // CHECK: call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h(i8 1, i32 %{{.*}}) - return _tile_cvtrowps2pbf16h(1, A); -} - -__m512bh test_tile_cvtrowps2pbf16l(unsigned int A) { - // CHECK-LABEL: @test_tile_cvtrowps2pbf16l( - // CHECK: call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l(i8 1, i32 %{{.*}}) - return _tile_cvtrowps2pbf16l(1, A); -} - -__m512h test_tile_cvtrowps2phh(unsigned int A) { - // CHECK-LABEL: @test_tile_cvtrowps2phh( - // CHECK: call <32 x half> @llvm.x86.tcvtrowps2phh(i8 1, i32 %{{.*}}) - return _tile_cvtrowps2phh(1, A); -} - -__m512h test_tile_cvtrowps2phl(unsigned int A) { - // CHECK-LABEL: @test_tile_cvtrowps2phl( - // CHECK: call <32 x half> @llvm.x86.tcvtrowps2phl(i8 1, i32 %{{.*}}) - return _tile_cvtrowps2phl(1, A); -} - -__m512i test_tile_movrow(unsigned int A) { - // CHECK-LABEL: @test_tile_movrow - // CHECK: %1 = call <16 x i32> @llvm.x86.tilemovrow(i8 1, i32 %{{.*}}) - return _tile_movrow(1, A); -} diff --git a/clang/test/CodeGen/attr-target-x86.c b/clang/test/CodeGen/attr-target-x86.c index 2033a8b4c335f9..593ccffbcda095 100644 --- a/clang/test/CodeGen/attr-target-x86.c +++ b/clang/test/CodeGen/attr-target-x86.c @@ -59,10 +59,10 @@ void __attribute__((target("avx10.1-512"))) avx10_1_512(void) {} // CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87" "tune-cpu"="i686" // CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" // CHECK-NOT: tune-cpu -// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-amx-avx512,-avx,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686" +// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-avx,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686" // CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686" -// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-amx-avx512,-avx,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686" -// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-amx-avx512,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-vaes" +// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-avx,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686" +// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-vaes" // CHECK-NOT: tune-cpu // CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-mmx" // CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx" @@ -76,5 +76,5 @@ void __attribute__((target("avx10.1-512"))) avx10_1_512(void) {} // CHECK: "target-cpu"="x86-64-v4" // CHECK-SAME: "target-features"="+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+cmov,+crc32,+cx16,+cx8,+evex512,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" -// CHECK: #12 = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave,-amx-avx512,-avx10.1-512,-avx10.2-512,-evex512" +// CHECK: #12 = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave,-avx10.1-512,-avx10.2-512,-evex512" // CHECK: #13 = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx10.1-512,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave" diff --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c index 822c997f71744f..e8c439ab48f21f 100644 --- a/clang/test/Driver/x86-target-features.c +++ b/clang/test/Driver/x86-target-features.c @@ -311,13 +311,6 @@ // AMX-TRANSPOSE: "-target-feature" "+amx-transpose" // NO-AMX-TRANSPOSE: "-target-feature" "-amx-transpose" -// RUN: %clang -target x86_64-unknown-linux-gnu -mamx-avx512 %s \ -// RUN: -### -o %t.o 2>&1 | FileCheck -check-prefix=AMX-AVX512 %s -// RUN: %clang -target x86_64-unknown-linux-gnu -mno-amx-avx512 %s \ -// RUN: -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-AMX-AVX512 %s -// AMX-AVX512: "-target-feature" "+amx-avx512" -// NO-AMX-AVX512: "-target-feature" "-amx-avx512" - // RUN: %clang --target=i386 -march=i386 -mhreset %s -### 2>&1 | FileCheck -check-prefix=HRESET %s // RUN: %clang --target=i386 -march=i386 -mno-hreset %s -### 2>&1 | FileCheck -check-prefix=NO-HRESET %s // HRESET: "-target-feature" "+hreset" diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c index 8e4ddb1526626e..c240b27c91a479 100644 --- a/clang/test/Preprocessor/x86_target_features.c +++ b/clang/test/Preprocessor/x86_target_features.c @@ -558,18 +558,6 @@ // NO-AMX-TRANSPOSE-NOT: #define __AMX_TRANSPOSE__ 1 -// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -mamx-avx512 -x c \ -// RUN: -E -dM -o - %s | FileCheck -check-prefix=AMX-AVX512 %s - -// AMX-AVX512: #define __AMX_AVX512__ 1 - -// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -mno-amx-avx512 -x c \ -// RUN: -E -dM -o - %s | FileCheck -check-prefix=NO-AMX-AVX512 %s -// RUN: %clang -target x86_64-unknown-linux-gnu -march=x86-64 -mamx-avx512 -mno-amx-tile \ -// RUN: -x c -E -dM -o - %s | FileCheck -check-prefix=NO-AMX-AVX512 %s - -// NO-AMX-AVX512-NOT: #define __AMX_AVX512__ 1 - // RUN: %clang -target i386-unknown-unknown -march=atom -mavxvnni -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVXVNNI %s // AVXVNNI: #define __AVX2__ 1 diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 3003f9887e239c..c42397024e45a7 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -5952,26 +5952,6 @@ let TargetPrefix = "x86" in { Intrinsic<[], [llvm_i8_ty, llvm_i8_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>; - // AMX-AVX512 - def int_x86_tcvtrowd2ps : ClangBuiltin<"__builtin_ia32_tcvtrowd2ps">, - Intrinsic<[llvm_v16f32_ty], [llvm_i8_ty, llvm_i32_ty], - [ImmArg<ArgIndex<0>>]>; - def int_x86_tcvtrowps2pbf16h : ClangBuiltin<"__builtin_ia32_tcvtrowps2pbf16h">, - Intrinsic<[llvm_v32bf16_ty], [llvm_i8_ty, llvm_i32_ty], - [ImmArg<ArgIndex<0>>]>; - def int_x86_tcvtrowps2pbf16l : ClangBuiltin<"__builtin_ia32_tcvtrowps2pbf16l">, - Intrinsic<[llvm_v32bf16_ty], [llvm_i8_ty, llvm_i32_ty], - [ImmArg<ArgIndex<0>>]>; - def int_x86_tcvtrowps2phh : ClangBuiltin<"__builtin_ia32_tcvtrowps2phh">, - Intrinsic<[llvm_v32f16_ty], [llvm_i8_ty, llvm_i32_ty], - [ImmArg<ArgIndex<0>>]>; - def int_x86_tcvtrowps2phl : ClangBuiltin<"__builtin_ia32_tcvtrowps2phl">, - Intrinsic<[llvm_v32f16_ty], [llvm_i8_ty, llvm_i32_ty], - [ImmArg<ArgIndex<0>>]>; - def int_x86_tilemovrow : ClangBuiltin<"__builtin_ia32_tilemovrow">, - Intrinsic<[llvm_v16i32_ty], [llvm_i8_ty, llvm_i32_ty], - [ImmArg<ArgIndex<0>>]>; - // AMX - internal intrinsics def int_x86_ldtilecfg_internal : ClangBuiltin<"__builtin_ia32_tile_loadconfig_internal">, @@ -6070,37 +6050,6 @@ let TargetPrefix = "x86" in { ClangBuiltin<"__builtin_ia32_ttransposed_internal">, Intrinsic<[llvm_x86amx_ty], [llvm_i16_ty, llvm_i16_ty, llvm_x86amx_ty], []>; - - def int_x86_tcvtrowd2ps_internal : - ClangBuiltin<"__builtin_ia32_tcvtrowd2ps_internal">, - Intrinsic<[llvm_v16f32_ty], - [llvm_i16_ty, llvm_i16_ty, llvm_x86amx_ty, llvm_i32_ty], - []>; - def int_x86_tcvtrowps2pbf16h_internal : - ClangBuiltin<"__builtin_ia32_tcvtrowps2pbf16h_internal">, - Intrinsic<[llvm_v32bf16_ty], - [llvm_i16_ty, llvm_i16_ty, llvm_x86amx_ty, llvm_i32_ty], - []>; - def int_x86_tcvtrowps2pbf16l_internal : - ClangBuiltin<"__builtin_ia32_tcvtrowps2pbf16l_internal">, - Intrinsic<[llvm_v32bf16_ty], - [llvm_i16_ty, llvm_i16_ty, llvm_x86amx_ty, llvm_i32_ty], - []>; - def int_x86_tcvtrowps2phh_internal : - ClangBuiltin<"__builtin_ia32_tcvtrowps2phh_internal">, - Intrinsic<[llvm_v32f16_ty], - [llvm_i16_ty, llvm_i16_ty, llvm_x86amx_ty, llvm_i32_ty], - []>; - def int_x86_tcvtrowps2phl_internal : - ClangBuiltin<"__builtin_ia32_tcvtrowps2phl_internal">, - Intrinsic<[llvm_v32f16_ty], - [llvm_i16_ty, llvm_i16_ty, llvm_x86amx_ty, llvm_i32_ty], - []>; - def int_x86_tilemovrow_internal : - ClangBuiltin<"__builtin_ia32_tilemovrow_internal">, - Intrinsic<[llvm_v16i32_ty], - [llvm_i16_ty, llvm_i16_ty, llvm_x86amx_ty, llvm_i32_ty], - []>; } //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def index 815556e374bef5..a62b4df420ec6a 100644 --- a/llvm/include/llvm/TargetParser/X86TargetParser.def +++ b/llvm/include/llvm/TargetParser/X86TargetParser.def @@ -266,7 +266,6 @@ X86_FEATURE (MOVRS, "movrs") X86_FEATURE (ZU, "zu") X86_FEATURE (AMX_FP8, "amx-fp8") X86_FEATURE (AMX_TRANSPOSE, "amx-transpose") -X86_FEATURE (AMX_AVX512, "amx-avx512") // These features aren't really CPU features, but the frontend can set them. X86_FEATURE (RETPOLINE_EXTERNAL_THUNK, "retpoline-external-thunk") X86_FEATURE (RETPOLINE_INDIRECT_BRANCHES, "retpoline-indirect-branches") diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 59780ba5b99fcf..160e7c0fc0310a 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -276,10 +276,6 @@ def FeatureAMXFP8 : SubtargetFeature<"amx-fp8", "HasAMXFP8", "true", def FeatureAMXTRANSPOSE : SubtargetFeature<"amx-transpose", "HasAMXTRANSPOSE", "true", "Support AMX amx-transpose instructions", [FeatureAMXTILE]>; -def FeatureAMXAVX512 : SubtargetFeature<"amx-avx512", - "HasAMXAVX512", "true", - "Support AMX-AVX512 instructions", - [FeatureAMXTILE]>; def FeatureCMPCCXADD : SubtargetFeature<"cmpccxadd", "HasCMPCCXADD", "true", "Support CMPCCXADD instructions">; def FeatureRAOINT : SubtargetFeature<"raoint", "HasRAOINT", "true", diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp index 9511a82f0e97d2..f832955d1202fa 100644 --- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp +++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp @@ -559,68 +559,12 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB, return true; } case X86::PTILELOADDV: - case X86::PTILELOADDT1V: - case X86::PTCVTROWD2PSrreV: - case X86::PTCVTROWD2PSrriV: - case X86::PTCVTROWPS2PBF16HrreV: - case X86::PTCVTROWPS2PBF16HrriV: - case X86::PTCVTROWPS2PBF16LrreV: - case X86::PTCVTROWPS2PBF16LrriV: - case X86::PTCVTROWPS2PHHrreV: - case X86::PTCVTROWPS2PHHrriV: - case X86::PTCVTROWPS2PHLrreV: - case X86::PTCVTROWPS2PHLrriV: - case X86::PTILEMOVROWrreV: - case X86::PTILEMOVROWrriV: { + case X86::PTILELOADDT1V: { for (unsigned i = 2; i > 0; --i) MI.removeOperand(i); - unsigned Opc; - switch (Opcode) { - case X86::PTILELOADDV: - Opc = GET_EGPR_IF_ENABLED(X86::TILELOADD); - break; - case X86::PTILELOADDT1V: - Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDT1); - break; - case X86::PTCVTROWD2PSrreV: - Opc = X86::TCVTROWD2PSrre; - break; - case X86::PTCVTROWD2PSrriV: - Opc = X86::TCVTROWD2PSrri; - break; - case X86::PTCVTROWPS2PBF16HrreV: - Opc = X86::TCVTROWPS2PBF16Hrre; - break; - case X86::PTCVTROWPS2PBF16HrriV: - Opc = X86::TCVTROWPS2PBF16Hrri; - break; - case X86::PTCVTROWPS2PBF16LrreV: - Opc = X86::TCVTROWPS2PBF16Lrre; - break; - case X86::PTCVTROWPS2PBF16LrriV: - Opc = X86::TCVTROWPS2PBF16Lrri; - break; - case X86::PTCVTROWPS2PHHrreV: - Opc = X86::TCVTROWPS2PHHrre; - break; - case X86::PTCVTROWPS2PHHrriV: - Opc = X86::TCVTROWPS2PHHrri; - break; - case X86::PTCVTROWPS2PHLrreV: - Opc = X86::TCVTROWPS2PHLrre; - break; - case X86::PTCVTROWPS2PHLrriV: - Opc = X86::TCVTROWPS2PHLrri; - break; - case X86::PTILEMOVROWrreV: - Opc = X86::TILEMOVROWrre; - break; - case X86::PTILEMOVROWrriV: - Opc = X86::TILEMOVROWrri; - break; - default: - llvm_unreachable("Unexpected Opcode"); - } + unsigned Opc = Opcode == X86::PTILELOADDV + ? GET_EGPR_IF_ENABLED(X86::TILELOADD) + : GET_EGPR_IF_ENABLED(X86::TILELOADDT1); MI.setDesc(TII->get(Opc)); return true; } @@ -770,8 +714,7 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB, case X86::PTDPBUUDV: Opc = X86::TDPBUUD; break; case X86::PTDPBF16PSV: Opc = X86::TDPBF16PS; break; case X86::PTDPFP16PSV: Opc = X86::TDPFP16PS; break; - default: - llvm_unreachable("Unexpected Opcode"); + default: llvm_unreachable("Impossible Opcode!"); } MI.setDesc(TII->get(Opc)); MI.tieOperands(0, 1); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 19a85a6d7ec6ce..91e48f1e77db12 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -37613,82 +37613,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MI.eraseFromParent(); // The pseudo is gone now. return BB; } - case X86::PTCVTROWPS2PBF16Hrri: - case X86::PTCVTROWPS2PBF16Lrri: - case X86::PTCVTROWPS2PHHrri: - case X86::PTCVTROWPS2PHLrri: - case X86::PTCVTROWD2PSrri: - case X86::PTILEMOVROWrri: { - const DebugLoc &DL = MI.getDebugLoc(); - unsigned Opc; - switch (MI.getOpcode()) { - default: - llvm_unreachable("Unexpected instruction!"); - case X86::PTCVTROWD2PSrri: - Opc = X86::TCVTROWD2PSrri; - break; - case X86::PTCVTROWPS2PBF16Hrri: - Opc = X86::TCVTROWPS2PBF16Hrri; - break; - case X86::PTCVTROWPS2PHHrri: - Opc = X86::TCVTROWPS2PHHrri; - break; - case X86::PTCVTROWPS2PBF16Lrri: - Opc = X86::TCVTROWPS2PBF16Lrri; - break; - case X86::PTCVTROWPS2PHLrri: - Opc = X86::TCVTROWPS2PHLrri; - break; - case X86::PTILEMOVROWrri: - Opc = X86::TILEMOVROWrri; - break; - } - MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc)); - MIB.add(MI.getOperand(0)); - MIB.addReg(TMMImmToTMMReg(MI.getOperand(1).getImm()), RegState::Undef); - MIB.addImm(MI.getOperand(2).getImm()); - - MI.eraseFromParent(); // The pseudo is gone now. - return BB; - } - case X86::PTCVTROWPS2PBF16Hrre: - case X86::PTCVTROWPS2PBF16Lrre: - case X86::PTCVTROWPS2PHHrre: - case X86::PTCVTROWPS2PHLrre: - case X86::PTCVTROWD2PSrre: - case X86::PTILEMOVROWrre: { - const DebugLoc &DL = MI.getDebugLoc(); - unsigned Opc; - switch (MI.getOpcode()) { - default: - llvm_unreachable("Unexpected instruction!"); - case X86::PTCVTROWD2PSrre: - Opc = X86::TCVTROWD2PSrre; - break; - case X86::PTCVTROWPS2PBF16Hrre: - Opc = X86::TCVTROWPS2PBF16Hrre; - break; - case X86::PTCVTROWPS2PBF16Lrre: - Opc = X86::TCVTROWPS2PBF16Lrre; - break; - case X86::PTCVTROWPS2PHHrre: - Opc = X86::TCVTROWPS2PHHrre; - break; - case X86::PTCVTROWPS2PHLrre: - Opc = X86::TCVTROWPS2PHLrre; - break; - case X86::PTILEMOVROWrre: - Opc = X86::TILEMOVROWrre; - break; - } - MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc)); - MIB.add(MI.getOperand(0)); - MIB.addReg(TMMImmToTMMReg(MI.getOperand(1).getImm()), RegState::Undef); - MIB.add(MI.getOperand(2)); - - MI.eraseFromParent(); // The pseudo is gone now. - return BB; - } } } diff --git a/llvm/lib/Target/X86/X86InstrAMX.td b/llvm/lib/Target/X86/X86InstrAMX.td index b954c977f8c6c9..947a8bec2890ef 100644 --- a/llvm/lib/Target/X86/X86InstrAMX.td +++ b/llvm/lib/Target/X86/X86InstrAMX.td @@ -369,150 +369,3 @@ let Predicates = [HasAMXTRANSPOSE, In64BitMode] in { } } } // HasAMXTILE, HasAMXTRANSPOSE - -multiclass m_tcvtrowd2ps { - let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in { - let SchedRW = [WriteSystem] in { - def rri : Ii8<0x7, MRMSrcReg, (outs VR512:$dst), - (ins TILE:$src1, i32u8imm:$src2), - "tcvtrowd2ps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, TA,XS, EVEX, EVEX_V512; - def rre : I<0x4A, MRMSrcReg4VOp3, (outs VR512:$dst), - (ins TILE:$src1, GR32:$src2), - "tcvtrowd2ps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, T8,XS, EVEX, VVVV, EVEX_V512; - } - } // HasAMXAVX512, HasAVX10_2_512, In64BitMode -} - -defm TCVTROWD2PS : m_tcvtrowd2ps; - -let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in { - let SchedRW = [WriteSystem] in { - let usesCustomInserter = 1 in { - def PTCVTROWD2PSrri : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, i32u8imm:$src2), - [(set VR512:$dst, (int_x86_tcvtrowd2ps timm:$src1, imm:$src2))]>; - def PTCVTROWD2PSrre : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, GR32:$src2), - [(set VR512:$dst, (int_x86_tcvtrowd2ps timm:$src1, GR32:$src2))]>; - } - - def PTCVTROWD2PSrriV : PseudoI<(outs VR512:$dst), - (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4), - [(set VR512: $dst, - (int_x86_tcvtrowd2ps_internal GR16:$src1, GR16:$src2, - TILE:$src3, imm:$src4))]>; - def PTCVTROWD2PSrreV : PseudoI<(outs VR512:$dst), - (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4), - [(set VR512: $dst, - (int_x86_tcvtrowd2ps_internal GR16:$src1, GR16:$src2, - TILE:$src3, GR32:$src4))]>; - def PTCVTROWPS2PBF16HrriV : PseudoI<(outs VR512:$dst), - (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4), - [(set VR512: $dst, - (int_x86_tcvtrowps2pbf16h_internal GR16:$src1, GR16:$src2, - TILE:$src3, imm:$src4))]>; - def PTCVTROWPS2PBF16HrreV : PseudoI<(outs VR512:$dst), - (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4), - [(set VR512: $dst, - (int_x86_tcvtrowps2pbf16h_internal GR16:$src1, GR16:$src2, - TILE:$src3, GR32:$src4))]>; - def PTCVTROWPS2PBF16LrriV : PseudoI<(outs VR512:$dst), - (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4), - [(set VR512: $dst, - (int_x86_tcvtrowps2pbf16l_internal GR16:$src1, GR16:$src2, - TILE:$src3, imm:$src4))]>; - def PTCVTROWPS2PBF16LrreV : PseudoI<(outs VR512:$dst), - (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4), - [(set VR512: $dst, - (int_x86_tcvtrowps2pbf16l_internal GR16:$src1, GR16:$src2, - TILE:$src3, GR32:$src4))]>; - def PTCVTROWPS2PHHrriV : PseudoI<(outs VR512:$dst), - (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4), - [(set VR512: $dst, - (int_x86_tcvtrowps2phh_internal GR16:$src1, GR16:$src2, - TILE:$src3, imm:$src4))]>; - def PTCVTROWPS2PHHrreV : PseudoI<(outs VR512:$dst), - (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4), - [(set VR512: $dst, - (int_x86_tcvtrowps2phh_internal GR16:$src1, GR16:$src2, - TILE:$src3, GR32:$src4))]>; - def PTCVTROWPS2PHLrriV : PseudoI<(outs VR512:$dst), - (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4), - [(set VR512: $dst, - (int_x86_tcvtrowps2phl_internal GR16:$src1, GR16:$src2, - TILE:$src3, imm:$src4))]>; - def PTCVTROWPS2PHLrreV : PseudoI<(outs VR512:$dst), - (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4), - [(set VR512: $dst, - (int_x86_tcvtrowps2phl_internal GR16:$src1, GR16:$src2, - TILE:$src3, GR32:$src4))]>; - } -} - -multiclass AMXAVX512_BASE<bits<8> Opcode1, bits<8> Opcode2, string Opstr, - Prefix P1, Prefix P2> { - let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode], SchedRW = [WriteSystem] in { - let OpPrefix = P1 in - def rre : I<Opcode1, MRMSrcReg4VOp3, (outs VR512:$dst), - (ins TILE:$src1, GR32:$src2), - !strconcat(Opstr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, EVEX, VVVV, EVEX_V512, T8; - let OpPrefix = P2 in - def rri : Ii8<Opcode2, MRMSrcReg, (outs VR512:$dst), - (ins TILE:$src1, i32u8imm:$src2), - !strconcat(Opstr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, EVEX, EVEX_V512, TA; - let usesCustomInserter = 1 in { - def "P"#NAME#"rre" : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, GR32:$src2), - [(set VR512:$dst, - (!cast<Intrinsic>("int_x86_"#Opstr) timm:$src1, GR32:$src2))]>; - def "P"#NAME#"rri" : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, i32u8imm:$src2), - [(set VR512:$dst, - (!cast<Intrinsic>("int_x86_"#Opstr) timm:$src1, imm:$src2))]>; - } - } -} - -defm TCVTROWPS2PHH : AMXAVX512_BASE<0x6d, 0x07, "tcvtrowps2phh", PS, PS>; -defm TCVTROWPS2PHL : AMXAVX512_BASE<0x6d, 0x77, "tcvtrowps2phl", PD, XD>; -defm TCVTROWPS2PBF16H : AMXAVX512_BASE<0x6d, 0x07, "tcvtrowps2pbf16h", XD, XD>; -defm TCVTROWPS2PBF16L : AMXAVX512_BASE<0x6d, 0x77, "tcvtrowps2pbf16l", XS, XS>; - -multiclass m_tilemovrow { - let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in { - let SchedRW = [WriteSystem] in { - def rri : Ii8<0x7, MRMSrcReg, (outs VR512:$dst), - (ins TILE:$src1, u8imm:$src2), - "tilemovrow\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, TA,PD, EVEX, EVEX_V512; - def rre : I<0x4A, MRMSrcReg4VOp3, (outs VR512:$dst), - (ins TILE:$src1, GR32:$src2), - "tilemovrow\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, T8,PD, EVEX, VVVV, EVEX_V512; - } - } // HasAMXAVX512, HasAVX10_2_512, In64BitMode -} - -defm TILEMOVROW : m_tilemovrow; - -let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in { - let SchedRW = [WriteSystem] in { - let usesCustomInserter = 1 in { - def PTILEMOVROWrri : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, i32u8imm:$src2), - [(set VR512:$dst, (int_x86_tilemovrow timm:$src1, imm:$src2))]>; - def PTILEMOVROWrre : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, GR32:$src2), - [(set VR512:$dst, (int_x86_tilemovrow timm:$src1, GR32:$src2))]>; - } - - def PTILEMOVROWrriV : PseudoI<(outs VR512:$dst), - (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4), - [(set VR512: $dst, - (int_x86_tilemovrow_internal GR16:$src1, GR16:$src2, - TILE:$src3, imm:$src4))]>; - def PTILEMOVROWrreV : PseudoI<(outs VR512:$dst), - (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4), - [(set VR512: $dst, - (int_x86_tilemovrow_internal GR16:$src1, GR16:$src2, - TILE:$src3, GR32:$src4))]>; - } -} diff --git a/llvm/lib/Target/X86/X86InstrPredicates.td b/llvm/lib/Target/X86/X86InstrPredicates.td index 2eb4e4fb941b29..d22e7dadaaa262 100644 --- a/llvm/lib/Target/X86/X86InstrPredicates.td +++ b/llvm/lib/Target/X86/X86InstrPredicates.td @@ -185,7 +185,6 @@ def HasAMXINT8 : Predicate<"Subtarget->hasAMXINT8()">; def HasAMXCOMPLEX : Predicate<"Subtarget->hasAMXCOMPLEX()">; def HasAMXFP8 : Predicate<"Subtarget->hasAMXFP8()">; def HasAMXTRANSPOSE : Predicate<"Subtarget->hasAMXTRANSPOSE()">; -def HasAMXAVX512 : Predicate<"Subtarget->hasAMXAVX512()">; def HasUINTR : Predicate<"Subtarget->hasUINTR()">; def HasUSERMSR : Predicate<"Subtarget->hasUSERMSR()">; def HasCRC32 : Predicate<"Subtarget->hasCRC32()">; diff --git a/llvm/lib/Target/X86/X86LowerAMXType.cpp b/llvm/lib/Target/X86/X86LowerAMXType.cpp index af6fb04295bdec..688e886cf3b13a 100644 --- a/llvm/lib/Target/X86/X86LowerAMXType.cpp +++ b/llvm/lib/Target/X86/X86LowerAMXType.cpp @@ -266,17 +266,6 @@ std::pair<Value *, Value *> ShapeCalculator::getShape(IntrinsicInst *II, Col = getColFromRow(II, II->getArgOperand(0), 4); break; } - case Intrinsic::x86_tcvtrowd2ps_internal: - case Intrinsic::x86_tcvtrowps2pbf16h_internal: - case Intrinsic::x86_tcvtrowps2pbf16l_internal: - case Intrinsic::x86_tcvtrowps2phh_internal: - case Intrinsic::x86_tcvtrowps2phl_internal: - case Intrinsic::x86_tilemovrow_internal: { - assert(OpNo == 2 && "Illegal Operand Number."); - Row = II->getArgOperand(0); - Col = II->getArgOperand(1); - break; - } } return std::make_pair(Row, Col); diff --git a/llvm/lib/Target/X86/X86PreTileConfig.cpp b/llvm/lib/Target/X86/X86PreTileConfig.cpp index d232a1d706549f..d20bfdcdb7f9c1 100644 --- a/llvm/lib/Target/X86/X86PreTileConfig.cpp +++ b/llvm/lib/Target/X86/X86PreTileConfig.cpp @@ -118,22 +118,10 @@ class X86PreTileConfig : public MachineFunctionPass { bool isAMXInstruction(MachineInstr &MI) { if (MI.isPHI() || MI.isDebugInstr() || MI.getNumOperands() < 3) return false; - switch (MI.getOpcode()) { - case X86::PTILESTOREDV: - case X86::PTCVTROWD2PSrreV: - case X86::PTCVTROWD2PSrriV: - case X86::PTCVTROWPS2PBF16HrreV: - case X86::PTCVTROWPS2PBF16HrriV: - case X86::PTCVTROWPS2PBF16LrreV: - case X86::PTCVTROWPS2PBF16LrriV: - case X86::PTCVTROWPS2PHHrreV: - case X86::PTCVTROWPS2PHHrriV: - case X86::PTCVTROWPS2PHLrreV: - case X86::PTCVTROWPS2PHLrriV: - case X86::PTILEMOVROWrreV: - case X86::PTILEMOVROWrriV: + + // PTILESTOREDV is the only exception that doesn't def a AMX register. + if (MI.getOpcode() == X86::PTILESTOREDV) return true; - } // We can simply check if it is AMX instruction by its def. // But we should exclude old API which uses physical registers. diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index a973aaaa4806e6..93911bc51a207d 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -1880,7 +1880,6 @@ const StringMap<bool> sys::getHostCPUFeatures() { !getX86CpuIDAndInfoEx(0x1e, 0x1, &EAX, &EBX, &ECX, &EDX); Features["amx-fp8"] = HasLeaf1E && ((EAX >> 4) & 1) && HasAMXSave; Features["amx-transpose"] = HasLeaf1E && ((EAX >> 5) & 1) && HasAMXSave; - Features["amx-avx512"] = HasLeaf1E && ((EAX >> 7) & 1) && HasAMXSave; bool HasLeaf24 = MaxLevel >= 0x24 && !getX86CpuIDAndInfo(0x24, &EAX, &EBX, &ECX, &EDX); diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp index eb55e6fc9134c8..691809b6d4b5ad 100644 --- a/llvm/lib/TargetParser/X86TargetParser.cpp +++ b/llvm/lib/TargetParser/X86TargetParser.cpp @@ -600,8 +600,6 @@ constexpr FeatureBitset ImpliedFeaturesAMX_INT8 = FeatureAMX_TILE; constexpr FeatureBitset ImpliedFeaturesAMX_COMPLEX = FeatureAMX_TILE; constexpr FeatureBitset ImpliedFeaturesAMX_FP8 = FeatureAMX_TILE; constexpr FeatureBitset ImpliedFeaturesAMX_TRANSPOSE = FeatureAMX_TILE; -constexpr FeatureBitset ImpliedFeaturesAMX_AVX512 = - FeatureAMX_TILE | FeatureAVX10_2_512; constexpr FeatureBitset ImpliedFeaturesHRESET = {}; constexpr FeatureBitset ImpliedFeaturesPREFETCHI = {}; diff --git a/llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll b/llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll deleted file mode 100644 index 71f8f231747fe7..00000000000000 --- a/llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll +++ /dev/null @@ -1,171 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx10.2-512 -mattr=+amx-avx512 -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx10.2-512 -mattr=+amx-avx512 -verify-machineinstrs -enable-ipra | FileCheck -check-prefix=IPRA %s -; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx10.2-512 -mattr=+amx-avx512 -verify-machineinstrs | FileCheck -check-prefix=O0 %s - -@buf = dso_local global [3072 x i8] zeroinitializer, align 64 - -define internal void @foo() { -; CHECK-LABEL: foo: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: retq -; -; IPRA-LABEL: foo: -; IPRA: # %bb.0: # %entry -; IPRA-NEXT: retq -; -; O0-LABEL: foo: -; O0: # %bb.0: # %entry -; O0-NEXT: retq -entry: - ret void -} - -define dso_local <16 x i32> @test_api(i16 signext %0, i16 signext %1) nounwind { -; CHECK-LABEL: test_api: -; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: pushq %r14 -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: subq $2112, %rsp # imm = 0x840 -; CHECK-NEXT: movl %esi, %ebx -; CHECK-NEXT: movl %edi, %ebp -; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vmovups %zmm0, (%rsp) -; CHECK-NEXT: movb $1, (%rsp) -; CHECK-NEXT: movw $8, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movb $8, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movw %bx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movb %bpl, {{[0-9]+}}(%rsp) -; CHECK-NEXT: ldtilecfg (%rsp) -; CHECK-NEXT: movl $buf, %eax -; CHECK-NEXT: movl $32, %ecx -; CHECK-NEXT: movw $8, %r14w -; CHECK-NEXT: tileloadd (%rax,%rcx), %tmm0 -; CHECK-NEXT: movabsq $64, %rax -; CHECK-NEXT: tilestored %tmm0, 1088(%rsp,%rax) # 1024-byte Folded Spill -; CHECK-NEXT: movl $buf+1024, %eax -; CHECK-NEXT: tileloadd (%rax,%rcx), %tmm1 -; CHECK-NEXT: movabsq $64, %rax -; CHECK-NEXT: tilestored %tmm1, 64(%rsp,%rax) # 1024-byte Folded Spill -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: callq foo -; CHECK-NEXT: ldtilecfg (%rsp) -; CHECK-NEXT: movabsq $64, %rax -; CHECK-NEXT: tileloadd 64(%rsp,%rax), %tmm1 # 1024-byte Folded Reload -; CHECK-NEXT: tilemovrow $2, %tmm1, %zmm0 -; CHECK-NEXT: tileloadd 1088(%rsp,%rax), %tmm0 # 1024-byte Folded Reload -; CHECK-NEXT: tilemovrow $2, %tmm0, %zmm1 -; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; CHECK-NEXT: addq $2112, %rsp # imm = 0x840 -; CHECK-NEXT: popq %rbx -; CHECK-NEXT: popq %r14 -; CHECK-NEXT: popq %rbp -; CHECK-NEXT: tilerelease -; CHECK-NEXT: retq -; -; IPRA-LABEL: test_api: -; IPRA: # %bb.0: -; IPRA-NEXT: subq $72, %rsp -; IPRA-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; IPRA-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp) -; IPRA-NEXT: movb $1, {{[0-9]+}}(%rsp) -; IPRA-NEXT: movw $8, {{[0-9]+}}(%rsp) -; IPRA-NEXT: movb $8, {{[0-9]+}}(%rsp) -; IPRA-NEXT: movw %si, {{[0-9]+}}(%rsp) -; IPRA-NEXT: movb %dil, {{[0-9]+}}(%rsp) -; IPRA-NEXT: ldtilecfg {{[0-9]+}}(%rsp) -; IPRA-NEXT: movl $buf, %eax -; IPRA-NEXT: movl $32, %ecx -; IPRA-NEXT: movw $8, %dx -; IPRA-NEXT: tileloadd (%rax,%rcx), %tmm0 -; IPRA-NEXT: movl $buf+1024, %eax -; IPRA-NEXT: tileloadd (%rax,%rcx), %tmm1 -; IPRA-NEXT: callq foo -; IPRA-NEXT: tilemovrow $2, %tmm1, %zmm0 -; IPRA-NEXT: tilemovrow $2, %tmm0, %zmm1 -; IPRA-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; IPRA-NEXT: addq $72, %rsp -; IPRA-NEXT: tilerelease -; IPRA-NEXT: retq -; -; O0-LABEL: test_api: -; O0: # %bb.0: -; O0-NEXT: pushq %rbp -; O0-NEXT: movq %rsp, %rbp -; O0-NEXT: andq $-1024, %rsp # imm = 0xFC00 -; O0-NEXT: subq $4096, %rsp # imm = 0x1000 -; O0-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; O0-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp) -; O0-NEXT: movb $1, {{[0-9]+}}(%rsp) -; O0-NEXT: movw %si, %cx -; O0-NEXT: movw %cx, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; O0-NEXT: movw %di, %ax -; O0-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; O0-NEXT: movl $buf, %esi -; O0-NEXT: movl $32, %edi -; O0-NEXT: movw $8, %dx -; O0-NEXT: # implicit-def: $al -; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) -; O0-NEXT: movw %dx, {{[0-9]+}}(%rsp) -; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) -; O0-NEXT: tileloadd (%rsi,%rdi), %tmm0 -; O0-NEXT: movl $64, %edi -; O0-NEXT: leaq {{[0-9]+}}(%rsp), %rsi -; O0-NEXT: movw $8, %dx -; O0-NEXT: tilestored %tmm0, (%rsi,%rdi) -; O0-NEXT: movl $32, %esi -; O0-NEXT: movl $buf+1024, %edx -; O0-NEXT: movw $8, %ax -; O0-NEXT: # implicit-def: $al -; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) -; O0-NEXT: movw %cx, {{[0-9]+}}(%rsp) -; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) -; O0-NEXT: tileloadd (%rdx,%rsi), %tmm0 -; O0-NEXT: movl $64, %esi -; O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx -; O0-NEXT: movw $8, %ax -; O0-NEXT: tilestored %tmm0, (%rdx,%rsi) -; O0-NEXT: vzeroupper -; O0-NEXT: callq foo -; O0-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %dx # 2-byte Reload -; O0-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %ax # 2-byte Reload -; O0-NEXT: movl $64, %edi -; O0-NEXT: leaq {{[0-9]+}}(%rsp), %rsi -; O0-NEXT: movw $8, %cx -; O0-NEXT: # implicit-def: $cl -; O0-NEXT: movb %cl, {{[0-9]+}}(%rsp) -; O0-NEXT: movw %dx, {{[0-9]+}}(%rsp) -; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) -; O0-NEXT: tileloadd (%rsi,%rdi), %tmm0 -; O0-NEXT: movw $8, %cx -; O0-NEXT: tilemovrow $2, %tmm0, %zmm0 -; O0-NEXT: movl $64, %esi -; O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx -; O0-NEXT: movw $8, %cx -; O0-NEXT: # implicit-def: $al -; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) -; O0-NEXT: movw %cx, {{[0-9]+}}(%rsp) -; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) -; O0-NEXT: tileloadd (%rdx,%rsi), %tmm0 -; O0-NEXT: movw $8, %cx -; O0-NEXT: tilemovrow $2, %tmm0, %zmm1 -; O0-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; O0-NEXT: movq %rbp, %rsp -; O0-NEXT: popq %rbp -; O0-NEXT: tilerelease -; O0-NEXT: retq - %3 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %0, i16 8, ptr @buf, i64 32) - %4 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 %1, ptr getelementptr inbounds ([3072 x i8], ptr @buf, i64 0, i64 1024), i64 32) - call void @foo() - %5 = call <16 x i32> @llvm.x86.tilemovrow.internal(i16 8, i16 %1, x86_amx %4, i32 2) - %6 = call <16 x i32> @llvm.x86.tilemovrow.internal(i16 %0, i16 8, x86_amx %3, i32 2) - %7 = add <16 x i32> %5, %6 - ret <16 x i32> %7 -} - - -declare x86_amx @llvm.x86.tileloadd64.internal(i16, i16, ptr, i64) -declare x86_amx @llvm.x86.tdpbssd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx) -declare void @llvm.x86.tilestored64.internal(i16, i16, ptr, i64, x86_amx) -declare <16 x i32> @llvm.x86.tilemovrow.internal(i16, i16, x86_amx, i32) diff --git a/llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll b/llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll deleted file mode 100644 index da7fedee88821b..00000000000000 --- a/llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll +++ /dev/null @@ -1,116 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+amx-tile,+amx-avx512,+avx10.2-512 | FileCheck %s - -define <16 x float> @test_tcvtrowd2ps(i32 %A) { -; CHECK-LABEL: test_tcvtrowd2ps: -; CHECK: # %bb.0: -; CHECK-NEXT: tcvtrowd2ps %edi, %tmm1, %zmm0 # encoding: [0x62,0xf2,0x46,0x48,0x4a,0xc1] -; CHECK-NEXT: retq # encoding: [0xc3] - %ret = call <16 x float> @llvm.x86.tcvtrowd2ps(i8 1, i32 %A) - ret <16 x float> %ret -} - -define <16 x float> @test_tcvtrowd2psi() { -; CHECK-LABEL: test_tcvtrowd2psi: -; CHECK: # %bb.0: -; CHECK-NEXT: tcvtrowd2ps $127, %tmm1, %zmm0 # encoding: [0x62,0xf3,0x7e,0x48,0x07,0xc1,0x7f] -; CHECK-NEXT: retq # encoding: [0xc3] - %ret = call <16 x float> @llvm.x86.tcvtrowd2ps(i8 1, i32 127) - ret <16 x float> %ret -} -declare <16 x float> @llvm.x86.tcvtrowd2ps(i8 %A, i32 %B) - -define <32 x bfloat> @test_tcvtrowps2pbf16h(i32 %A) { -; CHECK-LABEL: test_tcvtrowps2pbf16h: -; CHECK: # %bb.0: -; CHECK-NEXT: tcvtrowps2pbf16h %edi, %tmm1, %zmm0 # encoding: [0x62,0xf2,0x47,0x48,0x6d,0xc1] -; CHECK-NEXT: retq # encoding: [0xc3] - %ret = call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h(i8 1, i32 %A) - ret <32 x bfloat> %ret -} - -define <32 x bfloat> @test_tcvtrowps2pbf16hi() { -; CHECK-LABEL: test_tcvtrowps2pbf16hi: -; CHECK: # %bb.0: -; CHECK-NEXT: tcvtrowps2pbf16h $127, %tmm1, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x07,0xc1,0x7f] -; CHECK-NEXT: retq # encoding: [0xc3] - %ret = call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h(i8 1, i32 127) - ret <32 x bfloat> %ret -} -declare <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h(i8 %A, i32 %B) - -define <32 x bfloat> @test_tcvtrowps2pbf16l(i32 %A) { -; CHECK-LABEL: test_tcvtrowps2pbf16l: -; CHECK: # %bb.0: -; CHECK-NEXT: tcvtrowps2pbf16l %edi, %tmm1, %zmm0 # encoding: [0x62,0xf2,0x46,0x48,0x6d,0xc1] -; CHECK-NEXT: retq # encoding: [0xc3] - %ret = call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l(i8 1, i32 %A) - ret <32 x bfloat> %ret -} - -define <32 x bfloat> @test_tcvtrowps2pbf16li() { -; CHECK-LABEL: test_tcvtrowps2pbf16li: -; CHECK: # %bb.0: -; CHECK-NEXT: tcvtrowps2pbf16l $127, %tmm1, %zmm0 # encoding: [0x62,0xf3,0x7e,0x48,0x77,0xc1,0x7f] -; CHECK-NEXT: retq # encoding: [0xc3] - %ret = call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l(i8 1, i32 127) - ret <32 x bfloat> %ret -} -declare <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l(i8 %A, i32 %B) - -define <32 x half> @test_tcvtrowps2phh(i32 %A) { -; CHECK-LABEL: test_tcvtrowps2phh: -; CHECK: # %bb.0: -; CHECK-NEXT: tcvtrowps2phh %edi, %tmm1, %zmm0 # encoding: [0x62,0xf2,0x44,0x48,0x6d,0xc1] -; CHECK-NEXT: retq # encoding: [0xc3] - %ret = call <32 x half> @llvm.x86.tcvtrowps2phh(i8 1, i32 %A) - ret <32 x half> %ret -} - -define <32 x half> @test_tcvtrowps2phhi() { -; CHECK-LABEL: test_tcvtrowps2phhi: -; CHECK: # %bb.0: -; CHECK-NEXT: tcvtrowps2phh $127, %tmm1, %zmm0 # encoding: [0x62,0xf3,0x7c,0x48,0x07,0xc1,0x7f] -; CHECK-NEXT: retq # encoding: [0xc3] - %ret = call <32 x half> @llvm.x86.tcvtrowps2phh(i8 1, i32 127) - ret <32 x half> %ret -} -declare <32 x half> @llvm.x86.tcvtrowps2phh(i8 %A, i32 %B) - -define <32 x half> @test_tcvtrowps2phl(i32 %A) { -; CHECK-LABEL: test_tcvtrowps2phl: -; CHECK: # %bb.0: -; CHECK-NEXT: tcvtrowps2phl %edi, %tmm1, %zmm0 # encoding: [0x62,0xf2,0x45,0x48,0x6d,0xc1] -; CHECK-NEXT: retq # encoding: [0xc3] - %ret = call <32 x half> @llvm.x86.tcvtrowps2phl(i8 1, i32 %A) - ret <32 x half> %ret -} - -define <32 x half> @test_tcvtrowps2phli() { -; CHECK-LABEL: test_tcvtrowps2phli: -; CHECK: # %bb.0: -; CHECK-NEXT: tcvtrowps2phl $127, %tmm1, %zmm0 # encoding: [0x62,0xf3,0x7f,0x48,0x77,0xc1,0x7f] -; CHECK-NEXT: retq # encoding: [0xc3] - %ret = call <32 x half> @llvm.x86.tcvtrowps2phl(i8 1, i32 127) - ret <32 x half> %ret -} -declare <32 x half> @llvm.x86.tcvtrowps2phl(i8 %A, i32 %B) - -define <16 x i32> @test_tilemovrow(i32 %A) { -; CHECK-LABEL: test_tilemovrow: -; CHECK: # %bb.0: -; CHECK-NEXT: tilemovrow %edi, %tmm1, %zmm0 # encoding: [0x62,0xf2,0x45,0x48,0x4a,0xc1] -; CHECK-NEXT: retq # encoding: [0xc3] - %ret = call <16 x i32> @llvm.x86.tilemovrow(i8 1, i32 %A) - ret <16 x i32> %ret -} - -define <16 x i32> @test_tilemovrowi() { -; CHECK-LABEL: test_tilemovrowi: -; CHECK: # %bb.0: -; CHECK-NEXT: tilemovrow $127, %tmm1, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x07,0xc1,0x7f] -; CHECK-NEXT: retq # encoding: [0xc3] - %ret = call <16 x i32> @llvm.x86.tilemovrow(i8 1, i32 127) - ret <16 x i32> %ret -} -declare <16 x i32> @llvm.x86.tilemovrow(i8 %A, i32 %B) diff --git a/llvm/test/CodeGen/X86/amx-tile-avx512-internals.ll b/llvm/test/CodeGen/X86/amx-tile-avx512-internals.ll deleted file mode 100644 index b4a5c90bbea330..00000000000000 --- a/llvm/test/CodeGen/X86/amx-tile-avx512-internals.ll +++ /dev/null @@ -1,61 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-bf16,+avx10.2-512, \ -; RUN: -mattr=+amx-avx512 -verify-machineinstrs | FileCheck %s - -define void @test_amx(i8* %pointer, i8* %base, i32 %index, i64 %stride) { -; CHECK-LABEL: test_amx: -; CHECK: # %bb.0: -; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movb $1, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movb $8, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movw $8, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movw $8, %ax -; CHECK-NEXT: tileloadd (%rsi,%rcx), %tmm0 -; CHECK-NEXT: tcvtrowd2ps %edx, %tmm0, %zmm0 -; CHECK-NEXT: tcvtrowd2ps $16, %tmm0, %zmm0 -; CHECK-NEXT: tcvtrowps2pbf16h %edx, %tmm0, %zmm0 -; CHECK-NEXT: tcvtrowps2pbf16h $16, %tmm0, %zmm0 -; CHECK-NEXT: tcvtrowps2pbf16l %edx, %tmm0, %zmm0 -; CHECK-NEXT: tcvtrowps2pbf16l $16, %tmm0, %zmm0 -; CHECK-NEXT: tcvtrowps2phh %edx, %tmm0, %zmm0 -; CHECK-NEXT: tcvtrowps2phh $16, %tmm0, %zmm0 -; CHECK-NEXT: tcvtrowps2phl %edx, %tmm0, %zmm0 -; CHECK-NEXT: tcvtrowps2phl $16, %tmm0, %zmm0 -; CHECK-NEXT: tilemovrow %edx, %tmm0, %zmm0 -; CHECK-NEXT: tilemovrow $16, %tmm0, %zmm0 -; CHECK-NEXT: tilestored %tmm0, (%rdi,%rcx) -; CHECK-NEXT: tilerelease -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq - - %a = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 8, i8* %base, i64 %stride) - call <16 x float> @llvm.x86.tcvtrowd2ps.internal(i16 8, i16 8, x86_amx %a, i32 %index) - call <16 x float> @llvm.x86.tcvtrowd2ps.internal(i16 8, i16 8, x86_amx %a, i32 16) - call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h.internal(i16 8, i16 8, x86_amx %a, i32 %index) - call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h.internal(i16 8, i16 8, x86_amx %a, i32 16) - call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l.internal(i16 8, i16 8, x86_amx %a, i32 %index) - call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l.internal(i16 8, i16 8, x86_amx %a, i32 16) - call <32 x half> @llvm.x86.tcvtrowps2phh.internal(i16 8, i16 8, x86_amx %a, i32 %index) - call <32 x half> @llvm.x86.tcvtrowps2phh.internal(i16 8, i16 8, x86_amx %a, i32 16) - call <32 x half> @llvm.x86.tcvtrowps2phl.internal(i16 8, i16 8, x86_amx %a, i32 %index) - call <32 x half> @llvm.x86.tcvtrowps2phl.internal(i16 8, i16 8, x86_amx %a, i32 16) - call <16 x i32> @llvm.x86.tilemovrow.internal(i16 8, i16 8, x86_amx %a, i32 %index) - call <16 x i32> @llvm.x86.tilemovrow.internal(i16 8, i16 8, x86_amx %a, i32 16) - - call void @llvm.x86.tilestored64.internal(i16 8, i16 8, i8* %pointer, i64 %stride, x86_amx %a) - ret void -} - -declare x86_amx @llvm.x86.tilezero.internal(i16, i16) -declare x86_amx @llvm.x86.tileloadd64.internal(i16, i16, i8*, i64) -declare x86_amx @llvm.x86.tileloaddt164.internal(i16, i16, i8*, i64) -declare void @llvm.x86.tilestored64.internal(i16, i16, i8*, i64, x86_amx) - -declare <16 x float> @llvm.x86.tcvtrowd2ps.internal(i16, i16, x86_amx, i32) -declare <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h.internal(i16, i16, x86_amx, i32) -declare <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l.internal(i16, i16, x86_amx, i32) -declare <32 x half> @llvm.x86.tcvtrowps2phh.internal(i16, i16, x86_amx, i32) -declare <32 x half> @llvm.x86.tcvtrowps2phl.internal(i16, i16, x86_amx, i32) -declare <16 x i32> @llvm.x86.tilemovrow.internal(i16, i16, x86_amx, i32) diff --git a/llvm/test/MC/Disassembler/X86/amx-avx512.txt b/llvm/test/MC/Disassembler/X86/amx-avx512.txt deleted file mode 100644 index 0a162af1b4bc02..00000000000000 --- a/llvm/test/MC/Disassembler/X86/amx-avx512.txt +++ /dev/null @@ -1,106 +0,0 @@ -# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT -# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL - -# ATT: tcvtrowd2ps %ecx, %tmm5, %zmm22 -# INTEL: tcvtrowd2ps zmm22, tmm5, ecx -0x62,0xe2,0x76,0x48,0x4a,0xf5 - -# ATT: tcvtrowd2ps %ecx, %tmm2, %zmm22 -# INTEL: tcvtrowd2ps zmm22, tmm2, ecx -0x62,0xe2,0x76,0x48,0x4a,0xf2 - -# ATT: tcvtrowd2ps $123, %tmm5, %zmm22 -# INTEL: tcvtrowd2ps zmm22, tmm5, 123 -0x62,0xe3,0x7e,0x48,0x07,0xf5,0x7b - -# ATT: tcvtrowd2ps $123, %tmm2, %zmm22 -# INTEL: tcvtrowd2ps zmm22, tmm2, 123 -0x62,0xe3,0x7e,0x48,0x07,0xf2,0x7b - -# ATT: tcvtrowps2pbf16h %ecx, %tmm5, %zmm22 -# INTEL: tcvtrowps2pbf16h zmm22, tmm5, ecx -0x62,0xe2,0x77,0x48,0x6d,0xf5 - -# ATT: tcvtrowps2pbf16h %ecx, %tmm2, %zmm22 -# INTEL: tcvtrowps2pbf16h zmm22, tmm2, ecx -0x62,0xe2,0x77,0x48,0x6d,0xf2 - -# ATT: tcvtrowps2pbf16h $123, %tmm5, %zmm22 -# INTEL: tcvtrowps2pbf16h zmm22, tmm5, 123 -0x62,0xe3,0x7f,0x48,0x07,0xf5,0x7b - -# ATT: tcvtrowps2pbf16h $123, %tmm2, %zmm22 -# INTEL: tcvtrowps2pbf16h zmm22, tmm2, 123 -0x62,0xe3,0x7f,0x48,0x07,0xf2,0x7b - -# ATT: tcvtrowps2pbf16l %ecx, %tmm5, %zmm22 -# INTEL: tcvtrowps2pbf16l zmm22, tmm5, ecx -0x62,0xe2,0x76,0x48,0x6d,0xf5 - -# ATT: tcvtrowps2pbf16l %ecx, %tmm2, %zmm22 -# INTEL: tcvtrowps2pbf16l zmm22, tmm2, ecx -0x62,0xe2,0x76,0x48,0x6d,0xf2 - -# ATT: tcvtrowps2pbf16l $123, %tmm5, %zmm22 -# INTEL: tcvtrowps2pbf16l zmm22, tmm5, 123 -0x62,0xe3,0x7e,0x48,0x77,0xf5,0x7b - -# ATT: tcvtrowps2pbf16l $123, %tmm2, %zmm22 -# INTEL: tcvtrowps2pbf16l zmm22, tmm2, 123 -0x62,0xe3,0x7e,0x48,0x77,0xf2,0x7b - -# ATT: tcvtrowps2phh %ecx, %tmm5, %zmm22 -# INTEL: tcvtrowps2phh zmm22, tmm5, ecx -0x62,0xe2,0x74,0x48,0x6d,0xf5 - -# ATT: tcvtrowps2phh %ecx, %tmm2, %zmm22 -# INTEL: tcvtrowps2phh zmm22, tmm2, ecx -0x62,0xe2,0x74,0x48,0x6d,0xf2 - -# ATT: tcvtrowps2phh $123, %tmm5, %zmm22 -# INTEL: tcvtrowps2phh zmm22, tmm5, 123 -0x62,0xe3,0x7c,0x48,0x07,0xf5,0x7b - -# ATT: tcvtrowps2phh $123, %tmm2, %zmm22 -# INTEL: tcvtrowps2phh zmm22, tmm2, 123 -0x62,0xe3,0x7c,0x48,0x07,0xf2,0x7b - -# ATT: tcvtrowps2phl %ecx, %tmm5, %zmm22 -# INTEL: tcvtrowps2phl zmm22, tmm5, ecx -0x62,0xe2,0x75,0x48,0x6d,0xf5 - -# ATT: tcvtrowps2phl %ecx, %tmm2, %zmm22 -# INTEL: tcvtrowps2phl zmm22, tmm2, ecx -0x62,0xe2,0x75,0x48,0x6d,0xf2 - -# ATT: tcvtrowps2phl $123, %tmm5, %zmm22 -# INTEL: tcvtrowps2phl zmm22, tmm5, 123 -0x62,0xe3,0x7f,0x48,0x77,0xf5,0x7b - -# ATT: tcvtrowps2phl $123, %tmm2, %zmm22 -# INTEL: tcvtrowps2phl zmm22, tmm2, 123 -0x62,0xe3,0x7f,0x48,0x77,0xf2,0x7b - -# ATT: tilemovrow %ecx, %tmm3, %zmm22 -# INTEL: tilemovrow zmm22, tmm3, ecx -0x62,0xe2,0x75,0x48,0x4a,0xf3 - -# ATT: tilemovrow %ecx, %tmm2, %zmm22 -# INTEL: tilemovrow zmm22, tmm2, ecx -0x62,0xe2,0x75,0x48,0x4a,0xf2 - -# ATT: tilemovrow $123, %tmm3, %zmm22 -# INTEL: tilemovrow zmm22, tmm3, 123 -0x62,0xe3,0x7d,0x48,0x07,0xf3,0x7b - -# ATT: tilemovrow $123, %tmm2, %zmm22 -# INTEL: tilemovrow zmm22, tmm2, 123 -0x62,0xe3,0x7d,0x48,0x07,0xf2,0x7b - -# ATT: tilemovrow %edx, %tmm0, %zmm22 -# INTEL: tilemovrow zmm22, tmm0, edx -0x62,0xe2,0x6d,0x48,0x4a,0xf0 - -# ATT: tilemovrow $123, %tmm0, %zmm22 -# INTEL: tilemovrow zmm22, tmm0, 123 -0x62,0xe3,0x7d,0x48,0x07,0xf0,0x7b diff --git a/llvm/test/MC/X86/amx-avx512-att.s b/llvm/test/MC/X86/amx-avx512-att.s deleted file mode 100644 index 6da4ede82c6217..00000000000000 --- a/llvm/test/MC/X86/amx-avx512-att.s +++ /dev/null @@ -1,105 +0,0 @@ -// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding < %s | FileCheck %s - -// CHECK: tcvtrowd2ps %ecx, %tmm5, %zmm22 -// CHECK: encoding: [0x62,0xe2,0x76,0x48,0x4a,0xf5] - tcvtrowd2ps %ecx, %tmm5, %zmm22 - -// CHECK: tcvtrowd2ps %ecx, %tmm2, %zmm22 -// CHECK: encoding: [0x62,0xe2,0x76,0x48,0x4a,0xf2] - tcvtrowd2ps %ecx, %tmm2, %zmm22 - -// CHECK: tcvtrowd2ps $123, %tmm5, %zmm22 -// CHECK: encoding: [0x62,0xe3,0x7e,0x48,0x07,0xf5,0x7b] - tcvtrowd2ps $123, %tmm5, %zmm22 - -// CHECK: tcvtrowd2ps $123, %tmm2, %zmm22 -// CHECK: encoding: [0x62,0xe3,0x7e,0x48,0x07,0xf2,0x7b] - tcvtrowd2ps $123, %tmm2, %zmm22 - -// CHECK: tcvtrowps2pbf16h %ecx, %tmm5, %zmm22 -// CHECK: encoding: [0x62,0xe2,0x77,0x48,0x6d,0xf5] - tcvtrowps2pbf16h %ecx, %tmm5, %zmm22 - -// CHECK: tcvtrowps2pbf16h %ecx, %tmm2, %zmm22 -// CHECK: encoding: [0x62,0xe2,0x77,0x48,0x6d,0xf2] - tcvtrowps2pbf16h %ecx, %tmm2, %zmm22 - -// CHECK: tcvtrowps2pbf16h $123, %tmm5, %zmm22 -// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x07,0xf5,0x7b] - tcvtrowps2pbf16h $123, %tmm5, %zmm22 - -// CHECK: tcvtrowps2pbf16h $123, %tmm2, %zmm22 -// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x07,0xf2,0x7b] - tcvtrowps2pbf16h $123, %tmm2, %zmm22 - -// CHECK: tcvtrowps2pbf16l %ecx, %tmm5, %zmm22 -// CHECK: encoding: [0x62,0xe2,0x76,0x48,0x6d,0xf5] - tcvtrowps2pbf16l %ecx, %tmm5, %zmm22 - -// CHECK: tcvtrowps2pbf16l %ecx, %tmm2, %zmm22 -// CHECK: encoding: [0x62,0xe2,0x76,0x48,0x6d,0xf2] - tcvtrowps2pbf16l %ecx, %tmm2, %zmm22 - -// CHECK: tcvtrowps2pbf16l $123, %tmm5, %zmm22 -// CHECK: encoding: [0x62,0xe3,0x7e,0x48,0x77,0xf5,0x7b] - tcvtrowps2pbf16l $123, %tmm5, %zmm22 - -// CHECK: tcvtrowps2pbf16l $123, %tmm2, %zmm22 -// CHECK: encoding: [0x62,0xe3,0x7e,0x48,0x77,0xf2,0x7b] - tcvtrowps2pbf16l $123, %tmm2, %zmm22 - -// CHECK: tcvtrowps2phh %ecx, %tmm5, %zmm22 -// CHECK: encoding: [0x62,0xe2,0x74,0x48,0x6d,0xf5] - tcvtrowps2phh %ecx, %tmm5, %zmm22 - -// CHECK: tcvtrowps2phh %ecx, %tmm2, %zmm22 -// CHECK: encoding: [0x62,0xe2,0x74,0x48,0x6d,0xf2] - tcvtrowps2phh %ecx, %tmm2, %zmm22 - -// CHECK: tcvtrowps2phh $123, %tmm5, %zmm22 -// CHECK: encoding: [0x62,0xe3,0x7c,0x48,0x07,0xf5,0x7b] - tcvtrowps2phh $123, %tmm5, %zmm22 - -// CHECK: tcvtrowps2phh $123, %tmm2, %zmm22 -// CHECK: encoding: [0x62,0xe3,0x7c,0x48,0x07,0xf2,0x7b] - tcvtrowps2phh $123, %tmm2, %zmm22 - -// CHECK: tcvtrowps2phl %ecx, %tmm5, %zmm22 -// CHECK: encoding: [0x62,0xe2,0x75,0x48,0x6d,0xf5] - tcvtrowps2phl %ecx, %tmm5, %zmm22 - -// CHECK: tcvtrowps2phl %ecx, %tmm2, %zmm22 -// CHECK: encoding: [0x62,0xe2,0x75,0x48,0x6d,0xf2] - tcvtrowps2phl %ecx, %tmm2, %zmm22 - -// CHECK: tcvtrowps2phl $123, %tmm5, %zmm22 -// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x77,0xf5,0x7b] - tcvtrowps2phl $123, %tmm5, %zmm22 - -// CHECK: tcvtrowps2phl $123, %tmm2, %zmm22 -// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x77,0xf2,0x7b] - tcvtrowps2phl $123, %tmm2, %zmm22 - -// CHECK: tilemovrow %ecx, %tmm3, %zmm22 -// CHECK: encoding: [0x62,0xe2,0x75,0x48,0x4a,0xf3] - tilemovrow %ecx, %tmm3, %zmm22 - -// CHECK: tilemovrow %ecx, %tmm2, %zmm22 -// CHECK: encoding: [0x62,0xe2,0x75,0x48,0x4a,0xf2] - tilemovrow %ecx, %tmm2, %zmm22 - -// CHECK: tilemovrow $123, %tmm3, %zmm22 -// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x07,0xf3,0x7b] - tilemovrow $123, %tmm3, %zmm22 - -// CHECK: tilemovrow $123, %tmm2, %zmm22 -// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x07,0xf2,0x7b] - tilemovrow $123, %tmm2, %zmm22 - -// CHECK: tilemovrow %edx, %tmm0, %zmm22 -// CHECK: encoding: [0x62,0xe2,0x6d,0x48,0x4a,0xf0] - tilemovrow %edx, %tmm0, %zmm22 - -// CHECK: tilemovrow $123, %tmm0, %zmm22 -// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x07,0xf0,0x7b] - tilemovrow $123, %tmm0, %zmm22 diff --git a/llvm/test/MC/X86/amx-avx512-intel.s b/llvm/test/MC/X86/amx-avx512-intel.s deleted file mode 100644 index 3a517a6cd1aabb..00000000000000 --- a/llvm/test/MC/X86/amx-avx512-intel.s +++ /dev/null @@ -1,105 +0,0 @@ -// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s - -// CHECK: tcvtrowd2ps zmm22, tmm5, ecx -// CHECK: encoding: [0x62,0xe2,0x76,0x48,0x4a,0xf5] - tcvtrowd2ps zmm22, tmm5, ecx - -// CHECK: tcvtrowd2ps zmm22, tmm2, ecx -// CHECK: encoding: [0x62,0xe2,0x76,0x48,0x4a,0xf2] - tcvtrowd2ps zmm22, tmm2, ecx - -// CHECK: tcvtrowd2ps zmm22, tmm5, 123 -// CHECK: encoding: [0x62,0xe3,0x7e,0x48,0x07,0xf5,0x7b] - tcvtrowd2ps zmm22, tmm5, 123 - -// CHECK: tcvtrowd2ps zmm22, tmm2, 123 -// CHECK: encoding: [0x62,0xe3,0x7e,0x48,0x07,0xf2,0x7b] - tcvtrowd2ps zmm22, tmm2, 123 - -// CHECK: tcvtrowps2pbf16h zmm22, tmm5, ecx -// CHECK: encoding: [0x62,0xe2,0x77,0x48,0x6d,0xf5] - tcvtrowps2pbf16h zmm22, tmm5, ecx - -// CHECK: tcvtrowps2pbf16h zmm22, tmm2, ecx -// CHECK: encoding: [0x62,0xe2,0x77,0x48,0x6d,0xf2] - tcvtrowps2pbf16h zmm22, tmm2, ecx - -// CHECK: tcvtrowps2pbf16h zmm22, tmm5, 123 -// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x07,0xf5,0x7b] - tcvtrowps2pbf16h zmm22, tmm5, 123 - -// CHECK: tcvtrowps2pbf16h zmm22, tmm2, 123 -// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x07,0xf2,0x7b] - tcvtrowps2pbf16h zmm22, tmm2, 123 - -// CHECK: tcvtrowps2pbf16l zmm22, tmm5, ecx -// CHECK: encoding: [0x62,0xe2,0x76,0x48,0x6d,0xf5] - tcvtrowps2pbf16l zmm22, tmm5, ecx - -// CHECK: tcvtrowps2pbf16l zmm22, tmm2, ecx -// CHECK: encoding: [0x62,0xe2,0x76,0x48,0x6d,0xf2] - tcvtrowps2pbf16l zmm22, tmm2, ecx - -// CHECK: tcvtrowps2pbf16l zmm22, tmm5, 123 -// CHECK: encoding: [0x62,0xe3,0x7e,0x48,0x77,0xf5,0x7b] - tcvtrowps2pbf16l zmm22, tmm5, 123 - -// CHECK: tcvtrowps2pbf16l zmm22, tmm2, 123 -// CHECK: encoding: [0x62,0xe3,0x7e,0x48,0x77,0xf2,0x7b] - tcvtrowps2pbf16l zmm22, tmm2, 123 - -// CHECK: tcvtrowps2phh zmm22, tmm5, ecx -// CHECK: encoding: [0x62,0xe2,0x74,0x48,0x6d,0xf5] - tcvtrowps2phh zmm22, tmm5, ecx - -// CHECK: tcvtrowps2phh zmm22, tmm2, ecx -// CHECK: encoding: [0x62,0xe2,0x74,0x48,0x6d,0xf2] - tcvtrowps2phh zmm22, tmm2, ecx - -// CHECK: tcvtrowps2phh zmm22, tmm5, 123 -// CHECK: encoding: [0x62,0xe3,0x7c,0x48,0x07,0xf5,0x7b] - tcvtrowps2phh zmm22, tmm5, 123 - -// CHECK: tcvtrowps2phh zmm22, tmm2, 123 -// CHECK: encoding: [0x62,0xe3,0x7c,0x48,0x07,0xf2,0x7b] - tcvtrowps2phh zmm22, tmm2, 123 - -// CHECK: tcvtrowps2phl zmm22, tmm5, ecx -// CHECK: encoding: [0x62,0xe2,0x75,0x48,0x6d,0xf5] - tcvtrowps2phl zmm22, tmm5, ecx - -// CHECK: tcvtrowps2phl zmm22, tmm2, ecx -// CHECK: encoding: [0x62,0xe2,0x75,0x48,0x6d,0xf2] - tcvtrowps2phl zmm22, tmm2, ecx - -// CHECK: tcvtrowps2phl zmm22, tmm5, 123 -// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x77,0xf5,0x7b] - tcvtrowps2phl zmm22, tmm5, 123 - -// CHECK: tcvtrowps2phl zmm22, tmm2, 123 -// CHECK: encoding: [0x62,0xe3,0x7f,0x48,0x77,0xf2,0x7b] - tcvtrowps2phl zmm22, tmm2, 123 - -// CHECK: tilemovrow zmm22, tmm3, ecx -// CHECK: encoding: [0x62,0xe2,0x75,0x48,0x4a,0xf3] - tilemovrow zmm22, tmm3, ecx - -// CHECK: tilemovrow zmm22, tmm2, ecx -// CHECK: encoding: [0x62,0xe2,0x75,0x48,0x4a,0xf2] - tilemovrow zmm22, tmm2, ecx - -// CHECK: tilemovrow zmm22, tmm3, 123 -// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x07,0xf3,0x7b] - tilemovrow zmm22, tmm3, 123 - -// CHECK: tilemovrow zmm22, tmm2, 123 -// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x07,0xf2,0x7b] - tilemovrow zmm22, tmm2, 123 - -// CHECK: tilemovrow zmm22, tmm0, edx -// CHECK: encoding: [0x62,0xe2,0x6d,0x48,0x4a,0xf0] - tilemovrow zmm22, tmm0, edx - -// CHECK: tilemovrow zmm22, tmm0, 123 -// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x07,0xf0,0x7b] - tilemovrow zmm22, tmm0, 123 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits