llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-driver @llvm/pr-subscribers-clang Author: Mikołaj Piróg (mikolaj-pirog) <details> <summary>Changes</summary> Per Intel Architecture Instruction Set Extensions Programming Reference rev. 59 (https://cdrdv2.intel.com/v1/dl/getContent/671368), Revision History entry for revision -59, AMX-TRANSPOSE was removed --- Patch is 526.67 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/165556.diff 81 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsX86_64.td (-89) - (modified) clang/include/clang/Driver/Options.td (-2) - (modified) clang/lib/Basic/Targets/X86.cpp (-6) - (modified) clang/lib/CodeGen/TargetBuiltins/X86.cpp (-68) - (modified) clang/lib/Headers/CMakeLists.txt (-6) - (removed) clang/lib/Headers/amxbf16transposeintrin.h (-94) - (removed) clang/lib/Headers/amxcomplextransposeintrin.h (-303) - (removed) clang/lib/Headers/amxfp16transposeintrin.h (-94) - (modified) clang/lib/Headers/amxintrin.h (-2) - (removed) clang/lib/Headers/amxmovrstransposeintrin.h (-200) - (removed) clang/lib/Headers/amxtf32transposeintrin.h (-105) - (removed) clang/lib/Headers/amxtransposeintrin.h (-248) - (modified) clang/lib/Headers/immintrin.h (-12) - (modified) clang/lib/Sema/SemaX86.cpp (-17) - (removed) clang/test/CodeGen/X86/amx_movrs_tranpose.c (-53) - (removed) clang/test/CodeGen/X86/amx_movrs_tranpose_api.c (-81) - (removed) clang/test/CodeGen/X86/amx_movrs_transpose_errors.c (-22) - (modified) clang/test/CodeGen/X86/amx_tf32.c (-5) - (modified) clang/test/CodeGen/X86/amx_tf32_api.c (-7) - (modified) clang/test/CodeGen/X86/amx_tf32_errors.c (-8) - (removed) clang/test/CodeGen/X86/amx_transpose.c (-75) - (removed) clang/test/CodeGen/X86/amx_transpose_api.c (-114) - (removed) clang/test/CodeGen/X86/amx_transpose_errors.c (-75) - (modified) clang/test/Driver/x86-target-features.c (-7) - (modified) clang/test/Preprocessor/predefined-arch-macros.c (-2) - (modified) clang/test/Preprocessor/x86_target_features.c (-12) - (modified) llvm/include/llvm/CodeGen/TileShapeInfo.h (+7-81) - (modified) llvm/include/llvm/IR/IntrinsicsX86.td (-104) - (modified) llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h (-1) - (modified) llvm/include/llvm/TargetParser/X86TargetParser.def (-1) - (modified) llvm/lib/Target/X86/AsmParser/X86Operand.h (-31) - (modified) llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp (-5) - (modified) llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h (-7) - (modified) llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp (-19) - (modified) llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h (-1) - (modified) llvm/lib/Target/X86/X86.td (+1-5) - (modified) llvm/lib/Target/X86/X86ExpandPseudo.cpp (-167) - (modified) llvm/lib/Target/X86/X86FastPreTileConfig.cpp (+8-24) - (modified) llvm/lib/Target/X86/X86FastTileConfig.cpp (-6) - (modified) llvm/lib/Target/X86/X86ISelDAGToDAG.cpp (+2-76) - (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+1-154) - (modified) llvm/lib/Target/X86/X86InstrAMX.td (-208) - (modified) llvm/lib/Target/X86/X86InstrInfo.cpp (+3-12) - (modified) llvm/lib/Target/X86/X86InstrOperands.td (-7) - (modified) llvm/lib/Target/X86/X86InstrPredicates.td (-1) - (modified) llvm/lib/Target/X86/X86LowerAMXType.cpp (+26-177) - (modified) llvm/lib/Target/X86/X86PreTileConfig.cpp (-9) - (modified) llvm/lib/Target/X86/X86RegisterInfo.cpp (+4-66) - (modified) llvm/lib/Target/X86/X86RegisterInfo.td (+2-11) - (modified) llvm/lib/Target/X86/X86TileConfig.cpp (+12-71) - (modified) llvm/lib/TargetParser/Host.cpp (-1) - (modified) llvm/lib/TargetParser/X86TargetParser.cpp (+1-1) - (modified) llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt (-22) - (modified) llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt (-22) - (modified) llvm/test/CodeGen/X86/amx-tf32-internal.ll (+2-5) - (modified) llvm/test/CodeGen/X86/amx-tf32-intrinsics.ll (+1-11) - (removed) llvm/test/CodeGen/X86/amx_movrs_transpose_intrinsics.ll (-122) - (removed) llvm/test/CodeGen/X86/amx_tile_pair_O2_to_O0.ll (-136) - (removed) llvm/test/CodeGen/X86/amx_tile_pair_configure_O0.mir (-165) - (removed) llvm/test/CodeGen/X86/amx_tile_pair_configure_O2.mir (-153) - (removed) llvm/test/CodeGen/X86/amx_tile_pair_copy.mir (-97) - (removed) llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O0.ll (-87) - (removed) llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O2.ll (-61) - (removed) llvm/test/CodeGen/X86/amx_tile_pair_preconfigure_O0.mir (-134) - (removed) llvm/test/CodeGen/X86/amx_tile_pair_preconfigure_O2.mir (-113) - (removed) llvm/test/CodeGen/X86/amx_transpose_intrinsics.ll (-371) - (modified) llvm/test/CodeGen/X86/ipra-reg-usage.ll (+2-2) - (modified) llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-movrs.txt (-128) - (modified) llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-tf32.txt (-8) - (removed) llvm/test/MC/Disassembler/X86/amx-transpose-att.txt (-154) - (modified) llvm/test/MC/X86/AMX/x86-64-amx-movrs-att.s (-128) - (modified) llvm/test/MC/X86/AMX/x86-64-amx-movrs-intel.s (-128) - (modified) llvm/test/MC/X86/AMX/x86-64-amx-tf32-att.s (-7) - (modified) llvm/test/MC/X86/AMX/x86-64-amx-tf32-intel.s (-7) - (removed) llvm/test/MC/X86/amx-transpose-att.s (-153) - (removed) llvm/test/MC/X86/amx-transpose-intel.s (-153) - (modified) llvm/test/TableGen/x86-instr-mapping.inc (-8) - (modified) llvm/test/tools/llvm-ir2vec/output/reference_triplets.txt (+26-26) - (modified) llvm/test/tools/llvm-ir2vec/output/reference_x86_entities.txt (+5711-5733) - (modified) llvm/unittests/CodeGen/InstrRefLDVTest.cpp (+1-1) - (modified) llvm/utils/TableGen/X86RecognizableInstr.cpp (-1) ``````````diff diff --git a/clang/include/clang/Basic/BuiltinsX86_64.td b/clang/include/clang/Basic/BuiltinsX86_64.td index 275278c5ac089..062060e6afbbe 100644 --- a/clang/include/clang/Basic/BuiltinsX86_64.td +++ b/clang/include/clang/Basic/BuiltinsX86_64.td @@ -239,57 +239,6 @@ let Features = "amx-complex", Attributes = [NoThrow] in { def tcmmrlfp16ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">; } -let Features = "amx-transpose", Attributes = [NoThrow] in { - def t2rpntlvwz0_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">; -} - -let Features = "amx-movrs,amx-transpose", Attributes = [NoThrow] in { - def t2rpntlvwz0rs_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">; -} - -let Features = "amx-transpose", Attributes = [NoThrow] in { - def t2rpntlvwz0t1_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">; -} - -let Features = "amx-movrs,amx-transpose", Attributes = [NoThrow] in { - def t2rpntlvwz0rst1_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">; -} - -let Features = "amx-transpose", Attributes = [NoThrow] in { - def t2rpntlvwz1_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">; -} - -let Features = "amx-movrs,amx-transpose", Attributes = [NoThrow] in { - def t2rpntlvwz1rs_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">; -} - -let Features = "amx-transpose", Attributes = [NoThrow] in { - def t2rpntlvwz1t1_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">; -} - -let Features = "amx-movrs,amx-transpose", Attributes = [NoThrow] in { - def t2rpntlvwz1rst1_internal : X86Builtin<"void(unsigned short, unsigned short, unsigned short, _Vector<256, int *>, _Vector<256, int *>, void const *, size_t)">; -} - -let Features = "amx-transpose", Attributes = [NoThrow] in { - def ttransposed_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, _Vector<256, int>)">; -} - -let Features = "amx-bf16,amx-transpose", Attributes = [NoThrow] in { - def ttdpbf16ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">; -} - -let Features = "amx-fp16,amx-transpose", Attributes = [NoThrow] in { - def ttdpfp16ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">; -} - -let Features = "amx-complex,amx-transpose", Attributes = [NoThrow] in { - def ttcmmimfp16ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">; - def ttcmmrlfp16ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">; - def tconjtcmmimfp16ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">; - def tconjtfp16_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, _Vector<256, int>)">; -} - let Features = "amx-avx512,avx10.2", Attributes = [NoThrow] in { def tcvtrowd2ps_internal : X86Builtin<"_Vector<16, float>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">; def tcvtrowps2bf16h_internal : X86Builtin<"_Vector<32, __bf16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">; @@ -303,10 +252,6 @@ let Features = "amx-tf32", Attributes = [NoThrow] in { def tmmultf32ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">; } -let Features = "amx-tf32,amx-transpose", Attributes = [NoThrow] in { - def ttmmultf32ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">; -} - let Features = "amx-fp8", Attributes = [NoThrow] in { def tdpbf8ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">; def tdpbhf8ps_internal : X86Builtin<"_Vector<256, int>(unsigned short, unsigned short, unsigned short, _Vector<256, int>, _Vector<256, int>, _Vector<256, int>)">; @@ -321,13 +266,6 @@ let Features = "amx-tile", Attributes = [NoThrow] in { def tilezero : X86Builtin<"void(unsigned char)">; } -let Features = "amx-movrs,amx-transpose", Attributes = [NoThrow] in { - def t2rpntlvwz0rs : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">; - def t2rpntlvwz0rst1 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">; - def t2rpntlvwz1rs : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">; - def t2rpntlvwz1rst1 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">; -} - let Features = "amx-movrs", Attributes = [NoThrow] in { def tileloaddrs64 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">; def tileloaddrst164 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">; @@ -359,29 +297,6 @@ let Features = "amx-complex", Attributes = [NoThrow] in { def tcmmrlfp16ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">; } -let Features = "amx-transpose", Attributes = [NoThrow] in { - def t2rpntlvwz0 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">; - def t2rpntlvwz0t1 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">; - def t2rpntlvwz1 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">; - def t2rpntlvwz1t1 : X86Builtin<"void(_Constant unsigned char, void const *, size_t)">; - def ttransposed : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char)">; -} - -let Features = "amx-bf16,amx-transpose", Attributes = [NoThrow] in { - def ttdpbf16ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">; -} - -let Features = "amx-fp16,amx-transpose", Attributes = [NoThrow] in { - def ttdpfp16ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">; -} - -let Features = "amx-complex,amx-transpose", Attributes = [NoThrow] in { - def ttcmmimfp16ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">; - def ttcmmrlfp16ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">; - def tconjtcmmimfp16ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">; - def tconjtfp16 : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char)">; -} - let Features = "amx-avx512,avx10.2", Attributes = [NoThrow] in { def tcvtrowd2ps : X86Builtin<"_Vector<16, float>(_Constant unsigned char, unsigned int)">; def tcvtrowps2bf16h : X86Builtin<"_Vector<32, __bf16>(_Constant unsigned char, unsigned int)">; @@ -406,10 +321,6 @@ let Features = "amx-tf32", Attributes = [NoThrow] in { def tmmultf32ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">; } -let Features = "amx-tf32,amx-transpose", Attributes = [NoThrow] in { - def ttmmultf32ps : X86Builtin<"void(_Constant unsigned char, _Constant unsigned char, _Constant unsigned char)">; -} - let Features = "prefetchi", Attributes = [NoThrow, Const] in { def prefetchi : X86Builtin<"void(void const *, unsigned int)">; } diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 8784c9d7d206d..1d11db1209e47 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -6695,8 +6695,6 @@ def mamx_tf32 : Flag<["-"], "mamx-tf32">, Group<m_x86_Features_Group>; def mno_amx_tf32 : Flag<["-"], "mno-amx-tf32">, Group<m_x86_Features_Group>; def mamx_tile : Flag<["-"], "mamx-tile">, Group<m_x86_Features_Group>; def mno_amx_tile : Flag<["-"], "mno-amx-tile">, Group<m_x86_Features_Group>; -def mamx_transpose : Flag<["-"], "mamx-transpose">, Group<m_x86_Features_Group>; -def mno_amx_transpose : Flag<["-"], "mno-amx-transpose">, Group<m_x86_Features_Group>; def mamx_movrs: Flag<["-"], "mamx-movrs">, Group<m_x86_Features_Group>; def mno_amx_movrs: Flag<["-"], "mno-amx-movrs">, Group<m_x86_Features_Group>; def mcmpccxadd : Flag<["-"], "mcmpccxadd">, Group<m_x86_Features_Group>; diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index e71f10c4c16fc..7a90c89dd7dc0 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -396,8 +396,6 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features, HasAMXFP8 = true; } else if (Feature == "+amx-movrs") { HasAMXMOVRS = true; - } else if (Feature == "+amx-transpose") { - HasAMXTRANSPOSE = true; } else if (Feature == "+amx-avx512") { HasAMXAVX512 = true; } else if (Feature == "+amx-tf32") { @@ -925,8 +923,6 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__AMX_FP8__"); if (HasAMXMOVRS) Builder.defineMacro("__AMX_MOVRS__"); - if (HasAMXTRANSPOSE) - Builder.defineMacro("__AMX_TRANSPOSE__"); if (HasAMXAVX512) Builder.defineMacro("__AMX_AVX512__"); if (HasAMXTF32) @@ -1068,7 +1064,6 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const { .Case("amx-movrs", true) .Case("amx-tf32", true) .Case("amx-tile", true) - .Case("amx-transpose", true) .Case("avx", true) .Case("avx10.1", true) .Case("avx10.2", true) @@ -1189,7 +1184,6 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("amx-movrs", HasAMXMOVRS) .Case("amx-tf32", HasAMXTF32) .Case("amx-tile", HasAMXTILE) - .Case("amx-transpose", HasAMXTRANSPOSE) .Case("avx", SSELevel >= AVX) .Case("avx10.1", HasAVX10_1) .Case("avx10.2", HasAVX10_2) diff --git a/clang/lib/CodeGen/TargetBuiltins/X86.cpp b/clang/lib/CodeGen/TargetBuiltins/X86.cpp index b924407b6ddd7..2381b2e7cf2cf 100644 --- a/clang/lib/CodeGen/TargetBuiltins/X86.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/X86.cpp @@ -2931,74 +2931,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // instruction, but it will create a memset that won't be optimized away. return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true); } - // Corresponding to intrisics which will return 2 tiles (tile0_tile1). - case X86::BI__builtin_ia32_t2rpntlvwz0_internal: - case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal: - case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal: - case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal: - case X86::BI__builtin_ia32_t2rpntlvwz1_internal: - case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal: - case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal: - case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal: { - Intrinsic::ID IID; - switch (BuiltinID) { - default: - llvm_unreachable("Unsupported intrinsic!"); - case X86::BI__builtin_ia32_t2rpntlvwz0_internal: - IID = Intrinsic::x86_t2rpntlvwz0_internal; - break; - case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal: - IID = Intrinsic::x86_t2rpntlvwz0rs_internal; - break; - case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal: - IID = Intrinsic::x86_t2rpntlvwz0t1_internal; - break; - case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal: - IID = Intrinsic::x86_t2rpntlvwz0rst1_internal; - break; - case X86::BI__builtin_ia32_t2rpntlvwz1_internal: - IID = Intrinsic::x86_t2rpntlvwz1_internal; - break; - case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal: - IID = Intrinsic::x86_t2rpntlvwz1rs_internal; - break; - case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal: - IID = Intrinsic::x86_t2rpntlvwz1t1_internal; - break; - case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal: - IID = Intrinsic::x86_t2rpntlvwz1rst1_internal; - break; - } - - // Ops = (Row0, Col0, Col1, DstPtr0, DstPtr1, SrcPtr, Stride) - Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), - {Ops[0], Ops[1], Ops[2], Ops[5], Ops[6]}); - - auto *PtrTy = E->getArg(3)->getType()->getAs<PointerType>(); - assert(PtrTy && "arg3 must be of pointer type"); - QualType PtreeTy = PtrTy->getPointeeType(); - llvm::Type *TyPtee = ConvertType(PtreeTy); - - // Bitcast amx type (x86_amx) to vector type (256 x i32) - // Then store tile0 into DstPtr0 - Value *T0 = Builder.CreateExtractValue(Call, 0); - Value *VecT0 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, - {TyPtee}, {T0}); - Builder.CreateDefaultAlignedStore(VecT0, Ops[3]); - - // Then store tile1 into DstPtr1 - Value *T1 = Builder.CreateExtractValue(Call, 1); - Value *VecT1 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, - {TyPtee}, {T1}); - Value *Store = Builder.CreateDefaultAlignedStore(VecT1, Ops[4]); - - // Note: Here we escape directly use x86_tilestored64_internal to store - // the results due to it can't make sure the Mem written scope. This may - // cause shapes reloads after first amx intrinsic, which current amx reg- - // ister allocation has no ability to handle it. - - return Store; - } case X86::BI__ud2: // llvm.trap makes a ud2a instruction on x86. return EmitTrapCall(Intrinsic::trap); diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 18589125697b0..33fff7645df65 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -162,18 +162,12 @@ set(x86_files adxintrin.h ammintrin.h amxavx512intrin.h - amxbf16transposeintrin.h amxcomplexintrin.h - amxcomplextransposeintrin.h amxfp16intrin.h - amxfp16transposeintrin.h amxfp8intrin.h amxintrin.h amxmovrsintrin.h - amxmovrstransposeintrin.h amxtf32intrin.h - amxtf32transposeintrin.h - amxtransposeintrin.h avx10_2_512bf16intrin.h avx10_2_512convertintrin.h avx10_2_512minmaxintrin.h diff --git a/clang/lib/Headers/amxbf16transposeintrin.h b/clang/lib/Headers/amxbf16transposeintrin.h deleted file mode 100644 index 86f09f2ad8db2..0000000000000 --- a/clang/lib/Headers/amxbf16transposeintrin.h +++ /dev/null @@ -1,94 +0,0 @@ -/*===----- amxbf16transposeintrin.h - AMX-BF16 and AMX-TRANSPOSE ------------=== - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - *===------------------------------------------------------------------------=== - */ - -#ifndef __IMMINTRIN_H -#error \ - "Never use <amxbf16transposeintrin.h> directly; use <immintrin.h> instead." -#endif /* __IMMINTRIN_H */ - -#ifndef __AMX_BF16TRANSPOSEINTRIN_H -#define __AMX_BF16TRANSPOSEINTRIN_H -#ifdef __x86_64__ - -/* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("amx-bf16,amx-transpose"))) - -/// Compute transpose and dot-product of BF16 (16-bit) floating-point pairs in -/// tiles \a a and \a b, accumulating the intermediate single-precision -/// (32-bit) floating-point elements with elements in \a dst, and store the -/// 32-bit result back to tile \a dst. -/// -/// \headerfile <immintrin.h> -/// -/// \code -/// void _tile_tdpbf16ps (__tile dst, __tile a, __tile b) -/// \endcode -/// -/// \code{.operation} -/// FOR m := 0 TO dst.rows - 1 -/// tmp := dst.row[m] -/// FOR k := 0 TO (a.colsb / 4) - 1 -/// FOR n := 0 TO (dst.colsb / 4) - 1 -/// tmp.bf32[n] += FP32(a.row[m].bf16[2*k+0]) * -/// FP32(b.row[k].bf16[2*n+0]) -/// tmp.bf32[n] += FP32(a.row[m].bf16[2*k+1]) * -/// FP32(b.row[k].bf16[2*n+1]) -/// ENDFOR -/// ENDFOR -/// write_row_and_zero(dst, m, tmp, dst.colsb) -/// ENDFOR -/// zero_upper_rows(dst, dst.rows) -/// zero_tileconfig_start() -/// \endcode -/// -/// This intrinsic corresponds to the \c TTDPBF16PS instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param a -/// The 1st source tile. Max size is 1024 Bytes. -/// \param b -/// The 2nd source tile. Max size is 1024 Bytes. -#define _tile_tdpbf16ps(dst, a, b) __builtin_ia32_ttdpbf16ps((dst), (a), (b)) - -/// This is internal intrinsic. C/C++ user should avoid calling it directly. -static __inline__ _tile1024i __DEFAULT_FN_ATTRS -_tile_tdpbf16ps_internal(unsigned short m, unsigned short n, unsigned short k, - _tile1024i dst, _tile1024i src1, _tile1024i src2) { - return __builtin_ia32_ttdpbf16ps_internal(m, n, k, dst, src1, src2); -} - -/// Compute transpose and dot-product of BF16 (16-bit) floating-point pairs in -/// tiles src0 and src1, accumulating the intermediate single-precision -/// (32-bit) floating-point elements with elements in "dst", and store the -/// 32-bit result back to tile "dst". -/// -/// \headerfile <immintrin.h> -/// -/// This intrinsic corresponds to the <c> TTDPBF16PS </c> instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param src0 -/// The 1st source tile. Max size is 1024 Bytes. -/// \param src1 -/// The 2nd source tile. Max size is 1024 Bytes. -__DEFAULT_FN_ATTRS -static __inline__ void __tile_tdpbf16ps(__tile1024i *dst, __tile1024i src0, - __tile1024i src1) { - dst->tile = _tile_tdpbf16ps_internal(src0.row, src1.col, src0.col, dst->tile, - src0.tile, src1.tile); -} - -#undef __DEFAULT_FN_ATTRS - -#endif /* __x86_64__ */ -#endif /* __AMX_BF16TRANSPOSEINTRIN_H */ diff --git a/clang/lib/Headers/amxcomplextransposeintrin.h b/clang/lib/Headers/amxcomplextransposeintrin.h deleted file mode 100644 index 11abaf98e9371..0000000000000 --- a/clang/lib/Headers/amxcomplextransposeintrin.h +++ /dev/null @@ -1,303 +0,0 @@ -/*===----- amxcomplextransposeintrin.h - AMX-COMPLEX and AMX-TRANSPOSE ------=== - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - *===------------------------------------------------------------------------=== - */ - -#ifndef __IMMINTRIN_H -#error \ - "Never use <amxcomplextransposeintrin.h> directly; include <immintrin.h> instead." -#endif // __IMMINTRIN_H - -#ifndef __AMX_COMPLEXTRANSPOSEINTRIN_H -#define __AMX_COMPLEXTRANSPOSEINTRIN_H -#ifdef __x86_64__ - -#define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("amx-complex,amx-transpose"))) - -/// Perform matrix multiplication of two tiles containing complex elements and -/// accumulate the results into a packed single precision tile. Each dword -/// element in input tiles \a a and \a b is interpreted as a complex number -/// with FP16 real part and FP16 imaginary part. -/// Calculates the imaginary part of the result. For each possible combination -/// of (transposed column of \a a, column of \a b), it performs a set of -/// multiplication and accumulations on all corre... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/165556 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
