llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-tools-extra @llvm/pr-subscribers-clang-codegen Author: Ganesh (ganeshgit) <details> <summary>Changes</summary> Added new ISA AVX512 BMM. CPUID checks are updated for new subtarget. Model numbers are checked for identifying zen6. --- Patch is 101.35 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/179150.diff 59 Files Affected: - (modified) clang-tools-extra/clang-include-fixer/find-all-symbols/STLPostfixHeaderMap.cpp (+2) - (modified) clang-tools-extra/clangd/index/CanonicalIncludes.cpp (+2) - (modified) clang/docs/ReleaseNotes.rst (+15) - (modified) clang/include/clang/Basic/BuiltinsX86.td (+28) - (modified) clang/include/clang/Options/Options.td (+2) - (modified) clang/lib/Basic/Targets/X86.cpp (+15) - (modified) clang/lib/Basic/Targets/X86.h (+1) - (modified) clang/lib/CodeGen/TargetBuiltins/X86.cpp (+43) - (modified) clang/lib/Headers/CMakeLists.txt (+2) - (added) clang/lib/Headers/avx512bmmintrin.h (+63) - (added) clang/lib/Headers/avx512bmmvlintrin.h (+85) - (modified) clang/lib/Headers/immintrin.h (+4) - (modified) clang/test/CodeGen/attr-target-x86.c (+2-2) - (modified) clang/test/CodeGen/target-builtin-noerror.c (+1) - (modified) clang/test/Driver/x86-march.c (+4) - (modified) clang/test/Frontend/x86-target-cpu.c (+1) - (modified) clang/test/Misc/target-invalid-cpu-note/x86.c (+4) - (modified) clang/test/Preprocessor/predefined-arch-macros.c (+151) - (modified) compiler-rt/lib/builtins/cpu_model/x86.c (+13-10) - (modified) llvm/include/llvm/IR/IntrinsicsX86.td (+27) - (modified) llvm/include/llvm/Support/GenericLoopInfoImpl.h (+3-2) - (modified) llvm/include/llvm/TargetParser/X86TargetParser.def (+2) - (modified) llvm/include/llvm/TargetParser/X86TargetParser.h (+1) - (modified) llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp (+2-1) - (modified) llvm/lib/Target/X86/X86.td (+15) - (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+9-2) - (modified) llvm/lib/Target/X86/X86ISelLowering.h (+5) - (modified) llvm/lib/Target/X86/X86InstrAVX512.td (+62) - (modified) llvm/lib/Target/X86/X86InstrFragmentsSIMD.td (+7-1) - (modified) llvm/lib/Target/X86/X86InstrPredicates.td (+2) - (modified) llvm/lib/Target/X86/X86IntrinsicsInfo.h (+12) - (modified) llvm/lib/Target/X86/X86PfmCounters.td (+1) - (modified) llvm/lib/Target/X86/X86ScheduleZnver4.td (+16-16) - (modified) llvm/lib/TargetParser/Host.cpp (+8-1) - (modified) llvm/lib/TargetParser/X86TargetParser.cpp (+6) - (added) llvm/test/CodeGen/X86/avx512bmm-vbitrevb-bitreverse.ll (+88) - (added) llvm/test/CodeGen/X86/avx512bmm-vbitrevb-intrinsics-mem.ll (+141) - (added) llvm/test/CodeGen/X86/avx512bmm-vbitrevb-intrinsics.ll (+222) - (added) llvm/test/CodeGen/X86/avx512bmm-vbmac-intrinsics.ll (+63) - (modified) llvm/test/CodeGen/X86/bypass-slow-division-64.ll (+1) - (modified) llvm/test/CodeGen/X86/cmp16.ll (+1) - (modified) llvm/test/CodeGen/X86/cpus-amd.ll (+1) - (modified) llvm/test/CodeGen/X86/rdpru.ll (+1) - (modified) llvm/test/CodeGen/X86/shuffle-as-shifts.ll (+1) - (modified) llvm/test/CodeGen/X86/slow-unaligned-mem.ll (+1) - (modified) llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll (+1) - (modified) llvm/test/CodeGen/X86/tuning-shuffle-permilpd-avx512.ll (+1) - (modified) llvm/test/CodeGen/X86/tuning-shuffle-permilps-avx512.ll (+1) - (modified) llvm/test/CodeGen/X86/tuning-shuffle-unpckpd-avx512.ll (+1) - (modified) llvm/test/CodeGen/X86/tuning-shuffle-unpckps-avx512.ll (+1) - (modified) llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll (+1) - (modified) llvm/test/CodeGen/X86/vpdpwssd.ll (+1) - (modified) llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll (+1) - (modified) llvm/test/MC/X86/x86_long_nop.s (+2) - (modified) llvm/test/TableGen/x86-fold-tables.inc (+33) - (modified) llvm/test/Transforms/LoopUnroll/X86/call-remark.ll (+1) - (modified) llvm/test/Transforms/SLPVectorizer/X86/pr63668.ll (+1) - (modified) llvm/utils/TableGen/X86FoldTablesEmitter.cpp (+2-1) - (modified) llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn (+2) ``````````diff diff --git a/clang-tools-extra/clang-include-fixer/find-all-symbols/STLPostfixHeaderMap.cpp b/clang-tools-extra/clang-include-fixer/find-all-symbols/STLPostfixHeaderMap.cpp index d43a3ea327a39..8a5e3290ed9ef 100644 --- a/clang-tools-extra/clang-include-fixer/find-all-symbols/STLPostfixHeaderMap.cpp +++ b/clang-tools-extra/clang-include-fixer/find-all-symbols/STLPostfixHeaderMap.cpp @@ -39,6 +39,8 @@ const HeaderMapCollector::RegexHeaderMap *getSTLPostfixHeaderMap() { {"include/ammintrin.h$", "<ammintrin.h>"}, {"include/avx2intrin.h$", "<immintrin.h>"}, {"include/avx512bwintrin.h$", "<immintrin.h>"}, + {"include/avx512bmmintrin.h$", "<immintrin.h>"}, + {"include/avx512bmmvlintrin.h$", "<immintrin.h>"}, {"include/avx512cdintrin.h$", "<immintrin.h>"}, {"include/avx512dqintrin.h$", "<immintrin.h>"}, {"include/avx512erintrin.h$", "<immintrin.h>"}, diff --git a/clang-tools-extra/clangd/index/CanonicalIncludes.cpp b/clang-tools-extra/clangd/index/CanonicalIncludes.cpp index cbef64f351341..3a646e47a9b16 100644 --- a/clang-tools-extra/clangd/index/CanonicalIncludes.cpp +++ b/clang-tools-extra/clangd/index/CanonicalIncludes.cpp @@ -42,6 +42,8 @@ const std::pair<llvm::StringRef, llvm::StringRef> IncludeMappings[] = { {"include/ammintrin.h", "<ammintrin.h>"}, {"include/avx2intrin.h", "<immintrin.h>"}, {"include/avx512bwintrin.h", "<immintrin.h>"}, + {"include/avx512bmmintrin.h", "<immintrin.h>"}, + {"include/avx512bmmvlintrin.h", "<immintrin.h>"}, {"include/avx512cdintrin.h", "<immintrin.h>"}, {"include/avx512dqintrin.h", "<immintrin.h>"}, {"include/avx512erintrin.h", "<immintrin.h>"}, diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 3a3d76112a02b..a6ac25634a567 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -265,6 +265,21 @@ NVPTX Support X86 Support ^^^^^^^^^^^ +- `march=znver6` is now supported. +- Support ISA of ``AVX512BMM``. + * Support intrinsic of ``_mm512_vbmacor16x16x16_epi16``. + * Support intrinsic of ``_mm512_vbmacxor16x16x16_epi16``. + * Support intrinsic of ``_mm512_mask_vbitrev_epi8``. + * Support intrinsic of ``_mm512_maskz_vbitrev_epi8``. + * Support intrinsic of ``_mm512_vbitrev_epi8``. + * Support intrinsic of ``_mm256_vbmacor16x16x16_epi16``. + * Support intrinsic of ``_mm256_vbmacxor16x16x16_epi16``. + * Support intrinsic of ``_mm128_mask_vbitrev_epu8``. + * Support intrinsic of ``_mm256_mask_vbitrev_epu8``. + * Support intrinsic of ``_mm128_maskz_vbitrev_epu8``. + * Support intrinsic of ``_mm256_maskz_vbitrev_epu8``. + * Support intrinsic of ``_mm128_vbitrev_epu8``. + * Support intrinsic of ``_mm256_vbitrev_epu8``. Arm and AArch64 Support ^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 23eac47eb5e4c..7d1b513532f34 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -5055,3 +5055,31 @@ let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<256> let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { def vgetmantbf16512_mask : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Constant int, _Vector<32, __bf16>, unsigned int)">; } + +let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { + def vsqrtbf16 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>)">; +} + +let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { + def vsqrtbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>)">; +} + +let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { + def vsqrtbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>)">; +} + +let Features = "avx512bmm", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { + def bmacor16x16x16_v32hi : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Vector<32, short>)">; + def bmacxor16x16x16_v32hi : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Vector<32, short>)">; + def bitrev512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>)">; +} + +let Features = "avx512bmm,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def bmacor16x16x16_v16hi : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Vector<16, short>)">; + def bmacxor16x16x16_v16hi : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Vector<16, short>)">; + def bitrev256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>)">; +} + +let Features = "avx512bmm,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { + def bitrev128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>)">; +} diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index 421208a812bbc..658ee6f7ebe60 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -6889,6 +6889,8 @@ def mavx512bf16 : Flag<["-"], "mavx512bf16">, Group<m_x86_Features_Group>; def mno_avx512bf16 : Flag<["-"], "mno-avx512bf16">, Group<m_x86_Features_Group>; def mavx512bitalg : Flag<["-"], "mavx512bitalg">, Group<m_x86_Features_Group>; def mno_avx512bitalg : Flag<["-"], "mno-avx512bitalg">, Group<m_x86_Features_Group>; +def mavx512bmm : Flag<["-"], "mavx512bmm">, Group<m_x86_Features_Group>; +def mno_avx512bmm : Flag<["-"], "mno-avx512bmm">, Group<m_x86_Features_Group>; def mavx512bw : Flag<["-"], "mavx512bw">, Group<m_x86_Features_Group>; def mno_avx512bw : Flag<["-"], "mno-avx512bw">, Group<m_x86_Features_Group>; def mavx512cd : Flag<["-"], "mavx512cd">, Group<m_x86_Features_Group>; diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index f00d435937b92..2d65e5f6a9ba7 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -296,6 +296,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features, HasAVX512DQ = true; } else if (Feature == "+avx512bitalg") { HasAVX512BITALG = true; + } else if (Feature == "+avx512bmm") { + HasAVX512BMM = true; } else if (Feature == "+avx512bw") { HasAVX512BW = true; } else if (Feature == "+avx512vl") { @@ -308,6 +310,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features, HasAVX512IFMA = true; } else if (Feature == "+avx512vp2intersect") { HasAVX512VP2INTERSECT = true; + } else if (Feature == "avx512bmm") { + HasAVX512BMM = true; } else if (Feature == "+sha") { HasSHA = true; } else if (Feature == "+sha512") { @@ -716,6 +720,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, case CK_ZNVER5: defineCPUMacros(Builder, "znver5"); break; + case CK_ZNVER6: + defineCPUMacros(Builder, "znver6"); + break; case CK_Geode: defineCPUMacros(Builder, "geode"); break; @@ -833,6 +840,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__AVX512DQ__"); if (HasAVX512BITALG) Builder.defineMacro("__AVX512BITALG__"); + if (HasAVX512BMM) + Builder.defineMacro("__AVX512BMM__"); if (HasAVX512BW) Builder.defineMacro("__AVX512BW__"); if (HasAVX512VL) { @@ -846,6 +855,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__AVX512IFMA__"); if (HasAVX512VP2INTERSECT) Builder.defineMacro("__AVX512VP2INTERSECT__"); + if (HasAVX512BMM) + Builder.defineMacro("__AVX512BMM__"); if (HasSHA) Builder.defineMacro("__SHA__"); if (HasSHA512) @@ -1076,6 +1087,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const { .Case("avx512fp16", true) .Case("avx512dq", true) .Case("avx512bitalg", true) + .Case("avx512bmm", true) .Case("avx512bw", true) .Case("avx512vl", true) .Case("avx512vbmi", true) @@ -1196,6 +1208,8 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("avx512fp16", HasAVX512FP16) .Case("avx512dq", HasAVX512DQ) .Case("avx512bitalg", HasAVX512BITALG) + .Case("avx512bmm", HasAVX512BMM) + .Case("avx512bmm", HasAVX512BMM) .Case("avx512bw", HasAVX512BW) .Case("avx512vl", HasAVX512VL) .Case("avx512vbmi", HasAVX512VBMI) @@ -1641,6 +1655,7 @@ std::optional<unsigned> X86TargetInfo::getCPUCacheLineSize() const { case CK_ZNVER3: case CK_ZNVER4: case CK_ZNVER5: + case CK_ZNVER6: // Deprecated case CK_x86_64: case CK_x86_64_v2: diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index 922e32906cd04..6bd55f9fbf4bb 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -104,6 +104,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { bool HasAVX512BF16 = false; bool HasAVX512DQ = false; bool HasAVX512BITALG = false; + bool HasAVX512BMM = false; bool HasAVX512BW = false; bool HasAVX512VL = false; bool HasAVX512VBMI = false; diff --git a/clang/lib/CodeGen/TargetBuiltins/X86.cpp b/clang/lib/CodeGen/TargetBuiltins/X86.cpp index 9645ed87b8ef3..2c4e1f0cc8b17 100644 --- a/clang/lib/CodeGen/TargetBuiltins/X86.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/X86.cpp @@ -2678,6 +2678,49 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn); } + case X86::BI__builtin_ia32_bitrev128: + case X86::BI__builtin_ia32_bitrev256: + case X86::BI__builtin_ia32_bitrev512: { + Intrinsic::ID ID; + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_bitrev128: + ID = Intrinsic::x86_avx512_vbitrevb_128; + break; + case X86::BI__builtin_ia32_bitrev256: + ID = Intrinsic::x86_avx512_vbitrevb_256; + break; + case X86::BI__builtin_ia32_bitrev512: + ID = Intrinsic::x86_avx512_vbitrevb_512; + break; + } + + return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); + } + + case X86::BI__builtin_ia32_bmacor16x16x16_v16hi: + case X86::BI__builtin_ia32_bmacor16x16x16_v32hi: + case X86::BI__builtin_ia32_bmacxor16x16x16_v16hi: + case X86::BI__builtin_ia32_bmacxor16x16x16_v32hi: { + Intrinsic::ID ID; + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_bmacor16x16x16_v16hi: + ID = Intrinsic::x86_avx512_vbmacor_v16hi; + break; + case X86::BI__builtin_ia32_bmacor16x16x16_v32hi: + ID = Intrinsic::x86_avx512_vbmacor_v32hi; + break; + case X86::BI__builtin_ia32_bmacxor16x16x16_v16hi: + ID = Intrinsic::x86_avx512_vbmacxor_v16hi; + break; + case X86::BI__builtin_ia32_bmacxor16x16x16_v32hi: + ID = Intrinsic::x86_avx512_vbmacxor_v32hi; + break; + } + + return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); + } // packed comparison intrinsics case X86::BI__builtin_ia32_cmpeqps: case X86::BI__builtin_ia32_cmpeqpd: diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index c92b370b88d2d..3686080c1d6cf 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -186,6 +186,8 @@ set(x86_files avx2intrin.h avx512bf16intrin.h avx512bitalgintrin.h + avx512bmmintrin.h + avx512bmmvlintrin.h avx512bwintrin.h avx512cdintrin.h avx512dqintrin.h diff --git a/clang/lib/Headers/avx512bmmintrin.h b/clang/lib/Headers/avx512bmmintrin.h new file mode 100644 index 0000000000000..d39106f9c2276 --- /dev/null +++ b/clang/lib/Headers/avx512bmmintrin.h @@ -0,0 +1,63 @@ +/*===-------- avx512bmmintrin.h - AVX512BMM intrinsics *------------------=== + * + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===---------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use <avx512bmmintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef _AVX512BMMINTRIN_H +#define _AVX512BMMINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512bmm"), \ + __min_vector_width__(512))) + +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr +#else +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS +#endif + +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_bmacor16x16x16(__m512i __A, + __m512i __B, + __m512i __C) { + return (__m512i)__builtin_ia32_bmacor16x16x16_v32hi( + (__v32hi)__A, (__v32hi)__B, (__v32hi)__C); +} + +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_bmacxor16x16x16(__m512i __A, + __m512i __B, + __m512i __C) { + return (__m512i)__builtin_ia32_bmacxor16x16x16_v32hi( + (__v32hi)__A, (__v32hi)__B, (__v32hi)__C); +} + +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_bitrev_epi8(__m512i __A) { + return (__m512i)__builtin_ia32_bitrev512((__v64qi)__A); +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_mask_bitrev_epi8(__mmask64 __U, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectb_512( + (__mmask64)__U, (__v64qi)_mm512_bitrev_epi8(__A), (__v64qi)__B); +} + +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_bitrev_epi8(__mmask64 __U, __m512i __A) { + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, + (__v64qi)_mm512_bitrev_epi8(__A), + (__v64qi)_mm512_setzero_si512()); +} + +#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS_CONSTEXPR + +#endif diff --git a/clang/lib/Headers/avx512bmmvlintrin.h b/clang/lib/Headers/avx512bmmvlintrin.h new file mode 100644 index 0000000000000..86a1f2ab410d0 --- /dev/null +++ b/clang/lib/Headers/avx512bmmvlintrin.h @@ -0,0 +1,85 @@ +/*===------------- avx512bmvlintrin.h - BMM intrinsics ------------------=== + * + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error \ + "Never use <avx512bmmvlintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __BMMVLINTRIN_H +#define __BMMVLINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512bmm,avx512vl"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512bmm,avx512vl"), __min_vector_width__(256))) + +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr +#else +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 +#endif + +static __inline __m256i __DEFAULT_FN_ATTRS256 +_mm256_bmacor16x16x16(__m256i __A, __m256i __B, __m256i __C) { + return (__m256i)__builtin_ia32_bmacor16x16x16_v16hi( + (__v16hi)__A, (__v16hi)__B, (__v16hi)__C); +} + +static __inline __m256i __DEFAULT_FN_ATTRS256 +_mm256_bmacxor16x16x16(__m256i __A, __m256i __B, __m256i __C) { + return (__m256i)__builtin_ia32_bmacxor16x16x16_v16hi( + (__v16hi)__A, (__v16hi)__B, (__v16hi)__C); +} + +static __inline __m128i __DEFAULT_FN_ATTRS128 _mm128_bitrev_epi8(__m128i __A) { + return (__m128i)__builtin_ia32_bitrev128((__v16qi)__A); +} + +static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_bitrev_epi8(__m256i __A) { + return (__m256i)__builtin_ia32_bitrev256((__v32qi)__A); +} + +static __inline __m128i __DEFAULT_FN_ATTRS128 +_mm128_mask_bitrev_epi8(__mmask16 __U, __m128i __A, __m128i __B) { + return (__m128i)__builtin_ia32_selectb_128( + (__mmask16)__U, (__v16qi)_mm128_bitrev_epi8(__A), (__v16qi)__B); +} + +static __inline __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_bitrev_epi8(__mmask32 __U, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectb_256( + (__mmask32)__U, (__v32qi)_mm256_bitrev_epi8(__A), (__v32qi)__B); +} + +static __inline __m128i __DEFAULT_FN_ATTRS128 +_mm128_maskz_bitrev_epi8(__mmask16 __U, __m128i __A) { + return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, + (__v16qi)_mm128_bitrev_epi8(__A), + (__v16qi)_mm_setzero_si128()); +} + +static __inline __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_bitrev_epi8(__mmask32 __U, __m256i __A) { + return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, + (__v32qi)_mm256_bitrev_epi8(__A), + (__v32qi)_mm256_setzero_si256()); +} + +#undef __DEFAULT_FN_ATTRS128_CONSTEXPR +#undef __DEFAULT_FN_ATTRS256_CONSTEXPR +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 + +#endif diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h index 19064a4ff5cea..00107c44c3a55 100644 --- a/clang/lib/Headers/immintrin.h +++ b/clang/lib/Headers/immintrin.h @@ -58,6 +58,10 @@ #include <avx512bitalgintrin.h> +#include <avx512bmmintrin.h> + +#include <avx512bmmvlintrin.h> + #include <avx512cdintrin.h> #include <avx512vpopcntdqintrin.h> diff --git a/clang/test/CodeGen/attr-target-x86.c b/clang/test/CodeGen/attr-target-x86.c index 474fa93629d89..6a110ce38605b 100644 --- a/clang/test/CodeGen/attr-target-x86.c +++ b/clang/test/CodeGen/attr-target-x86.c @@ -33,7 +33,7 @@ __attribute__((target("fpmath=387"))) void f_fpmath_387(void) {} // CHECK-NOT: tune-cpu -// CHECK: [[f_no_sse2]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-amx-avx512,-avx,-avx10.1,-avx10.2,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686" +// CHECK: [[f_no_sse2]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-amx-avx512,-avx,-avx10.1,-avx10.2,-avx2,-avx512bf16,-avx512bitalg,-avx512bmm,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686" __attribute__((target("no-sse2"))) void f_no_sse2(void) {} @@ -41,7 +41,7 @@ void f_no_sse2(void) {} __attribute__((target("sse4"))) void f_sse4(void) {} -// CHECK: [[f_no_sse4]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-amx-avx512,-avx,-avx10.1,-avx10.2,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/179150 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
