llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-tools-extra

@llvm/pr-subscribers-clang-codegen

Author: Ganesh (ganeshgit)

<details>
<summary>Changes</summary>

Added new ISA AVX512 BMM.
CPUID checks are updated for new subtarget.
Model numbers are checked for identifying zen6.


---

Patch is 101.35 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/179150.diff


59 Files Affected:

- (modified) 
clang-tools-extra/clang-include-fixer/find-all-symbols/STLPostfixHeaderMap.cpp 
(+2) 
- (modified) clang-tools-extra/clangd/index/CanonicalIncludes.cpp (+2) 
- (modified) clang/docs/ReleaseNotes.rst (+15) 
- (modified) clang/include/clang/Basic/BuiltinsX86.td (+28) 
- (modified) clang/include/clang/Options/Options.td (+2) 
- (modified) clang/lib/Basic/Targets/X86.cpp (+15) 
- (modified) clang/lib/Basic/Targets/X86.h (+1) 
- (modified) clang/lib/CodeGen/TargetBuiltins/X86.cpp (+43) 
- (modified) clang/lib/Headers/CMakeLists.txt (+2) 
- (added) clang/lib/Headers/avx512bmmintrin.h (+63) 
- (added) clang/lib/Headers/avx512bmmvlintrin.h (+85) 
- (modified) clang/lib/Headers/immintrin.h (+4) 
- (modified) clang/test/CodeGen/attr-target-x86.c (+2-2) 
- (modified) clang/test/CodeGen/target-builtin-noerror.c (+1) 
- (modified) clang/test/Driver/x86-march.c (+4) 
- (modified) clang/test/Frontend/x86-target-cpu.c (+1) 
- (modified) clang/test/Misc/target-invalid-cpu-note/x86.c (+4) 
- (modified) clang/test/Preprocessor/predefined-arch-macros.c (+151) 
- (modified) compiler-rt/lib/builtins/cpu_model/x86.c (+13-10) 
- (modified) llvm/include/llvm/IR/IntrinsicsX86.td (+27) 
- (modified) llvm/include/llvm/Support/GenericLoopInfoImpl.h (+3-2) 
- (modified) llvm/include/llvm/TargetParser/X86TargetParser.def (+2) 
- (modified) llvm/include/llvm/TargetParser/X86TargetParser.h (+1) 
- (modified) llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp (+2-1) 
- (modified) llvm/lib/Target/X86/X86.td (+15) 
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+9-2) 
- (modified) llvm/lib/Target/X86/X86ISelLowering.h (+5) 
- (modified) llvm/lib/Target/X86/X86InstrAVX512.td (+62) 
- (modified) llvm/lib/Target/X86/X86InstrFragmentsSIMD.td (+7-1) 
- (modified) llvm/lib/Target/X86/X86InstrPredicates.td (+2) 
- (modified) llvm/lib/Target/X86/X86IntrinsicsInfo.h (+12) 
- (modified) llvm/lib/Target/X86/X86PfmCounters.td (+1) 
- (modified) llvm/lib/Target/X86/X86ScheduleZnver4.td (+16-16) 
- (modified) llvm/lib/TargetParser/Host.cpp (+8-1) 
- (modified) llvm/lib/TargetParser/X86TargetParser.cpp (+6) 
- (added) llvm/test/CodeGen/X86/avx512bmm-vbitrevb-bitreverse.ll (+88) 
- (added) llvm/test/CodeGen/X86/avx512bmm-vbitrevb-intrinsics-mem.ll (+141) 
- (added) llvm/test/CodeGen/X86/avx512bmm-vbitrevb-intrinsics.ll (+222) 
- (added) llvm/test/CodeGen/X86/avx512bmm-vbmac-intrinsics.ll (+63) 
- (modified) llvm/test/CodeGen/X86/bypass-slow-division-64.ll (+1) 
- (modified) llvm/test/CodeGen/X86/cmp16.ll (+1) 
- (modified) llvm/test/CodeGen/X86/cpus-amd.ll (+1) 
- (modified) llvm/test/CodeGen/X86/rdpru.ll (+1) 
- (modified) llvm/test/CodeGen/X86/shuffle-as-shifts.ll (+1) 
- (modified) llvm/test/CodeGen/X86/slow-unaligned-mem.ll (+1) 
- (modified) llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll (+1) 
- (modified) llvm/test/CodeGen/X86/tuning-shuffle-permilpd-avx512.ll (+1) 
- (modified) llvm/test/CodeGen/X86/tuning-shuffle-permilps-avx512.ll (+1) 
- (modified) llvm/test/CodeGen/X86/tuning-shuffle-unpckpd-avx512.ll (+1) 
- (modified) llvm/test/CodeGen/X86/tuning-shuffle-unpckps-avx512.ll (+1) 
- (modified) llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll (+1) 
- (modified) llvm/test/CodeGen/X86/vpdpwssd.ll (+1) 
- (modified) llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll (+1) 
- (modified) llvm/test/MC/X86/x86_long_nop.s (+2) 
- (modified) llvm/test/TableGen/x86-fold-tables.inc (+33) 
- (modified) llvm/test/Transforms/LoopUnroll/X86/call-remark.ll (+1) 
- (modified) llvm/test/Transforms/SLPVectorizer/X86/pr63668.ll (+1) 
- (modified) llvm/utils/TableGen/X86FoldTablesEmitter.cpp (+2-1) 
- (modified) llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn (+2) 


``````````diff
diff --git 
a/clang-tools-extra/clang-include-fixer/find-all-symbols/STLPostfixHeaderMap.cpp
 
b/clang-tools-extra/clang-include-fixer/find-all-symbols/STLPostfixHeaderMap.cpp
index d43a3ea327a39..8a5e3290ed9ef 100644
--- 
a/clang-tools-extra/clang-include-fixer/find-all-symbols/STLPostfixHeaderMap.cpp
+++ 
b/clang-tools-extra/clang-include-fixer/find-all-symbols/STLPostfixHeaderMap.cpp
@@ -39,6 +39,8 @@ const HeaderMapCollector::RegexHeaderMap 
*getSTLPostfixHeaderMap() {
       {"include/ammintrin.h$", "<ammintrin.h>"},
       {"include/avx2intrin.h$", "<immintrin.h>"},
       {"include/avx512bwintrin.h$", "<immintrin.h>"},
+      {"include/avx512bmmintrin.h$", "<immintrin.h>"},
+      {"include/avx512bmmvlintrin.h$", "<immintrin.h>"},
       {"include/avx512cdintrin.h$", "<immintrin.h>"},
       {"include/avx512dqintrin.h$", "<immintrin.h>"},
       {"include/avx512erintrin.h$", "<immintrin.h>"},
diff --git a/clang-tools-extra/clangd/index/CanonicalIncludes.cpp 
b/clang-tools-extra/clangd/index/CanonicalIncludes.cpp
index cbef64f351341..3a646e47a9b16 100644
--- a/clang-tools-extra/clangd/index/CanonicalIncludes.cpp
+++ b/clang-tools-extra/clangd/index/CanonicalIncludes.cpp
@@ -42,6 +42,8 @@ const std::pair<llvm::StringRef, llvm::StringRef> 
IncludeMappings[] = {
     {"include/ammintrin.h", "<ammintrin.h>"},
     {"include/avx2intrin.h", "<immintrin.h>"},
     {"include/avx512bwintrin.h", "<immintrin.h>"},
+    {"include/avx512bmmintrin.h", "<immintrin.h>"},
+    {"include/avx512bmmvlintrin.h", "<immintrin.h>"},
     {"include/avx512cdintrin.h", "<immintrin.h>"},
     {"include/avx512dqintrin.h", "<immintrin.h>"},
     {"include/avx512erintrin.h", "<immintrin.h>"},
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 3a3d76112a02b..a6ac25634a567 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -265,6 +265,21 @@ NVPTX Support
 
 X86 Support
 ^^^^^^^^^^^
+- `march=znver6` is now supported.
+- Support ISA of ``AVX512BMM``.
+  * Support intrinsic of ``_mm512_vbmacor16x16x16_epi16``.
+  * Support intrinsic of ``_mm512_vbmacxor16x16x16_epi16``.
+  * Support intrinsic of ``_mm512_mask_vbitrev_epi8``.
+  * Support intrinsic of ``_mm512_maskz_vbitrev_epi8``.
+  * Support intrinsic of ``_mm512_vbitrev_epi8``.
+  * Support intrinsic of ``_mm256_vbmacor16x16x16_epi16``.
+  * Support intrinsic of ``_mm256_vbmacxor16x16x16_epi16``.
+  * Support intrinsic of ``_mm128_mask_vbitrev_epu8``.
+  * Support intrinsic of ``_mm256_mask_vbitrev_epu8``.
+  * Support intrinsic of ``_mm128_maskz_vbitrev_epu8``.
+  * Support intrinsic of ``_mm256_maskz_vbitrev_epu8``.
+  * Support intrinsic of ``_mm128_vbitrev_epu8``.
+  * Support intrinsic of ``_mm256_vbitrev_epu8``.
 
 Arm and AArch64 Support
 ^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/BuiltinsX86.td 
b/clang/include/clang/Basic/BuiltinsX86.td
index 23eac47eb5e4c..7d1b513532f34 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -5055,3 +5055,31 @@ let Features = "avx10.2", Attributes = [NoThrow, Const, 
RequiredVectorWidth<256>
 let Features = "avx10.2", Attributes = [NoThrow, Const, 
RequiredVectorWidth<512>] in {
   def vgetmantbf16512_mask : X86Builtin<"_Vector<32, __bf16>(_Vector<32, 
__bf16>, _Constant int, _Vector<32, __bf16>, unsigned int)">;
 }
+
+let Features = "avx10.2", Attributes = [NoThrow, Const, 
RequiredVectorWidth<128>] in {
+  def vsqrtbf16 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>)">;
+}
+
+let Features = "avx10.2", Attributes = [NoThrow, Const, 
RequiredVectorWidth<256>] in {
+  def vsqrtbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>)">;
+}
+
+let Features = "avx10.2", Attributes = [NoThrow, Const, 
RequiredVectorWidth<512>] in {
+  def vsqrtbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>)">;
+}
+
+let Features = "avx512bmm", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<512>] in {
+  def bmacor16x16x16_v32hi : X86Builtin<"_Vector<32, short>(_Vector<32, 
short>, _Vector<32, short>, _Vector<32, short>)">;
+  def bmacxor16x16x16_v32hi : X86Builtin<"_Vector<32, short>(_Vector<32, 
short>, _Vector<32, short>, _Vector<32, short>)">;
+  def bitrev512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>)">;
+}
+
+let Features = "avx512bmm,avx512vl", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<256>] in {
+  def bmacor16x16x16_v16hi : X86Builtin<"_Vector<16, short>(_Vector<16, 
short>, _Vector<16, short>, _Vector<16, short>)">;
+  def bmacxor16x16x16_v16hi : X86Builtin<"_Vector<16, short>(_Vector<16, 
short>, _Vector<16, short>, _Vector<16, short>)">;
+  def bitrev256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>)">;
+}
+
+let Features = "avx512bmm,avx512vl", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<128>] in {
+  def bitrev128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>)">;
+}
diff --git a/clang/include/clang/Options/Options.td 
b/clang/include/clang/Options/Options.td
index 421208a812bbc..658ee6f7ebe60 100644
--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@@ -6889,6 +6889,8 @@ def mavx512bf16 : Flag<["-"], "mavx512bf16">, 
Group<m_x86_Features_Group>;
 def mno_avx512bf16 : Flag<["-"], "mno-avx512bf16">, 
Group<m_x86_Features_Group>;
 def mavx512bitalg : Flag<["-"], "mavx512bitalg">, Group<m_x86_Features_Group>;
 def mno_avx512bitalg : Flag<["-"], "mno-avx512bitalg">, 
Group<m_x86_Features_Group>;
+def mavx512bmm : Flag<["-"], "mavx512bmm">, Group<m_x86_Features_Group>;
+def mno_avx512bmm : Flag<["-"], "mno-avx512bmm">, Group<m_x86_Features_Group>;
 def mavx512bw : Flag<["-"], "mavx512bw">, Group<m_x86_Features_Group>;
 def mno_avx512bw : Flag<["-"], "mno-avx512bw">, Group<m_x86_Features_Group>;
 def mavx512cd : Flag<["-"], "mavx512cd">, Group<m_x86_Features_Group>;
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index f00d435937b92..2d65e5f6a9ba7 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -296,6 +296,8 @@ bool 
X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasAVX512DQ = true;
     } else if (Feature == "+avx512bitalg") {
       HasAVX512BITALG = true;
+    } else if (Feature == "+avx512bmm") {
+      HasAVX512BMM = true;
     } else if (Feature == "+avx512bw") {
       HasAVX512BW = true;
     } else if (Feature == "+avx512vl") {
@@ -308,6 +310,8 @@ bool 
X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasAVX512IFMA = true;
     } else if (Feature == "+avx512vp2intersect") {
       HasAVX512VP2INTERSECT = true;
+    } else if (Feature == "avx512bmm") {
+      HasAVX512BMM = true;
     } else if (Feature == "+sha") {
       HasSHA = true;
     } else if (Feature == "+sha512") {
@@ -716,6 +720,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions 
&Opts,
   case CK_ZNVER5:
     defineCPUMacros(Builder, "znver5");
     break;
+  case CK_ZNVER6:
+    defineCPUMacros(Builder, "znver6");
+    break;
   case CK_Geode:
     defineCPUMacros(Builder, "geode");
     break;
@@ -833,6 +840,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions 
&Opts,
     Builder.defineMacro("__AVX512DQ__");
   if (HasAVX512BITALG)
     Builder.defineMacro("__AVX512BITALG__");
+  if (HasAVX512BMM)
+    Builder.defineMacro("__AVX512BMM__");
   if (HasAVX512BW)
     Builder.defineMacro("__AVX512BW__");
   if (HasAVX512VL) {
@@ -846,6 +855,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions 
&Opts,
     Builder.defineMacro("__AVX512IFMA__");
   if (HasAVX512VP2INTERSECT)
     Builder.defineMacro("__AVX512VP2INTERSECT__");
+  if (HasAVX512BMM)
+    Builder.defineMacro("__AVX512BMM__");
   if (HasSHA)
     Builder.defineMacro("__SHA__");
   if (HasSHA512)
@@ -1076,6 +1087,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) 
const {
       .Case("avx512fp16", true)
       .Case("avx512dq", true)
       .Case("avx512bitalg", true)
+      .Case("avx512bmm", true)
       .Case("avx512bw", true)
       .Case("avx512vl", true)
       .Case("avx512vbmi", true)
@@ -1196,6 +1208,8 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
       .Case("avx512fp16", HasAVX512FP16)
       .Case("avx512dq", HasAVX512DQ)
       .Case("avx512bitalg", HasAVX512BITALG)
+      .Case("avx512bmm", HasAVX512BMM)
+      .Case("avx512bmm", HasAVX512BMM)
       .Case("avx512bw", HasAVX512BW)
       .Case("avx512vl", HasAVX512VL)
       .Case("avx512vbmi", HasAVX512VBMI)
@@ -1641,6 +1655,7 @@ std::optional<unsigned> 
X86TargetInfo::getCPUCacheLineSize() const {
     case CK_ZNVER3:
     case CK_ZNVER4:
     case CK_ZNVER5:
+    case CK_ZNVER6:
     // Deprecated
     case CK_x86_64:
     case CK_x86_64_v2:
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index 922e32906cd04..6bd55f9fbf4bb 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -104,6 +104,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public 
TargetInfo {
   bool HasAVX512BF16 = false;
   bool HasAVX512DQ = false;
   bool HasAVX512BITALG = false;
+  bool HasAVX512BMM = false;
   bool HasAVX512BW = false;
   bool HasAVX512VL = false;
   bool HasAVX512VBMI = false;
diff --git a/clang/lib/CodeGen/TargetBuiltins/X86.cpp 
b/clang/lib/CodeGen/TargetBuiltins/X86.cpp
index 9645ed87b8ef3..2c4e1f0cc8b17 100644
--- a/clang/lib/CodeGen/TargetBuiltins/X86.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/X86.cpp
@@ -2678,6 +2678,49 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned 
BuiltinID,
     return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
   }
 
+  case X86::BI__builtin_ia32_bitrev128:
+  case X86::BI__builtin_ia32_bitrev256:
+  case X86::BI__builtin_ia32_bitrev512: {
+    Intrinsic::ID ID;
+    switch (BuiltinID) {
+    default: llvm_unreachable("Unsupported intrinsic!");
+    case X86::BI__builtin_ia32_bitrev128:
+      ID = Intrinsic::x86_avx512_vbitrevb_128;
+      break;
+    case X86::BI__builtin_ia32_bitrev256:
+      ID = Intrinsic::x86_avx512_vbitrevb_256;
+      break;
+    case X86::BI__builtin_ia32_bitrev512:
+      ID = Intrinsic::x86_avx512_vbitrevb_512;
+      break;
+    }
+
+    return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
+  }
+
+  case X86::BI__builtin_ia32_bmacor16x16x16_v16hi:
+  case X86::BI__builtin_ia32_bmacor16x16x16_v32hi:
+  case X86::BI__builtin_ia32_bmacxor16x16x16_v16hi:
+  case X86::BI__builtin_ia32_bmacxor16x16x16_v32hi: {
+    Intrinsic::ID ID;
+    switch (BuiltinID) {
+    default: llvm_unreachable("Unsupported intrinsic!");
+    case X86::BI__builtin_ia32_bmacor16x16x16_v16hi:
+      ID = Intrinsic::x86_avx512_vbmacor_v16hi;
+      break;
+    case X86::BI__builtin_ia32_bmacor16x16x16_v32hi:
+      ID = Intrinsic::x86_avx512_vbmacor_v32hi;
+      break;
+    case X86::BI__builtin_ia32_bmacxor16x16x16_v16hi:
+      ID = Intrinsic::x86_avx512_vbmacxor_v16hi;
+      break;
+    case X86::BI__builtin_ia32_bmacxor16x16x16_v32hi:
+      ID = Intrinsic::x86_avx512_vbmacxor_v32hi;
+      break;
+    }
+
+    return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
+  }
   // packed comparison intrinsics
   case X86::BI__builtin_ia32_cmpeqps:
   case X86::BI__builtin_ia32_cmpeqpd:
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index c92b370b88d2d..3686080c1d6cf 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -186,6 +186,8 @@ set(x86_files
   avx2intrin.h
   avx512bf16intrin.h
   avx512bitalgintrin.h
+  avx512bmmintrin.h
+  avx512bmmvlintrin.h  
   avx512bwintrin.h
   avx512cdintrin.h
   avx512dqintrin.h
diff --git a/clang/lib/Headers/avx512bmmintrin.h 
b/clang/lib/Headers/avx512bmmintrin.h
new file mode 100644
index 0000000000000..d39106f9c2276
--- /dev/null
+++ b/clang/lib/Headers/avx512bmmintrin.h
@@ -0,0 +1,63 @@
+/*===-------- avx512bmmintrin.h - AVX512BMM intrinsics *------------------===
+ *
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===---------------------------------------------------------------------===
+ */
+
+#ifndef __IMMINTRIN_H
+#error "Never use <avx512bmmintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512BMMINTRIN_H
+#define _AVX512BMMINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS                                                     
\
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512bmm"),      
\
+                 __min_vector_width__(512)))
+
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
+#else
+#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
+#endif
+
+static __inline __m512i __DEFAULT_FN_ATTRS _mm512_bmacor16x16x16(__m512i __A,
+                                                                 __m512i __B,
+                                                                 __m512i __C) {
+  return (__m512i)__builtin_ia32_bmacor16x16x16_v32hi(
+      (__v32hi)__A, (__v32hi)__B, (__v32hi)__C);
+}
+
+static __inline __m512i __DEFAULT_FN_ATTRS _mm512_bmacxor16x16x16(__m512i __A,
+                                                                  __m512i __B,
+                                                                  __m512i __C) 
{
+  return (__m512i)__builtin_ia32_bmacxor16x16x16_v32hi(
+      (__v32hi)__A, (__v32hi)__B, (__v32hi)__C);
+}
+
+static __inline __m512i __DEFAULT_FN_ATTRS _mm512_bitrev_epi8(__m512i __A) {
+  return (__m512i)__builtin_ia32_bitrev512((__v64qi)__A);
+}
+
+static __inline __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_bitrev_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_selectb_512(
+      (__mmask64)__U, (__v64qi)_mm512_bitrev_epi8(__A), (__v64qi)__B);
+}
+
+static __inline __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_bitrev_epi8(__mmask64 __U, __m512i __A) {
+  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
+                                             (__v64qi)_mm512_bitrev_epi8(__A),
+                                             (__v64qi)_mm512_setzero_si512());
+}
+
+#undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS_CONSTEXPR
+
+#endif
diff --git a/clang/lib/Headers/avx512bmmvlintrin.h 
b/clang/lib/Headers/avx512bmmvlintrin.h
new file mode 100644
index 0000000000000..86a1f2ab410d0
--- /dev/null
+++ b/clang/lib/Headers/avx512bmmvlintrin.h
@@ -0,0 +1,85 @@
+/*===------------- avx512bmvlintrin.h - BMM intrinsics ------------------===
+ *
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error                                                                         
\
+    "Never use <avx512bmmvlintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __BMMVLINTRIN_H
+#define __BMMVLINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS128                                                  
\
+  __attribute__((__always_inline__, __nodebug__,                               
\
+                 __target__("avx512bmm,avx512vl"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256                                                  
\
+  __attribute__((__always_inline__, __nodebug__,                               
\
+                 __target__("avx512bmm,avx512vl"), __min_vector_width__(256)))
+
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
+#else
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
+#endif
+
+static __inline __m256i __DEFAULT_FN_ATTRS256
+_mm256_bmacor16x16x16(__m256i __A, __m256i __B, __m256i __C) {
+  return (__m256i)__builtin_ia32_bmacor16x16x16_v16hi(
+      (__v16hi)__A, (__v16hi)__B, (__v16hi)__C);
+}
+
+static __inline __m256i __DEFAULT_FN_ATTRS256
+_mm256_bmacxor16x16x16(__m256i __A, __m256i __B, __m256i __C) {
+  return (__m256i)__builtin_ia32_bmacxor16x16x16_v16hi(
+      (__v16hi)__A, (__v16hi)__B, (__v16hi)__C);
+}
+
+static __inline __m128i __DEFAULT_FN_ATTRS128 _mm128_bitrev_epi8(__m128i __A) {
+  return (__m128i)__builtin_ia32_bitrev128((__v16qi)__A);
+}
+
+static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_bitrev_epi8(__m256i __A) {
+  return (__m256i)__builtin_ia32_bitrev256((__v32qi)__A);
+}
+
+static __inline __m128i __DEFAULT_FN_ATTRS128
+_mm128_mask_bitrev_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
+  return (__m128i)__builtin_ia32_selectb_128(
+      (__mmask16)__U, (__v16qi)_mm128_bitrev_epi8(__A), (__v16qi)__B);
+}
+
+static __inline __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_bitrev_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
+  return (__m256i)__builtin_ia32_selectb_256(
+      (__mmask32)__U, (__v32qi)_mm256_bitrev_epi8(__A), (__v32qi)__B);
+}
+
+static __inline __m128i __DEFAULT_FN_ATTRS128
+_mm128_maskz_bitrev_epi8(__mmask16 __U, __m128i __A) {
+  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
+                                             (__v16qi)_mm128_bitrev_epi8(__A),
+                                             (__v16qi)_mm_setzero_si128());
+}
+
+static __inline __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_bitrev_epi8(__mmask32 __U, __m256i __A) {
+  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
+                                             (__v32qi)_mm256_bitrev_epi8(__A),
+                                             (__v32qi)_mm256_setzero_si256());
+}
+
+#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
+#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
+#undef __DEFAULT_FN_ATTRS128
+#undef __DEFAULT_FN_ATTRS256
+
+#endif
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 19064a4ff5cea..00107c44c3a55 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -58,6 +58,10 @@
 
 #include <avx512bitalgintrin.h>
 
+#include <avx512bmmintrin.h>
+
+#include <avx512bmmvlintrin.h>
+
 #include <avx512cdintrin.h>
 
 #include <avx512vpopcntdqintrin.h>
diff --git a/clang/test/CodeGen/attr-target-x86.c 
b/clang/test/CodeGen/attr-target-x86.c
index 474fa93629d89..6a110ce38605b 100644
--- a/clang/test/CodeGen/attr-target-x86.c
+++ b/clang/test/CodeGen/attr-target-x86.c
@@ -33,7 +33,7 @@ __attribute__((target("fpmath=387")))
 void f_fpmath_387(void) {}
 
 // CHECK-NOT: tune-cpu
-// CHECK: [[f_no_sse2]] = {{.*}}"target-cpu"="i686" 
"target-features"="+cmov,+cx8,+x87,-aes,-amx-avx512,-avx,-avx10.1,-avx10.2,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop"
 "tune-cpu"="i686"
+// CHECK: [[f_no_sse2]] = {{.*}}"target-cpu"="i686" 
"target-features"="+cmov,+cx8,+x87,-aes,-amx-avx512,-avx,-avx10.1,-avx10.2,-avx2,-avx512bf16,-avx512bitalg,-avx512bmm,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop"
 "tune-cpu"="i686"
 __attribute__((target("no-sse2")))
 void f_no_sse2(void) {}
 
@@ -41,7 +41,7 @@ void f_no_sse2(void) {}
 __attribute__((target("sse4")))
 void f_sse4(void) {}
 
-// CHECK: [[f_no_sse4]] = {{.*}}"target-cpu"="i686" 
"target-features"="+cmov,+cx8,+x87,-amx-avx512,-avx,-avx10.1,-avx10.2,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/179150
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to