llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-driver

@llvm/pr-subscribers-clang

Author: Freddy Ye (FreddyLeaf)

<details>
<summary>Changes</summary>

This reverts commit 282d2ab58f56c89510f810a43d4569824a90c538.


---

Patch is 141.61 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/93123.diff


53 Files Affected:

- (modified) clang/docs/ReleaseNotes.rst (-2) 
- (modified) clang/include/clang/Basic/BuiltinsX86.def (+21) 
- (modified) clang/include/clang/Basic/DiagnosticCommonKinds.td (+3) 
- (modified) clang/include/clang/Driver/Options.td (+6) 
- (modified) clang/lib/Basic/Targets/X86.cpp (+21) 
- (modified) clang/lib/Basic/Targets/X86.h (+3) 
- (modified) clang/lib/Headers/CMakeLists.txt (+2) 
- (added) clang/lib/Headers/avx512erintrin.h (+271) 
- (added) clang/lib/Headers/avx512pfintrin.h (+92) 
- (modified) clang/lib/Headers/immintrin.h (+8) 
- (modified) clang/lib/Headers/module.modulemap (+1) 
- (modified) clang/lib/Sema/SemaChecking.cpp (+30) 
- (added) clang/test/CodeGen/X86/avx512er-builtins.c (+347) 
- (added) clang/test/CodeGen/X86/avx512pf-builtins.c (+100) 
- (modified) clang/test/CodeGen/attr-cpuspecific.c (+5-5) 
- (modified) clang/test/CodeGen/attr-target-x86.c (+2-2) 
- (modified) clang/test/CodeGen/function-target-features.c (+2-2) 
- (modified) clang/test/CodeGen/target-builtin-noerror.c (+2) 
- (modified) clang/test/Driver/cl-x86-flags.c (+8-2) 
- (modified) clang/test/Driver/x86-target-features.c (+9-4) 
- (modified) clang/test/Frontend/x86-target-cpu.c (+8-2) 
- (modified) clang/test/Preprocessor/predefined-arch-macros.c (+12) 
- (modified) clang/test/Preprocessor/x86_target_features.c (+50) 
- (modified) clang/test/Sema/builtins-x86.c (+8) 
- (modified) llvm/docs/ReleaseNotes.rst (-3) 
- (modified) llvm/include/llvm/IR/IntrinsicsX86.td (+84) 
- (modified) llvm/include/llvm/TargetParser/X86TargetParser.def (+6-3) 
- (modified) llvm/lib/Target/X86/X86.td (+12) 
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+10) 
- (modified) llvm/lib/Target/X86/X86ISelLowering.h (+12) 
- (modified) llvm/lib/Target/X86/X86Instr3DNow.td (+2-1) 
- (modified) llvm/lib/Target/X86/X86InstrAVX512.td (+16-75) 
- (modified) llvm/lib/Target/X86/X86InstrFragments.td (+7-1) 
- (modified) llvm/lib/Target/X86/X86InstrFragmentsSIMD.td (+11) 
- (modified) llvm/lib/Target/X86/X86InstrPredicates.td (+3) 
- (modified) llvm/lib/Target/X86/X86IntrinsicsInfo.h (+27) 
- (modified) llvm/lib/Target/X86/X86Subtarget.h (+5-3) 
- (modified) llvm/lib/TargetParser/Host.cpp (+9) 
- (modified) llvm/lib/TargetParser/X86TargetParser.cpp (+3-3) 
- (modified) llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/avx512-gather-scatter-intrin-deprecated.ll 
(+24) 
- (modified) llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll (+24) 
- (added) llvm/test/CodeGen/X86/avx512er-intrinsics.ll (+306) 
- (modified) llvm/test/CodeGen/X86/crc32-target-feature.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll (+6-1) 
- (modified) llvm/test/CodeGen/X86/prefetch.ll (+17) 
- (modified) llvm/test/CodeGen/X86/speculative-load-hardening-gather.ll (+22) 
- (modified) llvm/test/CodeGen/X86/unfoldMemoryOperand.mir (+1-1) 
- (modified) llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr23997.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr54634.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll (+1-1) 
- (modified) llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll (+1-1) 


``````````diff
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index d410d8acd135b..0c4a343b70009 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -801,8 +801,6 @@ AMDGPU Support
 X86 Support
 ^^^^^^^^^^^
 
-- Remove knl/knm specific ISA supports: AVX512PF, AVX512ER, PREFETCHWT1
-
 Arm and AArch64 Support
 ^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/clang/include/clang/Basic/BuiltinsX86.def 
b/clang/include/clang/Basic/BuiltinsX86.def
index 7074479786b97..eafcc219c1096 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -832,11 +832,23 @@ TARGET_BUILTIN(__builtin_ia32_rsqrt14ss_mask, 
"V4fV4fV4fV4fUc", "ncV:128:", "avx
 TARGET_BUILTIN(__builtin_ia32_rsqrt14pd512_mask, "V8dV8dV8dUc", "ncV:512:", 
"avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_rsqrt14ps512_mask, "V16fV16fV16fUs", "ncV:512:", 
"avx512f,evex512")
 
+TARGET_BUILTIN(__builtin_ia32_rsqrt28sd_round_mask, "V2dV2dV2dV2dUcIi", 
"ncV:128:", "avx512er")
+TARGET_BUILTIN(__builtin_ia32_rsqrt28ss_round_mask, "V4fV4fV4fV4fUcIi", 
"ncV:128:", "avx512er")
+TARGET_BUILTIN(__builtin_ia32_rsqrt28pd_mask, "V8dV8dV8dUcIi", "ncV:512:", 
"avx512er,evex512")
+TARGET_BUILTIN(__builtin_ia32_rsqrt28ps_mask, "V16fV16fV16fUsIi", "ncV:512:", 
"avx512er,evex512")
+
 TARGET_BUILTIN(__builtin_ia32_rcp14sd_mask, "V2dV2dV2dV2dUc", "ncV:128:", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_rcp14ss_mask, "V4fV4fV4fV4fUc", "ncV:128:", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_rcp14pd512_mask, "V8dV8dV8dUc", "ncV:512:", 
"avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_rcp14ps512_mask, "V16fV16fV16fUs", "ncV:512:", 
"avx512f,evex512")
 
+TARGET_BUILTIN(__builtin_ia32_rcp28sd_round_mask, "V2dV2dV2dV2dUcIi", 
"ncV:128:", "avx512er")
+TARGET_BUILTIN(__builtin_ia32_rcp28ss_round_mask, "V4fV4fV4fV4fUcIi", 
"ncV:128:", "avx512er")
+TARGET_BUILTIN(__builtin_ia32_rcp28pd_mask, "V8dV8dV8dUcIi", "ncV:512:", 
"avx512er,evex512")
+TARGET_BUILTIN(__builtin_ia32_rcp28ps_mask, "V16fV16fV16fUsIi", "ncV:512:", 
"avx512er,evex512")
+TARGET_BUILTIN(__builtin_ia32_exp2pd_mask, "V8dV8dV8dUcIi", "ncV:512:", 
"avx512er,evex512")
+TARGET_BUILTIN(__builtin_ia32_exp2ps_mask, "V16fV16fV16fUsIi", "ncV:512:", 
"avx512er,evex512")
+
 TARGET_BUILTIN(__builtin_ia32_cvttps2dq512_mask, "V16iV16fV16iUsIi", 
"ncV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_cvttps2udq512_mask, "V16iV16fV16iUsIi", 
"ncV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_cvttpd2dq512_mask, "V8iV8dV8iUcIi", "ncV:512:", 
"avx512f,evex512")
@@ -948,6 +960,15 @@ TARGET_BUILTIN(__builtin_ia32_scattersiv16si, 
"vv*UsV16iV16iIi", "nV:512:", "avx
 TARGET_BUILTIN(__builtin_ia32_scatterdiv8di,  "vv*UcV8OiV8OiIi", "nV:512:", 
"avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_scatterdiv16si, "vv*UcV8OiV8iIi", "nV:512:", 
"avx512f,evex512")
 
+TARGET_BUILTIN(__builtin_ia32_gatherpfdpd,  "vUcV8ivC*IiIi", "nV:512:", 
"avx512pf,evex512")
+TARGET_BUILTIN(__builtin_ia32_gatherpfdps,  "vUsV16ivC*IiIi", "nV:512:", 
"avx512pf,evex512")
+TARGET_BUILTIN(__builtin_ia32_gatherpfqpd,  "vUcV8OivC*IiIi", "nV:512:", 
"avx512pf,evex512")
+TARGET_BUILTIN(__builtin_ia32_gatherpfqps,  "vUcV8OivC*IiIi", "nV:512:", 
"avx512pf,evex512")
+TARGET_BUILTIN(__builtin_ia32_scatterpfdpd, "vUcV8iv*IiIi", "nV:512:", 
"avx512pf,evex512")
+TARGET_BUILTIN(__builtin_ia32_scatterpfdps, "vUsV16iv*IiIi", "nV:512:", 
"avx512pf,evex512")
+TARGET_BUILTIN(__builtin_ia32_scatterpfqpd, "vUcV8Oiv*IiIi", "nV:512:", 
"avx512pf,evex512")
+TARGET_BUILTIN(__builtin_ia32_scatterpfqps, "vUcV8Oiv*IiIi", "nV:512:", 
"avx512pf,evex512")
+
 TARGET_BUILTIN(__builtin_ia32_knotqi, "UcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_knothi, "UsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_knotsi, "UiUi", "nc", "avx512bw")
diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td 
b/clang/include/clang/Basic/DiagnosticCommonKinds.td
index 1e44bc4ad09b6..0738f43ca555c 100644
--- a/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -361,6 +361,9 @@ def warn_invalid_feature_combination : Warning<
 def warn_target_unrecognized_env : Warning<
   "mismatch between architecture and environment in target triple '%0'; did 
you mean '%1'?">,
   InGroup<InvalidCommandLineArgument>;
+def warn_knl_knm_isa_support_removed : Warning<
+  "KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be 
removed in LLVM 19.">,
+  InGroup<DiagGroup<"knl-knm-isa-support-removed">>;
 def err_target_unsupported_abi_with_fpu : Error<
   "'%0' ABI is not supported with FPU">;
 
diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 9a5bffce20460..8cbb7f854ee72 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -6111,10 +6111,14 @@ def mavx512cd : Flag<["-"], "mavx512cd">, 
Group<m_x86_Features_Group>;
 def mno_avx512cd : Flag<["-"], "mno-avx512cd">, Group<m_x86_Features_Group>;
 def mavx512dq : Flag<["-"], "mavx512dq">, Group<m_x86_Features_Group>;
 def mno_avx512dq : Flag<["-"], "mno-avx512dq">, Group<m_x86_Features_Group>;
+def mavx512er : Flag<["-"], "mavx512er">, Group<m_x86_Features_Group>;
+def mno_avx512er : Flag<["-"], "mno-avx512er">, Group<m_x86_Features_Group>;
 def mavx512fp16 : Flag<["-"], "mavx512fp16">, Group<m_x86_Features_Group>;
 def mno_avx512fp16 : Flag<["-"], "mno-avx512fp16">, 
Group<m_x86_Features_Group>;
 def mavx512ifma : Flag<["-"], "mavx512ifma">, Group<m_x86_Features_Group>;
 def mno_avx512ifma : Flag<["-"], "mno-avx512ifma">, 
Group<m_x86_Features_Group>;
+def mavx512pf : Flag<["-"], "mavx512pf">, Group<m_x86_Features_Group>;
+def mno_avx512pf : Flag<["-"], "mno-avx512pf">, Group<m_x86_Features_Group>;
 def mavx512vbmi : Flag<["-"], "mavx512vbmi">, Group<m_x86_Features_Group>;
 def mno_avx512vbmi : Flag<["-"], "mno-avx512vbmi">, 
Group<m_x86_Features_Group>;
 def mavx512vbmi2 : Flag<["-"], "mavx512vbmi2">, Group<m_x86_Features_Group>;
@@ -6205,6 +6209,8 @@ def mpopcnt : Flag<["-"], "mpopcnt">, 
Group<m_x86_Features_Group>;
 def mno_popcnt : Flag<["-"], "mno-popcnt">, Group<m_x86_Features_Group>;
 def mprefetchi : Flag<["-"], "mprefetchi">, Group<m_x86_Features_Group>;
 def mno_prefetchi : Flag<["-"], "mno-prefetchi">, Group<m_x86_Features_Group>;
+def mprefetchwt1 : Flag<["-"], "mprefetchwt1">, Group<m_x86_Features_Group>;
+def mno_prefetchwt1 : Flag<["-"], "mno-prefetchwt1">, 
Group<m_x86_Features_Group>;
 def mprfchw : Flag<["-"], "mprfchw">, Group<m_x86_Features_Group>;
 def mno_prfchw : Flag<["-"], "mno-prfchw">, Group<m_x86_Features_Group>;
 def mptwrite : Flag<["-"], "mptwrite">, Group<m_x86_Features_Group>;
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 3a30cff917bb4..b823eaf6ce336 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -310,9 +310,15 @@ bool 
X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasAVX512VNNI = true;
     } else if (Feature == "+avx512bf16") {
       HasAVX512BF16 = true;
+    } else if (Feature == "+avx512er") {
+      HasAVX512ER = true;
+      Diags.Report(diag::warn_knl_knm_isa_support_removed);
     } else if (Feature == "+avx512fp16") {
       HasAVX512FP16 = true;
       HasLegalHalfType = true;
+    } else if (Feature == "+avx512pf") {
+      HasAVX512PF = true;
+      Diags.Report(diag::warn_knl_knm_isa_support_removed);
     } else if (Feature == "+avx512dq") {
       HasAVX512DQ = true;
     } else if (Feature == "+avx512bitalg") {
@@ -369,6 +375,9 @@ bool 
X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasWBNOINVD = true;
     } else if (Feature == "+prefetchi") {
       HasPREFETCHI = true;
+    } else if (Feature == "+prefetchwt1") {
+      HasPREFETCHWT1 = true;
+      Diags.Report(diag::warn_knl_knm_isa_support_removed);
     } else if (Feature == "+clzero") {
       HasCLZERO = true;
     } else if (Feature == "+cldemote") {
@@ -831,8 +840,12 @@ void X86TargetInfo::getTargetDefines(const LangOptions 
&Opts,
     Builder.defineMacro("__AVX512VNNI__");
   if (HasAVX512BF16)
     Builder.defineMacro("__AVX512BF16__");
+  if (HasAVX512ER)
+    Builder.defineMacro("__AVX512ER__");
   if (HasAVX512FP16)
     Builder.defineMacro("__AVX512FP16__");
+  if (HasAVX512PF)
+    Builder.defineMacro("__AVX512PF__");
   if (HasAVX512DQ)
     Builder.defineMacro("__AVX512DQ__");
   if (HasAVX512BITALG)
@@ -884,6 +897,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions 
&Opts,
     Builder.defineMacro("__SM4__");
   if (HasPREFETCHI)
     Builder.defineMacro("__PREFETCHI__");
+  if (HasPREFETCHWT1)
+    Builder.defineMacro("__PREFETCHWT1__");
   if (HasCLZERO)
     Builder.defineMacro("__CLZERO__");
   if (HasKL)
@@ -1069,7 +1084,9 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) 
const {
       .Case("avx512vpopcntdq", true)
       .Case("avx512vnni", true)
       .Case("avx512bf16", true)
+      .Case("avx512er", true)
       .Case("avx512fp16", true)
+      .Case("avx512pf", true)
       .Case("avx512dq", true)
       .Case("avx512bitalg", true)
       .Case("avx512bw", true)
@@ -1117,6 +1134,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) 
const {
       .Case("pku", true)
       .Case("popcnt", true)
       .Case("prefetchi", true)
+      .Case("prefetchwt1", true)
       .Case("prfchw", true)
       .Case("ptwrite", true)
       .Case("raoint", true)
@@ -1183,7 +1201,9 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
       .Case("avx512vpopcntdq", HasAVX512VPOPCNTDQ)
       .Case("avx512vnni", HasAVX512VNNI)
       .Case("avx512bf16", HasAVX512BF16)
+      .Case("avx512er", HasAVX512ER)
       .Case("avx512fp16", HasAVX512FP16)
+      .Case("avx512pf", HasAVX512PF)
       .Case("avx512dq", HasAVX512DQ)
       .Case("avx512bitalg", HasAVX512BITALG)
       .Case("avx512bw", HasAVX512BW)
@@ -1233,6 +1253,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
       .Case("pku", HasPKU)
       .Case("popcnt", HasPOPCNT)
       .Case("prefetchi", HasPREFETCHI)
+      .Case("prefetchwt1", HasPREFETCHWT1)
       .Case("prfchw", HasPRFCHW)
       .Case("ptwrite", HasPTWRITE)
       .Case("raoint", HasRAOINT)
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index 0633b7e0da96a..6a0a6cb84203d 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -103,6 +103,8 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public 
TargetInfo {
   bool HasAVX512VNNI = false;
   bool HasAVX512FP16 = false;
   bool HasAVX512BF16 = false;
+  bool HasAVX512ER = false;
+  bool HasAVX512PF = false;
   bool HasAVX512DQ = false;
   bool HasAVX512BITALG = false;
   bool HasAVX512BW = false;
@@ -134,6 +136,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public 
TargetInfo {
   bool HasCLWB = false;
   bool HasMOVBE = false;
   bool HasPREFETCHI = false;
+  bool HasPREFETCHWT1 = false;
   bool HasRDPID = false;
   bool HasRDPRU = false;
   bool HasRetpolineExternalThunk = false;
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index dbff92b4e59b4..5f02c71f6ca51 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -153,10 +153,12 @@ set(x86_files
   avx512bwintrin.h
   avx512cdintrin.h
   avx512dqintrin.h
+  avx512erintrin.h
   avx512fintrin.h
   avx512fp16intrin.h
   avx512ifmaintrin.h
   avx512ifmavlintrin.h
+  avx512pfintrin.h
   avx512vbmi2intrin.h
   avx512vbmiintrin.h
   avx512vbmivlintrin.h
diff --git a/clang/lib/Headers/avx512erintrin.h 
b/clang/lib/Headers/avx512erintrin.h
new file mode 100644
index 0000000000000..1c5a2d2d208ff
--- /dev/null
+++ b/clang/lib/Headers/avx512erintrin.h
@@ -0,0 +1,271 @@
+/*===---- avx512erintrin.h - AVX512ER intrinsics ---------------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __AVX512ERINTRIN_H
+#define __AVX512ERINTRIN_H
+
+/* exp2a23 */
+#define _mm512_exp2a23_round_pd(A, R) \
+  ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
+                                       (__v8df)_mm512_setzero_pd(), \
+                                       (__mmask8)-1, (int)(R)))
+
+#define _mm512_mask_exp2a23_round_pd(S, M, A, R) \
+  ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
+                                       (__v8df)(__m512d)(S), (__mmask8)(M), \
+                                       (int)(R)))
+
+#define _mm512_maskz_exp2a23_round_pd(M, A, R) \
+  ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
+                                       (__v8df)_mm512_setzero_pd(), \
+                                       (__mmask8)(M), (int)(R)))
+
+#define _mm512_exp2a23_pd(A) \
+  _mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_exp2a23_pd(S, M, A) \
+  _mm512_mask_exp2a23_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_exp2a23_pd(M, A) \
+  _mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_exp2a23_round_ps(A, R) \
+  ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
+                                      (__v16sf)_mm512_setzero_ps(), \
+                                      (__mmask16)-1, (int)(R)))
+
+#define _mm512_mask_exp2a23_round_ps(S, M, A, R) \
+  ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
+                                      (__v16sf)(__m512)(S), (__mmask16)(M), \
+                                      (int)(R)))
+
+#define _mm512_maskz_exp2a23_round_ps(M, A, R) \
+  ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
+                                      (__v16sf)_mm512_setzero_ps(), \
+                                      (__mmask16)(M), (int)(R)))
+
+#define _mm512_exp2a23_ps(A) \
+  _mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_exp2a23_ps(S, M, A) \
+  _mm512_mask_exp2a23_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_exp2a23_ps(M, A) \
+  _mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
+
+/* rsqrt28 */
+#define _mm512_rsqrt28_round_pd(A, R) \
+  ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
+                                          (__v8df)_mm512_setzero_pd(), \
+                                          (__mmask8)-1, (int)(R)))
+
+#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) \
+  ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
+                                          (__v8df)(__m512d)(S), (__mmask8)(M), 
\
+                                          (int)(R)))
+
+#define _mm512_maskz_rsqrt28_round_pd(M, A, R) \
+  ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
+                                          (__v8df)_mm512_setzero_pd(), \
+                                          (__mmask8)(M), (int)(R)))
+
+#define _mm512_rsqrt28_pd(A) \
+  _mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rsqrt28_pd(S, M, A) \
+  _mm512_mask_rsqrt28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rsqrt28_pd(M, A) \
+  _mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_rsqrt28_round_ps(A, R) \
+  ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
+                                         (__v16sf)_mm512_setzero_ps(), \
+                                         (__mmask16)-1, (int)(R)))
+
+#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) \
+  ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
+                                         (__v16sf)(__m512)(S), (__mmask16)(M), 
\
+                                         (int)(R)))
+
+#define _mm512_maskz_rsqrt28_round_ps(M, A, R) \
+  ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
+                                         (__v16sf)_mm512_setzero_ps(), \
+                                         (__mmask16)(M), (int)(R)))
+
+#define _mm512_rsqrt28_ps(A) \
+  _mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rsqrt28_ps(S, M, A) \
+  _mm512_mask_rsqrt28_round_ps((S), (M), A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rsqrt28_ps(M, A) \
+  _mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_rsqrt28_round_ss(A, B, R) \
+  ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
+                                               (__v4sf)(__m128)(B), \
+                                               (__v4sf)_mm_setzero_ps(), \
+                                               (__mmask8)-1, (int)(R)))
+
+#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) \
+  ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
+                                               (__v4sf)(__m128)(B), \
+                                               (__v4sf)(__m128)(S), \
+                                               (__mmask8)(M), (int)(R)))
+
+#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) \
+  ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
+                                               (__v4sf)(__m128)(B), \
+                                               (__v4sf)_mm_setzero_ps(), \
+                                               (__mmask8)(M), (int)(R)))
+
+#define _mm_rsqrt28_ss(A, B) \
+  _mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_rsqrt28_ss(S, M, A, B) \
+  _mm_mask_rsqrt28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_rsqrt28_ss(M, A, B) \
+  _mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_rsqrt28_round_sd(A, B, R) \
+  ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
+                                                (__v2df)(__m128d)(B), \
+                                                (__v2df)_mm_setzero_pd(), \
+                                                (__mmask8)-1, (int)(R)))
+
+#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) \
+  ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
+                                                (__v2df)(__m128d)(B), \
+                                                (__v2df)(__m128d)(S), \
+                                                (__mmask8)(M), (int)(R)))
+
+#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) \
+  ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
+                                                (__v2df)(__m128d)(B), \
+                                                (__v2df)_mm_setzero_pd(), \
+                                                (__mmask8)(M), (int)(R)))
+
+#define _mm_rsqrt28_sd(A, B) \
+  _mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_rsqrt28_sd(S, M, A, B) \
+  _mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_rsqrt28_sd(M, A, B) \
+  _mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+/* rcp28 */
+#define _mm512_rcp28_round_pd(A, R) \
+  ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
+                                        (__v8df)_mm512_setzero_pd(), \
+                                        (__mmask8)-1, (int)(R)))
+
+#define _mm512_mask_rcp28_round_pd(S, M, A, R) \
+  ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
+                                        (__v8df)(__m512d)(S), (__mmask8)(M), \
+                                        (int)(R)))
+
+#define _mm512_maskz_rcp28_round_pd(M, A, R) \
+  ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
+                                        (__v8df)_mm512_setzero_pd(), \
+                                        (__mmask8)(M), (int)(R)))
+
+#define _mm512_rcp28_pd(A) \
+  _mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rcp28_pd(S, M, A) \
+  _mm512_mask_rcp28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rcp28_pd(M, A) \
+  _mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_rcp28_round_ps(A, R) \
+  ((__m...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/93123
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to