FreddyYe updated this revision to Diff 540909.
FreddyYe added a comment.

Remove `-check-prefix=CHECK`


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155146/new/

https://reviews.llvm.org/D155146

Files:
  clang/docs/ReleaseNotes.rst
  clang/include/clang/Basic/BuiltinsX86.def
  clang/include/clang/Driver/Options.td
  clang/lib/Basic/Targets/X86.cpp
  clang/lib/Basic/Targets/X86.h
  clang/lib/Headers/CMakeLists.txt
  clang/lib/Headers/immintrin.h
  clang/lib/Headers/sha512intrin.h
  clang/test/CodeGen/X86/sha512-builtins.c
  clang/test/CodeGen/attr-target-x86.c
  clang/test/Driver/x86-target-features.c
  clang/test/Preprocessor/x86_target_features.c
  llvm/docs/ReleaseNotes.rst
  llvm/include/llvm/IR/IntrinsicsX86.td
  llvm/include/llvm/TargetParser/X86TargetParser.def
  llvm/lib/Target/X86/X86.td
  llvm/lib/Target/X86/X86InstrInfo.td
  llvm/lib/Target/X86/X86InstrSSE.td
  llvm/lib/TargetParser/Host.cpp
  llvm/lib/TargetParser/X86TargetParser.cpp
  llvm/test/CodeGen/X86/sha512-intrinsics.ll
  llvm/test/MC/Disassembler/X86/sha512.txt
  llvm/test/MC/X86/sha512-att.s
  llvm/test/MC/X86/sha512-intel.s

Index: llvm/test/MC/X86/sha512-intel.s
===================================================================
--- /dev/null
+++ llvm/test/MC/X86/sha512-intel.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -triple i686 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+// RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK:      vsha512msg1 ymm2, xmm3
+// CHECK: encoding: [0xc4,0xe2,0x7f,0xcc,0xd3]
+               vsha512msg1 ymm2, xmm3
+
+// CHECK:      vsha512msg2 ymm2, ymm3
+// CHECK: encoding: [0xc4,0xe2,0x7f,0xcd,0xd3]
+               vsha512msg2 ymm2, ymm3
+
+// CHECK:      vsha512rnds2 ymm2, ymm3, xmm4
+// CHECK: encoding: [0xc4,0xe2,0x67,0xcb,0xd4]
+               vsha512rnds2 ymm2, ymm3, xmm4
Index: llvm/test/MC/X86/sha512-att.s
===================================================================
--- /dev/null
+++ llvm/test/MC/X86/sha512-att.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -triple i686 --show-encoding %s | FileCheck %s
+// RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s
+
+// CHECK:      vsha512msg1 %xmm3, %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7f,0xcc,0xd3]
+               vsha512msg1 %xmm3, %ymm2
+
+// CHECK:      vsha512msg2 %ymm3, %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7f,0xcd,0xd3]
+               vsha512msg2 %ymm3, %ymm2
+
+// CHECK:      vsha512rnds2 %xmm4, %ymm3, %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x67,0xcb,0xd4]
+               vsha512rnds2 %xmm4, %ymm3, %ymm2
Index: llvm/test/MC/Disassembler/X86/sha512.txt
===================================================================
--- /dev/null
+++ llvm/test/MC/Disassembler/X86/sha512.txt
@@ -0,0 +1,17 @@
+# RUN: llvm-mc --disassemble %s -triple=i386 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=i386 --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+# RUN: llvm-mc --disassemble %s -triple=x86_64 --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT:        vsha512msg1 %xmm3, %ymm2
+# INTEL:      vsha512msg1 ymm2, xmm3
+0xc4,0xe2,0x7f,0xcc,0xd3
+
+# ATT:        vsha512msg2 %ymm3, %ymm2
+# INTEL:      vsha512msg2 ymm2, ymm3
+0xc4,0xe2,0x7f,0xcd,0xd3
+
+# ATT:        vsha512rnds2 %xmm4, %ymm3, %ymm2
+# INTEL:      vsha512rnds2 ymm2, ymm3, xmm4
+0xc4,0xe2,0x67,0xcb,0xd4
+
Index: llvm/test/CodeGen/X86/sha512-intrinsics.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/X86/sha512-intrinsics.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+sha512 | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+sha512 | FileCheck %s
+
+define <4 x i64> @test_int_x86_vsha512msg1(<4 x i64> %A, <2 x i64> %B) {
+; CHECK-LABEL: test_int_x86_vsha512msg1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsha512msg1 %xmm1, %ymm0 # encoding: [0xc4,0xe2,0x7f,0xcc,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.vsha512msg1(<4 x i64> %A, <2 x i64> %B)
+  ret <4 x i64> %ret
+}
+declare <4 x i64> @llvm.x86.vsha512msg1(<4 x i64> %A, <2 x i64> %B)
+
+define <4 x i64> @test_int_x86_vsha512msg2(<4 x i64> %A, <4 x i64> %B) {
+; CHECK-LABEL: test_int_x86_vsha512msg2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsha512msg2 %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7f,0xcd,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.vsha512msg2(<4 x i64> %A, <4 x i64> %B)
+  ret <4 x i64> %ret
+}
+declare <4 x i64> @llvm.x86.vsha512msg2(<4 x i64> %A, <4 x i64> %B)
+
+define <4 x i64> @test_int_x86_vsha512rnds2(<4 x i64> %A, <4 x i64> %B, <2 x i64> %C) {
+; CHECK-LABEL: test_int_x86_vsha512rnds2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsha512rnds2 %xmm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x77,0xcb,0xc2]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x i64> @llvm.x86.vsha512rnds2(<4 x i64> %A, <4 x i64> %B, <2 x i64> %C)
+  ret <4 x i64> %ret
+}
+declare <4 x i64> @llvm.x86.vsha512rnds2(<4 x i64> %A, <4 x i64> %B, <2 x i64> %C)
Index: llvm/lib/TargetParser/X86TargetParser.cpp
===================================================================
--- llvm/lib/TargetParser/X86TargetParser.cpp
+++ llvm/lib/TargetParser/X86TargetParser.cpp
@@ -655,6 +655,7 @@
 constexpr FeatureBitset ImpliedFeaturesAVXVNNIINT8 = FeatureAVX2;
 constexpr FeatureBitset ImpliedFeaturesAVXIFMA = FeatureAVX2;
 constexpr FeatureBitset ImpliedFeaturesAVXNECONVERT = FeatureAVX2;
+constexpr FeatureBitset ImpliedFeaturesSHA512 = FeatureAVX;
 constexpr FeatureBitset ImpliedFeaturesAVX512FP16 =
     FeatureAVX512BW | FeatureAVX512DQ | FeatureAVX512VL;
 // Key Locker Features
Index: llvm/lib/TargetParser/Host.cpp
===================================================================
--- llvm/lib/TargetParser/Host.cpp
+++ llvm/lib/TargetParser/Host.cpp
@@ -1746,6 +1746,7 @@
   Features["amx-int8"]   = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave;
   bool HasLeaf7Subleaf1 =
       MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
+  Features["sha512"]     = HasLeaf7Subleaf1 && ((EAX >> 0) & 1);
   Features["raoint"]     = HasLeaf7Subleaf1 && ((EAX >> 3) & 1);
   Features["avxvnni"]    = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave;
   Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
Index: llvm/lib/Target/X86/X86InstrSSE.td
===================================================================
--- llvm/lib/Target/X86/X86InstrSSE.td
+++ llvm/lib/Target/X86/X86InstrSSE.td
@@ -8295,3 +8295,25 @@
                 (VCVTNEPS2BF16rr VR128:$dst, VR128:$src), 0, "att">;
 def : InstAlias<"vcvtneps2bf16y\t{$src, $dst|$dst, $src}",
                 (VCVTNEPS2BF16Yrr VR128:$dst, VR256:$src), 0, "att">;
+
+// FIXME: Is there a better scheduler class for SHA512 than WriteVecIMul?
+let Predicates = [HasSHA512], Constraints = "$src1 = $dst" in {
+def VSHA512MSG1rr : I<0xcc, MRMSrcReg, (outs VR256:$dst),
+                     (ins VR256:$src1, VR128:$src2),
+                     "vsha512msg1\t{$src2, $dst|$dst, $src2}",
+                     [(set VR256:$dst,
+                       (int_x86_vsha512msg1 VR256:$src1, VR128:$src2))]>, VEX_L,
+                     VEX, T8XD, Sched<[WriteVecIMul]>;
+def VSHA512MSG2rr : I<0xcd, MRMSrcReg, (outs VR256:$dst),
+                     (ins VR256:$src1, VR256:$src2),
+                     "vsha512msg2\t{$src2, $dst|$dst, $src2}",
+                     [(set VR256:$dst,
+                       (int_x86_vsha512msg2 VR256:$src1, VR256:$src2))]>, VEX_L,
+                     VEX, T8XD, Sched<[WriteVecIMul]>;
+def VSHA512RNDS2rr : I<0xcb, MRMSrcReg, (outs VR256:$dst),
+                      (ins VR256:$src1, VR256:$src2, VR128:$src3),
+                      "vsha512rnds2\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                      [(set VR256:$dst,
+                        (int_x86_vsha512rnds2 VR256:$src1, VR256:$src2, VR128:$src3))]>,
+                      VEX_L, VEX_4V, T8XD, Sched<[WriteVecIMul]>;
+}
Index: llvm/lib/Target/X86/X86InstrInfo.td
===================================================================
--- llvm/lib/Target/X86/X86InstrInfo.td
+++ llvm/lib/Target/X86/X86InstrInfo.td
@@ -967,6 +967,7 @@
 def HasRTM       : Predicate<"Subtarget->hasRTM()">;
 def HasADX       : Predicate<"Subtarget->hasADX()">;
 def HasSHA       : Predicate<"Subtarget->hasSHA()">;
+def HasSHA512    : Predicate<"Subtarget->hasSHA512()">;
 def HasSGX       : Predicate<"Subtarget->hasSGX()">;
 def HasRDSEED    : Predicate<"Subtarget->hasRDSEED()">;
 def HasSSEPrefetch : Predicate<"Subtarget->hasSSEPrefetch()">;
Index: llvm/lib/Target/X86/X86.td
===================================================================
--- llvm/lib/Target/X86/X86.td
+++ llvm/lib/Target/X86/X86.td
@@ -238,6 +238,9 @@
 def FeatureSHA     : SubtargetFeature<"sha", "HasSHA", "true",
                                       "Enable SHA instructions",
                                       [FeatureSSE2]>;
+def FeatureSHA512  : SubtargetFeature<"sha512", "HasSHA512", "true",
+                                      "Support SHA512 instructions",
+                                      [FeatureAVX]>;
 // Processor supports CET SHSTK - Control-Flow Enforcement Technology
 // using Shadow Stack
 def FeatureSHSTK   : SubtargetFeature<"shstk", "HasSHSTK", "true",
@@ -1066,6 +1069,7 @@
   // Graniterapids
   list<SubtargetFeature> GNRAdditionalFeatures = [FeatureAMXFP16,
                                                   FeaturePREFETCHI,
+                                                  FeatureSHA512,
                                                   FeatureAMXCOMPLEX];
   list<SubtargetFeature> GNRFeatures =
     !listconcat(SPRFeatures, GNRAdditionalFeatures);
Index: llvm/include/llvm/TargetParser/X86TargetParser.def
===================================================================
--- llvm/include/llvm/TargetParser/X86TargetParser.def
+++ llvm/include/llvm/TargetParser/X86TargetParser.def
@@ -227,6 +227,7 @@
 X86_FEATURE       (AVXVNNI,         "avxvnni")
 X86_FEATURE       (AVXIFMA,         "avxifma")
 X86_FEATURE       (AVXVNNIINT8,     "avxvnniint8")
+X86_FEATURE       (SHA512,          "sha512")
 // These features aren't really CPU features, but the frontend can set them.
 X86_FEATURE       (RETPOLINE_EXTERNAL_THUNK,    "retpoline-external-thunk")
 X86_FEATURE       (RETPOLINE_INDIRECT_BRANCHES, "retpoline-indirect-branches")
Index: llvm/include/llvm/IR/IntrinsicsX86.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsX86.td
+++ llvm/include/llvm/IR/IntrinsicsX86.td
@@ -5105,6 +5105,20 @@
                             [IntrNoMem]>;
 }
 
+//===----------------------------------------------------------------------===//
+// SHA512 intrinsics
+let TargetPrefix = "x86" in {
+def int_x86_vsha512msg1 : ClangBuiltin<"__builtin_ia32_vsha512msg1">,
+    DefaultAttrsIntrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v2i64_ty],
+                          [IntrNoMem]>;
+def int_x86_vsha512msg2 : ClangBuiltin<"__builtin_ia32_vsha512msg2">,
+    DefaultAttrsIntrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
+                          [IntrNoMem]>;
+def int_x86_vsha512rnds2 : ClangBuiltin<"__builtin_ia32_vsha512rnds2">,
+    DefaultAttrsIntrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v2i64_ty],
+                          [IntrNoMem]>;
+}
+
 //===----------------------------------------------------------------------===//
 // Thread synchronization ops with timer.
 let TargetPrefix = "x86" in {
Index: llvm/docs/ReleaseNotes.rst
===================================================================
--- llvm/docs/ReleaseNotes.rst
+++ llvm/docs/ReleaseNotes.rst
@@ -276,7 +276,7 @@
 
 * ``__builtin_unpredictable`` (unpredictable metadata in LLVM IR), is handled by X86 Backend.
   ``X86CmovConversion`` pass now respects this builtin and does not convert CMOVs to branches.
-
+* Support ISA of ``SHA512``.
 
 Changes to the OCaml bindings
 -----------------------------
Index: clang/test/Preprocessor/x86_target_features.c
===================================================================
--- clang/test/Preprocessor/x86_target_features.c
+++ clang/test/Preprocessor/x86_target_features.c
@@ -660,6 +660,19 @@
 // AVXNECONVERTNOAVX2-NOT: #define __AVX2__ 1
 // AVXNECONVERTNOAVX2-NOT: #define __AVXNECONVERT__ 1
 
+// RUN: %clang -target i386-unknown-linux-gnu -march=atom -msha512 -x c -E -dM -o - %s | FileCheck  -check-prefix=SHA512 %s
+
+// SHA512: #define __AVX__ 1
+// SHA512: #define __SHA512__ 1
+
+// RUN: %clang -target i386-unknown-linux-gnu -march=atom -mno-sha512 -x c -E -dM -o - %s | FileCheck  -check-prefix=NOSHA512 %s
+// NOSHA512-NOT: #define __SHA512__ 1
+
+// RUN: %clang -target i386-unknown-linux-gnu -march=atom -msha512 -mno-avx -x c -E -dM -o - %s | FileCheck  -check-prefix=SHA512NOAVX %s
+
+// SHA512NOAVX-NOT: #define __AVX__ 1
+// SHA512NOAVX-NOT: #define __SHA512__ 1
+
 // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mcrc32 -x c -E -dM -o - %s | FileCheck -check-prefix=CRC32 %s
 
 // CRC32: #define __CRC32__ 1
Index: clang/test/Driver/x86-target-features.c
===================================================================
--- clang/test/Driver/x86-target-features.c
+++ clang/test/Driver/x86-target-features.c
@@ -349,6 +349,11 @@
 // AVXNECONVERT: "-target-feature" "+avxneconvert"
 // NO-AVXNECONVERT: "-target-feature" "-avxneconvert"
 
+// RUN: %clang --target=i386 -msha512 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=SHA512 %s
+// RUN: %clang --target=i386 -mno-sha512 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-SHA512 %s
+// SHA512: "-target-feature" "+sha512"
+// NO-SHA512: "-target-feature" "-sha512"
+
 // RUN: %clang --target=i386 -march=i386 -mcrc32 %s -### 2>&1 | FileCheck -check-prefix=CRC32 %s
 // RUN: %clang --target=i386 -march=i386 -mno-crc32 %s -### 2>&1 | FileCheck -check-prefix=NO-CRC32 %s
 // CRC32: "-target-feature" "+crc32"
Index: clang/test/CodeGen/attr-target-x86.c
===================================================================
--- clang/test/CodeGen/attr-target-x86.c
+++ clang/test/CodeGen/attr-target-x86.c
@@ -54,9 +54,9 @@
 // CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87" "tune-cpu"="i686"
 // CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
 // CHECK-NOT: tune-cpu
-// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
+// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
 // CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686"
-// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint8,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
+// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
 // CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes"
 // CHECK-NOT: tune-cpu
 // CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-3dnow,-3dnowa,-mmx"
Index: clang/test/CodeGen/X86/sha512-builtins.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/X86/sha512-builtins.c
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +sha512 -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -ffreestanding -triple=i386-unknown-unknown -target-feature +sha512 -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+#include <stddef.h>
+
+__m256i test_mm256_sha512msg1_epi64(__m256i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm256_sha512msg1_epi64(
+  // CHECK: call <4 x i64> @llvm.x86.vsha512msg1(<4 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  return _mm256_sha512msg1_epi64(__A, __B);
+}
+
+__m256i test_mm256_sha512msg2_epi64(__m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_sha512msg2_epi64(
+  // CHECK: call <4 x i64> @llvm.x86.vsha512msg2(<4 x i64> %{{.*}}, <4 x i64> %{{.*}})
+  return _mm256_sha512msg2_epi64(__A, __B);
+}
+
+__m256i test_mm256_sha512rnds2_epi64(__m256i __A, __m256i __B, __m128i __C) {
+  // CHECK-LABEL: @test_mm256_sha512rnds2_epi64(
+  // CHECK: call <4 x i64> @llvm.x86.vsha512rnds2(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  return _mm256_sha512rnds2_epi64(__A, __B, __C);
+}
Index: clang/lib/Headers/sha512intrin.h
===================================================================
--- /dev/null
+++ clang/lib/Headers/sha512intrin.h
@@ -0,0 +1,39 @@
+/*===--------------- sha512intrin.h - SHA512 intrinsics -----------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __IMMINTRIN_H
+#error "Never use <sha512intrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __SHA512INTRIN_H
+#define __SHA512INTRIN_H
+
+#define __DEFAULT_FN_ATTRS256                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("sha512"),         \
+                 __min_vector_width__(256)))
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_sha512msg1_epi64(__m256i __A, __m128i __B) {
+  return (__m256i)__builtin_ia32_vsha512msg1((__v4du)__A, (__v2du)__B);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_sha512msg2_epi64(__m256i __A, __m256i __B) {
+  return (__m256i)__builtin_ia32_vsha512msg2((__v4du)__A, (__v4du)__B);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_sha512rnds2_epi64(__m256i __A, __m256i __B, __m128i __C) {
+  return (__m256i)__builtin_ia32_vsha512rnds2((__v4du)__A, (__v4du)__B,
+                                              (__v2du)__C);
+}
+
+#undef __DEFAULT_FN_ATTRS256
+
+#endif // __SHA512INTRIN_H
Index: clang/lib/Headers/immintrin.h
===================================================================
--- clang/lib/Headers/immintrin.h
+++ clang/lib/Headers/immintrin.h
@@ -269,6 +269,11 @@
 #include <avxneconvertintrin.h>
 #endif
 
+#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
+    defined(__SHA512__)
+#include <sha512intrin.h>
+#endif
+
 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
     defined(__RDPID__)
 /// Returns the value of the IA32_TSC_AUX MSR (0xc0000103).
Index: clang/lib/Headers/CMakeLists.txt
===================================================================
--- clang/lib/Headers/CMakeLists.txt
+++ clang/lib/Headers/CMakeLists.txt
@@ -203,6 +203,7 @@
   rtmintrin.h
   serializeintrin.h
   sgxintrin.h
+  sha512intrin.h
   shaintrin.h
   smmintrin.h
   tbmintrin.h
Index: clang/lib/Basic/Targets/X86.h
===================================================================
--- clang/lib/Basic/Targets/X86.h
+++ clang/lib/Basic/Targets/X86.h
@@ -112,6 +112,7 @@
   bool HasAVX512IFMA = false;
   bool HasAVX512VP2INTERSECT = false;
   bool HasSHA = false;
+  bool HasSHA512 = false;
   bool HasSHSTK = false;
   bool HasSGX = false;
   bool HasCX8 = false;
Index: clang/lib/Basic/Targets/X86.cpp
===================================================================
--- clang/lib/Basic/Targets/X86.cpp
+++ clang/lib/Basic/Targets/X86.cpp
@@ -261,6 +261,8 @@
       HasAVX512VP2INTERSECT = true;
     } else if (Feature == "+sha") {
       HasSHA = true;
+    } else if (Feature == "+sha512") {
+      HasSHA512 = true;
     } else if (Feature == "+shstk") {
       HasSHSTK = true;
     } else if (Feature == "+movbe") {
@@ -749,6 +751,8 @@
     Builder.defineMacro("__AVX512VP2INTERSECT__");
   if (HasSHA)
     Builder.defineMacro("__SHA__");
+  if (HasSHA512)
+    Builder.defineMacro("__SHA512__");
 
   if (HasFXSR)
     Builder.defineMacro("__FXSR__");
@@ -999,6 +1003,7 @@
       .Case("serialize", true)
       .Case("sgx", true)
       .Case("sha", true)
+      .Case("sha512", true)
       .Case("shstk", true)
       .Case("sse", true)
       .Case("sse2", true)
@@ -1104,6 +1109,7 @@
       .Case("serialize", HasSERIALIZE)
       .Case("sgx", HasSGX)
       .Case("sha", HasSHA)
+      .Case("sha512", HasSHA512)
       .Case("shstk", HasSHSTK)
       .Case("sse", SSELevel >= SSE1)
       .Case("sse2", SSELevel >= SSE2)
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -5043,6 +5043,8 @@
 def mno_sgx : Flag<["-"], "mno-sgx">, Group<m_x86_Features_Group>;
 def msha : Flag<["-"], "msha">, Group<m_x86_Features_Group>;
 def mno_sha : Flag<["-"], "mno-sha">, Group<m_x86_Features_Group>;
+def msha512 : Flag<["-"], "msha512">, Group<m_x86_Features_Group>;
+def mno_sha512 : Flag<["-"], "mno-sha512">, Group<m_x86_Features_Group>;
 def mtbm : Flag<["-"], "mtbm">, Group<m_x86_Features_Group>;
 def mno_tbm : Flag<["-"], "mno-tbm">, Group<m_x86_Features_Group>;
 def mtsxldtrk : Flag<["-"], "mtsxldtrk">, Group<m_x86_Features_Group>;
Index: clang/include/clang/Basic/BuiltinsX86.def
===================================================================
--- clang/include/clang/Basic/BuiltinsX86.def
+++ clang/include/clang/Basic/BuiltinsX86.def
@@ -2132,6 +2132,11 @@
 TARGET_BUILTIN(__builtin_ia32_vcvtneps2bf16128, "V8yV4f", "nV:128:", "avx512bf16,avx512vl|avxneconvert")
 TARGET_BUILTIN(__builtin_ia32_vcvtneps2bf16256, "V8yV8f", "nV:256:", "avx512bf16,avx512vl|avxneconvert")
 
+// SHA512
+TARGET_BUILTIN(__builtin_ia32_vsha512msg1, "V4ULLiV4ULLiV2ULLi", "nV:256:", "sha512")
+TARGET_BUILTIN(__builtin_ia32_vsha512msg2, "V4ULLiV4ULLiV4ULLi", "nV:256:", "sha512")
+TARGET_BUILTIN(__builtin_ia32_vsha512rnds2, "V4ULLiV4ULLiV4ULLiV2ULLi", "nV:256:", "sha512")
+
 TARGET_HEADER_BUILTIN(_InterlockedAnd64,         "WiWiD*Wi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
 TARGET_HEADER_BUILTIN(_InterlockedDecrement64,   "WiWiD*",   "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
 TARGET_HEADER_BUILTIN(_InterlockedExchange64,    "WiWiD*Wi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
Index: clang/docs/ReleaseNotes.rst
===================================================================
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -700,6 +700,10 @@
 
 - Add ISA of ``AMX-COMPLEX`` which supports ``tcmmimfp16ps`` and
   ``tcmmrlfp16ps``.
+- Support ISA of ``SHA512``.
+  * Support intrinsic of ``_mm256_sha512msg1_epi64``.
+  * Support intrinsic of ``_mm256_sha512msg2_epi64``.
+  * Support intrinsic of ``_mm256_sha512rnds2_epi64``.
 
 Arm and AArch64 Support
 ^^^^^^^^^^^^^^^^^^^^^^^
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to