Author: Craig Topper Date: 2020-10-04T12:09:41-07:00 New Revision: a02b449bb1556fe0f17b86eaa69f6bcda945d123
URL: https://github.com/llvm/llvm-project/commit/a02b449bb1556fe0f17b86eaa69f6bcda945d123 DIFF: https://github.com/llvm/llvm-project/commit/a02b449bb1556fe0f17b86eaa69f6bcda945d123.diff LOG: [X86] Sync AESENC/DEC Key Locker builtins with gcc. For the wide builtins, pass a single input and output pointer to the builtins. Emit the GEPs and input loads from CGBuiltin. Added: Modified: clang/include/clang/Basic/BuiltinsX86.def clang/lib/CodeGen/CGBuiltin.cpp clang/lib/Headers/keylockerintrin.h clang/test/CodeGen/X86/keylocker.c llvm/test/CodeGen/X86/keylocker-intrinsics-fast-isel.ll Removed: ################################################################################ diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index c33026139b3c..8f9cfe4b6dc5 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -1902,22 +1902,16 @@ TARGET_BUILTIN(__builtin_ia32_enqcmds, "Ucv*vC*", "n", "enqcmd") // KEY LOCKER TARGET_BUILTIN(__builtin_ia32_loadiwkey, "vV2OiV2OiV2OiUi", "nV:128:", "kl") -TARGET_BUILTIN(__builtin_ia32_encodekey128_u32, - "UiUiV2Oiv*", "nV:128:", "kl") -TARGET_BUILTIN(__builtin_ia32_encodekey256_u32, - "UiUiV2OiV2Oiv*", "nV:128:", "kl") -TARGET_BUILTIN(__builtin_ia32_aesenc128kl, "UcV2Oi*V2OivC*", "nV:128:", "kl") -TARGET_BUILTIN(__builtin_ia32_aesenc256kl, "UcV2Oi*V2OivC*", "nV:128:", "kl") -TARGET_BUILTIN(__builtin_ia32_aesdec128kl, "UcV2Oi*V2OivC*", "nV:128:", "kl") -TARGET_BUILTIN(__builtin_ia32_aesdec256kl, "UcV2Oi*V2OivC*", "nV:128:", "kl") -TARGET_BUILTIN(__builtin_ia32_aesencwide128kl, - "UcvC*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2OiV2OiV2OiV2OiV2OiV2OiV2OiV2Oi", "nV:128:", "kl,widekl") -TARGET_BUILTIN(__builtin_ia32_aesencwide256kl, - "UcvC*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2OiV2OiV2OiV2OiV2OiV2OiV2OiV2Oi", "nV:128:", "kl,widekl") -TARGET_BUILTIN(__builtin_ia32_aesdecwide128kl, - "UcvC*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2OiV2OiV2OiV2OiV2OiV2OiV2OiV2Oi", "nV:128:", "kl,widekl") -TARGET_BUILTIN(__builtin_ia32_aesdecwide256kl, - "UcvC*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2OiV2OiV2OiV2OiV2OiV2OiV2OiV2Oi", "nV:128:", "kl,widekl") +TARGET_BUILTIN(__builtin_ia32_encodekey128_u32, "UiUiV2Oiv*", "nV:128:", "kl") +TARGET_BUILTIN(__builtin_ia32_encodekey256_u32, "UiUiV2OiV2Oiv*", "nV:128:", "kl") +TARGET_BUILTIN(__builtin_ia32_aesenc128kl_u8, "UcV2Oi*V2OivC*", "nV:128:", "kl") +TARGET_BUILTIN(__builtin_ia32_aesenc256kl_u8, "UcV2Oi*V2OivC*", "nV:128:", "kl") +TARGET_BUILTIN(__builtin_ia32_aesdec128kl_u8, "UcV2Oi*V2OivC*", "nV:128:", "kl") +TARGET_BUILTIN(__builtin_ia32_aesdec256kl_u8, "UcV2Oi*V2OivC*", "nV:128:", "kl") +TARGET_BUILTIN(__builtin_ia32_aesencwide128kl_u8, "UcV2Oi*V2OiC*vC*", "nV:128:", "kl,widekl") +TARGET_BUILTIN(__builtin_ia32_aesencwide256kl_u8, "UcV2Oi*V2OiC*vC*", "nV:128:", "kl,widekl") +TARGET_BUILTIN(__builtin_ia32_aesdecwide128kl_u8, "UcV2Oi*V2OiC*vC*", "nV:128:", "kl,widekl") +TARGET_BUILTIN(__builtin_ia32_aesdecwide256kl_u8, "UcV2Oi*V2OiC*vC*", "nV:128:", "kl,widekl") // SERIALIZE TARGET_BUILTIN(__builtin_ia32_serialize, "v", "n", "serialize") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index d3603579844d..dc3cafa5d062 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -14070,75 +14070,67 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return Builder.CreateExtractValue(Call, 0); } - case X86::BI__builtin_ia32_aesenc128kl: - case X86::BI__builtin_ia32_aesdec128kl: - case X86::BI__builtin_ia32_aesenc256kl: - case X86::BI__builtin_ia32_aesdec256kl: - case X86::BI__builtin_ia32_aesencwide128kl: - case X86::BI__builtin_ia32_aesdecwide128kl: - case X86::BI__builtin_ia32_aesencwide256kl: - case X86::BI__builtin_ia32_aesdecwide256kl: { - int FirstReturnOp; - int ResultCount; - SmallVector<Value*, 9> InOps; - unsigned ID; - + case X86::BI__builtin_ia32_aesenc128kl_u8: + case X86::BI__builtin_ia32_aesdec128kl_u8: + case X86::BI__builtin_ia32_aesenc256kl_u8: + case X86::BI__builtin_ia32_aesdec256kl_u8: { + Intrinsic::ID IID; switch (BuiltinID) { - default: llvm_unreachable("Unsupported intrinsic!"); - case X86::BI__builtin_ia32_aesenc128kl: - case X86::BI__builtin_ia32_aesdec128kl: - case X86::BI__builtin_ia32_aesenc256kl: - case X86::BI__builtin_ia32_aesdec256kl: { - InOps = {Ops[1], Ops[2]}; - FirstReturnOp = 0; - ResultCount = 1; - switch (BuiltinID) { - case X86::BI__builtin_ia32_aesenc128kl: - ID = Intrinsic::x86_aesenc128kl; - break; - case X86::BI__builtin_ia32_aesdec128kl: - ID = Intrinsic::x86_aesdec128kl; - break; - case X86::BI__builtin_ia32_aesenc256kl: - ID = Intrinsic::x86_aesenc256kl; - break; - case X86::BI__builtin_ia32_aesdec256kl: - ID = Intrinsic::x86_aesdec256kl; - break; - } + default: llvm_unreachable("Unexpected builtin"); + case X86::BI__builtin_ia32_aesenc128kl_u8: + IID = Intrinsic::x86_aesenc128kl; + break; + case X86::BI__builtin_ia32_aesdec128kl_u8: + IID = Intrinsic::x86_aesdec128kl; + break; + case X86::BI__builtin_ia32_aesenc256kl_u8: + IID = Intrinsic::x86_aesenc256kl; + break; + case X86::BI__builtin_ia32_aesdec256kl_u8: + IID = Intrinsic::x86_aesdec256kl; break; } - case X86::BI__builtin_ia32_aesencwide128kl: - case X86::BI__builtin_ia32_aesdecwide128kl: - case X86::BI__builtin_ia32_aesencwide256kl: - case X86::BI__builtin_ia32_aesdecwide256kl: { - InOps = {Ops[0], Ops[9], Ops[10], Ops[11], Ops[12], Ops[13], - Ops[14], Ops[15], Ops[16]}; - FirstReturnOp = 1; - ResultCount = 8; - switch (BuiltinID) { - case X86::BI__builtin_ia32_aesencwide128kl: - ID = Intrinsic::x86_aesencwide128kl; - break; - case X86::BI__builtin_ia32_aesdecwide128kl: - ID = Intrinsic::x86_aesdecwide128kl; - break; - case X86::BI__builtin_ia32_aesencwide256kl: - ID = Intrinsic::x86_aesencwide256kl; - break; - case X86::BI__builtin_ia32_aesdecwide256kl: - ID = Intrinsic::x86_aesdecwide256kl; - break; - } + + Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]}); + + Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1), + Ops[0]); + + return Builder.CreateExtractValue(Call, 0); + } + case X86::BI__builtin_ia32_aesencwide128kl_u8: + case X86::BI__builtin_ia32_aesdecwide128kl_u8: + case X86::BI__builtin_ia32_aesencwide256kl_u8: + case X86::BI__builtin_ia32_aesdecwide256kl_u8: { + Intrinsic::ID IID; + switch (BuiltinID) { + case X86::BI__builtin_ia32_aesencwide128kl_u8: + IID = Intrinsic::x86_aesencwide128kl; + break; + case X86::BI__builtin_ia32_aesdecwide128kl_u8: + IID = Intrinsic::x86_aesdecwide128kl; + break; + case X86::BI__builtin_ia32_aesencwide256kl_u8: + IID = Intrinsic::x86_aesencwide256kl; + break; + case X86::BI__builtin_ia32_aesdecwide256kl_u8: + IID = Intrinsic::x86_aesdecwide256kl; break; } + + Value *InOps[9]; + InOps[0] = Ops[2]; + for (int i = 0; i != 8; ++i) { + Value *Ptr = Builder.CreateConstGEP1_32(Ops[1], i); + InOps[i + 1] = Builder.CreateAlignedLoad(Ptr, Align(16)); } - Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), InOps); + Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps); - for (int i = 0; i < ResultCount; ++i) { - Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, i + 1), - Ops[FirstReturnOp + i]); + for (int i = 0; i != 8; ++i) { + Value *Extract = Builder.CreateExtractValue(Call, i + 1); + Value *Ptr = Builder.CreateConstGEP1_32(Ops[0], i); + Builder.CreateAlignedStore(Extract, Ptr, Align(16)); } return Builder.CreateExtractValue(Call, 0); diff --git a/clang/lib/Headers/keylockerintrin.h b/clang/lib/Headers/keylockerintrin.h index c31ba16122a5..c15d39c8e392 100644 --- a/clang/lib/Headers/keylockerintrin.h +++ b/clang/lib/Headers/keylockerintrin.h @@ -211,7 +211,7 @@ _mm_encodekey256_u32(unsigned int __htype, __m128i __key_lo, __m128i __key_hi, /// \endoperation static __inline__ unsigned char __DEFAULT_FN_ATTRS _mm_aesenc128kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { - return __builtin_ia32_aesenc128kl(__odata, __idata, __h); + return __builtin_ia32_aesenc128kl_u8((__v2di *)__odata, (__v2di)__idata, __h); } /// The AESENC256KL performs 14 rounds of AES to encrypt the __idata using @@ -248,7 +248,7 @@ _mm_aesenc128kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { /// \endoperation static __inline__ unsigned char __DEFAULT_FN_ATTRS _mm_aesenc256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { - return __builtin_ia32_aesenc256kl(__odata, __idata, __h); + return __builtin_ia32_aesenc256kl_u8((__v2di *)__odata, (__v2di)__idata, __h); } /// The AESDEC128KL performs 10 rounds of AES to decrypt the __idata using @@ -285,7 +285,7 @@ _mm_aesenc256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { /// \endoperation static __inline__ unsigned char __DEFAULT_FN_ATTRS _mm_aesdec128kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { - return __builtin_ia32_aesdec128kl(__odata, __idata, __h); + return __builtin_ia32_aesdec128kl_u8((__v2di *)__odata, (__v2di)__idata, __h); } /// The AESDEC256KL performs 10 rounds of AES to decrypt the __idata using @@ -322,7 +322,7 @@ _mm_aesdec128kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { /// \endoperation static __inline__ unsigned char __DEFAULT_FN_ATTRS _mm_aesdec256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { - return __builtin_ia32_aesdec256kl(__odata, __idata, __h); + return __builtin_ia32_aesdec256kl_u8((__v2di *)__odata, (__v2di)__idata, __h); } #undef __DEFAULT_FN_ATTRS @@ -374,23 +374,8 @@ _mm_aesdec256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { /// \endoperation static __inline__ unsigned char __DEFAULT_FN_ATTRS _mm_aesencwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) { - return __builtin_ia32_aesencwide128kl(__h, - __odata, - __odata + 1, - __odata + 2, - __odata + 3, - __odata + 4, - __odata + 5, - __odata + 6, - __odata + 7, - __idata[0], - __idata[1], - __idata[2], - __idata[3], - __idata[4], - __idata[5], - __idata[6], - __idata[7]); + return __builtin_ia32_aesencwide128kl_u8((__v2di *)__odata, + (const __v2di *)__idata, __h); } /// Encrypt __idata[0] to __idata[7] using 256-bit AES key indicated by handle @@ -429,23 +414,8 @@ _mm_aesencwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* /// \endoperation static __inline__ unsigned char __DEFAULT_FN_ATTRS _mm_aesencwide256kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) { - return __builtin_ia32_aesencwide256kl(__h, - __odata, - __odata + 1, - __odata + 2, - __odata + 3, - __odata + 4, - __odata + 5, - __odata + 6, - __odata + 7, - __idata[0], - __idata[1], - __idata[2], - __idata[3], - __idata[4], - __idata[5], - __idata[6], - __idata[7]); + return __builtin_ia32_aesencwide256kl_u8((__v2di *)__odata, + (const __v2di *)__idata, __h); } /// Decrypt __idata[0] to __idata[7] using 128-bit AES key indicated by handle @@ -484,23 +454,8 @@ _mm_aesencwide256kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* /// \endoperation static __inline__ unsigned char __DEFAULT_FN_ATTRS _mm_aesdecwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) { - return __builtin_ia32_aesdecwide128kl(__h, - __odata, - __odata + 1, - __odata + 2, - __odata + 3, - __odata + 4, - __odata + 5, - __odata + 6, - __odata + 7, - __idata[0], - __idata[1], - __idata[2], - __idata[3], - __idata[4], - __idata[5], - __idata[6], - __idata[7]); + return __builtin_ia32_aesdecwide128kl_u8((__v2di *)__odata, + (const __v2di *)__idata, __h); } /// Decrypt __idata[0] to __idata[7] using 256-bit AES key indicated by handle @@ -539,23 +494,8 @@ _mm_aesdecwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* /// \endoperation static __inline__ unsigned char __DEFAULT_FN_ATTRS _mm_aesdecwide256kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) { - return __builtin_ia32_aesdecwide256kl(__h, - __odata, - __odata + 1, - __odata + 2, - __odata + 3, - __odata + 4, - __odata + 5, - __odata + 6, - __odata + 7, - __idata[0], - __idata[1], - __idata[2], - __idata[3], - __idata[4], - __idata[5], - __idata[6], - __idata[7]); + return __builtin_ia32_aesdecwide256kl_u8((__v2di *)__odata, + (const __v2di *)__idata, __h); } #undef __DEFAULT_FN_ATTRS diff --git a/clang/test/CodeGen/X86/keylocker.c b/clang/test/CodeGen/X86/keylocker.c index b410d53b4b83..b87fe22d7761 100644 --- a/clang/test/CodeGen/X86/keylocker.c +++ b/clang/test/CodeGen/X86/keylocker.c @@ -78,47 +78,215 @@ unsigned int test_encodekey256_u32(unsigned int htype, __m128i key_lo, __m128i k unsigned char test_mm_aesenc256kl_u8(__m128i *odata, __m128i idata, const void *h) { //CHECK-LABEL: @test_mm_aesenc256kl_u8 //CHECK: call { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64> %{{.*}}, i8* %{{.*}}) + //CHECK: extractvalue { i8, <2 x i64> } %{{.*}}, 1 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64> } %{{.*}}, 0 return _mm_aesenc256kl_u8(odata, idata, h); } unsigned char test_mm_aesdec256kl_u8(__m128i *odata, __m128i idata, const void *h) { //CHECK-LABEL: @test_mm_aesdec256kl_u8 //CHECK: call { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64> %{{.*}}, i8* %{{.*}}) + //CHECK: extractvalue { i8, <2 x i64> } %{{.*}}, 1 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64> } %{{.*}}, 0 return _mm_aesdec256kl_u8(odata, idata, h); } unsigned char test_mm_aesenc128kl_u8(__m128i *odata, __m128i idata, const void *h) { //CHECK-LABEL: @test_mm_aesenc128kl_u8 //CHECK: call { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64> %{{.*}}, i8* %{{.*}}) + //CHECK: extractvalue { i8, <2 x i64> } %{{.*}}, 1 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64> } %{{.*}}, 0 return _mm_aesenc128kl_u8(odata, idata, h); } unsigned char test_mm_aesdec128kl_u8(__m128i *odata, __m128i idata, const void *h) { //CHECK-LABEL: @test_mm_aesdec128kl_u8 //CHECK: call { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64> %{{.*}}, i8* %{{.*}}) + //CHECK: extractvalue { i8, <2 x i64> } %{{.*}}, 1 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64> } %{{.*}}, 0 return _mm_aesdec128kl_u8(odata, idata, h); } unsigned char test__mm_aesencwide128kl_u8(__m128i odata[8], const __m128i idata[8], const void* h) { //CHECK-LABEL: @test__mm_aesencwide128kl - //CHECK: call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 1 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 2 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 3 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 4 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 5 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 6 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 7 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 1 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 2 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 1 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 3 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 2 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 4 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 3 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 5 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 4 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 6 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 5 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 7 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 6 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 8 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 7 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 0 return _mm_aesencwide128kl_u8(odata, idata, h); } unsigned char test__mm_aesdecwide128kl_u8(__m128i odata[8], const __m128i idata[8], const void* h) { //CHECK-LABEL: @test__mm_aesdecwide128kl - //CHECK: call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 1 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 2 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 3 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 4 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 5 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 6 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 7 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 1 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 2 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 1 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 3 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 2 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 4 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 3 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 5 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 4 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 6 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 5 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 7 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 6 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 8 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 7 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 0 return _mm_aesdecwide128kl_u8(odata, idata, h); } unsigned char test__mm_aesencwide256kl_u8(__m128i odata[8], const __m128i idata[8], const void* h) { //CHECK-LABEL: @test__mm_aesencwide256kl - //CHECK: call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 1 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 2 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 3 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 4 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 5 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 6 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 7 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 1 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 2 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 1 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 3 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 2 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 4 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 3 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 5 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 4 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 6 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 5 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 7 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 6 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 8 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 7 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 0 return _mm_aesencwide256kl_u8(odata, idata, h); } unsigned char test__mm_aesdecwide256kl_u8(__m128i odata[8], const __m128i idata[8], const void* h) { //CHECK-LABEL: @test__mm_aesdecwide256kl - //CHECK: call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 1 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 2 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 3 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 4 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 5 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 6 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 7 + //CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16 + //CHECK: call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 1 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 2 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 1 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 3 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 2 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 4 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 3 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 5 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 4 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 6 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 5 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 7 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 6 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 8 + //CHECK: getelementptr <2 x i64>, <2 x i64>* %{{.*}}, i32 7 + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16 + //CHECK: extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 0 return _mm_aesdecwide256kl_u8(odata, idata, h); } diff --git a/llvm/test/CodeGen/X86/keylocker-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/keylocker-intrinsics-fast-isel.ll index b5518ec44dc2..a2443ffbc4e6 100644 --- a/llvm/test/CodeGen/X86/keylocker-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/keylocker-intrinsics-fast-isel.ll @@ -99,6 +99,346 @@ entry: ret i32 %21 } +define zeroext i8 @test_mm_aesenc256kl_u8(<2 x i64>* %odata, <2 x i64> %idata, i8* %h) { +; CHECK-LABEL: test_mm_aesenc256kl_u8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: aesenc256kl (%rsi), %xmm0 +; CHECK-NEXT: sete %al +; CHECK-NEXT: movaps %xmm0, (%rdi) +; CHECK-NEXT: retq +entry: + %0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64> %idata, i8* %h) #1 + %1 = extractvalue { i8, <2 x i64> } %0, 1 + store <2 x i64> %1, <2 x i64>* %odata, align 16 + %2 = extractvalue { i8, <2 x i64> } %0, 0 + ret i8 %2 +} + +define zeroext i8 @test_mm_aesdec256kl_u8(<2 x i64>* %odata, <2 x i64> %idata, i8* %h) { +; CHECK-LABEL: test_mm_aesdec256kl_u8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: aesdec256kl (%rsi), %xmm0 +; CHECK-NEXT: sete %al +; CHECK-NEXT: movaps %xmm0, (%rdi) +; CHECK-NEXT: retq +entry: + %0 = tail call { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64> %idata, i8* %h) #1 + %1 = extractvalue { i8, <2 x i64> } %0, 1 + store <2 x i64> %1, <2 x i64>* %odata, align 16 + %2 = extractvalue { i8, <2 x i64> } %0, 0 + ret i8 %2 +} + +define zeroext i8 @test_mm_aesenc128kl_u8(<2 x i64>* %odata, <2 x i64> %idata, i8* %h) { +; CHECK-LABEL: test_mm_aesenc128kl_u8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: aesenc128kl (%rsi), %xmm0 +; CHECK-NEXT: sete %al +; CHECK-NEXT: movaps %xmm0, (%rdi) +; CHECK-NEXT: retq +entry: + %0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64> %idata, i8* %h) #1 + %1 = extractvalue { i8, <2 x i64> } %0, 1 + store <2 x i64> %1, <2 x i64>* %odata, align 16 + %2 = extractvalue { i8, <2 x i64> } %0, 0 + ret i8 %2 +} + +define zeroext i8 @test_mm_aesdec128kl_u8(<2 x i64>* %odata, <2 x i64> %idata, i8* %h) { +; CHECK-LABEL: test_mm_aesdec128kl_u8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: aesdec128kl (%rsi), %xmm0 +; CHECK-NEXT: sete %al +; CHECK-NEXT: movaps %xmm0, (%rdi) +; CHECK-NEXT: retq +entry: + %0 = tail call { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64> %idata, i8* %h) #1 + %1 = extractvalue { i8, <2 x i64> } %0, 1 + store <2 x i64> %1, <2 x i64>* %odata, align 16 + %2 = extractvalue { i8, <2 x i64> } %0, 0 + ret i8 %2 +} + +define zeroext i8 @test__mm_aesencwide128kl_u8(<2 x i64>* %odata, <2 x i64>* %idata, i8* %h) { +; CHECK-LABEL: test__mm_aesencwide128kl_u8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movaps (%rsi), %xmm0 +; CHECK-NEXT: movaps 16(%rsi), %xmm1 +; CHECK-NEXT: movaps 32(%rsi), %xmm2 +; CHECK-NEXT: movaps 48(%rsi), %xmm3 +; CHECK-NEXT: movaps 64(%rsi), %xmm4 +; CHECK-NEXT: movaps 80(%rsi), %xmm5 +; CHECK-NEXT: movaps 96(%rsi), %xmm6 +; CHECK-NEXT: movaps 112(%rsi), %xmm7 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: aesencwide128kl (%rdx) +; CHECK-NEXT: sete %al +; CHECK-NEXT: movaps %xmm0, (%rdi) +; CHECK-NEXT: movaps %xmm1, 16(%rdi) +; CHECK-NEXT: movaps %xmm2, 32(%rdi) +; CHECK-NEXT: movaps %xmm3, 48(%rdi) +; CHECK-NEXT: movaps %xmm4, 64(%rdi) +; CHECK-NEXT: movaps %xmm5, 80(%rdi) +; CHECK-NEXT: movaps %xmm6, 96(%rdi) +; CHECK-NEXT: movaps %xmm7, 112(%rdi) +; CHECK-NEXT: retq +entry: + %0 = load <2 x i64>, <2 x i64>* %idata, align 16 + %1 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 1 + %2 = load <2 x i64>, <2 x i64>* %1, align 16 + %3 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 2 + %4 = load <2 x i64>, <2 x i64>* %3, align 16 + %5 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 3 + %6 = load <2 x i64>, <2 x i64>* %5, align 16 + %7 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 4 + %8 = load <2 x i64>, <2 x i64>* %7, align 16 + %9 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 5 + %10 = load <2 x i64>, <2 x i64>* %9, align 16 + %11 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 6 + %12 = load <2 x i64>, <2 x i64>* %11, align 16 + %13 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 7 + %14 = load <2 x i64>, <2 x i64>* %13, align 16 + %15 = tail call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(i8* %h, <2 x i64> %0, <2 x i64> %2, <2 x i64> %4, <2 x i64> %6, <2 x i64> %8, <2 x i64> %10, <2 x i64> %12, <2 x i64> %14) #1 + %16 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 1 + store <2 x i64> %16, <2 x i64>* %odata, align 16 + %17 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 2 + %18 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 1 + store <2 x i64> %17, <2 x i64>* %18, align 16 + %19 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 3 + %20 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 2 + store <2 x i64> %19, <2 x i64>* %20, align 16 + %21 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 4 + %22 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 3 + store <2 x i64> %21, <2 x i64>* %22, align 16 + %23 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 5 + %24 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 4 + store <2 x i64> %23, <2 x i64>* %24, align 16 + %25 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 6 + %26 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 5 + store <2 x i64> %25, <2 x i64>* %26, align 16 + %27 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 7 + %28 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 6 + store <2 x i64> %27, <2 x i64>* %28, align 16 + %29 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 8 + %30 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 7 + store <2 x i64> %29, <2 x i64>* %30, align 16 + %31 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 0 + ret i8 %31 +} + +define zeroext i8 @test__mm_aesdecwide128kl_u8(<2 x i64>* %odata, <2 x i64>* %idata, i8* %h) { +; CHECK-LABEL: test__mm_aesdecwide128kl_u8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movaps (%rsi), %xmm0 +; CHECK-NEXT: movaps 16(%rsi), %xmm1 +; CHECK-NEXT: movaps 32(%rsi), %xmm2 +; CHECK-NEXT: movaps 48(%rsi), %xmm3 +; CHECK-NEXT: movaps 64(%rsi), %xmm4 +; CHECK-NEXT: movaps 80(%rsi), %xmm5 +; CHECK-NEXT: movaps 96(%rsi), %xmm6 +; CHECK-NEXT: movaps 112(%rsi), %xmm7 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: aesdecwide128kl (%rdx) +; CHECK-NEXT: sete %al +; CHECK-NEXT: movaps %xmm0, (%rdi) +; CHECK-NEXT: movaps %xmm1, 16(%rdi) +; CHECK-NEXT: movaps %xmm2, 32(%rdi) +; CHECK-NEXT: movaps %xmm3, 48(%rdi) +; CHECK-NEXT: movaps %xmm4, 64(%rdi) +; CHECK-NEXT: movaps %xmm5, 80(%rdi) +; CHECK-NEXT: movaps %xmm6, 96(%rdi) +; CHECK-NEXT: movaps %xmm7, 112(%rdi) +; CHECK-NEXT: retq +entry: + %0 = load <2 x i64>, <2 x i64>* %idata, align 16 + %1 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 1 + %2 = load <2 x i64>, <2 x i64>* %1, align 16 + %3 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 2 + %4 = load <2 x i64>, <2 x i64>* %3, align 16 + %5 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 3 + %6 = load <2 x i64>, <2 x i64>* %5, align 16 + %7 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 4 + %8 = load <2 x i64>, <2 x i64>* %7, align 16 + %9 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 5 + %10 = load <2 x i64>, <2 x i64>* %9, align 16 + %11 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 6 + %12 = load <2 x i64>, <2 x i64>* %11, align 16 + %13 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 7 + %14 = load <2 x i64>, <2 x i64>* %13, align 16 + %15 = tail call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(i8* %h, <2 x i64> %0, <2 x i64> %2, <2 x i64> %4, <2 x i64> %6, <2 x i64> %8, <2 x i64> %10, <2 x i64> %12, <2 x i64> %14) #1 + %16 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 1 + store <2 x i64> %16, <2 x i64>* %odata, align 16 + %17 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 2 + %18 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 1 + store <2 x i64> %17, <2 x i64>* %18, align 16 + %19 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 3 + %20 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 2 + store <2 x i64> %19, <2 x i64>* %20, align 16 + %21 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 4 + %22 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 3 + store <2 x i64> %21, <2 x i64>* %22, align 16 + %23 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 5 + %24 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 4 + store <2 x i64> %23, <2 x i64>* %24, align 16 + %25 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 6 + %26 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 5 + store <2 x i64> %25, <2 x i64>* %26, align 16 + %27 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 7 + %28 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 6 + store <2 x i64> %27, <2 x i64>* %28, align 16 + %29 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 8 + %30 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 7 + store <2 x i64> %29, <2 x i64>* %30, align 16 + %31 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 0 + ret i8 %31 +} + +define zeroext i8 @test__mm_aesencwide256kl_u8(<2 x i64>* %odata, <2 x i64>* %idata, i8* %h) { +; CHECK-LABEL: test__mm_aesencwide256kl_u8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movaps (%rsi), %xmm0 +; CHECK-NEXT: movaps 16(%rsi), %xmm1 +; CHECK-NEXT: movaps 32(%rsi), %xmm2 +; CHECK-NEXT: movaps 48(%rsi), %xmm3 +; CHECK-NEXT: movaps 64(%rsi), %xmm4 +; CHECK-NEXT: movaps 80(%rsi), %xmm5 +; CHECK-NEXT: movaps 96(%rsi), %xmm6 +; CHECK-NEXT: movaps 112(%rsi), %xmm7 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: aesencwide256kl (%rdx) +; CHECK-NEXT: sete %al +; CHECK-NEXT: movaps %xmm0, (%rdi) +; CHECK-NEXT: movaps %xmm1, 16(%rdi) +; CHECK-NEXT: movaps %xmm2, 32(%rdi) +; CHECK-NEXT: movaps %xmm3, 48(%rdi) +; CHECK-NEXT: movaps %xmm4, 64(%rdi) +; CHECK-NEXT: movaps %xmm5, 80(%rdi) +; CHECK-NEXT: movaps %xmm6, 96(%rdi) +; CHECK-NEXT: movaps %xmm7, 112(%rdi) +; CHECK-NEXT: retq +entry: + %0 = load <2 x i64>, <2 x i64>* %idata, align 16 + %1 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 1 + %2 = load <2 x i64>, <2 x i64>* %1, align 16 + %3 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 2 + %4 = load <2 x i64>, <2 x i64>* %3, align 16 + %5 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 3 + %6 = load <2 x i64>, <2 x i64>* %5, align 16 + %7 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 4 + %8 = load <2 x i64>, <2 x i64>* %7, align 16 + %9 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 5 + %10 = load <2 x i64>, <2 x i64>* %9, align 16 + %11 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 6 + %12 = load <2 x i64>, <2 x i64>* %11, align 16 + %13 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 7 + %14 = load <2 x i64>, <2 x i64>* %13, align 16 + %15 = tail call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(i8* %h, <2 x i64> %0, <2 x i64> %2, <2 x i64> %4, <2 x i64> %6, <2 x i64> %8, <2 x i64> %10, <2 x i64> %12, <2 x i64> %14) #1 + %16 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 1 + store <2 x i64> %16, <2 x i64>* %odata, align 16 + %17 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 2 + %18 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 1 + store <2 x i64> %17, <2 x i64>* %18, align 16 + %19 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 3 + %20 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 2 + store <2 x i64> %19, <2 x i64>* %20, align 16 + %21 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 4 + %22 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 3 + store <2 x i64> %21, <2 x i64>* %22, align 16 + %23 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 5 + %24 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 4 + store <2 x i64> %23, <2 x i64>* %24, align 16 + %25 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 6 + %26 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 5 + store <2 x i64> %25, <2 x i64>* %26, align 16 + %27 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 7 + %28 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 6 + store <2 x i64> %27, <2 x i64>* %28, align 16 + %29 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 8 + %30 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 7 + store <2 x i64> %29, <2 x i64>* %30, align 16 + %31 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 0 + ret i8 %31 +} + +define zeroext i8 @test__mm_aesdecwide256kl_u8(<2 x i64>* %odata, <2 x i64>* %idata, i8* %h) { +; CHECK-LABEL: test__mm_aesdecwide256kl_u8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movaps (%rsi), %xmm0 +; CHECK-NEXT: movaps 16(%rsi), %xmm1 +; CHECK-NEXT: movaps 32(%rsi), %xmm2 +; CHECK-NEXT: movaps 48(%rsi), %xmm3 +; CHECK-NEXT: movaps 64(%rsi), %xmm4 +; CHECK-NEXT: movaps 80(%rsi), %xmm5 +; CHECK-NEXT: movaps 96(%rsi), %xmm6 +; CHECK-NEXT: movaps 112(%rsi), %xmm7 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: aesdecwide256kl (%rdx) +; CHECK-NEXT: sete %al +; CHECK-NEXT: movaps %xmm0, (%rdi) +; CHECK-NEXT: movaps %xmm1, 16(%rdi) +; CHECK-NEXT: movaps %xmm2, 32(%rdi) +; CHECK-NEXT: movaps %xmm3, 48(%rdi) +; CHECK-NEXT: movaps %xmm4, 64(%rdi) +; CHECK-NEXT: movaps %xmm5, 80(%rdi) +; CHECK-NEXT: movaps %xmm6, 96(%rdi) +; CHECK-NEXT: movaps %xmm7, 112(%rdi) +; CHECK-NEXT: retq +entry: + %0 = load <2 x i64>, <2 x i64>* %idata, align 16 + %1 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 1 + %2 = load <2 x i64>, <2 x i64>* %1, align 16 + %3 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 2 + %4 = load <2 x i64>, <2 x i64>* %3, align 16 + %5 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 3 + %6 = load <2 x i64>, <2 x i64>* %5, align 16 + %7 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 4 + %8 = load <2 x i64>, <2 x i64>* %7, align 16 + %9 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 5 + %10 = load <2 x i64>, <2 x i64>* %9, align 16 + %11 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 6 + %12 = load <2 x i64>, <2 x i64>* %11, align 16 + %13 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 7 + %14 = load <2 x i64>, <2 x i64>* %13, align 16 + %15 = tail call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(i8* %h, <2 x i64> %0, <2 x i64> %2, <2 x i64> %4, <2 x i64> %6, <2 x i64> %8, <2 x i64> %10, <2 x i64> %12, <2 x i64> %14) #1 + %16 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 1 + store <2 x i64> %16, <2 x i64>* %odata, align 16 + %17 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 2 + %18 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 1 + store <2 x i64> %17, <2 x i64>* %18, align 16 + %19 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 3 + %20 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 2 + store <2 x i64> %19, <2 x i64>* %20, align 16 + %21 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 4 + %22 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 3 + store <2 x i64> %21, <2 x i64>* %22, align 16 + %23 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 5 + %24 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 4 + store <2 x i64> %23, <2 x i64>* %24, align 16 + %25 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 6 + %26 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 5 + store <2 x i64> %25, <2 x i64>* %26, align 16 + %27 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 7 + %28 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 6 + store <2 x i64> %27, <2 x i64>* %28, align 16 + %29 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 8 + %30 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 7 + store <2 x i64> %29, <2 x i64>* %30, align 16 + %31 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 0 + ret i8 %31 +} + declare void @llvm.x86.loadiwkey(<2 x i64>, <2 x i64>, <2 x i64>, i32) declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32, <2 x i64>) declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32, <2 x i64>, <2 x i64>) +declare { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64>, i8*) +declare { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64>, i8*) +declare { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64>, i8*) +declare { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64>, i8*) +declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) +declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) +declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) +declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits