Author: Craig Topper Date: 2020-10-04T12:09:35-07:00 New Revision: 230c57b0bd8321085a5e0339baf37b509d5c76f6
URL: https://github.com/llvm/llvm-project/commit/230c57b0bd8321085a5e0339baf37b509d5c76f6 DIFF: https://github.com/llvm/llvm-project/commit/230c57b0bd8321085a5e0339baf37b509d5c76f6.diff LOG: [X86] Synchronize the encodekey builtins with gcc. Don't assume void* is 16 byte aligned. We were taking multiple pointer arguments in the builtin. gcc accepts a single void*. The cast from void* to _m128i* caused the IR generation to assume the pointer was aligned. Instead make the builtin take a single void*, emit i8* GEPs to adjust then cast to <2 x i64>* and perform a store with align of 1. Added: llvm/test/CodeGen/X86/keylocker-intrinsics-fast-isel.ll Modified: clang/include/clang/Basic/BuiltinsX86.def clang/lib/CodeGen/CGBuiltin.cpp clang/lib/Headers/keylockerintrin.h clang/test/CodeGen/X86/keylocker.c Removed: ################################################################################ diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 1fbc950998a1..c33026139b3c 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -1902,10 +1902,10 @@ TARGET_BUILTIN(__builtin_ia32_enqcmds, "Ucv*vC*", "n", "enqcmd") // KEY LOCKER TARGET_BUILTIN(__builtin_ia32_loadiwkey, "vV2OiV2OiV2OiUi", "nV:128:", "kl") -TARGET_BUILTIN(__builtin_ia32_encodekey128, - "UiUiV2OiV2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*", "nV:128:", "kl") -TARGET_BUILTIN(__builtin_ia32_encodekey256, - "UiUiV2OiV2OiV2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*", "nV:128:", "kl") +TARGET_BUILTIN(__builtin_ia32_encodekey128_u32, + "UiUiV2Oiv*", "nV:128:", "kl") +TARGET_BUILTIN(__builtin_ia32_encodekey256_u32, + "UiUiV2OiV2Oiv*", "nV:128:", "kl") TARGET_BUILTIN(__builtin_ia32_aesenc128kl, "UcV2Oi*V2OivC*", "nV:128:", "kl") TARGET_BUILTIN(__builtin_ia32_aesenc256kl, "UcV2Oi*V2OivC*", "nV:128:", "kl") TARGET_BUILTIN(__builtin_ia32_aesdec128kl, "UcV2Oi*V2OivC*", "nV:128:", "kl") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e5f6ee138a21..d3603579844d 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -14039,8 +14039,37 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_psubusb128: case X86::BI__builtin_ia32_psubusw128: return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::usub_sat); - case X86::BI__builtin_ia32_encodekey128: - case X86::BI__builtin_ia32_encodekey256: + case X86::BI__builtin_ia32_encodekey128_u32: { + Intrinsic::ID IID = Intrinsic::x86_encodekey128; + + Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]}); + + for (int i = 0; i < 6; ++i) { + Value *Extract = Builder.CreateExtractValue(Call, i + 1); + Value *Ptr = Builder.CreateConstGEP1_32(Ops[2], i * 16); + Ptr = Builder.CreateBitCast( + Ptr, llvm::PointerType::getUnqual(Extract->getType())); + Builder.CreateAlignedStore(Extract, Ptr, Align(1)); + } + + return Builder.CreateExtractValue(Call, 0); + } + case X86::BI__builtin_ia32_encodekey256_u32: { + Intrinsic::ID IID = Intrinsic::x86_encodekey256; + + Value *Call = + Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]}); + + for (int i = 0; i < 7; ++i) { + Value *Extract = Builder.CreateExtractValue(Call, i + 1); + Value *Ptr = Builder.CreateConstGEP1_32(Ops[3], i * 16); + Ptr = Builder.CreateBitCast( + Ptr, llvm::PointerType::getUnqual(Extract->getType())); + Builder.CreateAlignedStore(Extract, Ptr, Align(1)); + } + + return Builder.CreateExtractValue(Call, 0); + } case X86::BI__builtin_ia32_aesenc128kl: case X86::BI__builtin_ia32_aesdec128kl: case X86::BI__builtin_ia32_aesenc256kl: @@ -14056,18 +14085,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, switch (BuiltinID) { default: llvm_unreachable("Unsupported intrinsic!"); - case X86::BI__builtin_ia32_encodekey128: - ID = Intrinsic::x86_encodekey128; - InOps = {Ops[0], Ops[1]}; - FirstReturnOp = 2; - ResultCount = 6; - break; - case X86::BI__builtin_ia32_encodekey256: - ID = Intrinsic::x86_encodekey256; - InOps = {Ops[0], Ops[1], Ops[2]}; - FirstReturnOp = 3; - ResultCount = 7; - break; case X86::BI__builtin_ia32_aesenc128kl: case X86::BI__builtin_ia32_aesdec128kl: case X86::BI__builtin_ia32_aesenc256kl: diff --git a/clang/lib/Headers/keylockerintrin.h b/clang/lib/Headers/keylockerintrin.h index 718771c869cc..c31ba16122a5 100644 --- a/clang/lib/Headers/keylockerintrin.h +++ b/clang/lib/Headers/keylockerintrin.h @@ -132,15 +132,7 @@ _mm_loadiwkey (unsigned int __ctl, __m128i __intkey, /// \endoperation static __inline__ unsigned int __DEFAULT_FN_ATTRS _mm_encodekey128_u32(unsigned int __htype, __m128i __key, void *__h) { - __m128i *__results = (__m128i*)__h; - - return __builtin_ia32_encodekey128(__htype, __key, - __results, - __results + 1, - __results + 2, - __results + 3, - __results + 4, - __results + 5); + return __builtin_ia32_encodekey128_u32(__htype, (__v2di)__key, __h); } /// Wrap a 256-bit AES key from __key_hi:__key_lo into a key handle, then @@ -181,16 +173,8 @@ _mm_encodekey128_u32(unsigned int __htype, __m128i __key, void *__h) { static __inline__ unsigned int __DEFAULT_FN_ATTRS _mm_encodekey256_u32(unsigned int __htype, __m128i __key_lo, __m128i __key_hi, void *__h) { - __m128i *__results = (__m128i*)__h; - - return __builtin_ia32_encodekey256(__htype, __key_lo, __key_hi, - __results, - __results + 1, - __results + 2, - __results + 3, - __results + 4, - __results + 5, - __results + 6); + return __builtin_ia32_encodekey256_u32(__htype, (__v2di)__key_lo, + (__v2di)__key_hi, __h); } /// The AESENC128KL performs 10 rounds of AES to encrypt the __idata using diff --git a/clang/test/CodeGen/X86/keylocker.c b/clang/test/CodeGen/X86/keylocker.c index 835bdd279ef1..b410d53b4b83 100644 --- a/clang/test/CodeGen/X86/keylocker.c +++ b/clang/test/CodeGen/X86/keylocker.c @@ -14,12 +14,64 @@ void test_loadiwkey(unsigned int ctl, __m128i intkey, __m128i enkey_lo, __m128i unsigned int test_encodekey128_u32(unsigned int htype, __m128i key, void *h) { //CHECK-LABEL: @test_encodekey128_u32 //CHECK: call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32 %{{.*}}, <2 x i64> %{{.*}}) + //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 1 + //CHECK: itcast i8* %{{.*}} to <2 x i64>* + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} + //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 2 + //CHECK: getelementptr i8, i8* %{{.*}}, i32 16 + //CHECK: bitcast i8* %{{.*}} to <2 x i64>* + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} + //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 3 + //CHECK: getelementptr i8, i8* %{{.*}}, i32 32 + //CHECK: bitcast i8* %{{.*}} to <2 x i64>* + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} + //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 4 + //CHECK: getelementptr i8, i8* %{{.*}}, i32 48 + //CHECK: bitcast i8* %{{.*}} to <2 x i64>* + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} + //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 5 + //CHECK: getelementptr i8, i8* %{{.*}}, i32 64 + //CHECK: bitcast i8* %{{.*}} to <2 x i64>* + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} + //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 6 + //CHECK: getelementptr i8, i8* %{{.*}}, i32 80 + //CHECK: bitcast i8* %{{.*}} to <2 x i64>* + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} + //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 0 return _mm_encodekey128_u32(htype, key, h); } unsigned int test_encodekey256_u32(unsigned int htype, __m128i key_lo, __m128i key_hi, void *h) { //CHECK-LABEL: @test_encodekey256_u32 //CHECK: call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32 %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) + //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 1 + //CHECK: itcast i8* %{{.*}} to <2 x i64>* + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} + //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 2 + //CHECK: getelementptr i8, i8* %{{.*}}, i32 16 + //CHECK: bitcast i8* %{{.*}} to <2 x i64>* + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} + //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 3 + //CHECK: getelementptr i8, i8* %{{.*}}, i32 32 + //CHECK: bitcast i8* %{{.*}} to <2 x i64>* + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} + //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 4 + //CHECK: getelementptr i8, i8* %{{.*}}, i32 48 + //CHECK: bitcast i8* %{{.*}} to <2 x i64>* + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} + //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 5 + //CHECK: getelementptr i8, i8* %{{.*}}, i32 64 + //CHECK: bitcast i8* %{{.*}} to <2 x i64>* + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} + //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 6 + //CHECK: getelementptr i8, i8* %{{.*}}, i32 80 + //CHECK: bitcast i8* %{{.*}} to <2 x i64>* + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} + //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 7 + //CHECK: getelementptr i8, i8* %{{.*}}, i32 96 + //CHECK: bitcast i8* %{{.*}} to <2 x i64>* + //CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} + //CHECK: extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %{{.*}}, 0 return _mm_encodekey256_u32(htype, key_lo, key_hi, h); } diff --git a/llvm/test/CodeGen/X86/keylocker-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/keylocker-intrinsics-fast-isel.ll new file mode 100644 index 000000000000..b5518ec44dc2 --- /dev/null +++ b/llvm/test/CodeGen/X86/keylocker-intrinsics-fast-isel.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+kl,+widekl | FileCheck %s + +; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/X86/keylocker-builtins.c + +define void @test_loadiwkey(i32 %ctl, <2 x i64> %intkey, <2 x i64> %enkey_lo, <2 x i64> %enkey_hi) { +; CHECK-LABEL: test_loadiwkey: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: loadiwkey %xmm2, %xmm1 +; CHECK-NEXT: retq +entry: + tail call void @llvm.x86.loadiwkey(<2 x i64> %intkey, <2 x i64> %enkey_lo, <2 x i64> %enkey_hi, i32 %ctl) + ret void +} + +define i32 @test_encodekey128_u32(i32 %htype, <2 x i64> %key, i8* nocapture %h) { +; CHECK-LABEL: test_encodekey128_u32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: encodekey128 %edi, %eax +; CHECK-NEXT: movups %xmm0, (%rsi) +; CHECK-NEXT: movups %xmm1, 16(%rsi) +; CHECK-NEXT: movups %xmm2, 32(%rsi) +; CHECK-NEXT: movups %xmm4, 48(%rsi) +; CHECK-NEXT: movups %xmm5, 64(%rsi) +; CHECK-NEXT: movups %xmm6, 80(%rsi) +; CHECK-NEXT: retq +entry: + %0 = tail call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32 %htype, <2 x i64> %key) + %1 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1 + %2 = bitcast i8* %h to <2 x i64>* + store <2 x i64> %1, <2 x i64>* %2, align 1 + %3 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2 + %4 = getelementptr i8, i8* %h, i64 16 + %5 = bitcast i8* %4 to <2 x i64>* + store <2 x i64> %3, <2 x i64>* %5, align 1 + %6 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3 + %7 = getelementptr i8, i8* %h, i64 32 + %8 = bitcast i8* %7 to <2 x i64>* + store <2 x i64> %6, <2 x i64>* %8, align 1 + %9 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4 + %10 = getelementptr i8, i8* %h, i64 48 + %11 = bitcast i8* %10 to <2 x i64>* + store <2 x i64> %9, <2 x i64>* %11, align 1 + %12 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5 + %13 = getelementptr i8, i8* %h, i64 64 + %14 = bitcast i8* %13 to <2 x i64>* + store <2 x i64> %12, <2 x i64>* %14, align 1 + %15 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6 + %16 = getelementptr i8, i8* %h, i64 80 + %17 = bitcast i8* %16 to <2 x i64>* + store <2 x i64> %15, <2 x i64>* %17, align 1 + %18 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0 + ret i32 %18 +} + +define i32 @test_encodekey256_u32(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_hi, i8* nocapture %h) { +; CHECK-LABEL: test_encodekey256_u32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: encodekey256 %edi, %eax +; CHECK-NEXT: movups %xmm0, (%rsi) +; CHECK-NEXT: movups %xmm1, 16(%rsi) +; CHECK-NEXT: movups %xmm2, 32(%rsi) +; CHECK-NEXT: movups %xmm3, 48(%rsi) +; CHECK-NEXT: movups %xmm4, 64(%rsi) +; CHECK-NEXT: movups %xmm5, 80(%rsi) +; CHECK-NEXT: movups %xmm6, 96(%rsi) +; CHECK-NEXT: retq +entry: + %0 = tail call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_hi) + %1 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1 + %2 = bitcast i8* %h to <2 x i64>* + store <2 x i64> %1, <2 x i64>* %2, align 1 + %3 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2 + %4 = getelementptr i8, i8* %h, i64 16 + %5 = bitcast i8* %4 to <2 x i64>* + store <2 x i64> %3, <2 x i64>* %5, align 1 + %6 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3 + %7 = getelementptr i8, i8* %h, i64 32 + %8 = bitcast i8* %7 to <2 x i64>* + store <2 x i64> %6, <2 x i64>* %8, align 1 + %9 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4 + %10 = getelementptr i8, i8* %h, i64 48 + %11 = bitcast i8* %10 to <2 x i64>* + store <2 x i64> %9, <2 x i64>* %11, align 1 + %12 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5 + %13 = getelementptr i8, i8* %h, i64 64 + %14 = bitcast i8* %13 to <2 x i64>* + store <2 x i64> %12, <2 x i64>* %14, align 1 + %15 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6 + %16 = getelementptr i8, i8* %h, i64 80 + %17 = bitcast i8* %16 to <2 x i64>* + store <2 x i64> %15, <2 x i64>* %17, align 1 + %18 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7 + %19 = getelementptr i8, i8* %h, i64 96 + %20 = bitcast i8* %19 to <2 x i64>* + store <2 x i64> %18, <2 x i64>* %20, align 1 + %21 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0 + ret i32 %21 +} + +declare void @llvm.x86.loadiwkey(<2 x i64>, <2 x i64>, <2 x i64>, i32) +declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32, <2 x i64>) +declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32, <2 x i64>, <2 x i64>) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits