[PATCH v2 4/5] crypto: AES CBC by8 encryption
This patch introduces the assembly routine to do a by8 AES CBC encryption in support of the AES CBC multi-buffer implementation. Encryption of 8 data streams of a key size are done simultaneously. Originally-by: Chandramouli Narayanan Signed-off-by: Tim Chen --- arch/x86/crypto/aes-cbc-mb/aes_cbc_enc_x8.S | 774 1 file changed, 774 insertions(+) create mode 100644 arch/x86/crypto/aes-cbc-mb/aes_cbc_enc_x8.S diff --git a/arch/x86/crypto/aes-cbc-mb/aes_cbc_enc_x8.S b/arch/x86/crypto/aes-cbc-mb/aes_cbc_enc_x8.S new file mode 100644 index 000..eaffc28 --- /dev/null +++ b/arch/x86/crypto/aes-cbc-mb/aes_cbc_enc_x8.S @@ -0,0 +1,774 @@ +/* + * AES CBC by8 multibuffer optimization (x86_64) + * This file implements 128/192/256 bit AES CBC encryption + * + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * James Guilford + * Sean Gulley + * Tim Chen + * + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#include + +/* stack size needs to be an odd multiple of 8 for alignment */ + +#define AES_KEYSIZE_12816 +#define AES_KEYSIZE_19224 +#define AES_KEYSIZE_25632 + +#define XMM_SAVE_SIZE 16*10 +#define GPR_SAVE_SIZE 8*9 +#define STACK_SIZE (XMM_SAVE_SIZE + GPR_SAVE_SIZE) + +#define GPR_SAVE_REG %rsp +#define GPR_SAVE_AREA %rsp + XMM_SAVE_SIZE +#define LEN_AREA_OFFSETXMM_SAVE_SIZE + 8*8 +#define LEN_AREA_REG %rsp +#define LEN_AREA %rsp + XMM_SAVE_SIZE + 8*8 + +#define IN_OFFSET 0 +#define OUT_OFFSET 8*8 +#define KEYS_OFFSET16*8 +#define IV_OFFSET 24*8 + + +#define IDX%rax +#define TMP%rbx +#define ARG%rdi +#define LEN%rsi + +#define KEYS0 %r14 +#define KEYS1 %r15 +#define KEYS2 %rbp +#define KEYS3 %rdx +#define KEYS4 %rcx +#define KEYS5 %r8 +#define KEYS6 %r9 +#define KEYS7 %r10 + +#define IN0%r11 +#define IN2%r12 +#define IN4%r13 +#define IN6LEN + +#define XDATA0 %xmm0 +#define XDATA1 %xmm1 +#define XDATA2 %xmm2 +#define XDATA3 %xmm3 +#define XDATA4 %xmm4 +#define XDATA5 %xmm5 +#define XDATA6 %xmm6 +#define XDATA7 %xmm7 + +#define XKEY0_3%xmm8 +#define XKEY1_4%xmm9 +#define XKEY2_5%xmm10 +#define XKEY3_6%xmm11 +#define XKEY4_7%xmm12 +#define XKEY5_8%xmm13 +#define XKEY6_9%xmm14 +#define XTMP %xmm15 + +#defineMOVDQ movdqu /* assume buffers not aligned */ +#define CONCAT(a, b) a##b +#define INPUT_REG_SUFX 1 /* IN */ +#define XDATA_REG_SUFX 2 /* XDAT */ +#define KEY_REG_SUFX 3 /* KEY */ +#define XMM_REG_SUFX 4 /* XMM */ + +/* + * To avoid positional parameter errors while compiling + * three registers need to be passed + */ +.text + +.macro pxor2 x, y, z + MOVDQ (\x,\y), XTMP + pxorXTMP, \z +.endm + +.macro inreg n
[PATCH v2 2/5] crypto: AES CBC multi-buffer data structures
This patch introduces the data structures and prototypes of functions needed for doing AES CBC encryption using multi-buffer. Included are the structures of the multi-buffer AES CBC job, job scheduler in C and data structure defines in x86 assembly code. Originally-by: Chandramouli Narayanan Signed-off-by: Tim Chen --- arch/x86/crypto/aes-cbc-mb/aes_cbc_mb_ctx.h| 96 + arch/x86/crypto/aes-cbc-mb/aes_cbc_mb_mgr.h| 131 arch/x86/crypto/aes-cbc-mb/mb_mgr_datastruct.S | 270 + arch/x86/crypto/aes-cbc-mb/reg_sizes.S | 125 4 files changed, 622 insertions(+) create mode 100644 arch/x86/crypto/aes-cbc-mb/aes_cbc_mb_ctx.h create mode 100644 arch/x86/crypto/aes-cbc-mb/aes_cbc_mb_mgr.h create mode 100644 arch/x86/crypto/aes-cbc-mb/mb_mgr_datastruct.S create mode 100644 arch/x86/crypto/aes-cbc-mb/reg_sizes.S diff --git a/arch/x86/crypto/aes-cbc-mb/aes_cbc_mb_ctx.h b/arch/x86/crypto/aes-cbc-mb/aes_cbc_mb_ctx.h new file mode 100644 index 000..5493f83 --- /dev/null +++ b/arch/x86/crypto/aes-cbc-mb/aes_cbc_mb_ctx.h @@ -0,0 +1,96 @@ +/* + * Header file for multi buffer AES CBC algorithm manager + * that deals with 8 buffers at a time + * + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * James Guilford + * Sean Gulley + * Tim Chen + * + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#ifndef __AES_CBC_MB_CTX_H +#define __AES_CBC_MB_CTX_H + + +#include + +#include "aes_cbc_mb_mgr.h" + +#define CBC_ENCRYPT0x01 +#define CBC_DECRYPT0x02 +#define CBC_START 0x04 +#define CBC_DONE 0x08 + +#define CBC_CTX_STS_IDLE 0x00 +#define CBC_CTX_STS_PROCESSING 0x01 +#define CBC_CTX_STS_LAST 0x02 +#define CBC_CTX_STS_COMPLETE 0x04 + +enum cbc_ctx_error { + CBC_CTX_ERROR_NONE = 0, + CBC_CTX_ERROR_INVALID_FLAGS = -1, + CBC_CTX_ERROR_ALREADY_PROCESSING = -2, + CBC_CTX_ERROR_ALREADY_COMPLETED = -3, +}; + +#define cbc_ctx_init(ctx, nbytes, op) \ + do { \ + (ctx)->flag = (op) | CBC_START; \ + (ctx)->nbytes = nbytes; \ + } while (0) + +/* AESNI routines to perform cbc decrypt and key expansion */ + +asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); +asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, +unsigned int key_len); + +#endif /* __AES_CBC_MB_CTX_H */ diff --git a/arch/x86/crypto/aes-cbc-mb/aes_cbc_mb_mgr.h b/arch/x86/crypto/aes-cbc-mb/aes_cbc_mb_mgr.h new file mode 100644 index 000..0def82e --- /dev/null +++ b/arch/x86/crypto/aes-cbc-mb/aes_cbc_mb_mgr.h @@ -0,0 +1,131 @@ +/* + * Header file for multi buffer AES CBC algorithm manager + *
[PATCH v2 1/5] crypto: Multi-buffer encryptioin infrastructure support
In this patch, the infrastructure needed to support multibuffer encryption implementation is added: a) Enhace mcryptd daemon to support blkcipher requests. b) Update configuration to include multi-buffer encryption build support. c) Add support to crypto scatterwalk support that can sleep during encryption operation, as we may have buffers for jobs in data lanes that are half-finished, waiting for additional jobs to come to fill empty lanes before we start the encryption again. Therefore, we need to enhance crypto walk with the option to map data buffers non-atomically. This is done by algorithms run from crypto daemon who knows it is safe to do so as it can save and restore FPU state in correct context. For an introduction to the multi-buffer implementation, please see http://www.intel.com/content/www/us/en/communications/communications-ia-multi-buffer-paper.html Originally-by: Chandramouli Narayanan Signed-off-by: Tim Chen --- crypto/Kconfig | 16 +++ crypto/blkcipher.c | 29 - crypto/mcryptd.c | 256 ++- crypto/scatterwalk.c | 7 ++ include/crypto/algapi.h | 1 + include/crypto/mcryptd.h | 36 ++ include/crypto/scatterwalk.h | 6 + include/linux/crypto.h | 1 + 8 files changed, 347 insertions(+), 5 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index 7240821..6b51084 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -888,6 +888,22 @@ config CRYPTO_AES_NI_INTEL ECB, CBC, LRW, PCBC, XTS. The 64 bit version has additional acceleration for CTR. +config CRYPTO_AES_CBC_MB + tristate "AES CBC algorithm (x86_64 Multi-Buffer, Experimental)" + depends on X86 && 64BIT + select CRYPTO_ABLK_HELPER + select CRYPTO_MCRYPTD + help + AES CBC encryption implemented using multi-buffer technique. + This algorithm computes on multiple data lanes concurrently with + SIMD instructions for better throughput. It should only be + used when there is significant work to generate many separate + crypto requests that keep all the data lanes filled to get + the performance benefit. If the data lanes are unfilled, a + flush operation will be initiated after some delay to process + the exisiting crypto jobs, adding some extra latency at low + load case. + config CRYPTO_AES_SPARC64 tristate "AES cipher algorithms (SPARC64)" depends on SPARC64 diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c index 11b9814..9fd4028 100644 --- a/crypto/blkcipher.c +++ b/crypto/blkcipher.c @@ -35,6 +35,9 @@ enum { BLKCIPHER_WALK_SLOW = 1 << 1, BLKCIPHER_WALK_COPY = 1 << 2, BLKCIPHER_WALK_DIFF = 1 << 3, + /* deal with scenarios where we can sleep during sg walk */ + /* when we process part of a request */ + BLKCIPHER_WALK_MAY_SLEEP = 1 << 4, }; static int blkcipher_walk_next(struct blkcipher_desc *desc, @@ -44,22 +47,38 @@ static int blkcipher_walk_first(struct blkcipher_desc *desc, static inline void blkcipher_map_src(struct blkcipher_walk *walk) { - walk->src.virt.addr = scatterwalk_map(&walk->in); + /* add support for asynchronous requests which need no atomic map */ + if (walk->flags & BLKCIPHER_WALK_MAY_SLEEP) + walk->src.virt.addr = scatterwalk_map_nonatomic(&walk->in); + else + walk->src.virt.addr = scatterwalk_map(&walk->in); } static inline void blkcipher_map_dst(struct blkcipher_walk *walk) { - walk->dst.virt.addr = scatterwalk_map(&walk->out); + /* add support for asynchronous requests which need no atomic map */ + if (walk->flags & BLKCIPHER_WALK_MAY_SLEEP) + walk->dst.virt.addr = scatterwalk_map_nonatomic(&walk->out); + else + walk->dst.virt.addr = scatterwalk_map(&walk->out); } static inline void blkcipher_unmap_src(struct blkcipher_walk *walk) { - scatterwalk_unmap(walk->src.virt.addr); + /* add support for asynchronous requests which need no atomic map */ + if (walk->flags & BLKCIPHER_WALK_MAY_SLEEP) + scatterwalk_unmap_nonatomic(walk->src.virt.addr); + else + scatterwalk_unmap(walk->src.virt.addr); } static inline void blkcipher_unmap_dst(struct blkcipher_walk *walk) { - scatterwalk_unmap(walk->dst.virt.addr); + /* add support for asynchronous requests which need no atomic map */ + if (walk->flags & BLKCIPHER_WALK_MAY_SLEEP) + scatterwalk_unmap_nonatomic(walk->dst.virt.addr); + else + scatterwalk_unmap(walk->dst.virt.addr); } /* Get a spot of the specified length that does not straddle a page. @@ -299,6 +318,8 @@ static inline int blkcipher_copy_iv(struct blkcipher_walk *walk) int blkcipher_walk_virt(struct blkcipher_desc *desc, struct blkcipher_walk *walk) {
[PATCH v2 5/5] crypto: AES CBC multi-buffer glue code
This patch introduces the multi-buffer job manager which is responsible for submitting scatter-gather buffers from several AES CBC jobs to the multi-buffer algorithm. The glue code interfaces with the underlying algorithm that handles 8 data streams of AES CBC encryption in parallel. AES key expansion and CBC decryption requests are performed in a manner similar to the existing AESNI Intel glue driver. The outline of the algorithm for AES CBC encryption requests is sketched below: Any driver requesting the crypto service will place an async crypto request on the workqueue. The multi-buffer crypto daemon will pull an AES CBC encryption request from work queue and put each request in an empty data lane for multi-buffer crypto computation. When all the empty lanes are filled, computation will commence on the jobs in parallel and the job with the shortest remaining buffer will get completed and be returned. To prevent prolonged stall, when no new jobs arrive, we will flush workqueue of jobs after a maximum allowable delay has elapsed. To accommodate the fragmented nature of scatter-gather, we will keep submitting the next scatter-buffer fragment for a job for multi-buffer computation until a job is completed and no more buffer fragments remain. At that time we will pull a new job to fill the now empty data slot. We check with the multibuffer scheduler to see if there are other completed jobs to prevent extraneous delay in returning any completed jobs. This multi-buffer algorithm should be used for cases where we get at least 8 streams of crypto jobs submitted at a reasonably high rate. For low crypto job submission rate and low number of data streams, this algorithm will not be beneficial. The reason is at low rate, we do not fill out the data lanes before flushing the jobs instead of processing them with all the data lanes full. We will miss the benefit of parallel computation, and adding delay to the processing of the crypto job at the same time. Some tuning of the maximum latency parameter may be needed to get the best performance. Originally-by: Chandramouli Narayanan Signed-off-by: Tim Chen --- arch/x86/crypto/Makefile| 1 + arch/x86/crypto/aes-cbc-mb/Makefile | 22 + arch/x86/crypto/aes-cbc-mb/aes_cbc_mb.c | 812 3 files changed, 835 insertions(+) create mode 100644 arch/x86/crypto/aes-cbc-mb/Makefile create mode 100644 arch/x86/crypto/aes-cbc-mb/aes_cbc_mb.c diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index b9b912a..000db49 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -33,6 +33,7 @@ obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o +obj-$(CONFIG_CRYPTO_AES_CBC_MB) += aes-cbc-mb/ obj-$(CONFIG_CRYPTO_POLY1305_X86_64) += poly1305-x86_64.o # These modules require assembler to support AVX. diff --git a/arch/x86/crypto/aes-cbc-mb/Makefile b/arch/x86/crypto/aes-cbc-mb/Makefile new file mode 100644 index 000..b642bd8 --- /dev/null +++ b/arch/x86/crypto/aes-cbc-mb/Makefile @@ -0,0 +1,22 @@ +# +# Arch-specific CryptoAPI modules. +# + +avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no) + +# we need decryption and key expansion routine symbols +# if either AESNI_NI_INTEL or AES_CBC_MB is a module + +ifeq ($(CONFIG_CRYPTO_AES_NI_INTEL),m) + dec_support := ../aesni-intel_asm.o +endif +ifeq ($(CONFIG_CRYPTO_AES_CBC_MB),m) + dec_support := ../aesni-intel_asm.o +endif + +ifeq ($(avx_supported),yes) + obj-$(CONFIG_CRYPTO_AES_CBC_MB) += aes-cbc-mb.o + aes-cbc-mb-y := $(dec_support) aes_cbc_mb.o aes_mb_mgr_init.o \ + mb_mgr_inorder_x8_asm.o mb_mgr_ooo_x8_asm.o \ + aes_cbc_enc_x8.o +endif diff --git a/arch/x86/crypto/aes-cbc-mb/aes_cbc_mb.c b/arch/x86/crypto/aes-cbc-mb/aes_cbc_mb.c new file mode 100644 index 000..6a03712 --- /dev/null +++ b/arch/x86/crypto/aes-cbc-mb/aes_cbc_mb.c @@ -0,0 +1,812 @@ +/* + * Multi buffer AES CBC algorithm glue code + * + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * James Guilford + * Sean Gulley + * Tim Chen + */ + +#define pr_fmt(fmt)KBUILD_MODNAME "
[PATCH v2 3/5] crypto: AES CBC multi-buffer scheduler
This patch implements in-order scheduler for encrypting multiple buffers in parallel supporting AES CBC encryption with key sizes of 128, 192 and 256 bits. It uses 8 data lanes by taking advantage of the SIMD instructions with XMM registers. The multibuffer manager and scheduler is mostly written in assembly and the initialization support is written C. The AES CBC multibuffer crypto driver support interfaces with the multibuffer manager and scheduler to support AES CBC encryption in parallel. The scheduler supports job submissions, job flushing and and job retrievals after completion. The basic flow of usage of the CBC multibuffer scheduler is as follows: - The caller allocates an aes_cbc_mb_mgr_inorder_x8 object and initializes it once by calling aes_cbc_init_mb_mgr_inorder_x8(). - The aes_cbc_mb_mgr_inorder_x8 structure has an array of JOB_AES objects. Allocation and scheduling of JOB_AES objects are managed by the multibuffer scheduler support routines. The caller allocates a JOB_AES using aes_cbc_get_next_job_inorder_x8(). - The returned JOB_AES must be filled in with parameters for CBC encryption (eg: plaintext buffer, ciphertext buffer, key, iv, etc) and submitted to the manager object using aes_cbc_submit_job_inorder_xx(). - If the oldest JOB_AES is completed during a call to aes_cbc_submit_job_inorder_x8(), it is returned. Otherwise, NULL is returned. - A call to aes_cbc_flush_job_inorder_x8() always returns the oldest job, unless the multibuffer manager is empty of jobs. - A call to aes_cbc_get_completed_job_inorder_x8() returns a completed job. This routine is useful to process completed jobs instead of waiting for the flusher to engage. - When a job is returned from submit or flush, the caller extracts the useful data and returns it to the multibuffer manager implicitly by the next call to aes_cbc_get_next_job_xx(). Jobs are always returned from submit or flush routines in the order they were submitted (hence "inorder").A job allocated using aes_cbc_get_next_job_inorder_x8() must be filled in and submitted before another call. A job returned by aes_cbc_submit_job_inorder_x8() or aes_cbc_flush_job_inorder_x8() is 'deallocated' upon the next call to get a job structure. Calls to get_next_job() cannot fail. If all jobs are allocated after a call to get_next_job(), the subsequent call to submit always returns the oldest job in a completed state. Originally-by: Chandramouli Narayanan Signed-off-by: Tim Chen --- arch/x86/crypto/aes-cbc-mb/aes_mb_mgr_init.c | 145 +++ arch/x86/crypto/aes-cbc-mb/mb_mgr_inorder_x8_asm.S | 222 +++ arch/x86/crypto/aes-cbc-mb/mb_mgr_ooo_x8_asm.S | 416 + 3 files changed, 783 insertions(+) create mode 100644 arch/x86/crypto/aes-cbc-mb/aes_mb_mgr_init.c create mode 100644 arch/x86/crypto/aes-cbc-mb/mb_mgr_inorder_x8_asm.S create mode 100644 arch/x86/crypto/aes-cbc-mb/mb_mgr_ooo_x8_asm.S diff --git a/arch/x86/crypto/aes-cbc-mb/aes_mb_mgr_init.c b/arch/x86/crypto/aes-cbc-mb/aes_mb_mgr_init.c new file mode 100644 index 000..7a7f8a1 --- /dev/null +++ b/arch/x86/crypto/aes-cbc-mb/aes_mb_mgr_init.c @@ -0,0 +1,145 @@ +/* + * Initialization code for multi buffer AES CBC algorithm + * + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * James Guilford + * Sean Gulley + * Tim Chen + * + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE
[PATCH v2 0/5] crypto: x86 AES-CBC encryption with multibuffer
In this patch series, we introduce AES CBC encryption that is parallelized on x86_64 cpu with XMM registers. The multi-buffer technique encrypt 8 data streams in parallel with SIMD instructions. Decryption is handled as in the existing AESNI Intel CBC implementation which can already parallelize decryption even for a single data stream. Please see the multi-buffer whitepaper for details of the technique: http://www.intel.com/content/www/us/en/communications/communications-ia-multi-buffer-paper.html It is important that any driver uses this algorithm properly for scenarios where we have many data streams that can fill up the data lanes most of the time. It shouldn't be used when only a single data stream is expected mostly. Otherwise we may incurr extra delays when we have frequent gaps in data lanes, causing us to wait till data come in to fill the data lanes before initiating encryption. We may have to wait for flush operations to commence when no new data come in after some wait time. However we keep this extra delay to a minimum by opportunistically flushing the unfinished jobs if crypto daemon is the only active task running on a cpu. By using this technique, we saw a throughput increase of up to 5.8x under optimal conditions when we have fully loaded encryption jobs filling up all the data lanes. Change Log: v2 1. Update cpu feature check to make sure SSE is supported 2. Fix up unloading of aes-cbc-mb module to properly free memory Tim Chen (5): crypto: Multi-buffer encryptioin infrastructure support crypto: AES CBC multi-buffer data structures crypto: AES CBC multi-buffer scheduler crypto: AES CBC by8 encryption crypto: AES CBC multi-buffer glue code arch/x86/crypto/Makefile | 1 + arch/x86/crypto/aes-cbc-mb/Makefile| 22 + arch/x86/crypto/aes-cbc-mb/aes_cbc_enc_x8.S| 774 arch/x86/crypto/aes-cbc-mb/aes_cbc_mb.c| 812 + arch/x86/crypto/aes-cbc-mb/aes_cbc_mb_ctx.h| 96 +++ arch/x86/crypto/aes-cbc-mb/aes_cbc_mb_mgr.h| 131 arch/x86/crypto/aes-cbc-mb/aes_mb_mgr_init.c | 145 arch/x86/crypto/aes-cbc-mb/mb_mgr_datastruct.S | 270 +++ arch/x86/crypto/aes-cbc-mb/mb_mgr_inorder_x8_asm.S | 222 ++ arch/x86/crypto/aes-cbc-mb/mb_mgr_ooo_x8_asm.S | 416 +++ arch/x86/crypto/aes-cbc-mb/reg_sizes.S | 125 crypto/Kconfig | 16 + crypto/blkcipher.c | 29 +- crypto/mcryptd.c | 256 ++- crypto/scatterwalk.c | 7 + include/crypto/algapi.h| 1 + include/crypto/mcryptd.h | 36 + include/crypto/scatterwalk.h | 6 + include/linux/crypto.h | 1 + 19 files changed, 3361 insertions(+), 5 deletions(-) create mode 100644 arch/x86/crypto/aes-cbc-mb/Makefile create mode 100644 arch/x86/crypto/aes-cbc-mb/aes_cbc_enc_x8.S create mode 100644 arch/x86/crypto/aes-cbc-mb/aes_cbc_mb.c create mode 100644 arch/x86/crypto/aes-cbc-mb/aes_cbc_mb_ctx.h create mode 100644 arch/x86/crypto/aes-cbc-mb/aes_cbc_mb_mgr.h create mode 100644 arch/x86/crypto/aes-cbc-mb/aes_mb_mgr_init.c create mode 100644 arch/x86/crypto/aes-cbc-mb/mb_mgr_datastruct.S create mode 100644 arch/x86/crypto/aes-cbc-mb/mb_mgr_inorder_x8_asm.S create mode 100644 arch/x86/crypto/aes-cbc-mb/mb_mgr_ooo_x8_asm.S create mode 100644 arch/x86/crypto/aes-cbc-mb/reg_sizes.S -- 1.7.11.7 -- To unsubscribe from this list: send the line "unsubscribe linux-crypto" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 5/5] crypto: AES CBC multi-buffer glue code
On Thu, 2015-10-29 at 09:19 -0700, Tim Chen wrote: > On Thu, 2015-10-29 at 03:03 +0100, Stephan Mueller wrote: > > Am Mittwoch, 28. Oktober 2015, 14:19:29 schrieb Tim Chen: > > > > Hi Tim, > > > > >+ > > >+ /* check for dependent cpu features */ > > >+ if (!cpu_has_aes) { > > >+ pr_err("aes_cbc_mb_mod_init: no aes support\n"); > > >+ err = -ENODEV; > > >+ goto err1; > > >+ } > > > > In your post 0/5, you say that this mechanism needs AVX2. In the existing > > AESNI glue code I find > > > > #ifdef CONFIG_X86_64 > > #ifdef CONFIG_AS_AVX2 > > if (boot_cpu_has(X86_FEATURE_AVX2)) { > > > > ... > > > > Why would that CPU check not be needed here? > > Good catch. Will add check for avx2. Actually since we are using only XMM registers and SSE instructions, checking for SSE support will be sufficient. Tim -- To unsubscribe from this list: send the line "unsubscribe linux-crypto" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 5/5] crypto: AES CBC multi-buffer glue code
On Thu, 2015-10-29 at 03:03 +0100, Stephan Mueller wrote: > Am Mittwoch, 28. Oktober 2015, 14:19:29 schrieb Tim Chen: > > Hi Tim, > > >+ > >+/* check for dependent cpu features */ > >+if (!cpu_has_aes) { > >+pr_err("aes_cbc_mb_mod_init: no aes support\n"); > >+err = -ENODEV; > >+goto err1; > >+} > > In your post 0/5, you say that this mechanism needs AVX2. In the existing > AESNI glue code I find > > #ifdef CONFIG_X86_64 > #ifdef CONFIG_AS_AVX2 > if (boot_cpu_has(X86_FEATURE_AVX2)) { > > ... > > Why would that CPU check not be needed here? Good catch. Will add check for avx2. Tim > > Ciao > Stephan -- To unsubscribe from this list: send the line "unsubscribe linux-crypto" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] crypto: x86: Remove duplicate define of SHA1_DIGEST_SIZE
On Thu, 2015-10-29 at 08:51 +0100, LABBE Corentin wrote: > On Fri, Oct 16, 2015 at 09:04:58AM -0700, Tim Chen wrote: > > On Wed, 2015-10-14 at 21:15 +0200, LABBE Corentin wrote: > > > The sha x86 crypto code use two define for the same thing: > > > NUM_SHA1_DIGEST_WORDS and SHA1_DIGEST_LENGTH > > > Replace them by SHA1_DIGEST_SIZE/4 > > > > Thanks. Acked-by: Tim Chen > > > > > > Signed-off-by: LABBE Corentin > > > > Hello > > Thanks for your ack, but Thomas Gleixner in the same time NACK this patch. > Just in case I attach the new patch, for permit you to decide which one you > prefer. > > Regards Looks fine. But wonder if you should have #define SHA1_DIGEST_WORDS (SHA1_DIGEST_SIZE / sizeof(u32)) moved to sha1.h Tim > > diff --git a/arch/x86/crypto/sha-mb/sha_mb_ctx.h > b/arch/x86/crypto/sha-mb/sha_mb_ctx.h > index e36069d..9fd36eb5 100644 > --- a/arch/x86/crypto/sha-mb/sha_mb_ctx.h > +++ b/arch/x86/crypto/sha-mb/sha_mb_ctx.h > @@ -94,7 +94,6 @@ enum hash_ctx_error { > > > /* Hash Constants and Typedefs */ > -#define SHA1_DIGEST_LENGTH 5 > #define SHA1_LOG2_BLOCK_SIZE6 > > #define SHA1_PADLENGTHFIELD_SIZE8 > diff --git a/arch/x86/crypto/sha-mb/sha_mb_mgr.h > b/arch/x86/crypto/sha-mb/sha_mb_mgr.h > index 08ad1a9..b295e15 100644 > --- a/arch/x86/crypto/sha-mb/sha_mb_mgr.h > +++ b/arch/x86/crypto/sha-mb/sha_mb_mgr.h > @@ -54,10 +54,10 @@ > #ifndef __SHA_MB_MGR_H > #define __SHA_MB_MGR_H > > - > +#include > #include > > -#define NUM_SHA1_DIGEST_WORDS 5 > +#define SHA1_DIGEST_WORDS (SHA1_DIGEST_SIZE / sizeof(u32)) Suggest to move SHA1_DIGEST_WORDS to sha1.h Tim -- To unsubscribe from this list: send the line "unsubscribe linux-crypto" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v1 0/4] TPM2: select hash algorithm for a trusted key
Jarkko Sakkinen (4): crypto: add entry for sm3-256 tpm: choose hash algorithm for sealing when using TPM 2.0 keys, trusted: select the hash algorithm keys, trusted: update documentation for 'hash=' option Documentation/security/keys-trusted-encrypted.txt | 3 ++ crypto/hash_info.c| 2 ++ drivers/char/tpm/tpm.h| 10 -- drivers/char/tpm/tpm2-cmd.c | 42 +-- include/crypto/hash_info.h| 3 ++ include/keys/trusted-type.h | 1 + include/uapi/linux/hash_info.h| 1 + security/keys/trusted.c | 20 ++- 8 files changed, 75 insertions(+), 7 deletions(-) -- 2.5.0 -- To unsubscribe from this list: send the line "unsubscribe linux-crypto" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v1 1/4] crypto: add entry for sm3-256
Added entry for sm3-256 to the following tables: * hash_algo_name * hash_digest_size Needed for TPM 2.0 trusted key sealing. Signed-off-by: Jarkko Sakkinen --- crypto/hash_info.c | 2 ++ include/crypto/hash_info.h | 3 +++ include/uapi/linux/hash_info.h | 1 + 3 files changed, 6 insertions(+) diff --git a/crypto/hash_info.c b/crypto/hash_info.c index 3e7ff46..6f3a113 100644 --- a/crypto/hash_info.c +++ b/crypto/hash_info.c @@ -31,6 +31,7 @@ const char *const hash_algo_name[HASH_ALGO__LAST] = { [HASH_ALGO_TGR_128] = "tgr128", [HASH_ALGO_TGR_160] = "tgr160", [HASH_ALGO_TGR_192] = "tgr192", + [HASH_ALGO_SM3_256] = "sm3-256", }; EXPORT_SYMBOL_GPL(hash_algo_name); @@ -52,5 +53,6 @@ const int hash_digest_size[HASH_ALGO__LAST] = { [HASH_ALGO_TGR_128] = TGR128_DIGEST_SIZE, [HASH_ALGO_TGR_160] = TGR160_DIGEST_SIZE, [HASH_ALGO_TGR_192] = TGR192_DIGEST_SIZE, + [HASH_ALGO_SM3_256] = SM3_256_DIGEST_SIZE, }; EXPORT_SYMBOL_GPL(hash_digest_size); diff --git a/include/crypto/hash_info.h b/include/crypto/hash_info.h index e1e5a3e..d86e050 100644 --- a/include/crypto/hash_info.h +++ b/include/crypto/hash_info.h @@ -34,6 +34,9 @@ #define TGR160_DIGEST_SIZE 20 #define TGR192_DIGEST_SIZE 24 +/* not defined in include/crypto/ */ +#define SM3_256_DIGEST_SIZE 32 + extern const char *const hash_algo_name[HASH_ALGO__LAST]; extern const int hash_digest_size[HASH_ALGO__LAST]; diff --git a/include/uapi/linux/hash_info.h b/include/uapi/linux/hash_info.h index ca18c45..ebf8fd8 100644 --- a/include/uapi/linux/hash_info.h +++ b/include/uapi/linux/hash_info.h @@ -31,6 +31,7 @@ enum hash_algo { HASH_ALGO_TGR_128, HASH_ALGO_TGR_160, HASH_ALGO_TGR_192, + HASH_ALGO_SM3_256, HASH_ALGO__LAST }; -- 2.5.0 -- To unsubscribe from this list: send the line "unsubscribe linux-crypto" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] crypto: x86: Remove duplicate define of SHA1_DIGEST_SIZE
On Fri, Oct 16, 2015 at 09:04:58AM -0700, Tim Chen wrote: > On Wed, 2015-10-14 at 21:15 +0200, LABBE Corentin wrote: > > The sha x86 crypto code use two define for the same thing: > > NUM_SHA1_DIGEST_WORDS and SHA1_DIGEST_LENGTH > > Replace them by SHA1_DIGEST_SIZE/4 > > Thanks. Acked-by: Tim Chen > > > > Signed-off-by: LABBE Corentin > Hello Thanks for your ack, but Thomas Gleixner in the same time NACK this patch. Just in case I attach the new patch, for permit you to decide which one you prefer. Regards --8<-- >From 7439bc57d95de49a510c0eb5d328dec10a3c6689 Mon Sep 17 00:00:00 2001 From: LABBE Corentin Date: Wed, 14 Oct 2015 12:48:04 +0200 Subject: [PATCH] crypto: x86: Remove duplicate define of SHA1_DIGEST_SIZE The sha x86 crypto code use two define for the same thing: NUM_SHA1_DIGEST_WORDS and SHA1_DIGEST_LENGTH Replace them by SHA1_DIGEST_SIZE/4 Signed-off-by: LABBE Corentin --- arch/x86/crypto/sha-mb/sha1_mb.c| 2 +- arch/x86/crypto/sha-mb/sha_mb_ctx.h | 1 - arch/x86/crypto/sha-mb/sha_mb_mgr.h | 6 +++--- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c index a841e97..6544ea7 100644 --- a/arch/x86/crypto/sha-mb/sha1_mb.c +++ b/arch/x86/crypto/sha-mb/sha1_mb.c @@ -104,7 +104,7 @@ static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job)(struct sha1_mb_m inline void sha1_init_digest(uint32_t *digest) { - static const uint32_t initial_digest[SHA1_DIGEST_LENGTH] = {SHA1_H0, + static const uint32_t initial_digest[SHA1_DIGEST_WORDS] = {SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }; memcpy(digest, initial_digest, sizeof(initial_digest)); } diff --git a/arch/x86/crypto/sha-mb/sha_mb_ctx.h b/arch/x86/crypto/sha-mb/sha_mb_ctx.h index e36069d..9fd36eb5 100644 --- a/arch/x86/crypto/sha-mb/sha_mb_ctx.h +++ b/arch/x86/crypto/sha-mb/sha_mb_ctx.h @@ -94,7 +94,6 @@ enum hash_ctx_error { /* Hash Constants and Typedefs */ -#define SHA1_DIGEST_LENGTH 5 #define SHA1_LOG2_BLOCK_SIZE6 #define SHA1_PADLENGTHFIELD_SIZE8 diff --git a/arch/x86/crypto/sha-mb/sha_mb_mgr.h b/arch/x86/crypto/sha-mb/sha_mb_mgr.h index 08ad1a9..b295e15 100644 --- a/arch/x86/crypto/sha-mb/sha_mb_mgr.h +++ b/arch/x86/crypto/sha-mb/sha_mb_mgr.h @@ -54,10 +54,10 @@ #ifndef __SHA_MB_MGR_H #define __SHA_MB_MGR_H - +#include #include -#define NUM_SHA1_DIGEST_WORDS 5 +#define SHA1_DIGEST_WORDS (SHA1_DIGEST_SIZE / sizeof(u32)) enum job_sts { STS_UNKNOWN = 0, STS_BEING_PROCESSED = 1, @@ -69,7 +69,7 @@ enum job_sts {STS_UNKNOWN = 0, struct job_sha1 { u8 *buffer; u32 len; - u32 result_digest[NUM_SHA1_DIGEST_WORDS] __aligned(32); + u32 result_digest[SHA1_DIGEST_WORDS] __aligned(32); enumjob_sts status; void*user_data; }; -- 2.4.10 -- To unsubscribe from this list: send the line "unsubscribe linux-crypto" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html