[PATCH V7 6/7] crypto: AES vectors for AES CBC multibuffer testing

2017-07-25 Thread Megha Dey
For more robust testing of AES CBC multibuffer support, additional
test vectors have been added to the AES CBC encrypt/decrypt
test case.

Originally-by: Chandramouli Narayanan 
Signed-off-by: Megha Dey 
Acked-by: Tim Chen 
---
 crypto/testmgr.h | 1456 ++
 1 file changed, 1456 insertions(+)

diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 4293573..c1e3a30 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -34488,4 +34488,1460 @@ struct comp_testvec {
},
 };
 
+#ifdef CONFIG_CRYPTO_AES_CBC_MB
+static struct cipher_testvec aes_cbc_enc_tv_template_rnddata_klen16[] = {
+{
+   .key =
+"\xd7\x0c\x4c\x6d\x11\x02\xb0\x31\x63\x9b\x82\x76\x9e\x03\x26\xdf",
+   .klen = 16,
+   .iv =
+"\xc1\x62\x66\x62\xb8\x65\x28\xfa\x5f\x36\xd3\x09\xb1\x2c\xa1\xa3",
+   .input =
+"\x4f\x6c\x63\xa5\xd0\x19\x08\x4e\xd4\x58\x33\xf6\x2b\xeb\x26\xb9",
+   .ilen = 16,
+   .result =
+"\xa0\x35\xb0\x33\xc0\x2e\xe5\xbb\xbc\xe6\x01\x9e\xf4\x67\x11\x14",
+   .rlen = 16,
+},
+{
+   .key =
+"\xd7\x0c\x4c\x6d\x11\x02\xb0\x31\x63\x9b\x82\x76\x9e\x03\x26\xdf",
+   .klen = 16,
+   .iv =
+"\x78\x6c\x27\xd6\xb2\xdc\xbe\x7b\xab\xc2\x43\xd7\x81\x0c\xe5\x20",
+   .input =
+"\x9a\x00\x4e\x5a\xb3\x51\x68\xaa\xdb\x6e\xe5\xa4\x7f\x23\x6e\x4d"
+"\x1e\x72\x5e\xad\x64\xc9\x96\x23\xf8\xae\xef\xf6\x7b\x7d\xd6\xf0",
+   .ilen = 32,
+   .result =
+"\x5a\xc0\x04\xc6\x53\xef\x3b\x69\xb1\x41\xc7\x85\xeb\x69\x82\xd0"
+"\xed\x09\xbb\xec\xb2\x8d\x5c\xc9\x61\x81\x5c\xf6\x99\x49\xa0\x4d",
+   .rlen = 32,
+},
+{
+   .key =
+"\xd7\x0c\x4c\x6d\x11\x02\xb0\x31\x63\x9b\x82\x76\x9e\x03\x26\xdf",
+   .klen = 16,
+   .iv =
+"\xc9\x05\x4c\x35\x96\x77\xd3\x3c\x3d\x97\x7c\x82\xf5\x58\x71\xf1",
+   .input =
+"\xa9\x5b\x03\xec\xec\x73\xed\xcb\x5c\x4c\xd2\x40\xb6\x9b\x49\x31"
+"\x5d\xf2\x23\xb3\x11\x98\xeb\x89\xab\x3e\x3a\xdd\xaa\xfd\xd1\xde"
+"\xab\x73\x59\x86\x1a\x59\x32\xb2\x55\x46\x4a\x80\xa4\xcc\xa8\xd9",
+   .ilen = 48,
+   .result =
+"\xdb\x05\x69\xe1\x33\x8b\x0b\x3d\x33\x12\x0d\xef\x94\x0f\xa3\xb3"
+"\xd7\x0a\x53\x7b\x98\x53\xc6\xc2\xa3\xd4\x7a\x30\x1a\xed\x45\xcc"
+"\x47\x38\xc1\x75\x0b\x3c\xd4\x8d\xa8\xf9\xd3\x71\xb8\x22\xa6\xae",
+   .rlen = 48,
+},
+{
+   .key =
+"\xd7\x0c\x4c\x6d\x11\x02\xb0\x31\x63\x9b\x82\x76\x9e\x03\x26\xdf",
+   .klen = 16,
+   .iv =
+"\x6c\xb4\x84\x61\x1e\x39\x4b\x22\x37\xaa\x7b\x78\xc0\x71\x20\x60",
+   .input =
+"\x05\x43\x76\x1e\xc6\x68\x43\x52\x5f\x43\x39\xbf\x93\x38\x38\x83"
+"\x38\x1d\x3c\xb5\xc8\xab\xe4\xd0\x7f\x1a\xac\xca\xcc\x16\xea\x75"
+"\x30\x75\x40\xe8\x61\x07\xc6\x04\x55\x2b\xf3\x29\xc3\x37\x83\x42"
+"\xe0\x21\xfb\xb4\x5d\x93\xbb\x87\x01\x3e\xa6\x9d\x3b\x0a\x5a\x37",
+   .ilen = 64,
+   .result =
+"\x83\x9f\xa0\xac\x14\x14\x88\x68\x7f\x9a\x5f\x98\x91\x71\xa8\xce"
+"\x28\xfb\x5e\xb1\x49\xe7\x63\x39\x12\x62\x00\x3e\x5c\x63\x2b\x12"
+"\x3d\xff\xd5\x0a\x43\x28\x52\x68\x78\x62\xc7\xa4\xbb\xca\x5d\x5e"
+"\xe3\xd5\x23\xb3\xe7\x22\xae\xf3\xd0\xd9\x00\x14\x0c\x46\x67\x17",
+   .rlen = 64,
+},
+{
+   .key =
+"\xd7\x0c\x4c\x6d\x11\x02\xb0\x31\x63\x9b\x82\x76\x9e\x03\x26\xdf",
+   .klen = 16,
+   .iv =
+"\xf9\xe8\xab\xe2\xf9\x28\xeb\x05\x10\xc4\x97\x37\x76\xe4\xe0\xd9",
+   .input =
+"\xab\x99\xe8\x2a\x18\x50\xdc\x80\x1f\x38\xb9\x01\x34\xd4\x59\x60"
+"\x4e\x1c\x21\x71\x22\x06\xbe\x5f\x71\x07\x3b\x13\xe7\x05\xca\xa5"
+"\x7b\x23\xb5\xaa\xc6\xdb\xe3\x17\xa9\x9f\xe1\xbc\xd5\x1b\xe6\xf5"
+"\xfa\x43\xdd\x80\x50\xc8\x8a\x32\x2f\x65\x25\xa4\xeb\xd1\x74\x02"
+"\x07\xc1\x04\x94\x6b\x34\xa1\x74\x62\xb2\x8d\x60\xf5\x7e\xda\x1a"
+"\x0f\xf5\x21\xe1\xd7\x88\xc8\x26\xd7\x49\xb2\x4a\x84\x2c\x00\x3b"
+"\x96\xde\x4e\xa7\x57\x27\xa0\xa4\x3a\xff\x69\x19\xf7\xec\xeb\x62"
+"\xff\x5a\x82\x0d\x25\x5e\x3c\x63\xb3\x6d\xc4\xb9\xe3\xc9\x3a\xc2",
+   .ilen = 128,
+   .result =
+"\xec\xd5\x2f\x6a\xfd\x61\xf2\x37\x19\x6f\x55\x31\xd7\x2c\x14\x4d"
+"\xc1\xb4\xbb\x7d\xa9\x1a\xe6\x85\x8c\x2f\xbf\x7e\x66\x21\xf8\x17"
+"\x9e\x09\x1b\x2a\x11\xbf\xdf\x7d\xdf\xf5\xfb\x0a\x16\x79\xe2\x43"
+"\x5c\x3b\x3e\x84\x35\xfd\x92\x9e\xe0\x31\x50\x1d\x62\xd6\x22\x99"
+"\x5f\x25\xb3\xe8\xdf\xb0\xc0\xab\xd9\xdb\xac\x4b\x9c\xe2\x89\xc6"
+"\x49\x7f\x5f\xee\xcb\xf6\x25\x10\x9f\x32\x58\x85\x45\x50\x74\x8a"
+"\x55\xce\x86\x44\xda\xe4\x93\x58\x4d\xd3\x73\x76\x40\xf6\x92\x8b"
+"\x99\xc1\x2b\xf9\x18\xd0\xfa\xd0\xa6\x84\x03\xf5\xd4\xcb\xfa\xe7",
+   .rlen = 128,
+},
+{
+   .key =
+"\xd7\x0c\x4c\x6d\x11\x02\xb0\x31\x63\x9b\x82\x76\x9e\x03\x26\xdf",
+   .klen = 16,
+   .iv =
+"\x58\x1e\x1a\x65\x16\x25\xaa\x55\x97\xcd\xeb\x4c\xd6\xb3\x9c\x2b",
+   .input =
+"\xef\x85\x0b\xe5\x02\xd5\xce\xcc\xad\x2d\x5e\xec\x1e\x01\x8c\x28"
+"\xf0\x2c\x23\x10\xaa\x84\xf0\x61\xe2\x56\x29\x21\x9f\x09\xaf\x9d"
+"\x7d\xfc\x60\x16\x4c\x67\xdd\xdf\x74\x35\x49\x81\xca\x68\xb6\xc7"

Re: FW: [PATCH V6 5/7] crypto: AES CBC multi-buffer glue code

2017-07-25 Thread Megha Dey
On Tue, 2017-07-25 at 10:17 +0800, Herbert Xu wrote:
> On Mon, Jul 24, 2017 at 06:09:56PM -0700, Megha Dey wrote:
> >
> > Under the skcipher interface, if both the outer and inner alg are async,
> > there should not be any problem right? Currently I do not see any
> > existing algorithms have both algorithms async.
> 
> That's because the purpose of cryptd is to turn a sync algorithm
> into an async one.
> 
> Your mcryptd is completely different.  We already went through
> this discussion for sha1-mb.  This is no different.  You should
> choose the type that fits your circumstances.

I have updated the inner algorithm to also be async in v7 patch series.
> 
> Cheers,




[PATCH V7 7/7] crypto: AES CBC multi-buffer tcrypt

2017-07-25 Thread Megha Dey
The tcrypt test framework for CBC multi-buffer testing is
laid out in this patch. Tcrypt has been extended to validate
the functionality and performance of AES CBC multi-buffer support.

A new test(mode=600) has been added to test the speed of the multibuffer
case, as multi-buffer encrypt will wait for additional encrypt requests
that never arrive to kick off computation. So we always incur the extra
delay before flush timer expires to trigger the computation in the
(mode=500) test. We create the new tests that will send out these
additional requests that can be aggregated and computed in parallel for
true throughput speed test of the multi-buffer encrypt test case.case.

The enhanced CBC tests create multiple transforms and exercise
the multi-buffer implementation. Crafted requests are sent at once
to the multiple transforms created and the returned responses
are compared with expected results. The test vectors are so chosen
as to exercise the scatter-gather list to the maximum allowable limit
within the framework.

Originally-by: Chandramouli Narayanan 
Signed-off-by: Megha Dey 
Acked-by: Tim Chen 
---
 crypto/tcrypt.c  | 259 +++-
 crypto/testmgr.c | 707 +++
 crypto/testmgr.h |  64 -
 3 files changed, 1016 insertions(+), 14 deletions(-)

diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 50fde18..dcbe04c 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -38,6 +38,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "tcrypt.h"
 
 /*
@@ -84,7 +85,7 @@ struct tcrypt_result {
int err;
 };
 
-static void tcrypt_complete(struct crypto_async_request *req, int err)
+void tcrypt_complete(struct crypto_async_request *req, int err)
 {
struct tcrypt_result *res = req->data;
 
@@ -183,6 +184,11 @@ static int test_aead_cycles(struct aead_request *req, int 
enc, int blen)
 
 #define XBUFSIZE 8
 #define MAX_IVLEN 32
+#define MB_WIDTH 8
+struct scatterlist mb_sg[MB_WIDTH][XBUFSIZE];
+struct skcipher_request *mb_req[MB_WIDTH];
+struct tcrypt_result mb_tresult[MB_WIDTH];
+char *mb_xbuf[MB_WIDTH][XBUFSIZE];
 
 static int testmgr_alloc_buf(char *buf[XBUFSIZE])
 {
@@ -780,6 +786,46 @@ static inline int do_one_acipher_op(struct 
skcipher_request *req, int ret)
return ret;
 }
 
+
+/*
+ * Perform a maximum of MB_WIDTH operations.
+ * Await the results and measure performance.
+ */
+cycles_t mb_start, mb_end;
+static int mb_err[MB_WIDTH];
+
+static inline int do_multi_acipher_op(
+   struct skcipher_request *req[MB_WIDTH], int enc)
+{
+   int i, ret, comp_ret = 0;
+   bool is_async;
+
+   for (i = 0; i < MB_WIDTH; ++i) {
+   ret = enc == ENCRYPT ? crypto_skcipher_encrypt(req[i])
+   : crypto_skcipher_decrypt(req[i]);
+   mb_err[i] = ret;
+   if (ret == -EINPROGRESS || ret == -EBUSY)
+   continue; /* on with next req */
+   /* any other error, bail out */
+   if (ret)
+   return ret;
+   }
+   for (i = 0; i < MB_WIDTH; ++i) {
+   struct tcrypt_result *tr = req[i]->base.data;
+
+   is_async = mb_err[i] == -EINPROGRESS || mb_err[i] == -EBUSY;
+   if (is_async) {
+   wait_for_completion(>completion);
+   reinit_completion(>completion);
+   }
+   comp_ret = tr->err;
+   if (comp_ret)
+   pr_info("multi_acipher_op error\n");
+   }
+
+   return comp_ret;
+}
+
 static int test_acipher_jiffies(struct skcipher_request *req, int enc,
int blen, int secs)
 {
@@ -846,7 +892,7 @@ static int test_acipher_cycles(struct skcipher_request 
*req, int enc,
 
 out:
if (ret == 0)
-   pr_cont("1 operation in %lu cycles (%d bytes)\n",
+   pr_cont("1 operation in %4lu cycles (%d bytes)\n",
(cycles + 4) / 8, blen);
 
return ret;
@@ -927,6 +973,7 @@ static void test_skcipher_speed(const char *algo, int enc, 
unsigned int secs,
if (ret) {
pr_err("setkey() failed flags=%x\n",
crypto_skcipher_get_flags(tfm));
+
goto out_free_req;
}
 
@@ -980,6 +1027,203 @@ static void test_skcipher_speed(const char *algo, int 
enc, unsigned int secs,
crypto_free_skcipher(tfm);
 }
 
+static int test_mb_acipher_jiffies(
+   struct skcipher_request *req[MB_WIDTH], int enc, int blen, int secs)
+{
+   unsigned long start, end;
+   int bcount;
+   int ret;
+
+   /* initiate a maximum of MB_WIDTH operations and measure performance */
+   for (start = jiffies, end = start + secs * HZ, bcount = 0;
+   time_before(jiffies, 

[PATCH V7 4/7] crypto: AES CBC by8 encryption

2017-07-25 Thread Megha Dey
This patch introduces the assembly routine to do a by8 AES CBC
encryption in support of the AES CBC multi-buffer implementation.

It encrypts 8 data streams of the same key size simultaneously.

Originally-by: Chandramouli Narayanan 
Signed-off-by: Megha Dey 
Acked-by: Tim Chen 
---
 arch/x86/crypto/aes-cbc-mb/aes_cbc_enc_x8.S | 775 
 1 file changed, 775 insertions(+)
 create mode 100644 arch/x86/crypto/aes-cbc-mb/aes_cbc_enc_x8.S

diff --git a/arch/x86/crypto/aes-cbc-mb/aes_cbc_enc_x8.S 
b/arch/x86/crypto/aes-cbc-mb/aes_cbc_enc_x8.S
new file mode 100644
index 000..2130574
--- /dev/null
+++ b/arch/x86/crypto/aes-cbc-mb/aes_cbc_enc_x8.S
@@ -0,0 +1,775 @@
+/*
+ * AES CBC by8 multibuffer optimization (x86_64)
+ * This file implements 128/192/256 bit AES CBC encryption
+ *
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * James Guilford 
+ * Sean Gulley 
+ * Tim Chen 
+ * Megha Dey 
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#include 
+
+/* stack size needs to be an odd multiple of 8 for alignment */
+
+#define AES_KEYSIZE_12816
+#define AES_KEYSIZE_19224
+#define AES_KEYSIZE_25632
+
+#define XMM_SAVE_SIZE  16*10
+#define GPR_SAVE_SIZE  8*9
+#define STACK_SIZE (XMM_SAVE_SIZE + GPR_SAVE_SIZE)
+
+#define GPR_SAVE_REG   %rsp
+#define GPR_SAVE_AREA  %rsp + XMM_SAVE_SIZE
+#define LEN_AREA_OFFSETXMM_SAVE_SIZE + 8*8
+#define LEN_AREA_REG   %rsp
+#define LEN_AREA   %rsp + XMM_SAVE_SIZE + 8*8
+
+#define IN_OFFSET  0
+#define OUT_OFFSET 8*8
+#define KEYS_OFFSET16*8
+#define IV_OFFSET  24*8
+
+
+#define IDX%rax
+#define TMP%rbx
+#define ARG%rdi
+#define LEN%rsi
+
+#define KEYS0  %r14
+#define KEYS1  %r15
+#define KEYS2  %rbp
+#define KEYS3  %rdx
+#define KEYS4  %rcx
+#define KEYS5  %r8
+#define KEYS6  %r9
+#define KEYS7  %r10
+
+#define IN0%r11
+#define IN2%r12
+#define IN4%r13
+#define IN6LEN
+
+#define XDATA0 %xmm0
+#define XDATA1 %xmm1
+#define XDATA2 %xmm2
+#define XDATA3 %xmm3
+#define XDATA4 %xmm4
+#define XDATA5 %xmm5
+#define XDATA6 %xmm6
+#define XDATA7 %xmm7
+
+#define XKEY0_3%xmm8
+#define XKEY1_4%xmm9
+#define XKEY2_5%xmm10
+#define XKEY3_6%xmm11
+#define XKEY4_7%xmm12
+#define XKEY5_8%xmm13
+#define XKEY6_9%xmm14
+#define XTMP   %xmm15
+
+#defineMOVDQ movdqu /* assume buffers not aligned */
+#define CONCAT(a, b)   a##b
+#define INPUT_REG_SUFX 1   /* IN */
+#define XDATA_REG_SUFX 2   /* XDAT */
+#define KEY_REG_SUFX   3   /* KEY */
+#define XMM_REG_SUFX   4   /* XMM */
+

[PATCH V7 5/7] crypto: AES CBC multi-buffer glue code

2017-07-25 Thread Megha Dey
This patch introduces the multi-buffer job manager which is responsible
for submitting scatter-gather buffers from several AES CBC jobs
to the multi-buffer algorithm. The glue code interfaces with the
underlying algorithm that handles 8 data streams of AES CBC encryption
in parallel. AES key expansion and CBC decryption requests are performed
in a manner similar to the existing AESNI Intel glue driver.

The outline of the algorithm for AES CBC encryption requests is
sketched below:

Any driver requesting the crypto service will place an async crypto
request on the workqueue.  The multi-buffer crypto daemon will pull an
AES CBC encryption request from work queue and put each request in an
empty data lane for multi-buffer crypto computation.  When all the empty
lanes are filled, computation will commence on the jobs in parallel and
the job with the shortest remaining buffer will get completed and be
returned. To prevent prolonged stall, when no new jobs arrive, we will
flush workqueue of jobs after a maximum allowable delay has elapsed.

To accommodate the fragmented nature of scatter-gather, we will keep
submitting the next scatter-buffer fragment for a job for multi-buffer
computation until a job is completed and no more buffer fragments
remain. At that time we will pull a new job to fill the now empty data slot.
We check with the multibuffer scheduler to see if there are other
completed jobs to prevent extraneous delay in returning any completed
jobs.

This multi-buffer algorithm should be used for cases where we get at
least 8 streams of crypto jobs submitted at a reasonably high rate.
For low crypto job submission rate and low number of data streams, this
algorithm will not be beneficial. The reason is at low rate, we do not
fill out the data lanes before flushing the jobs instead of processing
them with all the data lanes full. We will miss the benefit of parallel
computation, and adding delay to the processing of the crypto job at the
same time.  Some tuning of the maximum latency parameter may be needed
to get the best performance.

Originally-by: Chandramouli Narayanan 
Signed-off-by: Megha Dey 
Acked-by: Tim Chen 
---
 arch/x86/crypto/Makefile|   1 +
 arch/x86/crypto/aes-cbc-mb/Makefile |  22 +
 arch/x86/crypto/aes-cbc-mb/aes_cbc_mb.c | 720 
 3 files changed, 743 insertions(+)
 create mode 100644 arch/x86/crypto/aes-cbc-mb/Makefile
 create mode 100644 arch/x86/crypto/aes-cbc-mb/aes_cbc_mb.c

diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 34b3fa2..cc556a7 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -33,6 +33,7 @@ obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
 obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o
 obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
 obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o
+obj-$(CONFIG_CRYPTO_AES_CBC_MB) += aes-cbc-mb/
 obj-$(CONFIG_CRYPTO_POLY1305_X86_64) += poly1305-x86_64.o
 
 # These modules require assembler to support AVX.
diff --git a/arch/x86/crypto/aes-cbc-mb/Makefile 
b/arch/x86/crypto/aes-cbc-mb/Makefile
new file mode 100644
index 000..b642bd8
--- /dev/null
+++ b/arch/x86/crypto/aes-cbc-mb/Makefile
@@ -0,0 +1,22 @@
+#
+# Arch-specific CryptoAPI modules.
+#
+
+avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no)
+
+# we need decryption and key expansion routine symbols
+# if either AESNI_NI_INTEL or AES_CBC_MB is a module
+
+ifeq ($(CONFIG_CRYPTO_AES_NI_INTEL),m)
+   dec_support := ../aesni-intel_asm.o
+endif
+ifeq ($(CONFIG_CRYPTO_AES_CBC_MB),m)
+   dec_support := ../aesni-intel_asm.o
+endif
+
+ifeq ($(avx_supported),yes)
+   obj-$(CONFIG_CRYPTO_AES_CBC_MB) += aes-cbc-mb.o
+   aes-cbc-mb-y := $(dec_support) aes_cbc_mb.o aes_mb_mgr_init.o \
+   mb_mgr_inorder_x8_asm.o mb_mgr_ooo_x8_asm.o \
+   aes_cbc_enc_x8.o
+endif
diff --git a/arch/x86/crypto/aes-cbc-mb/aes_cbc_mb.c 
b/arch/x86/crypto/aes-cbc-mb/aes_cbc_mb.c
new file mode 100644
index 000..341cbcb
--- /dev/null
+++ b/arch/x86/crypto/aes-cbc-mb/aes_cbc_mb.c
@@ -0,0 +1,720 @@
+/*
+ * Multi buffer AES CBC algorithm glue code
+ *
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ 

[PATCH V7 3/7] crypto: AES CBC multi-buffer scheduler

2017-07-25 Thread Megha Dey
This patch implements in-order scheduler for encrypting multiple buffers
in parallel supporting AES CBC encryption with key sizes of
128, 192 and 256 bits. It uses 8 data lanes by taking advantage of the
SIMD instructions with XMM registers.

The multibuffer manager and scheduler is mostly written in assembly and
the initialization support is written C. The AES CBC multibuffer crypto
driver support interfaces with the multibuffer manager and scheduler
to support AES CBC encryption in parallel. The scheduler supports
job submissions, job flushing and and job retrievals after completion.

The basic flow of usage of the CBC multibuffer scheduler is as follows:

- The caller allocates an aes_cbc_mb_mgr_inorder_x8 object
and initializes it once by calling aes_cbc_init_mb_mgr_inorder_x8().

- The aes_cbc_mb_mgr_inorder_x8 structure has an array of JOB_AES
objects. Allocation and scheduling of JOB_AES objects are managed
by the multibuffer scheduler support routines. The caller allocates
a JOB_AES using aes_cbc_get_next_job_inorder_x8().

- The returned JOB_AES must be filled in with parameters for CBC
encryption (eg: plaintext buffer, ciphertext buffer, key, iv, etc) and
submitted to the manager object using aes_cbc_submit_job_inorder_xx().

- If the oldest JOB_AES is completed during a call to
aes_cbc_submit_job_inorder_x8(), it is returned. Otherwise,
NULL is returned.

- A call to aes_cbc_flush_job_inorder_x8() always returns the
oldest job, unless the multibuffer manager is empty of jobs.

- A call to aes_cbc_get_completed_job_inorder_x8() returns
a completed job. This routine is useful to process completed
jobs instead of waiting for the flusher to engage.

- When a job is returned from submit or flush, the caller extracts
the useful data and returns it to the multibuffer manager implicitly
by the next call to aes_cbc_get_next_job_xx().

Jobs are always returned from submit or flush routines in the order they
were submitted (hence "inorder").A job allocated using
aes_cbc_get_next_job_inorder_x8() must be filled in and submitted before
another call. A job returned by aes_cbc_submit_job_inorder_x8() or
aes_cbc_flush_job_inorder_x8() is 'deallocated' upon the next call to
get a job structure. Calls to get_next_job() cannot fail. If all jobs
are allocated after a call to get_next_job(), the subsequent call to submit
always returns the oldest job in a completed state.

Originally-by: Chandramouli Narayanan 
Signed-off-by: Megha Dey 
Acked-by: Tim Chen 
---
 arch/x86/crypto/aes-cbc-mb/aes_mb_mgr_init.c   | 146 
 arch/x86/crypto/aes-cbc-mb/mb_mgr_inorder_x8_asm.S | 223 +++
 arch/x86/crypto/aes-cbc-mb/mb_mgr_ooo_x8_asm.S | 417 +
 3 files changed, 786 insertions(+)
 create mode 100644 arch/x86/crypto/aes-cbc-mb/aes_mb_mgr_init.c
 create mode 100644 arch/x86/crypto/aes-cbc-mb/mb_mgr_inorder_x8_asm.S
 create mode 100644 arch/x86/crypto/aes-cbc-mb/mb_mgr_ooo_x8_asm.S

diff --git a/arch/x86/crypto/aes-cbc-mb/aes_mb_mgr_init.c 
b/arch/x86/crypto/aes-cbc-mb/aes_mb_mgr_init.c
new file mode 100644
index 000..2a2ce6c
--- /dev/null
+++ b/arch/x86/crypto/aes-cbc-mb/aes_mb_mgr_init.c
@@ -0,0 +1,146 @@
+/*
+ * Initialization code for multi buffer AES CBC algorithm
+ *
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * James Guilford 
+ * Sean Gulley 
+ * Tim Chen 
+ * Megha Dey 
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS 

[PATCH V7 2/7] crypto: AES CBC multi-buffer data structures

2017-07-25 Thread Megha Dey
This patch introduces the data structures and prototypes of functions
needed for doing AES CBC encryption using multi-buffer. Included are
the structures of the multi-buffer AES CBC job, job scheduler in C and
data structure defines in x86 assembly code.

Originally-by: Chandramouli Narayanan 
Signed-off-by: Megha Dey 
Acked-by: Tim Chen 
---
 arch/x86/crypto/aes-cbc-mb/aes_cbc_mb_ctx.h|  97 +
 arch/x86/crypto/aes-cbc-mb/aes_cbc_mb_mgr.h| 132 
 arch/x86/crypto/aes-cbc-mb/mb_mgr_datastruct.S | 271 +
 arch/x86/crypto/aes-cbc-mb/reg_sizes.S | 126 
 4 files changed, 626 insertions(+)
 create mode 100644 arch/x86/crypto/aes-cbc-mb/aes_cbc_mb_ctx.h
 create mode 100644 arch/x86/crypto/aes-cbc-mb/aes_cbc_mb_mgr.h
 create mode 100644 arch/x86/crypto/aes-cbc-mb/mb_mgr_datastruct.S
 create mode 100644 arch/x86/crypto/aes-cbc-mb/reg_sizes.S

diff --git a/arch/x86/crypto/aes-cbc-mb/aes_cbc_mb_ctx.h 
b/arch/x86/crypto/aes-cbc-mb/aes_cbc_mb_ctx.h
new file mode 100644
index 000..024586b
--- /dev/null
+++ b/arch/x86/crypto/aes-cbc-mb/aes_cbc_mb_ctx.h
@@ -0,0 +1,97 @@
+/*
+ * Header file for multi buffer AES CBC algorithm manager
+ * that deals with 8 buffers at a time
+ *
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * James Guilford 
+ * Sean Gulley 
+ * Tim Chen 
+ * Megha Dey 
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#ifndef __AES_CBC_MB_CTX_H
+#define __AES_CBC_MB_CTX_H
+
+
+#include 
+
+#include "aes_cbc_mb_mgr.h"
+
+#define CBC_ENCRYPT0x01
+#define CBC_DECRYPT0x02
+#define CBC_START  0x04
+#define CBC_DONE   0x08
+
+#define CBC_CTX_STS_IDLE   0x00
+#define CBC_CTX_STS_PROCESSING 0x01
+#define CBC_CTX_STS_LAST   0x02
+#define CBC_CTX_STS_COMPLETE   0x04
+
+enum cbc_ctx_error {
+   CBC_CTX_ERROR_NONE   =  0,
+   CBC_CTX_ERROR_INVALID_FLAGS  = -1,
+   CBC_CTX_ERROR_ALREADY_PROCESSING = -2,
+   CBC_CTX_ERROR_ALREADY_COMPLETED  = -3,
+};
+
+#define cbc_ctx_init(ctx, n_bytes, op) \
+   do { \
+   (ctx)->flag = (op) | CBC_START; \
+   (ctx)->nbytes = (n_bytes); \
+   } while (0)
+
+/* AESNI routines to perform cbc decrypt and key expansion */
+
+asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
+ const u8 *in, unsigned int len, u8 *iv);
+asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
+unsigned int key_len);
+
+#endif /* __AES_CBC_MB_CTX_H */
diff --git a/arch/x86/crypto/aes-cbc-mb/aes_cbc_mb_mgr.h 

[PATCH V7 1/7] crypto: Multi-buffer encryption infrastructure support

2017-07-25 Thread Megha Dey
In this patch, the infrastructure needed to support multibuffer
encryption implementation is added:

a) Enhance mcryptd daemon to support skcipher requests.

b) Add multi-buffer mcryptd skcipher helper which presents the
   top-level algorithm as an skcipher.

b) Update configuration to include multi-buffer encryption build
support.

For an introduction to the multi-buffer implementation, please see
http://www.intel.com/content/www/us/en/communications/communications-ia-multi-buffer-paper.html

Originally-by: Chandramouli Narayanan 
Signed-off-by: Megha Dey 
Acked-by: Tim Chen 
---
 crypto/Kconfig   |  15 ++
 crypto/mcryptd.c | 475 +++
 include/crypto/mcryptd.h |  56 ++
 3 files changed, 546 insertions(+)

diff --git a/crypto/Kconfig b/crypto/Kconfig
index aac4bc9..d172459 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1008,6 +1008,21 @@ config CRYPTO_AES_NI_INTEL
  ECB, CBC, LRW, PCBC, XTS. The 64 bit version has additional
  acceleration for CTR.
 
+config CRYPTO_AES_CBC_MB
+tristate "AES CBC algorithm (x86_64 Multi-Buffer, Experimental)"
+depends on X86 && 64BIT
+select CRYPTO_SIMD
+select CRYPTO_MCRYPTD
+help
+  AES CBC encryption implemented using multi-buffer technique.
+  This algorithm computes on multiple data lanes concurrently with
+  SIMD instructions for better throughput. It should only be used
+  when we expect many concurrent crypto requests to keep all the
+  data lanes filled to realize the performance benefit. If the data
+  lanes are unfilled, a flush operation will be initiated after some
+  delay to process the exisiting crypto jobs, adding some extra
+  latency to low load case.
+
 config CRYPTO_AES_SPARC64
tristate "AES cipher algorithms (SPARC64)"
depends on SPARC64
diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c
index 4e64726..a28b67f 100644
--- a/crypto/mcryptd.c
+++ b/crypto/mcryptd.c
@@ -273,6 +273,443 @@ static inline bool mcryptd_check_internal(struct rtattr 
**tb, u32 *type,
return false;
 }
 
+static int mcryptd_enqueue_skcipher_request(struct mcryptd_queue *queue,
+   struct crypto_async_request *request,
+   struct mcryptd_skcipher_request_ctx *rctx)
+{
+   int cpu, err;
+   struct mcryptd_cpu_queue *cpu_queue;
+
+   cpu = get_cpu();
+   cpu_queue = this_cpu_ptr(queue->cpu_queue);
+   rctx->tag.cpu = cpu;
+
+   err = crypto_enqueue_request(_queue->queue, request);
+   pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n",
+   cpu, cpu_queue, request);
+   queue_work_on(cpu, kcrypto_wq, _queue->work);
+   put_cpu();
+
+   return err;
+}
+
+static int mcryptd_skcipher_setkey(struct crypto_skcipher *parent,
+   const u8 *key, unsigned int keylen)
+{
+   struct mcryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(parent);
+   struct crypto_skcipher *child = ctx->child;
+   int err;
+
+   crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+   crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
+   CRYPTO_TFM_REQ_MASK);
+   err = crypto_skcipher_setkey(child, key, keylen);
+   crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
+   CRYPTO_TFM_RES_MASK);
+   return err;
+}
+
+static void mcryptd_skcipher_complete(struct skcipher_request *req, int err)
+{
+   struct mcryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
+
+   local_bh_disable();
+   rctx->complete(>base, err);
+   local_bh_enable();
+}
+
+static void mcryptd_skcipher_encrypt(struct crypto_async_request *base,
+   int err)
+{
+   struct skcipher_request *req = skcipher_request_cast(base);
+   struct mcryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
+   struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+   struct mcryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+   struct crypto_skcipher *child = ctx->child;
+   struct skcipher_request subreq;
+
+   if (unlikely(err == -EINPROGRESS))
+   goto out;
+
+   /* set up the skcipher request to work on */
+   skcipher_request_set_tfm(, child);
+   skcipher_request_set_callback(,
+   CRYPTO_TFM_REQ_MAY_SLEEP, 0, 0);
+   skcipher_request_set_crypt(, req->src, req->dst,
+   req->cryptlen, req->iv);
+
+   /*
+* pass addr of descriptor stored in the request context
+* so that the callee can get to the request context
+*/
+ 

[PATCH V7 0/7] crypto: AES CBC multibuffer implementation

2017-07-25 Thread Megha Dey
In this patch series, we introduce AES CBC encryption that is parallelized on
x86_64 cpu with XMM registers. The multi-buffer technique encrypt 8 data
streams in parallel with SIMD instructions. Decryption is handled as in the
existing AESNI Intel CBC implementation which can already parallelize decryption
even for a single data stream.

Please see the multi-buffer whitepaper for details of the technique:
http://www.intel.com/content/www/us/en/communications/communications-ia-multi-buffer-paper.html

It is important that any driver uses this algorithm properly for scenarios
where we have many data streams that can fill up the data lanes most of the
time. It shouldn't be used when only a single data stream is expected mostly.
Otherwise we may incur extra delays when we have frequent gaps in data lanes,
causing us to wait till data come in to fill the data lanes before initiating
encryption.  We may have to wait for flush operations to commence when no new
data come in after some wait time. However we keep this extra delay to a
minimum by opportunistically flushing the unfinished jobs if crypto daemon is
the only active task running on a cpu.

By using this technique, we saw a throughput increase of up to 5.7x under
optimal conditions when we have fully loaded encryption jobs filling up all
the data lanes.

Change Log:

v7
1. Add the CRYPTO_ALG_ASYNC flag to the internal algorithm
2. Remove the irq_disabled check

v6
1. Move away from the compat naming scheme and update the names of the inner
   and outer algorithm
2. Move wrapper code around synchronous internal algorithm from simd.c
   to mcryptd.c

v5
1. Use an async implementation of the inner algorithm instead of sync and use
   the latest skcipher interface instead of the older blkcipher interface.
   (we have picked up this work after a while)

v4
1. Make the decrypt path also use ablkcpher walk.
http://lkml.iu.edu/hypermail/linux/kernel/1512.0/01807.html

v3
1. Use ablkcipher_walk helpers to walk the scatter gather list
and eliminated needs to modify blkcipher_walk for multibuffer cipher

v2
1. Update cpu feature check to make sure SSE is supported
2. Fix up unloading of aes-cbc-mb module to properly free memory

Megha Dey (7):
  crypto: Multi-buffer encryption infrastructure support
  crypto: AES CBC multi-buffer data structures
  crypto: AES CBC multi-buffer scheduler
  crypto: AES CBC by8 encryption
  crypto: AES CBC multi-buffer glue code
  crypto: AES vectors for AES CBC multibuffer testing
  crypto: AES CBC multi-buffer tcrypt

 arch/x86/crypto/Makefile   |1 +
 arch/x86/crypto/aes-cbc-mb/Makefile|   22 +
 arch/x86/crypto/aes-cbc-mb/aes_cbc_enc_x8.S|  775 ++
 arch/x86/crypto/aes-cbc-mb/aes_cbc_mb.c|  720 ++
 arch/x86/crypto/aes-cbc-mb/aes_cbc_mb_ctx.h|   97 ++
 arch/x86/crypto/aes-cbc-mb/aes_cbc_mb_mgr.h|  132 ++
 arch/x86/crypto/aes-cbc-mb/aes_mb_mgr_init.c   |  146 ++
 arch/x86/crypto/aes-cbc-mb/mb_mgr_datastruct.S |  271 
 arch/x86/crypto/aes-cbc-mb/mb_mgr_inorder_x8_asm.S |  223 +++
 arch/x86/crypto/aes-cbc-mb/mb_mgr_ooo_x8_asm.S |  417 ++
 arch/x86/crypto/aes-cbc-mb/reg_sizes.S |  126 ++
 crypto/Kconfig |   15 +
 crypto/mcryptd.c   |  475 +++
 crypto/tcrypt.c|  259 +++-
 crypto/testmgr.c   |  707 +
 crypto/testmgr.h   | 1496 
 include/crypto/mcryptd.h   |   56 +
 17 files changed, 5936 insertions(+), 2 deletions(-)
 create mode 100644 arch/x86/crypto/aes-cbc-mb/Makefile
 create mode 100644 arch/x86/crypto/aes-cbc-mb/aes_cbc_enc_x8.S
 create mode 100644 arch/x86/crypto/aes-cbc-mb/aes_cbc_mb.c
 create mode 100644 arch/x86/crypto/aes-cbc-mb/aes_cbc_mb_ctx.h
 create mode 100644 arch/x86/crypto/aes-cbc-mb/aes_cbc_mb_mgr.h
 create mode 100644 arch/x86/crypto/aes-cbc-mb/aes_mb_mgr_init.c
 create mode 100644 arch/x86/crypto/aes-cbc-mb/mb_mgr_datastruct.S
 create mode 100644 arch/x86/crypto/aes-cbc-mb/mb_mgr_inorder_x8_asm.S
 create mode 100644 arch/x86/crypto/aes-cbc-mb/mb_mgr_ooo_x8_asm.S
 create mode 100644 arch/x86/crypto/aes-cbc-mb/reg_sizes.S

-- 
1.9.1



Re: [PATCH v2 3/3] crypto: scompress - defer allocation of scratch buffer to first use

2017-07-25 Thread Ard Biesheuvel

> On 26 Jul 2017, at 00:36, Giovanni Cabiddu  wrote:
> 
> Hi Ard,
> 
>> On Fri, Jul 21, 2017 at 04:42:38PM +0100, Ard Biesheuvel wrote:
>> +static int crypto_scomp_init_tfm(struct crypto_tfm *tfm)
>> +{
>> +int ret;
>> +
>> +mutex_lock(_lock);
>> +ret = crypto_scomp_alloc_all_scratches();
>> +mutex_unlock(_lock);
>> +
>> +return ret;
>> +}
> If you allocate the scratch buffers at init_tfm, don't you end
> up with a situation where if you allocate two tfms of the same algo
> then you get twice the number of scratches?
> If that is the case, we should implement a reference count
> mechanism.
> Am I missing something?
> 

Isn't the mutex supposed to take care of that?

Re: [PATCH v2 3/3] crypto: scompress - defer allocation of scratch buffer to first use

2017-07-25 Thread Giovanni Cabiddu
Hi Ard,

On Fri, Jul 21, 2017 at 04:42:38PM +0100, Ard Biesheuvel wrote:
> +static int crypto_scomp_init_tfm(struct crypto_tfm *tfm)
> +{
> + int ret;
> +
> + mutex_lock(_lock);
> + ret = crypto_scomp_alloc_all_scratches();
> + mutex_unlock(_lock);
> +
> + return ret;
> +}
If you allocate the scratch buffers at init_tfm, don't you end
up with a situation where if you allocate two tfms of the same algo
then you get twice the number of scratches?
If that is the case, we should implement a reference count
mechanism.
Am I missing something?

Regards,

-- 
Giovanni


Re: [PATCH v3 3/4] btrfs: Add zstd support

2017-07-25 Thread Giovanni Cabiddu
Hi Nick,

On Thu, Jul 20, 2017 at 10:27:42PM +0100, Nick Terrell wrote:
> Add zstd compression and decompression support to BtrFS. zstd at its
> fastest level compresses almost as well as zlib, while offering much
> faster compression and decompression, approaching lzo speeds.
Can we look at integrating the zstd implementation below the acomp API
available in the crypto subsystem?
(https://github.com/torvalds/linux/blob/master/crypto/acompress.c)
Acomp was designed to provide a generic and uniform API for compression
in the kernel which hides algorithm specific details to frameworks.
In future it would be nice to see btrfs using exclusively acomp
for compression. This way when a new compression algorithm is exposed
through acomp, it will be available immediately in btrfs.
Furthermore, any framework in the kernel that will use acomp will be
automatically enabled to use zstd.
What do you think?

Here is a prototype that shows how btrfs can be integrated with
acomp: https://patchwork.kernel.org/patch/9201741/

Regards,

-- 
Giovanni


Re: [PATCH v3 0/3] Update support for XTS-AES on AMD CCPs

2017-07-25 Thread Gary R Hook

On 07/25/2017 02:21 PM, Hook, Gary wrote:

The following series adds support for XS-AES on version 5 CCPs, both
128- and 256-bit, and enhances/clarifies/simplifies some crypto layer
code.


Herbert:

Oops. The last patch in this series depends upon a fix that was sent 
just prior

to this. This series won't fully apply to cryptodev-2.6 without it, and thus
will have to wait until "Fix XTS-AES-128 support on v5 CCPs" is processed.

Sorry about that.



Changes since v2:
 - Move a CCP v5 fix out of this patch series and submit independently
 - In the unit-size check patch:
- Edit comments
- Remove unnecessary variable
- Delay a change (that belongs in the CCP v5 patch)

Changes since v1:
 - rework the validation of the unit-size; move to a separate patch
 - expand the key buffer to accommodate 256-bit keys
 - use xts_check_key() in the crypto layer


---

Gary R Hook (3):
  crypto: ccp - Add a call to xts_check_key()
  crypto: ccp - Rework the unit-size check for XTS-AES
  crypto: ccp - Add XTS-AES-256 support for CCP version 5


 drivers/crypto/ccp/ccp-crypto-aes-xts.c |   92
+--
 drivers/crypto/ccp/ccp-crypto.h |2 -
 drivers/crypto/ccp/ccp-ops.c|2 +
 3 files changed, 53 insertions(+), 43 deletions(-)

--


[PATCH v3 0/3] Update support for XTS-AES on AMD CCPs

2017-07-25 Thread Gary R Hook
The following series adds support for XS-AES on version 5 CCPs, both
128- and 256-bit, and enhances/clarifies/simplifies some crypto layer
code.

Changes since v2:
 - Move a CCP v5 fix out of this patch series and submit independently
 - In the unit-size check patch:
- Edit comments
- Remove unnecessary variable
- Delay a change (that belongs in the CCP v5 patch)

Changes since v1:
 - rework the validation of the unit-size; move to a separate patch
 - expand the key buffer to accommodate 256-bit keys
 - use xts_check_key() in the crypto layer


---

Gary R Hook (3):
  crypto: ccp - Add a call to xts_check_key()
  crypto: ccp - Rework the unit-size check for XTS-AES
  crypto: ccp - Add XTS-AES-256 support for CCP version 5


 drivers/crypto/ccp/ccp-crypto-aes-xts.c |   92 +--
 drivers/crypto/ccp/ccp-crypto.h |2 -
 drivers/crypto/ccp/ccp-ops.c|2 +
 3 files changed, 53 insertions(+), 43 deletions(-)

--


[PATCH v3 1/3] crypto: ccp - Add a call to xts_check_key()

2017-07-25 Thread Gary R Hook
Vet the key using the available standard function

Signed-off-by: Gary R Hook 
---
 drivers/crypto/ccp/ccp-crypto-aes-xts.c |9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c 
b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
index 3f26a415ef44..2b5d3a62fad9 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-xts.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -97,7 +98,13 @@ static int ccp_aes_xts_complete(struct crypto_async_request 
*async_req, int ret)
 static int ccp_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
  unsigned int key_len)
 {
-   struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ablkcipher_tfm(tfm));
+   struct crypto_tfm *xfm = crypto_ablkcipher_tfm(tfm);
+   struct ccp_ctx *ctx = crypto_tfm_ctx(xfm);
+   int ret;
+
+   ret = xts_check_key(xfm, key, key_len);
+   if (ret)
+   return ret;
 
/* Only support 128-bit AES key with a 128-bit Tweak key,
 * otherwise use the fallback



[PATCH v3 2/3] crypto: ccp - Rework the unit-size check for XTS-AES

2017-07-25 Thread Gary R Hook
The CCP supports a limited set of unit-size values. Change the check
for this parameter such that acceptable values match the enumeration.
Then clarify the conditions under which we must use the fallback
implementation.

Signed-off-by: Gary R Hook 
---
 drivers/crypto/ccp/ccp-crypto-aes-xts.c |   57 +++
 1 file changed, 20 insertions(+), 37 deletions(-)

diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c 
b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
index 2b5d3a62fad9..5c2df880ab48 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-xts.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
@@ -39,46 +39,26 @@ struct ccp_unit_size_map {
u32 value;
 };
 
-static struct ccp_unit_size_map unit_size_map[] = {
+static struct ccp_unit_size_map xts_unit_sizes[] = {
{
-   .size   = 4096,
-   .value  = CCP_XTS_AES_UNIT_SIZE_4096,
-   },
-   {
-   .size   = 2048,
-   .value  = CCP_XTS_AES_UNIT_SIZE_2048,
-   },
-   {
-   .size   = 1024,
-   .value  = CCP_XTS_AES_UNIT_SIZE_1024,
+   .size   = 16,
+   .value  = CCP_XTS_AES_UNIT_SIZE_16,
},
{
-   .size   = 512,
+   .size   = 512,
.value  = CCP_XTS_AES_UNIT_SIZE_512,
},
{
-   .size   = 256,
-   .value  = CCP_XTS_AES_UNIT_SIZE__LAST,
-   },
-   {
-   .size   = 128,
-   .value  = CCP_XTS_AES_UNIT_SIZE__LAST,
-   },
-   {
-   .size   = 64,
-   .value  = CCP_XTS_AES_UNIT_SIZE__LAST,
-   },
-   {
-   .size   = 32,
-   .value  = CCP_XTS_AES_UNIT_SIZE__LAST,
+   .size   = 1024,
+   .value  = CCP_XTS_AES_UNIT_SIZE_1024,
},
{
-   .size   = 16,
-   .value  = CCP_XTS_AES_UNIT_SIZE_16,
+   .size   = 2048,
+   .value  = CCP_XTS_AES_UNIT_SIZE_2048,
},
{
-   .size   = 1,
-   .value  = CCP_XTS_AES_UNIT_SIZE__LAST,
+   .size   = 4096,
+   .value  = CCP_XTS_AES_UNIT_SIZE_4096,
},
 };
 
@@ -138,16 +118,19 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request 
*req,
if (!req->info)
return -EINVAL;
 
+   /* Check conditions under which the CCP can fulfill a request. The
+* device can handle input plaintext of a length that is a multiple
+* of the unit_size, bug the crypto implementation only supports
+* the unit_size being equal to the input length. This limits the
+* number of scenarios we can handle.
+*/
unit_size = CCP_XTS_AES_UNIT_SIZE__LAST;
-   if (req->nbytes <= unit_size_map[0].size) {
-   for (unit = 0; unit < ARRAY_SIZE(unit_size_map); unit++) {
-   if (!(req->nbytes & (unit_size_map[unit].size - 1))) {
-   unit_size = unit_size_map[unit].value;
-   break;
-   }
+   for (unit = 0; unit < ARRAY_SIZE(xts_unit_sizes); unit++) {
+   if (req->nbytes == xts_unit_sizes[unit].size) {
+   unit_size = unit;
+   break;
}
}
-
if ((unit_size == CCP_XTS_AES_UNIT_SIZE__LAST) ||
(ctx->u.aes.key_len != AES_KEYSIZE_128)) {
SKCIPHER_REQUEST_ON_STACK(subreq, ctx->u.aes.tfm_skcipher);



[PATCH v3 3/3] crypto: ccp - Add XTS-AES-256 support for CCP version 5

2017-07-25 Thread Gary R Hook
Signed-off-by: Gary R Hook 
---
 drivers/crypto/ccp/ccp-crypto-aes-xts.c |   26 ++
 drivers/crypto/ccp/ccp-crypto.h |2 +-
 drivers/crypto/ccp/ccp-ops.c|2 ++
 3 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c 
b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
index 5c2df880ab48..94b5bcf5b628 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-xts.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
@@ -80,19 +80,24 @@ static int ccp_aes_xts_setkey(struct crypto_ablkcipher 
*tfm, const u8 *key,
 {
struct crypto_tfm *xfm = crypto_ablkcipher_tfm(tfm);
struct ccp_ctx *ctx = crypto_tfm_ctx(xfm);
+   unsigned int ccpversion = ccp_version();
int ret;
 
ret = xts_check_key(xfm, key, key_len);
if (ret)
return ret;
 
-   /* Only support 128-bit AES key with a 128-bit Tweak key,
-* otherwise use the fallback
+   /* Version 3 devices support 128-bit keys; version 5 devices can
+* accommodate 128- and 256-bit keys.
 */
switch (key_len) {
case AES_KEYSIZE_128 * 2:
memcpy(ctx->u.aes.key, key, key_len);
break;
+   case AES_KEYSIZE_256 * 2:
+   if (ccpversion > CCP_VERSION(3, 0))
+   memcpy(ctx->u.aes.key, key, key_len);
+   break;
}
ctx->u.aes.key_len = key_len / 2;
sg_init_one(>u.aes.key_sg, ctx->u.aes.key, key_len);
@@ -105,6 +110,8 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req,
 {
struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
+   unsigned int ccpversion = ccp_version();
+   unsigned int fallback = 0;
unsigned int unit;
u32 unit_size;
int ret;
@@ -131,8 +138,19 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request 
*req,
break;
}
}
-   if ((unit_size == CCP_XTS_AES_UNIT_SIZE__LAST) ||
-   (ctx->u.aes.key_len != AES_KEYSIZE_128)) {
+   /* The CCP has restrictions on block sizes. Also, a version 3 device
+* only supports AES-128 operations; version 5 CCPs support both
+* AES-128 and -256 operations.
+*/
+   if (unit_size == CCP_XTS_AES_UNIT_SIZE__LAST)
+   fallback = 1;
+   if ((ccpversion < CCP_VERSION(5, 0)) &&
+   (ctx->u.aes.key_len != AES_KEYSIZE_128))
+   fallback = 1;
+   if ((ctx->u.aes.key_len != AES_KEYSIZE_128) &&
+   (ctx->u.aes.key_len != AES_KEYSIZE_256))
+   fallback = 1;
+   if (fallback) {
SKCIPHER_REQUEST_ON_STACK(subreq, ctx->u.aes.tfm_skcipher);
 
/* Use the fallback to process the request for any
diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h
index 156b8233853f..880f8acdd0cd 100644
--- a/drivers/crypto/ccp/ccp-crypto.h
+++ b/drivers/crypto/ccp/ccp-crypto.h
@@ -91,7 +91,7 @@ struct ccp_aes_ctx {
 
struct scatterlist key_sg;
unsigned int key_len;
-   u8 key[AES_MAX_KEY_SIZE];
+   u8 key[AES_MAX_KEY_SIZE * 2];
 
u8 nonce[CTR_RFC3686_NONCE_SIZE];
 
diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
index 6a2857274f61..6045e8c1d025 100644
--- a/drivers/crypto/ccp/ccp-ops.c
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -1065,6 +1065,8 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue 
*cmd_q,
 
if (xts->key_len == AES_KEYSIZE_128)
aestype = CCP_AES_TYPE_128;
+   else if (xts->key_len == AES_KEYSIZE_256)
+   aestype = CCP_AES_TYPE_256;
else
return -EINVAL;
 



[PATCH] crypto: ccp - Fix XTS-AES-128 support on v5 CCPs

2017-07-25 Thread Gary R Hook
Version 5 CCPs have some new requirements for XTS-AES: the type field
must be specified, and the key requires 512 bits, with each part
occupying 256 bits and padded with zeroes.

cc:  # 4.9.x+

Signed-off-by: Gary R Hook 
---
 drivers/crypto/ccp/ccp-crypto-aes-xts.c |4 ++-
 drivers/crypto/ccp/ccp-dev-v5.c |2 +
 drivers/crypto/ccp/ccp-dev.h|2 +
 drivers/crypto/ccp/ccp-ops.c|   43 +--
 include/linux/ccp.h |3 +-
 5 files changed, 43 insertions(+), 11 deletions(-)

diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c 
b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
index 58a4244b4752..3f26a415ef44 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-xts.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
@@ -1,8 +1,9 @@
 /*
  * AMD Cryptographic Coprocessor (CCP) AES XTS crypto API support
  *
- * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
  *
+ * Author: Gary R Hook 
  * Author: Tom Lendacky 
  *
  * This program is free software; you can redistribute it and/or modify
@@ -164,6 +165,7 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req,
memset(>cmd, 0, sizeof(rctx->cmd));
INIT_LIST_HEAD(>cmd.entry);
rctx->cmd.engine = CCP_ENGINE_XTS_AES_128;
+   rctx->cmd.u.xts.type = CCP_AES_TYPE_128;
rctx->cmd.u.xts.action = (encrypt) ? CCP_AES_ACTION_ENCRYPT
   : CCP_AES_ACTION_DECRYPT;
rctx->cmd.u.xts.unit_size = unit_size;
diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c
index b3526336d608..9221db10d5ed 100644
--- a/drivers/crypto/ccp/ccp-dev-v5.c
+++ b/drivers/crypto/ccp/ccp-dev-v5.c
@@ -145,6 +145,7 @@ union ccp_function {
 #defineCCP_AES_MODE(p) ((p)->aes.mode)
 #defineCCP_AES_TYPE(p) ((p)->aes.type)
 #defineCCP_XTS_SIZE(p) ((p)->aes_xts.size)
+#defineCCP_XTS_TYPE(p) ((p)->aes_xts.type)
 #defineCCP_XTS_ENCRYPT(p)  ((p)->aes_xts.encrypt)
 #defineCCP_DES3_SIZE(p)((p)->des3.size)
 #defineCCP_DES3_ENCRYPT(p) ((p)->des3.encrypt)
@@ -344,6 +345,7 @@ static int ccp5_perform_xts_aes(struct ccp_op *op)
CCP5_CMD_PROT() = 0;
 
function.raw = 0;
+   CCP_XTS_TYPE() = op->u.xts.type;
CCP_XTS_ENCRYPT() = op->u.xts.action;
CCP_XTS_SIZE() = op->u.xts.unit_size;
CCP5_CMD_FUNCTION() = function.raw;
diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h
index 9320931d89da..3d51180199ac 100644
--- a/drivers/crypto/ccp/ccp-dev.h
+++ b/drivers/crypto/ccp/ccp-dev.h
@@ -194,6 +194,7 @@
 #define CCP_AES_CTX_SB_COUNT   1
 
 #define CCP_XTS_AES_KEY_SB_COUNT   1
+#define CCP5_XTS_AES_KEY_SB_COUNT  2
 #define CCP_XTS_AES_CTX_SB_COUNT   1
 
 #define CCP_DES3_KEY_SB_COUNT  1
@@ -497,6 +498,7 @@ struct ccp_aes_op {
 };
 
 struct ccp_xts_aes_op {
+   enum ccp_aes_type type;
enum ccp_aes_action action;
enum ccp_xts_aes_unit_size unit_size;
 };
diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
index e23d138fc1ce..6a2857274f61 100644
--- a/drivers/crypto/ccp/ccp-ops.c
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -1038,6 +1038,8 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue 
*cmd_q,
struct ccp_op op;
unsigned int unit_size, dm_offset;
bool in_place = false;
+   unsigned int sb_count;
+   enum ccp_aes_type aestype;
int ret;
 
switch (xts->unit_size) {
@@ -1061,7 +1063,9 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue 
*cmd_q,
return -EINVAL;
}
 
-   if (xts->key_len != AES_KEYSIZE_128)
+   if (xts->key_len == AES_KEYSIZE_128)
+   aestype = CCP_AES_TYPE_128;
+   else
return -EINVAL;
 
if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
@@ -1083,23 +1087,44 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue 
*cmd_q,
op.sb_key = cmd_q->sb_key;
op.sb_ctx = cmd_q->sb_ctx;
op.init = 1;
+   op.u.xts.type = aestype;
op.u.xts.action = xts->action;
op.u.xts.unit_size = xts->unit_size;
 
-   /* All supported key sizes fit in a single (32-byte) SB entry
-* and must be in little endian format. Use the 256-bit byte
-* swap passthru option to convert from big endian to little
-* endian.
+   /* A version 3 device only supports 128-bit keys, which fits into a
+* single SB entry. A version 5 device uses a 512-bit vector, so two
+* SB entries.
 */
+   if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0))
+   sb_count = CCP_XTS_AES_KEY_SB_COUNT;
+   else
+   sb_count = CCP5_XTS_AES_KEY_SB_COUNT;
ret = ccp_init_dm_workarea(, 

Re: Fix dma unmap direction in iMX sahara aes calculation

2017-07-25 Thread Fabio Estevam
Hi Mogens,

On Sun, Jul 16, 2017 at 6:21 PM, Mogens Lauridsen
 wrote:
> Hi,
>
> The direction used in dma_unmap_sg in aes calc in sahara.c is wrong.
> This result in the cache not being invalidated correct when aes
> calculation is done and result is dma'ed to memory.
> This is seen as sporadic wrong result from aes calc.
>
> Thanks,
> Mogens
>
> Signed-off-by: Mogens Lauridsen 

Your two fixes are good, but the patch format is not correct.

You could try to use git send-email for submitting the two patches.

Subject could be improved. If you run 'git log
drivers/crypto/sahara.c' you will see the common standard, so you
could do:

crypto: sahara - Fix dma unmap direction as the Subject.

Then you need to run './scripts/checkpatch.pl 0001-your.patch' to see
what people and lists to send it to.

Please resend them.


Re: [RFC Part2 PATCH v3 02/26] crypto: ccp: Add Platform Security Processor (PSP) device support

2017-07-25 Thread Brijesh Singh


On 07/25/2017 03:29 AM, Kamil Konieczny wrote:

Hi,

minor misspelling,

On 24.07.2017 22:02, Brijesh Singh wrote:

Platform Security Processor (PSP) is part of AMD Secure Processor (AMD-SP),
PSP is a dedicated processor that provides the support for key management
commands in a Secure Encrypted Virtualiztion (SEV) mode, along with
software-based Tursted Executation Environment (TEE) to enable the

- ^ Trusted

third-party tursted applications.

-- ^ trusted
[...]



Noted. thanks

-Brijesh


Re: [PATCH 0/4] crypto: caam - add Job Ring support for DPAA2 parts

2017-07-25 Thread Shawn Guo
On Tue, Jul 25, 2017 at 01:31:52PM +, Horia Geantă wrote:
> On 7/25/2017 4:22 PM, Shawn Guo wrote:
> > On Tue, Jul 18, 2017 at 06:30:46PM +0300, Horia Geantă wrote:
> >> This patch set adds support for CAAM's legacy Job Ring backend / interface
> >> on platforms having DPAA2 (Datapath Acceleration Architecture v2), like
> >> LS1088A or LS2088A.
> >>
> >> I would like to get the DT patches through the crypto list (to make sure
> >> they don't end up merged before driver support).
> > 
> > Unless it's really urgent (usually critical bug fix), the DTS patches
> > should go through arm-soc tree.  We usually take DTS patches after the
> > driver counterpart has been accepted by subsystem maintainers, or in the
> > best case, has landed on mainline.
> 
> Thanks for the clarification.
> 
> Do I have to re-post the DTS patches once the driver is accepted or a
> ping should suffice or...?

A ping is sufficient.

Shawn


Re: [PATCH 0/4] crypto: caam - add Job Ring support for DPAA2 parts

2017-07-25 Thread Horia Geantă
On 7/25/2017 4:22 PM, Shawn Guo wrote:
> On Tue, Jul 18, 2017 at 06:30:46PM +0300, Horia Geantă wrote:
>> This patch set adds support for CAAM's legacy Job Ring backend / interface
>> on platforms having DPAA2 (Datapath Acceleration Architecture v2), like
>> LS1088A or LS2088A.
>>
>> I would like to get the DT patches through the crypto list (to make sure
>> they don't end up merged before driver support).
> 
> Unless it's really urgent (usually critical bug fix), the DTS patches
> should go through arm-soc tree.  We usually take DTS patches after the
> driver counterpart has been accepted by subsystem maintainers, or in the
> best case, has landed on mainline.

Thanks for the clarification.

Do I have to re-post the DTS patches once the driver is accepted or a
ping should suffice or...?

Horia


Re: [PATCH 0/4] crypto: caam - add Job Ring support for DPAA2 parts

2017-07-25 Thread Shawn Guo
On Tue, Jul 18, 2017 at 06:30:46PM +0300, Horia Geantă wrote:
> This patch set adds support for CAAM's legacy Job Ring backend / interface
> on platforms having DPAA2 (Datapath Acceleration Architecture v2), like
> LS1088A or LS2088A.
> 
> I would like to get the DT patches through the crypto list (to make sure
> they don't end up merged before driver support).

Unless it's really urgent (usually critical bug fix), the DTS patches
should go through arm-soc tree.  We usually take DTS patches after the
driver counterpart has been accepted by subsystem maintainers, or in the
best case, has landed on mainline.

Shawn


Re: [RFC Part2 PATCH v3 02/26] crypto: ccp: Add Platform Security Processor (PSP) device support

2017-07-25 Thread Kamil Konieczny
Hi,

minor misspelling,

On 24.07.2017 22:02, Brijesh Singh wrote:
> Platform Security Processor (PSP) is part of AMD Secure Processor (AMD-SP),
> PSP is a dedicated processor that provides the support for key management
> commands in a Secure Encrypted Virtualiztion (SEV) mode, along with
> software-based Tursted Executation Environment (TEE) to enable the
- ^ Trusted
> third-party tursted applications.
-- ^ trusted
[...]

-- 
Best regards,
Kamil Konieczny
Samsung R Institute Poland



Re: Poor RNG performance on Ryzen

2017-07-25 Thread Jan Glauber
On Sat, Jul 22, 2017 at 02:16:41PM -0400, Theodore Ts'o wrote:
> On Fri, Jul 21, 2017 at 04:55:12PM +0200, Oliver Mangold wrote:
> > On 21.07.2017 16:47, Theodore Ts'o wrote:
> > > On Fri, Jul 21, 2017 at 01:39:13PM +0200, Oliver Mangold wrote:
> > > > Better, but obviously there is still much room for improvement by 
> > > > reducing
> > > > the number of calls to RDRAND.
> > > Hmm, is there some way we can easily tell we are running on Ryzen?  Or
> > > do we believe this is going to be true for all AMD devices?
> > I would like to note that my first measurement on Broadwell suggest that the
> > current frequency of RDRAND calls seems to slow things down on Intel also
> > (but not as much as on Ryzen).
> 
> On my T470 laptop (with an Intel mobile core i7 processor), using your
> benchmark, I am getting 136 MB/s, versus your 75 MB/s.  But so what?
> 
> More realistically, if we are generating 256 bit keys (so we're
> reading from /dev/urandom 32 bytes at a time), it takes 2.24
> microseconds per key generation.  What do you get when you run:
> 
> dd if=/dev/urandom of=/dev/zero bs=256 count=100
> 
> Even if on Ryzen it's slower by a factor of two, 5 microseconds per
> key generation is pretty fast!  The time to do the Diffie-Hellman
> exchange and the RSA operations will probably completely swamp the
> time to generate the session key.
> 
> And if you think 2.24 or 5 microseconds is to slow for the IV
> generation --- then use a userspace ChaCha20 CRNG for that purpose.
> 
> I'm not really sure I see a real-life operational problem here.
> 
>   - Ted

While I agree that it is not an issue if the hardware is just slow I
still wonder why we read 8 bytes with arch_get_random_long() and
only use half of them as Oliver pointed out.

If arch_get_random_int() is not slower on Intel we could use that.
Or am I missing something?

--Jan