[PATCH 4/5] crypto:chelsio: Fix iv passed in fallback path for rfc3686

2018-02-24 Thread Harsh Jain
We use ctr(aes) to fallback rfc3686(ctr) request. Send updated IV to fallback 
path.

Signed-off-by: Harsh Jain 
---
 drivers/crypto/chelsio/chcr_algo.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/chelsio/chcr_algo.c 
b/drivers/crypto/chelsio/chcr_algo.c
index f9c1970..3c3ca34 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -1292,7 +1292,7 @@ static int process_cipher(struct ablkcipher_request *req,
   req->src,
   req->dst,
   req->nbytes,
-  req->info,
+  reqctx->iv,
   op_type);
goto error;
}
-- 
2.1.4



[PATCH 3/5] crypto:chelsio: Update IV before sending request to HW

2018-02-24 Thread Harsh Jain
CBC Decryption requires Last Block as IV. In case src/dst buffer
are same last block will be replaced by plain text. This patch copies
the Last Block before sending request to HW.

Signed-off-by: Harsh Jain 
---
 drivers/crypto/chelsio/chcr_algo.c | 19 +++
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/crypto/chelsio/chcr_algo.c 
b/drivers/crypto/chelsio/chcr_algo.c
index 33f7b90..f9c1970 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -854,6 +854,13 @@ static struct sk_buff *create_cipher_wr(struct 
cipher_wr_param *wrparam)
transhdr_len, temp,
ablkctx->ciph_mode == CHCR_SCMD_CIPHER_MODE_AES_CBC);
reqctx->skb = skb;
+
+   if (reqctx->op && (ablkctx->ciph_mode ==
+  CHCR_SCMD_CIPHER_MODE_AES_CBC))
+   sg_pcopy_to_buffer(wrparam->req->src,
+   sg_nents(wrparam->req->src), wrparam->req->info, 16,
+   reqctx->processed + wrparam->bytes - AES_BLOCK_SIZE);
+
return skb;
 err:
return ERR_PTR(error);
@@ -1077,9 +1084,8 @@ static int chcr_update_cipher_iv(struct 
ablkcipher_request *req,
ret = chcr_update_tweak(req, iv, 0);
else if (subtype == CRYPTO_ALG_SUB_TYPE_CBC) {
if (reqctx->op)
-   sg_pcopy_to_buffer(req->src, sg_nents(req->src), iv,
-  16,
-  reqctx->processed - AES_BLOCK_SIZE);
+   /*Updated before sending last WR*/
+   memcpy(iv, req->info, AES_BLOCK_SIZE);
else
memcpy(iv, _pld->data[2], AES_BLOCK_SIZE);
}
@@ -1107,11 +1113,8 @@ static int chcr_final_cipher_iv(struct 
ablkcipher_request *req,
else if (subtype == CRYPTO_ALG_SUB_TYPE_XTS)
ret = chcr_update_tweak(req, iv, 1);
else if (subtype == CRYPTO_ALG_SUB_TYPE_CBC) {
-   if (reqctx->op)
-   sg_pcopy_to_buffer(req->src, sg_nents(req->src), iv,
-  16,
-  reqctx->processed - AES_BLOCK_SIZE);
-   else
+   /*Already updated for Decrypt*/
+   if (!reqctx->op)
memcpy(iv, _pld->data[2], AES_BLOCK_SIZE);
 
}
-- 
2.1.4



[PATCH 5/5] crypto:chelsio:Split Hash requests for large scatter gather list

2018-02-24 Thread Harsh Jain
Send multiple WRs to H/W when No. of entries received in scatter list
cannot be sent in single request.

Signed-off-by: Harsh Jain 
---
 drivers/crypto/chelsio/chcr_algo.c   | 358 ++-
 drivers/crypto/chelsio/chcr_algo.h   |  10 +-
 drivers/crypto/chelsio/chcr_core.h   |   6 +-
 drivers/crypto/chelsio/chcr_crypto.h |  32 +++-
 4 files changed, 298 insertions(+), 108 deletions(-)

diff --git a/drivers/crypto/chelsio/chcr_algo.c 
b/drivers/crypto/chelsio/chcr_algo.c
index 3c3ca34..9db1cca 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -131,6 +131,11 @@ static inline int is_ofld_imm(const struct sk_buff *skb)
return (skb->len <= SGE_MAX_WR_LEN);
 }
 
+static inline void chcr_init_hctx_per_wr(struct chcr_ahash_req_ctx *reqctx)
+{
+   memset(>hctx_wr, 0, sizeof(struct chcr_hctx_per_wr));
+}
+
 static int sg_nents_xlen(struct scatterlist *sg, unsigned int reqlen,
 unsigned int entlen,
 unsigned int skip)
@@ -165,6 +170,7 @@ static inline void chcr_handle_ahash_resp(struct 
ahash_request *req,
  int err)
 {
struct chcr_ahash_req_ctx *reqctx = ahash_request_ctx(req);
+   struct chcr_hctx_per_wr *hctx_wr = >hctx_wr;
int digestsize, updated_digestsize;
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
struct uld_ctx *u_ctx = ULD_CTX(h_ctx(tfm));
@@ -172,25 +178,43 @@ static inline void chcr_handle_ahash_resp(struct 
ahash_request *req,
if (input == NULL)
goto out;
digestsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(req));
-   if (reqctx->is_sg_map)
-   chcr_hash_dma_unmap(_ctx->lldi.pdev->dev, req);
-   if (reqctx->dma_addr)
-   dma_unmap_single(_ctx->lldi.pdev->dev, reqctx->dma_addr,
-reqctx->dma_len, DMA_TO_DEVICE);
-   reqctx->dma_addr = 0;
updated_digestsize = digestsize;
if (digestsize == SHA224_DIGEST_SIZE)
updated_digestsize = SHA256_DIGEST_SIZE;
else if (digestsize == SHA384_DIGEST_SIZE)
updated_digestsize = SHA512_DIGEST_SIZE;
-   if (reqctx->result == 1) {
-   reqctx->result = 0;
-   memcpy(req->result, input + sizeof(struct cpl_fw6_pld),
-  digestsize);
-   } else {
-   memcpy(reqctx->partial_hash, input + sizeof(struct cpl_fw6_pld),
-  updated_digestsize);
+
+   if (hctx_wr->dma_addr) {
+   dma_unmap_single(_ctx->lldi.pdev->dev, hctx_wr->dma_addr,
+hctx_wr->dma_len, DMA_TO_DEVICE);
+   hctx_wr->dma_addr = 0;
+   }
+   if (hctx_wr->isfinal || ((hctx_wr->processed + reqctx->reqlen) ==
+req->nbytes)) {
+   if (hctx_wr->result == 1) {
+   hctx_wr->result = 0;
+   memcpy(req->result, input + sizeof(struct cpl_fw6_pld),
+  digestsize);
+   } else {
+   memcpy(reqctx->partial_hash,
+  input + sizeof(struct cpl_fw6_pld),
+  updated_digestsize);
+
+   }
+   goto unmap;
}
+   memcpy(reqctx->partial_hash, input + sizeof(struct cpl_fw6_pld),
+  updated_digestsize);
+
+   err = chcr_ahash_continue(req);
+   if (err)
+   goto unmap;
+   return;
+unmap:
+   if (hctx_wr->is_sg_map)
+   chcr_hash_dma_unmap(_ctx->lldi.pdev->dev, req);
+
+
 out:
req->base.complete(>base, err);
 }
@@ -563,7 +587,6 @@ static void  ulptx_walk_add_sg(struct ulptx_walk *walk,
 
if (!len)
return;
-
while (sg && skip) {
if (sg_dma_len(sg) <= skip) {
skip -= sg_dma_len(sg);
@@ -653,6 +676,35 @@ static int generate_copy_rrkey(struct ablk_ctx *ablkctx,
}
return 0;
 }
+
+static int chcr_hash_ent_in_wr(struct scatterlist *src,
+unsigned int minsg,
+unsigned int space,
+unsigned int srcskip)
+{
+   int srclen = 0;
+   int srcsg = minsg;
+   int soffset = 0, sless;
+
+   if (sg_dma_len(src) == srcskip) {
+   src = sg_next(src);
+   srcskip = 0;
+   }
+   while (src && space > (sgl_ent_len[srcsg + 1])) {
+   sless = min_t(unsigned int, sg_dma_len(src) - soffset - srcskip,
+   CHCR_SRC_SG_SIZE);
+   srclen += sless;
+   soffset += sless;
+   srcsg++;
+   if (sg_dma_len(src) == (soffset + srcskip)) {
+   src = sg_next(src);
+   soffset = 0;
+   

[PATCH 2/5] crypto:chelsio: Fix src buffer dma length

2018-02-24 Thread Harsh Jain
ulptx header cannot have length > 64k. Adjust length accordingly.

Signed-off-by: Harsh Jain 
---
 drivers/crypto/chelsio/chcr_algo.c | 19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/crypto/chelsio/chcr_algo.c 
b/drivers/crypto/chelsio/chcr_algo.c
index 2bef618..33f7b90 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -662,7 +662,7 @@ static int chcr_sg_ent_in_wr(struct scatterlist *src,
 {
int srclen = 0, dstlen = 0;
int srcsg = minsg, dstsg = minsg;
-   int offset = 0, less;
+   int offset = 0, soffset = 0, less, sless = 0;
 
if (sg_dma_len(src) == srcskip) {
src = sg_next(src);
@@ -673,10 +673,12 @@ static int chcr_sg_ent_in_wr(struct scatterlist *src,
dst = sg_next(dst);
dstskip = 0;
}
-
+   soffset = 0;
while (src && dst &&
   space > (sgl_ent_len[srcsg + 1] + dsgl_ent_len[dstsg])) {
-   srclen += (sg_dma_len(src) - srcskip);
+   sless = min_t(unsigned int, sg_dma_len(src) - srcskip - soffset,
+   CHCR_SRC_SG_SIZE);
+   srclen += sless;
srcsg++;
offset = 0;
while (dst && ((dstsg + 1) <= MAX_DSGL_ENT) &&
@@ -687,15 +689,20 @@ static int chcr_sg_ent_in_wr(struct scatterlist *src,
 dstskip, CHCR_DST_SG_SIZE);
dstlen += less;
offset += less;
-   if (offset == sg_dma_len(dst)) {
+   if ((offset + dstskip) == sg_dma_len(dst)) {
dst = sg_next(dst);
offset = 0;
}
dstsg++;
dstskip = 0;
}
-   src = sg_next(src);
-   srcskip = 0;
+   soffset += sless;
+   if ((soffset + srcskip) == sg_dma_len(src)) {
+   src = sg_next(src);
+   srcskip = 0;
+   soffset = 0;
+   }
+
}
return min(srclen, dstlen);
 }
-- 
2.1.4



[PATCH 1/5] crypto:chelsio: Use kernel round function to align lengths

2018-02-24 Thread Harsh Jain
Replace DIV_ROUND_UP to roundup or rounddown

Signed-off-by: Harsh Jain 
---
 drivers/crypto/chelsio/chcr_algo.c | 73 ++
 drivers/crypto/chelsio/chcr_algo.h |  1 -
 2 files changed, 34 insertions(+), 40 deletions(-)

diff --git a/drivers/crypto/chelsio/chcr_algo.c 
b/drivers/crypto/chelsio/chcr_algo.c
index 8a67884..2bef618 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -784,14 +784,14 @@ static struct sk_buff *create_cipher_wr(struct 
cipher_wr_param *wrparam)
nents = sg_nents_xlen(reqctx->dstsg,  wrparam->bytes, CHCR_DST_SG_SIZE,
  reqctx->dst_ofst);
dst_size = get_space_for_phys_dsgl(nents + 1);
-   kctx_len = (DIV_ROUND_UP(ablkctx->enckey_len, 16) * 16);
+   kctx_len = roundup(ablkctx->enckey_len, 16);
transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, dst_size);
nents = sg_nents_xlen(reqctx->srcsg, wrparam->bytes,
  CHCR_SRC_SG_SIZE, reqctx->src_ofst);
-   temp = reqctx->imm ? (DIV_ROUND_UP((IV + wrparam->req->nbytes), 16)
- * 16) : (sgl_len(nents + MIN_CIPHER_SG) * 8);
+   temp = reqctx->imm ? roundup(IV + wrparam->req->nbytes, 16) :
+(sgl_len(nents + MIN_CIPHER_SG) * 8);
transhdr_len += temp;
-   transhdr_len = DIV_ROUND_UP(transhdr_len, 16) * 16;
+   transhdr_len = roundup(transhdr_len, 16);
skb = alloc_skb(SGE_MAX_WR_LEN, flags);
if (!skb) {
error = -ENOMEM;
@@ -1148,7 +1148,7 @@ static int chcr_handle_cipher_resp(struct 
ablkcipher_request *req,
if ((bytes + reqctx->processed) >= req->nbytes)
bytes  = req->nbytes - reqctx->processed;
else
-   bytes = ROUND_16(bytes);
+   bytes = rounddown(bytes, 16);
} else {
/*CTR mode counter overfloa*/
bytes  = req->nbytes - reqctx->processed;
@@ -1234,7 +1234,7 @@ static int process_cipher(struct ablkcipher_request *req,
   CHCR_DST_SG_SIZE, 0);
dnents += 1; // IV
phys_dsgl = get_space_for_phys_dsgl(dnents);
-   kctx_len = (DIV_ROUND_UP(ablkctx->enckey_len, 16) * 16);
+   kctx_len = roundup(ablkctx->enckey_len, 16);
transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, phys_dsgl);
reqctx->imm = (transhdr_len + IV + req->nbytes) <=
SGE_MAX_WR_LEN;
@@ -1252,7 +1252,7 @@ static int process_cipher(struct ablkcipher_request *req,
if ((bytes + reqctx->processed) >= req->nbytes)
bytes  = req->nbytes - reqctx->processed;
else
-   bytes = ROUND_16(bytes);
+   bytes = rounddown(bytes, 16);
} else {
bytes = req->nbytes;
}
@@ -1526,10 +1526,10 @@ static struct sk_buff *create_hash_wr(struct 
ahash_request *req,
SGE_MAX_WR_LEN;
nents = sg_nents_xlen(req->src, param->sg_len, CHCR_SRC_SG_SIZE, 0);
nents += param->bfr_len ? 1 : 0;
-   transhdr_len += req_ctx->imm ? (DIV_ROUND_UP((param->bfr_len +
-   param->sg_len), 16) * 16) :
+   transhdr_len += req_ctx->imm ? roundup((param->bfr_len +
+   param->sg_len), 16) :
(sgl_len(nents) * 8);
-   transhdr_len = DIV_ROUND_UP(transhdr_len, 16) * 16;
+   transhdr_len = roundup(transhdr_len, 16);
 
skb = alloc_skb(SGE_MAX_WR_LEN, flags);
if (!skb)
@@ -2124,11 +2124,11 @@ static struct sk_buff *create_authenc_wr(struct 
aead_request *req,
transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, dst_size);
reqctx->imm = (transhdr_len + assoclen + IV + req->cryptlen) <
SGE_MAX_WR_LEN;
-   temp = reqctx->imm ? (DIV_ROUND_UP((assoclen + IV + req->cryptlen), 16)
-   * 16) : (sgl_len(reqctx->src_nents + reqctx->aad_nents
+   temp = reqctx->imm ? roundup(assoclen + IV + req->cryptlen, 16)
+   : (sgl_len(reqctx->src_nents + reqctx->aad_nents
+ MIN_GCM_SG) * 8);
transhdr_len += temp;
-   transhdr_len = DIV_ROUND_UP(transhdr_len, 16) * 16;
+   transhdr_len = roundup(transhdr_len, 16);
 
if (chcr_aead_need_fallback(req, dnents, T6_MAX_AAD_SIZE,
transhdr_len, op_type)) {
@@ -2187,9 +2187,8 @@ static struct sk_buff *create_authenc_wr(struct 
aead_request *req,
memcpy(chcr_req->key_ctx.key, actx->dec_rrkey,
   aeadctx->enckey_len);
 
-   memcpy(chcr_req->key_ctx.key + (DIV_ROUND_UP(aeadctx->enckey_len, 16) <<
-   4), actx->h_iopad, kctx_len -
-   

[PATCH 0/5] crypto:chelsio: Bug fixes and cleanup

2018-02-24 Thread Harsh Jain
It includes bug fixes and code cleanup.

Harsh Jain (5):
  crypto:chelsio: Use kernel round function to align lengths
  crypto:chelsio: Fix src buffer dma length
  crypto:chelsio: Update IV before sending request ot HW
  crypto:chelsio: Fix iv passed in fallback path for rfc3686
  crypto:chelsio:Split Hash requests for large scatter gather list

 drivers/crypto/chelsio/chcr_algo.c   | 467 ---
 drivers/crypto/chelsio/chcr_algo.h   |  11 +-
 drivers/crypto/chelsio/chcr_core.h   |   6 +-
 drivers/crypto/chelsio/chcr_crypto.h |  32 ++-
 4 files changed, 355 insertions(+), 161 deletions(-)

-- 
2.1.4



error in libkcapi 1.0.3 for aead aio

2018-02-24 Thread Harsh Jain
Hi Stephan,

1 of the test mentioned in test.sh is failing for AEAD AIO operation even 
thought driver is returning EBADMSG(as expected) to af_alg with latest 
cryptodev tree.

Debug log and strace attached.

Command :

strace -o strace.log ../bin/kcapi   -x 10   -c "gcm(aes)" -i 
7815d4b06ae50c9c56e87bd7 -k ea38ac0c9b9998c80e28fb496a2b88d9 -a 
"853f98a750098bec1aa7497e979e78098155c877879556bb51ddeb6374cbaefc" -t 
"c4ce58985b7203094be1d134c1b8ab0b" -q "b03692f86d1b8b39baf2abb255197c98"

Thanks & Regards

Harsh Jain

execve("../bin/kcapi", ["../bin/kcapi", "-x", "10", "-c", "gcm(aes)", "-i", 
"7815d4b06ae50c9c56e87bd7", "-k", "ea38ac0c9b9998c80e28fb496a2b88d9", "-a", 
"853f98a750098bec1aa7497e979e7809"..., "-t", 
"c4ce58985b7203094be1d134c1b8ab0b", "-q", "b03692f86d1b8b39baf2abb255197c98"], 
[/* 22 vars */]) = 0
brk(0)  = 0x12a1000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 
0x7f1f6471f000
access("/etc/ld.so.preload", R_OK)  = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=86613, ...}) = 0
mmap(NULL, 86613, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f1f64709000
close(3)= 0
open("/lib64/libtinfo.so.5", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0@\316\0\0\0\0\0\0"..., 
832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=174528, ...}) = 0
mmap(NULL, 2268928, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 
0x7f1f642d7000
mprotect(0x7f1f642fc000, 2097152, PROT_NONE) = 0
mmap(0x7f1f644fc000, 20480, PROT_READ|PROT_WRITE, 
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x25000) = 0x7f1f644fc000
close(3)= 0
open("/lib64/libdl.so.2", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0`\16\0\0\0\0\0\0"..., 
832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=19776, ...}) = 0
mmap(NULL, 2109744, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 
0x7f1f640d3000
mprotect(0x7f1f640d5000, 2097152, PROT_NONE) = 0
mmap(0x7f1f642d5000, 8192, PROT_READ|PROT_WRITE, 
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x7f1f642d5000
close(3)= 0
open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0@\34\2\0\0\0\0\0"..., 
832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=2118128, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 
0x7f1f64708000
mmap(NULL, 3932672, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 
0x7f1f63d12000
mprotect(0x7f1f63ec8000, 2097152, PROT_NONE) = 0
mmap(0x7f1f640c8000, 24576, PROT_READ|PROT_WRITE, 
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1b6000) = 0x7f1f640c8000
mmap(0x7f1f640ce000, 16896, PROT_READ|PROT_WRITE, 
MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f1f640ce000
close(3)= 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 
0x7f1f64706000
arch_prctl(ARCH_SET_FS, 0x7f1f64706740) = 0
mprotect(0x7f1f640c8000, 16384, PROT_READ) = 0
mprotect(0x7f1f642d5000, 4096, PROT_READ) = 0
mprotect(0x7f1f644fc000, 16384, PROT_READ) = 0
mprotect(0x6dc000, 4096, PROT_READ) = 0
mprotect(0x7f1f6472, 4096, PROT_READ) = 0
munmap(0x7f1f64709000, 86613)   = 0
rt_sigprocmask(SIG_BLOCK, NULL, [], 8)  = 0
open("/dev/tty", O_RDWR|O_NONBLOCK) = 3
close(3)= 0
brk(0)  = 0x12a1000
brk(0x12c2000)  = 0x12c2000
brk(0)  = 0x12c2000
open("/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=106070960, ...}) = 0
mmap(NULL, 106070960, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f1f5d7e9000
close(3)= 0
brk(0)  = 0x12c2000
getuid()= 0
getgid()= 0
geteuid()   = 0
getegid()   = 0
rt_sigprocmask(SIG_BLOCK, NULL, [], 8)  = 0
open("/proc/meminfo", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 
0x7f1f6471e000
read(3, "MemTotal:   16415288 kB\nMemF"..., 1024) = 1024
close(3)= 0
munmap(0x7f1f6471e000, 4096)= 0
rt_sigaction(SIGCHLD, {SIG_DFL, [], SA_RESTORER|SA_RESTART, 0x7f1f63d47250}, 
{SIG_DFL, [], 0}, 8) = 0
rt_sigaction(SIGCHLD, {SIG_DFL, [], SA_RESTORER|SA_RESTART, 0x7f1f63d47250}, 
{SIG_DFL, [], SA_RESTORER|SA_RESTART, 0x7f1f63d47250}, 8) = 0
rt_sigaction(SIGINT, {SIG_DFL, [], SA_RESTORER, 0x7f1f63d47250}, {SIG_DFL, [], 
0}, 8) = 0
rt_sigaction(SIGINT, {SIG_DFL, [], SA_RESTORER, 0x7f1f63d47250}, {SIG_DFL, [], 
SA_RESTORER, 0x7f1f63d47250}, 8) = 0
rt_sigaction(SIGQUIT, {SIG_DFL, [], 

[PATCH 2/3] crypto: ccp - return an actual key size from RSA max_size callback

2018-02-24 Thread Maciej S. Szmigiero
rsa-pkcs1pad uses a value returned from a RSA implementation max_size
callback as a size of an input buffer passed to the RSA implementation for
encrypt and sign operations.

CCP RSA implementation uses a hardware input buffer which size depends only
on the current RSA key length, so it should return this key length in
the max_size callback, too.
This also matches what the kernel software RSA implementation does.

Previously, the value returned from this callback was always the maximum
RSA key size the CCP hardware supports.
This resulted in this huge buffer being passed by rsa-pkcs1pad to CCP even
for smaller key sizes and then in a buffer overflow when ccp_run_rsa_cmd()
tried to copy this large input buffer into a RSA key length-sized hardware
input buffer.

Signed-off-by: Maciej S. Szmigiero 
Fixes: ceeec0afd684 ("crypto: ccp - Add support for RSA on the CCP")
Cc: sta...@vger.kernel.org
---
 drivers/crypto/ccp/ccp-crypto-rsa.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/ccp/ccp-crypto-rsa.c 
b/drivers/crypto/ccp/ccp-crypto-rsa.c
index e6db8672d89c..05850dfd7940 100644
--- a/drivers/crypto/ccp/ccp-crypto-rsa.c
+++ b/drivers/crypto/ccp/ccp-crypto-rsa.c
@@ -60,10 +60,9 @@ static int ccp_rsa_complete(struct crypto_async_request 
*async_req, int ret)
 
 static unsigned int ccp_rsa_maxsize(struct crypto_akcipher *tfm)
 {
-   if (ccp_version() > CCP_VERSION(3, 0))
-   return CCP5_RSA_MAXMOD;
-   else
-   return CCP_RSA_MAXMOD;
+   struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm);
+
+   return ctx->u.rsa.n_len;
 }
 
 static int ccp_rsa_crypt(struct akcipher_request *req, bool encrypt)


[PATCH 3/3] crypto: ccp - protect RSA implementation from too large input data

2018-02-24 Thread Maciej S. Szmigiero
CCP RSA implementation uses a hardware input buffer which size depends only
on the current RSA key length. Key modulus and a message to be processed
is then copied to this buffer based on their own lengths.

Since the price for providing too long input data is a buffer overflow and
there already has been a case when this has happened let's better reject
such oversized input data and log an error message in this case so we know
what is going on.

Signed-off-by: Maciej S. Szmigiero 
---
 drivers/crypto/ccp/ccp-ops.c | 24 
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
index 406b95329b3d..517aeee30abf 100644
--- a/drivers/crypto/ccp/ccp-ops.c
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -1770,10 +1770,6 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, 
struct ccp_cmd *cmd)
if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
return -EINVAL;
 
-   memset(, 0, sizeof(op));
-   op.cmd_q = cmd_q;
-   op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
-
/* The RSA modulus must precede the message being acted upon, so
 * it must be copied to a DMA area where the message and the
 * modulus can be concatenated.  Therefore the input buffer
@@ -1785,6 +1781,26 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, 
struct ccp_cmd *cmd)
o_len = 32 * ((rsa->key_size + 255) / 256);
i_len = o_len * 2;
 
+   if (rsa->mod_len > o_len) {
+   dev_err(cmd_q->ccp->dev,
+   "RSA modulus of %u bytes too large for key size of %u 
bits\n",
+   (unsigned int)rsa->mod_len,
+   (unsigned int)rsa->key_size);
+   return -EINVAL;
+   }
+
+   if (rsa->src_len > o_len) {
+   dev_err(cmd_q->ccp->dev,
+   "RSA data of %u bytes too large for key size of %u 
bits\n",
+   (unsigned int)rsa->src_len,
+   (unsigned int)rsa->key_size);
+   return -EINVAL;
+   }
+
+   memset(, 0, sizeof(op));
+   op.cmd_q = cmd_q;
+   op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
+
sb_count = 0;
if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) {
/* sb_count is the number of storage block slots required


[PATCH 1/3] X.509: unpack RSA signatureValue field from BIT STRING

2018-02-24 Thread Maciej S. Szmigiero
The signatureValue field of a X.509 certificate is encoded as a BIT STRING.
For RSA signatures this BIT STRING is of so-called primitive subtype, which
contains a u8 prefix indicating a count of unused bits in the encoding.

We have to strip this prefix from signature data, just as we already do for
key data in x509_extract_key_data() function.

This wasn't noticed earlier because this prefix byte is zero for RSA key
sizes divisible by 8. Since BIT STRING is a big-endian encoding adding zero
prefixes has no bearing on its value.

The signature length, however was incorrect, which is a problem for RSA
implementations that need it to be exactly correct (like AMD CCP).

Signed-off-by: Maciej S. Szmigiero 
Fixes: c26fd69fa009 ("X.509: Add a crypto key parser for binary (DER) X.509 
certificates")
Cc: sta...@vger.kernel.org
---
 crypto/asymmetric_keys/x509_cert_parser.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/crypto/asymmetric_keys/x509_cert_parser.c 
b/crypto/asymmetric_keys/x509_cert_parser.c
index ce2df8c9c583..88c26a4538ae 100644
--- a/crypto/asymmetric_keys/x509_cert_parser.c
+++ b/crypto/asymmetric_keys/x509_cert_parser.c
@@ -249,6 +249,15 @@ int x509_note_signature(void *context, size_t hdrlen,
return -EINVAL;
}
 
+   if (!strcmp(ctx->cert->sig->pkey_algo, "rsa")) {
+   /* Discard the BIT STRING metadata */
+   if (vlen < 1 || *(const u8 *)value != 0)
+   return -EBADMSG;
+
+   value++;
+   vlen--;
+   }
+
ctx->cert->raw_sig = value;
ctx->cert->raw_sig_size = vlen;
return 0;


RE: [Crypto v7 03/12] tls: support for inline tls

2018-02-24 Thread Atul Gupta


-Original Message-
From: Dave Watson [mailto:davejwat...@fb.com] 
Sent: Friday, February 23, 2018 11:03 PM
To: Atul Gupta 
Cc: da...@davemloft.net; herb...@gondor.apana.org.au; s...@queasysnail.net; 
linux-crypto@vger.kernel.org; net...@vger.kernel.org; Ganesh GR 

Subject: Re: [Crypto v7 03/12] tls: support for inline tls

On 02/23/18 04:58 PM, Atul Gupta wrote:
> > On 02/22/18 11:21 PM, Atul Gupta wrote:
> > > @@ -403,6 +431,15 @@ static int do_tls_setsockopt_tx(struct sock *sk, 
> > > char __user *optval,
> > >   goto err_crypto_info;
> > >   }
> > >  
> > > + rc = tls_offload_dev_absent(sk);
> > > + if (rc == -EINVAL) {
> > > + goto out;
> > > + } else if (rc == -EEXIST) {
> > > + /* Retain HW unhash for cleanup and move to SW Tx */
> > > + sk->sk_prot[TLS_BASE_TX].unhash =
> > > + sk->sk_prot[TLS_FULL_HW].unhash;
> > 
> > I'm still confused by this, it lookes like it is modifying the global 
> > tls_prots without taking a lock?  And modifying it for all sockets, not 
> > just this one?  One way to fix might be to always set an unhash in 
> > TLS_BASE_TX, and then have a function pointer unhash in ctx.
> 
> code enters do_tls_setsockopt_tx only for those offload capable dev which 
> does not define FULL_HW setsockopt as done by chtls, unhash prot update is 
> required for cleanup/revert of setup done in tls_hw_hash. This update does 
> not impact SW or other Inline HW path. 

I still don't follow.  If it doesn't impact SW, then what is it doing?
According to the comment, we're moving to SW tx, where sk_prot will be 
_prot[TLS_SW_TX], and the unhash function you set here in TLS_BASE_TX won't 
be called.

some of the scenarios I originally thought:
- tls_init finds the Inline offload dev and sets the TLS_FULL_HW but setsockopt 
remains do_tls_setsockopt_tx, In the above path we continue in TLS_SW_TX mode 
with updated unhash. Since, sw_tx prot is borrowed from base_tx we modified the 
base_tx prot unhash for cleanup.
- tls_offload_dev_absent finds no device i.e rc=0. Continue in TLS_SW_TX mode. 
No change required.
- Inline tls device is added after tls_init is called, do_tls_setsockopt_tx 
will see tls_offload_dev_absent return EEXIST and will modify unhash but only 
if tx_conf = TLS_FULL_HW [missing now] and you rightly pointed that it ends up 
modifying base prot for all sk which is not we want. My worry was losing hw 
specific unhash.
I see calling tls_offload_dev_absent in do_tls_setsockopt_tx an overkill, the 
sk here perhaps require no update to continue in SW_TX, the HW unhash is still 
assigned to tls_init 'sk' for cleanup in the close path, better to remove 
tls_offload_dev_absent altogether and simplify.



Re: [dm-devel] Integrity checking fails with Atmel SHA hw accelerator enabled

2018-02-24 Thread Gilad Ben-Yossef
Hi,

I'm adding the linux crypto mailing list because it seems relevant.


On Fri, Feb 23, 2018 at 2:25 PM, Gigi W  wrote:
> Thanks for the input!
>
> See below
>
>
> On Fri, Feb 23, 2018 at 10:53 AM Gilad Ben-Yossef 
> wrote:
>>
>> On Fri, Feb 23, 2018 at 10:30 AM, Gigi W  wrote:
>> > Hi
>> >
>> > I'm having some trouble using dm-verity for a squashfs root file system
>> > that
>> > seems to be related to the
>> > Atmel SHA hw accelerator in the kernel, CONFIG_CRYPTO_DEV_ATMEL_SHA
>> >
>> > Some info about my setup:
>> > * I'm using a board with a SAMA5D4 CPU.
>> > * I'm using Yocto rocko for building an image for that device.
>> >
>> > The idea is that Using the 4.14.14 Kernel, Integrity checking using
>> > Kernel
>> > crypto fails with Atmel SHA hw accelerator enabled in kernel.
>> > By disabling it, `CONFIG_CRYPTO_DEV_ATMEL_SHA=n`, and using the software
>> > sha256 algo, integrity checking works as expected.
>> > This is my kernel config [3]
>> >
>> > Using the 4.8.4 Kernel and Atmel SHA hw accelerator enabled, everything
>> > was
>> > ok.
>> >
>> > This is what triggers the error during verified boot:
>> >
>> > status=`veritysetup create vroot $root_dev $verity_dev --hash-offset
>> > $hashoffset $root_hash`
>> >
>> > mount /dev/mapper/vroot /mnt/
>> > mount_ok=`cat /proc/mounts | grep mnt`
>> > if [ -z "$mount_ok" ] ; then
>> > echo "Failed to mount $root_dev on mnt/"
>> > else
>> > echo "Switch rootfs"
>> > exec switch_root -c /dev/console /mnt /sbin/init
>> > fi
>> >
>> > The mount operation fails:
>> >
>> > device-mapper: verity: 179:4: metadata block 2 is corrupted
>> > EXT4-fs (dm-0): unable to read superblock
>> > device-mapper: verity: 179:4: metadata block 2 is corrupted
>> > EXT4-fs (dm-0): unable to read superblock
>> > device-mapper: verity: 179:4: metadata block 2 is corrupted
>> > EXT4-fs (dm-0): unable to read superblock
>> > device-mapper: verity: 179:4: metadata block 2 is corrupted
>> > SQUASHFS error: squashfs_read_data failed to read block 0x0
>> > squashfs: SQUASHFS error: unable to read squashfs_super_block
>> > device-mapper: verity: 179:4: metadata block 2 is corrupted
>> > FAT-fs (dm-0): unable to read boot sector
>> > mount: mounting /dev/mapper/vroot on /mnt/ failed: Input/output error
>> > Failed to mount /dev/mmcblk0p4 on mnt/
>> > reboot: Restarting system
>> > Reboot failed -- System halted
>> >
>> > Using veritysetup to verify the integrity against the hashes is
>> > successful,
>> > as it's not using the kernel for that ...
>> >
>> >
>> > So it looks like it something changed from 4.8.4 to 4.14.14.
>>
>> If I am not mistaken the Atmel SHA hw accelerator is an async (read:
>> off CPU) crypto accelerator.
>>  Up until 4.12 (I think...) DM-Verity did not use async crypto
>> accelerators (even if present and have high
>> priority). I've changed this is commit d1ac3ff008fb ("dm verity:
>> switch to using asynchronous hash crypto API").
>
>
> This would explain some things, like the same speeds while reading from a
> verity device, having the CONFIG_CRYPTO_DEV_ATMEL_SHA enabled and then
> disabled, on the 4.8.4 kernel -> it was always using the sync API.
>
>>
>>
>> Is it possible that whatever issue you are seeing has always been
>> there and when DM-Verity started using
>> async. accelerators it was only exposed?
>
>
> It looks like it.
>
> From my understandings + tests described above, Atmel SHA hw accelerator
> works correctly - output hashes are ok, dm-verity with other async crypto
> accelerators is working ok,
> but dm-verity + Atmel SHA hw accelerator don't play nice together.
>
> I couldn't find anyone else complaining about this.
>

I agree that that the possibility that there is something wrong in the
Atmel SHA accelerator is one possible direction.
The other one is that there is something wrong in DM-Verity that only
manifests under certain conditions when working with async crypto HW
providers.
I don't think it is the case, because I tested DM-Verity after my
changes with the CryptoCell async HW provider and did not get any
other bug reports, but I'd like to be sure.

Can you do a little experiment? add debug printk to show the data
being hashed, the hash produced by atmel and the expected
pre-calculated hash.

If the theory that there is something wrong with atmel accelerator, we
can calculate the hash on the data with other means (software) and
should get a different hash.

If you are having trouble adding the printk's in the right place let
me know and I'll create a patch for you to test.

Cheers,
Gilad


-- 
Gilad Ben-Yossef
Chief Coffee Drinker

"If you take a class in large-scale robotics, can you end up in a
situation where the homework eats your dog?"
 -- Jean-Baptiste Queru