Re: [PATCH] staging: ccree: use size_t consistently

2017-12-19 Thread Greg Kroah-Hartman
On Wed, Dec 20, 2017 at 07:23:31AM +, Gilad Ben-Yossef wrote:
> Fix declaration, implementation and wrapper function to use
> the same size_t type we actually define the parameter to be.
> 
> Fixes: 3f268f5d6669 ("staging: ccree: turn compile time debug log to params")
> Signed-off-by: Gilad Ben-Yossef 

You forgot the reported-by: tag :(

I'll go add it...


[PATCH] staging: ccree: use size_t consistently

2017-12-19 Thread Gilad Ben-Yossef
Fix declaration, implementation and wrapper function to use
the same size_t type we actually define the parameter to be.

Fixes: 3f268f5d6669 ("staging: ccree: turn compile time debug log to params")
Signed-off-by: Gilad Ben-Yossef 
---
 drivers/staging/ccree/ssi_driver.c | 2 +-
 drivers/staging/ccree/ssi_driver.h | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/ccree/ssi_driver.c 
b/drivers/staging/ccree/ssi_driver.c
index 56b5d45..1254c69 100644
--- a/drivers/staging/ccree/ssi_driver.c
+++ b/drivers/staging/ccree/ssi_driver.c
@@ -86,7 +86,7 @@ void __dump_byte_array(const char *name, const u8 *buf, 
size_t len)
if (!buf)
return;
 
-   snprintf(prefix, sizeof(prefix), "%s[%lu]: ", name, len);
+   snprintf(prefix, sizeof(prefix), "%s[%zu]: ", name, len);
 
print_hex_dump(KERN_DEBUG, prefix, DUMP_PREFIX_ADDRESS, 16, 1, buf,
   len, false);
diff --git a/drivers/staging/ccree/ssi_driver.h 
b/drivers/staging/ccree/ssi_driver.h
index 5a56f7a..bf83f3e 100644
--- a/drivers/staging/ccree/ssi_driver.h
+++ b/drivers/staging/ccree/ssi_driver.h
@@ -174,10 +174,9 @@ static inline struct device *drvdata_to_dev(struct 
cc_drvdata *drvdata)
return >plat_dev->dev;
 }
 
-void __dump_byte_array(const char *name, const u8 *the_array,
-  unsigned long size);
+void __dump_byte_array(const char *name, const u8 *buf, size_t len);
 static inline void dump_byte_array(const char *name, const u8 *the_array,
-  unsigned long size)
+  size_t size)
 {
if (cc_dump_bytes)
__dump_byte_array(name, the_array, size);
-- 
2.7.4



[PATCH] crypto: scomp - delete unused comments

2017-12-19 Thread Zhou Wang
There are no init and exit callbacks, so delete its comments.

Signed-off-by: Zhou Wang 
---
 include/crypto/internal/scompress.h | 11 ---
 1 file changed, 11 deletions(-)

diff --git a/include/crypto/internal/scompress.h 
b/include/crypto/internal/scompress.h
index ccad9b2..0f6ddac 100644
--- a/include/crypto/internal/scompress.h
+++ b/include/crypto/internal/scompress.h
@@ -28,17 +28,6 @@ struct crypto_scomp {
  * @free_ctx:  Function frees context allocated with alloc_ctx
  * @compress:  Function performs a compress operation
  * @decompress:Function performs a de-compress operation
- * @init:  Initialize the cryptographic transformation object.
- * This function is used to initialize the cryptographic
- * transformation object. This function is called only once at
- * the instantiation time, right after the transformation context
- * was allocated. In case the cryptographic hardware has some
- * special requirements which need to be handled by software, this
- * function shall check for the precise requirement of the
- * transformation and put any software fallbacks in place.
- * @exit:  Deinitialize the cryptographic transformation object. This is a
- * counterpart to @init, used to remove various changes set in
- * @init.
  * @base:  Common crypto API algorithm data structure
  */
 struct scomp_alg {
-- 
1.9.1



[PATCH v2 1/2] crypto: Fix out-of-bounds access of the data buffer in generic-gcm-aesni

2017-12-19 Thread Junaid Shahid
The aesni_gcm_enc/dec functions can access memory before the start of
the data buffer if the length of the data buffer is less than 16 bytes.
This is because they perform the read via a single 16-byte load. This
can potentially result in accessing a page that is not mapped and thus
causing the machine to crash. This patch fixes that by reading the
partial block byte-by-byte and optionally an via 8-byte load if the block
was at least 8 bytes.

Fixes: 0487ccac ("crypto: aesni - make non-AVX AES-GCM work with any aadlen")
Signed-off-by: Junaid Shahid 
---
 arch/x86/crypto/aesni-intel_asm.S | 86 ---
 1 file changed, 44 insertions(+), 42 deletions(-)

diff --git a/arch/x86/crypto/aesni-intel_asm.S 
b/arch/x86/crypto/aesni-intel_asm.S
index 16627fec80b2..4b16f31cb359 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -85,6 +85,7 @@ enc:.octa 0x2
 # and zero should follow ALL_F
 .section   .rodata, "a", @progbits
 .align 16
+.octa 0x
 SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100
 ALL_F:  .octa 0x
 .octa 0x
@@ -256,6 +257,36 @@ aad_shift_arr:
pxor  \TMP1, \GH# result is in TMP1
 .endm
 
+# read the last <16 byte block
+# Clobbers %rax, DPTR, TMP1 and XMM1-2
+.macro READ_PARTIAL_BLOCK DPTR DLEN TMP1 XMM1 XMM2 XMMDst
+pxor \XMMDst, \XMMDst
+mov \DLEN, \TMP1
+cmp $8, \DLEN
+jl _read_last_lt8_\@
+mov (\DPTR), %rax
+MOVQ_R64_XMM %rax, \XMMDst
+   add $8, \DPTR
+sub $8, \TMP1
+jz _done_read_last_partial_block_\@
+_read_last_lt8_\@:
+   shl $8, %rax
+mov -1(\DPTR, \TMP1, 1), %al
+dec \TMP1
+jnz _read_last_lt8_\@
+MOVQ_R64_XMM %rax, \XMM1
+   # adjust the shuffle mask pointer to be able to shift either 0 or 8
+   # bytes depending on whether the last block is <8 bytes or not
+mov \DLEN, \TMP1
+and $8, \TMP1
+   lea SHIFT_MASK(%rip), %rax
+   sub \TMP1, %rax
+   movdqu (%rax), \XMM2# get the appropriate shuffle mask
+   PSHUFB_XMM \XMM2, \XMM1 # shift left either 0 or 8 bytes
+   por \XMM1, \XMMDst
+_done_read_last_partial_block_\@:
+.endm
+
 /*
 * if a = number of total plaintext bytes
 * b = floor(a/16)
@@ -1310,6 +1341,7 @@ _esb_loop_\@:
MOVADQ  (%r10),\TMP1
AESENCLAST  \TMP1,\XMM0
 .endm
+
 /*
 * void aesni_gcm_dec(void *aes_ctx,// AES Key schedule. Starts on a 16 
byte boundary.
 *   u8 *out,   // Plaintext output. Encrypt in-place 
is allowed.
@@ -1385,14 +1417,6 @@ _esb_loop_\@:
 *
 *AAD Format with 64-bit Extended Sequence Number
 *
-* aadLen:
-*   from the definition of the spec, aadLen can only be 8 or 12 bytes.
-*   The code supports 16 too but for other sizes, the code will fail.
-*
-* TLen:
-*   from the definition of the spec, TLen can only be 8, 12 or 16 bytes.
-*   For other sizes, the code will fail.
-*
 * poly = x^128 + x^127 + x^126 + x^121 + 1
 *
 */
@@ -1486,19 +1510,15 @@ _zero_cipher_left_decrypt:
PSHUFB_XMM %xmm10, %xmm0
 
ENCRYPT_SINGLE_BLOCK  %xmm0, %xmm1# E(K, Yn)
-   sub $16, %r11
-   add %r13, %r11
-   movdqu (%arg3,%r11,1), %xmm1   # receive the last <16 byte block
-   lea SHIFT_MASK+16(%rip), %r12
-   sub %r13, %r12
-# adjust the shuffle mask pointer to be able to shift 16-%r13 bytes
-# (%r13 is the number of bytes in plaintext mod 16)
-   movdqu (%r12), %xmm2   # get the appropriate shuffle mask
-   PSHUFB_XMM %xmm2, %xmm1# right shift 16-%r13 butes
 
+   lea (%arg3,%r11,1), %r10
+   READ_PARTIAL_BLOCK %r10 %r13 %r12 %xmm2 %xmm3 %xmm1
+
+   lea ALL_F+16(%rip), %r12
+   sub %r13, %r12
movdqa  %xmm1, %xmm2
pxor %xmm1, %xmm0# Ciphertext XOR E(K, Yn)
-   movdqu ALL_F-SHIFT_MASK(%r12), %xmm1
+   movdqu (%r12), %xmm1
# get the appropriate mask to mask out top 16-%r13 bytes of %xmm0
pand %xmm1, %xmm0# mask out top 16-%r13 bytes of %xmm0
pand%xmm1, %xmm2
@@ -1507,9 +1527,6 @@ _zero_cipher_left_decrypt:
 
pxor %xmm2, %xmm8
GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
- # GHASH computation for the last <16 byte block
-   sub %r13, %r11
-   add $16, %r11
 
 # output %r13 bytes
MOVQ_R64_XMM%xmm0, %rax
@@ -1663,14 +1680,6 @@ ENDPROC(aesni_gcm_dec)
 *
 * AAD Format with 64-bit Extended Sequence Number
 *
-* aadLen:
-*   from the definition of the spec, aadLen can only be 8 or 12 

[PATCH v2 2/2] crypto: Fix out-of-bounds access of the AAD buffer in generic-gcm-aesni

2017-12-19 Thread Junaid Shahid
The aesni_gcm_enc/dec functions can access memory after the end of
the AAD buffer if the AAD length is not a multiple of 4 bytes.
It didn't matter with rfc4106-gcm-aesni as in that case the AAD was
always followed by the 8 byte IV, but that is no longer the case with
generic-gcm-aesni. This can potentially result in accessing a page that
is not mapped and thus causing the machine to crash. This patch fixes
that by reading the last <16 byte block of the AAD byte-by-byte and
optionally via an 8-byte load if the block was at least 8 bytes.

Fixes: 0487ccac ("crypto: aesni - make non-AVX AES-GCM work with any aadlen")
Signed-off-by: Junaid Shahid 
---
 arch/x86/crypto/aesni-intel_asm.S | 80 +--
 1 file changed, 10 insertions(+), 70 deletions(-)

diff --git a/arch/x86/crypto/aesni-intel_asm.S 
b/arch/x86/crypto/aesni-intel_asm.S
index 4b16f31cb359..03846f2d32c9 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -309,7 +309,7 @@ XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
pxor   \XMM2, \XMM2
 
cmp$16, %r11
-   jl _get_AAD_rest8\num_initial_blocks\operation
+   jl _get_AAD_rest\num_initial_blocks\operation
 _get_AAD_blocks\num_initial_blocks\operation:
movdqu (%r10), %xmm\i
PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data
@@ -322,43 +322,13 @@ _get_AAD_blocks\num_initial_blocks\operation:
jge_get_AAD_blocks\num_initial_blocks\operation
 
movdqu \XMM2, %xmm\i
+
+   /* read the last <16B of AAD */
+_get_AAD_rest\num_initial_blocks\operation:
cmp$0, %r11
je _get_AAD_done\num_initial_blocks\operation
 
-   pxor   %xmm\i,%xmm\i
-
-   /* read the last <16B of AAD. since we have at least 4B of
-   data right after the AAD (the ICV, and maybe some CT), we can
-   read 4B/8B blocks safely, and then get rid of the extra stuff */
-_get_AAD_rest8\num_initial_blocks\operation:
-   cmp$4, %r11
-   jle_get_AAD_rest4\num_initial_blocks\operation
-   movq   (%r10), \TMP1
-   add$8, %r10
-   sub$8, %r11
-   pslldq $8, \TMP1
-   psrldq $8, %xmm\i
-   pxor   \TMP1, %xmm\i
-   jmp_get_AAD_rest8\num_initial_blocks\operation
-_get_AAD_rest4\num_initial_blocks\operation:
-   cmp$0, %r11
-   jle_get_AAD_rest0\num_initial_blocks\operation
-   mov(%r10), %eax
-   movq   %rax, \TMP1
-   add$4, %r10
-   sub$4, %r10
-   pslldq $12, \TMP1
-   psrldq $4, %xmm\i
-   pxor   \TMP1, %xmm\i
-_get_AAD_rest0\num_initial_blocks\operation:
-   /* finalize: shift out the extra bytes we read, and align
-   left. since pslldq can only shift by an immediate, we use
-   vpshufb and an array of shuffle masks */
-   movq   %r12, %r11
-   salq   $4, %r11
-   movdqu aad_shift_arr(%r11), \TMP1
-   PSHUFB_XMM \TMP1, %xmm\i
-_get_AAD_rest_final\num_initial_blocks\operation:
+   READ_PARTIAL_BLOCK %r10, %r12, %r11, \TMP1, \TMP2, %xmm\i
PSHUFB_XMM   %xmm14, %xmm\i # byte-reflect the AAD data
pxor   \XMM2, %xmm\i
GHASH_MUL  %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
@@ -568,7 +538,7 @@ XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
pxor   \XMM2, \XMM2
 
cmp$16, %r11
-   jl _get_AAD_rest8\num_initial_blocks\operation
+   jl _get_AAD_rest\num_initial_blocks\operation
 _get_AAD_blocks\num_initial_blocks\operation:
movdqu (%r10), %xmm\i
PSHUFB_XMM   %xmm14, %xmm\i # byte-reflect the AAD data
@@ -581,43 +551,13 @@ _get_AAD_blocks\num_initial_blocks\operation:
jge_get_AAD_blocks\num_initial_blocks\operation
 
movdqu \XMM2, %xmm\i
+
+   /* read the last <16B of AAD */
+_get_AAD_rest\num_initial_blocks\operation:
cmp$0, %r11
je _get_AAD_done\num_initial_blocks\operation
 
-   pxor   %xmm\i,%xmm\i
-
-   /* read the last <16B of AAD. since we have at least 4B of
-   data right after the AAD (the ICV, and maybe some PT), we can
-   read 4B/8B blocks safely, and then get rid of the extra stuff */
-_get_AAD_rest8\num_initial_blocks\operation:
-   cmp$4, %r11
-   jle_get_AAD_rest4\num_initial_blocks\operation
-   movq   (%r10), \TMP1
-   add$8, %r10
-   sub$8, %r11
-   pslldq $8, \TMP1
-   psrldq $8, %xmm\i
-   pxor   \TMP1, %xmm\i
-   jmp_get_AAD_rest8\num_initial_blocks\operation
-_get_AAD_rest4\num_initial_blocks\operation:
-   cmp$0, %r11
-   jle_get_AAD_rest0\num_initial_blocks\operation
-   mov(%r10), %eax
-   movq   %rax, \TMP1
-   add$4, %r10
-   sub 

[PATCH v2 0/2] Fix out-of-bounds memory accesses in generic-gcm-aesni

2017-12-19 Thread Junaid Shahid
Changes in v2:
- Also fixed issue 2 described below in addition to issue 1 in v1

The aesni_gcm_enc/dec functions can access memory before the start or end of
the supplied src buffer. This can happen if either:

1. The data length is less than 16 bytes and there is no AAD or the AAD
   length is not enough to cover the underrun. In this case, memory before
   the start of the buffer would be accessed.
2. The AAD length is not a multiple of 4 bytes and the data length is too
   small to cover the overrun. In this case, memory after the end of the
   buffer would be accessed.

This was not a problem when rfc4106-gcm-aesni was the only mode supported by
the aesni module, as in that case there is always enough AAD and IV bytes to
cover the out-of-bounds accesses. However, that is no longer the case with
the generic-gcm-aesni mode. This could potentially result in accessing pages
that are not mapped, thus causing a crash.


Junaid Shahid (2):
  crypto: Fix out-of-bounds access of the data buffer in
generic-gcm-aesni
  crypto: Fix out-of-bounds access of the AAD buffer in
generic-gcm-aesni

 arch/x86/crypto/aesni-intel_asm.S | 166 +-
 1 file changed, 54 insertions(+), 112 deletions(-)

-- 
2.15.1.620.gb9897f4670-goog



Re: [PATCH] crypto: x86/twofish-3way - Fix %rbp usage

2017-12-19 Thread Josh Poimboeuf
On Mon, Dec 18, 2017 at 04:40:26PM -0800, Eric Biggers wrote:
> From: Eric Biggers 
> 
> Using %rbp as a temporary register breaks frame pointer convention and
> breaks stack traces when unwinding from an interrupt in the crypto code.
> 
> In twofish-3way, we can't simply replace %rbp with another register
> because there are none available.  Instead, we use the stack to hold the
> values that %rbp, %r11, and %r12 were holding previously.  Each of these
> values represents the half of the output from the previous Feistel round
> that is being passed on unchanged to the following round.  They are only
> used once per round, when they are exchanged with %rax, %rbx, and %rcx.
> 
> As a result, we free up 3 registers (one per block) and can reassign
> them so that %rbp is not used, and additionally %r14 and %r15 are not
> used so they do not need to be saved/restored.
> 
> There may be a small overhead caused by replacing 'xchg REG, REG' with
> the needed sequence 'mov MEM, REG; mov REG, MEM; mov REG, REG' once per
> round.  But, counterintuitively, when I tested "ctr-twofish-3way" on a
> Haswell processor, the new version was actually about 2% faster.
> (Perhaps 'xchg' is not as well optimized as plain moves.)
> 
> Reported-by: syzbot 
> Signed-off-by: Eric Biggers 

Thanks a lot for fixing this!

Reviewed-by: Josh Poimboeuf 

-- 
Josh


[PATCH] crypto: Fix out-of-bounds memory access in generic-gcm-aesni

2017-12-19 Thread Junaid Shahid
The aesni_gcm_enc/dec functions can access memory before the start of
the data buffer if the length of the data buffer is less than 16 bytes.
This is because they perform the read via a single 16-byte load. It
didn't matter with rfc4106-gcm-aesni as in that case there was always at
least 16 bytes preceding the data buffer in the form of AAD+IV, but that
is no longer the case with generic-gcm-aesni. This can potentially result
in accessing a page that is not mapped and thus causing the machine to
crash. This patch fixes that by reading the partial block byte-by-byte and
optionally via an 8-byte load if the block was at least 8 bytes.

Fixes: 0487ccac ("crypto: aesni - make non-AVX AES-GCM work with any aadlen")
Signed-off-by: Junaid Shahid 
---
 arch/x86/crypto/aesni-intel_asm.S | 85 ---
 1 file changed, 43 insertions(+), 42 deletions(-)

diff --git a/arch/x86/crypto/aesni-intel_asm.S 
b/arch/x86/crypto/aesni-intel_asm.S
index 16627fec80b2..a442d4645e91 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -85,6 +85,7 @@ enc:.octa 0x2
 # and zero should follow ALL_F
 .section   .rodata, "a", @progbits
 .align 16
+.octa 0x
 SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100
 ALL_F:  .octa 0x
 .octa 0x
@@ -1310,6 +1311,40 @@ _esb_loop_\@:
MOVADQ  (%r10),\TMP1
AESENCLAST  \TMP1,\XMM0
 .endm
+
+# read the last <16 byte block
+# %r11 is the data offset value
+# %r13 is the length of the partial block
+# Clobbers %rax, TMP1-2 and XMM1-2
+.macro READ_PARTIAL_BLOCK TMP1, TMP2, XMM1, XMM2, XMMDst
+pxor \XMMDst, \XMMDst
+   lea -1(%arg3,%r11,1), \TMP1
+mov %r13, \TMP2
+cmp $8, %r13
+jl _read_last_lt8_encrypt_\@
+mov 1(\TMP1), %rax
+MOVQ_R64_XMM %rax, \XMMDst
+add $8, \TMP1
+sub $8, \TMP2
+jz _done_read_last_partial_block_encrypt_\@
+_read_last_lt8_encrypt_\@:
+   shl $8, %rax
+mov (\TMP1, \TMP2, 1), %al
+dec \TMP2
+jnz _read_last_lt8_encrypt_\@
+MOVQ_R64_XMM %rax, \XMM1
+   # adjust the shuffle mask pointer to be able to shift either 0 or 8
+   # bytes depending on whether the last block is <8 bytes or not
+mov %r13, \TMP1
+and $8, \TMP1
+   lea SHIFT_MASK(%rip), %rax
+   sub \TMP1, %rax
+   movdqu (%rax), \XMM2# get the appropriate shuffle mask
+   PSHUFB_XMM \XMM2, \XMM1 # shift left either 0 or 8 bytes
+   por \XMM1, \XMMDst
+_done_read_last_partial_block_encrypt_\@:
+.endm
+
 /*
 * void aesni_gcm_dec(void *aes_ctx,// AES Key schedule. Starts on a 16 
byte boundary.
 *   u8 *out,   // Plaintext output. Encrypt in-place 
is allowed.
@@ -1385,14 +1420,6 @@ _esb_loop_\@:
 *
 *AAD Format with 64-bit Extended Sequence Number
 *
-* aadLen:
-*   from the definition of the spec, aadLen can only be 8 or 12 bytes.
-*   The code supports 16 too but for other sizes, the code will fail.
-*
-* TLen:
-*   from the definition of the spec, TLen can only be 8, 12 or 16 bytes.
-*   For other sizes, the code will fail.
-*
 * poly = x^128 + x^127 + x^126 + x^121 + 1
 *
 */
@@ -1486,19 +1513,13 @@ _zero_cipher_left_decrypt:
PSHUFB_XMM %xmm10, %xmm0
 
ENCRYPT_SINGLE_BLOCK  %xmm0, %xmm1# E(K, Yn)
-   sub $16, %r11
-   add %r13, %r11
-   movdqu (%arg3,%r11,1), %xmm1   # receive the last <16 byte block
-   lea SHIFT_MASK+16(%rip), %r12
-   sub %r13, %r12
-# adjust the shuffle mask pointer to be able to shift 16-%r13 bytes
-# (%r13 is the number of bytes in plaintext mod 16)
-   movdqu (%r12), %xmm2   # get the appropriate shuffle mask
-   PSHUFB_XMM %xmm2, %xmm1# right shift 16-%r13 butes
+   READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm3 %xmm1
 
+   lea ALL_F+16(%rip), %r12
+   sub %r13, %r12
movdqa  %xmm1, %xmm2
pxor %xmm1, %xmm0# Ciphertext XOR E(K, Yn)
-   movdqu ALL_F-SHIFT_MASK(%r12), %xmm1
+   movdqu (%r12), %xmm1
# get the appropriate mask to mask out top 16-%r13 bytes of %xmm0
pand %xmm1, %xmm0# mask out top 16-%r13 bytes of %xmm0
pand%xmm1, %xmm2
@@ -1507,9 +1528,6 @@ _zero_cipher_left_decrypt:
 
pxor %xmm2, %xmm8
GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
- # GHASH computation for the last <16 byte block
-   sub %r13, %r11
-   add $16, %r11
 
 # output %r13 bytes
MOVQ_R64_XMM%xmm0, %rax
@@ -1663,14 +1681,6 @@ ENDPROC(aesni_gcm_dec)
 *
 * 

Re: [PATCH] crypto: n2 - cure use after free

2017-12-19 Thread David Miller
From: Jan Engelhardt 
Date: Tue, 19 Dec 2017 19:09:07 +0100

> queue_cache_init is first called for the Control Word Queue
> (n2_crypto_probe). At that time, queue_cache[0] is NULL and a new
> kmem_cache will be allocated. If the subsequent n2_register_algs call
> fails, the kmem_cache will be released in queue_cache_destroy, but
> queue_cache_init[0] is not set back to NULL.
> 
> So when the Module Arithmetic Unit gets probed next (n2_mau_probe),
> queue_cache_init will not allocate a kmem_cache again, but leave it
> as its bogus value, causing a BUG() to trigger when queue_cache[0] is
> eventually passed to kmem_cache_zalloc:
 ...
> Signed-off-by: Jan Engelhardt 

Acked-by: David S. Miller 


[PATCH] crypto: n2 - cure use after free

2017-12-19 Thread Jan Engelhardt
queue_cache_init is first called for the Control Word Queue
(n2_crypto_probe). At that time, queue_cache[0] is NULL and a new
kmem_cache will be allocated. If the subsequent n2_register_algs call
fails, the kmem_cache will be released in queue_cache_destroy, but
queue_cache_init[0] is not set back to NULL.

So when the Module Arithmetic Unit gets probed next (n2_mau_probe),
queue_cache_init will not allocate a kmem_cache again, but leave it
as its bogus value, causing a BUG() to trigger when queue_cache[0] is
eventually passed to kmem_cache_zalloc:

n2_crypto: Found N2CP at /virtual-devices@100/n2cp@7
n2_crypto: Registered NCS HVAPI version 2.0
called queue_cache_init
n2_crypto: md5 alg registration failed
n2cp f028687c: /virtual-devices@100/n2cp@7: Unable to register 
algorithms.
called queue_cache_destroy
n2cp: probe of f028687c failed with error -22
n2_crypto: Found NCP at /virtual-devices@100/ncp@6
n2_crypto: Registered NCS HVAPI version 2.0
called queue_cache_init
kernel BUG at mm/slab.c:2993!
Call Trace:
 [00604488] kmem_cache_alloc+0x1a8/0x1e0
  (inlined) kmem_cache_zalloc
  (inlined) new_queue
  (inlined) spu_queue_setup
  (inlined) handle_exec_unit
 [10c61eb4] spu_mdesc_scan+0x1f4/0x460 [n2_crypto]
 [10c62b80] n2_mau_probe+0x100/0x220 [n2_crypto]
 [0084b174] platform_drv_probe+0x34/0xc0

Signed-off-by: Jan Engelhardt 
---
 drivers/crypto/n2_core.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c
index 48de52cf2ecc..662e709812cc 100644
--- a/drivers/crypto/n2_core.c
+++ b/drivers/crypto/n2_core.c
@@ -1625,6 +1625,7 @@ static int queue_cache_init(void)
  CWQ_ENTRY_SIZE, 0, NULL);
if (!queue_cache[HV_NCS_QTYPE_CWQ - 1]) {
kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]);
+   queue_cache[HV_NCS_QTYPE_MAU - 1] = NULL;
return -ENOMEM;
}
return 0;
@@ -1634,6 +1635,8 @@ static void queue_cache_destroy(void)
 {
kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]);
kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_CWQ - 1]);
+   queue_cache[HV_NCS_QTYPE_MAU - 1] = NULL;
+   queue_cache[HV_NCS_QTYPE_CWQ - 1] = NULL;
 }
 
 static long spu_queue_register_workfn(void *arg)
-- 
2.12.3



Re: [PATCH v2] hwrng: Clean up RNG list when last hwrng is unregistered

2017-12-19 Thread Gary R Hook

On 12/17/2017 03:49 AM, PrasannaKumar Muralidharan wrote:

On 17 December 2017 at 14:53, PrasannaKumar Muralidharan
 wrote:

Hi Gary,

Some minor comments below.

On 16 December 2017 at 01:25, Gary R Hook  wrote:


Commit 142a27f0a731 added support for a "best" RNG, and in doing so
introduced a hang from rmmod/modprobe -r when the last RNG on the list
was unloaded.


Nice catch. Thanks for fixing this.


When the hwrng list is depleted, return the global variables to their
original state and decrement all references to the object.

Fixes: 142a27f0a731 ("hwrng: core - Reset user selected rng by writing "" to 
rng_current")


Please cc the commit author (in this case its me) so that this patch
gets noticed easily.


D'oh! I did not do so on this version. My apologies.




Signed-off-by: Gary R Hook 
---

Changes since v1: fix misspelled word in subject

  drivers/char/hw_random/core.c |4 
  1 file changed, 4 insertions(+)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 657b8770b6b9..91bb98c42a1c 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -306,6 +306,10 @@ static int enable_best_rng(void)
 ret = ((new_rng == current_rng) ? 0 : 
set_current_rng(new_rng));
 if (!ret)
 cur_rng_set_by_user = 0;
+   } else {
+   drop_current_rng();


When the hwrng list is empty just set current_rng = NULL instead of
calling drop_current_rng().


+   cur_rng_set_by_user = 0;
+   ret = 0;
 }

 return ret;



Regards,
PrasannaKumar


I am fine with the code as is.

Reviewed-by: PrasannaKumar Muralidharan 

Regards,
PrasannaKumar





Re: [PATCH] crypto: x86/twofish-3way - Fix %rbp usage

2017-12-19 Thread Ingo Molnar

* Ingo Molnar  wrote:

> 
> * Eric Biggers  wrote:
> 
> > There may be a small overhead caused by replacing 'xchg REG, REG' with
> > the needed sequence 'mov MEM, REG; mov REG, MEM; mov REG, REG' once per
> > round.  But, counterintuitively, when I tested "ctr-twofish-3way" on a
> > Haswell processor, the new version was actually about 2% faster.
> > (Perhaps 'xchg' is not as well optimized as plain moves.)
> 
> XCHG has implicit LOCK semantics on all x86 CPUs, so that's not a surprising 
> result I think.

Correction: I think XCHG only implies LOCK if there's a memory operand involved 
- 
register-register XCHG should not imply any barriers.

So the result is indeed unintuitive.

Thanks,

Ingo


Re: [PATCH] crypto: n2 - cure use after free

2017-12-19 Thread David Miller
From: Jan Engelhardt 
Date: Tue, 19 Dec 2017 16:42:39 +0100 (CET)

> Nevertheless, I think that the error pathing in n2_core.c should be made 
> robust as well.

I completely agree.

> Should I resubmit with a new commit message?

Yes.


Re: [PATCH] crypto: n2 - cure use after free

2017-12-19 Thread Jan Engelhardt

On Tuesday 2017-12-19 16:31, David Miller wrote:
>
>Instead, what fails is the algorithm registry which you should look
>more deeply into the cause of.

You are right. The registration failure is because the crypto layer 
expects halg->statesize to be non-zero, and drivers/crypto/n2_core.c 
does not set it, causing breakage since possibly 
v4.2-rc1-182-g8996eafdcbad (commit by rmk):

(1471)  halg = >halg;
halg->digestsize = tmpl->digest_size;

Nevertheless, I think that the error pathing in n2_core.c should be made 
robust as well. Should I resubmit with a new commit message?


RE: [PATCH] crypto: x86/twofish-3way - Fix %rbp usage

2017-12-19 Thread David Laight
From: Juergen Gross
> Sent: 19 December 2017 08:05
..
> 
> Exchanging 2 registers can be done without memory access via:
> 
> xor reg1, reg2
> xor reg2, reg1
> xor reg1, reg2

That'll generate horrid data dependencies.
ISTR that there are some optimisations for the stack,
so even 'push reg1', 'mov reg2,reg1', 'pop reg2' might
be faster than the above.

David



Re: [PATCH] crypto: Fix race around ctx->rcvused by making it atomic_t

2017-12-19 Thread Stephan Mueller
Am Dienstag, 19. Dezember 2017, 12:26:42 CET schrieb Jonathan Cameron:

Hi Jonathan,

> > > + atomic_set(>rcvused, 0);
> > 
> > I think ATOMIC_INIT(0) is more suitable here.
> 
> It's ugly to use it to assign a dynamic element like this.
> 
> ctx->rcvused = (atomic_t)ATOMIC_INIT(0);

You are right, it is dynamic and not static assignment. Please disregard my 
comment.

Ciao
Stephan


Re: [PATCH] crypto: Fix race around ctx->rcvused by making it atomic_t

2017-12-19 Thread Jonathan Cameron
On Tue, 19 Dec 2017 12:11:19 +0100
Stephan Mueller  wrote:

> Am Dienstag, 19. Dezember 2017, 11:31:22 CET schrieb Jonathan Cameron:
> 
> Hi Jonathan,
> 
> > This variable was increased and decreased without any protection.
> > Result was an occasional misscount and negative wrap around resulting
> > in false resource allocation failures.
> > 
> > Signed-off-by: Jonathan Cameron 
> > Fixes: 2d97591ef43d ("crypto: af_alg - consolidation of duplicate code")
> > ---
> > Resending due to incompetence on my part - sorry all!
> > 
> > The fixes tag is probably not ideal as I'm not 100% sure when this actually
> > became a bug.  The code in question was introduced in
> > 
> > Fixes: e870456d8e7c ("crypto: algif_skcipher - overhaul memory management")
> > and related.
> > rcvused moved in
> > Fixes: 2d97591ef43d ("crypto: af_alg - consolidation of duplicate code")
> > So I have gone with the latter option as that is where it will cleanly
> > apply. However, it probably doesn't matter as both are present only in the
> > 4.14 final kernel.
> > 
> >  crypto/af_alg.c | 4 ++--
> >  crypto/algif_aead.c | 2 +-
> >  crypto/algif_skcipher.c | 2 +-
> >  include/crypto/if_alg.h | 5 +++--
> >  4 files changed, 7 insertions(+), 6 deletions(-)
> > 
> > diff --git a/crypto/af_alg.c b/crypto/af_alg.c
> > index 8612aa36a3ef..759cfa678c04 100644
> > --- a/crypto/af_alg.c
> > +++ b/crypto/af_alg.c
> > @@ -664,7 +664,7 @@ void af_alg_free_areq_sgls(struct af_alg_async_req
> > *areq) unsigned int i;
> > 
> > list_for_each_entry_safe(rsgl, tmp, >rsgl_list, list) {
> > -   ctx->rcvused -= rsgl->sg_num_bytes;
> > +   atomic_sub(rsgl->sg_num_bytes, >rcvused);
> > af_alg_free_sg(>sgl);
> > list_del(>list);
> > if (rsgl != >first_rsgl)
> > @@ -1173,7 +1173,7 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr
> > *msg, int flags,
> > 
> > areq->last_rsgl = rsgl;
> > len += err;
> > -   ctx->rcvused += err;
> > +   atomic_add(err, >rcvused);
> > rsgl->sg_num_bytes = err;
> > iov_iter_advance(>msg_iter, err);
> > }
> > diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
> > index db9378a45296..d3da3b0869f5 100644
> > --- a/crypto/algif_aead.c
> > +++ b/crypto/algif_aead.c
> > @@ -550,7 +550,7 @@ static int aead_accept_parent_nokey(void *private,
> > struct sock *sk) INIT_LIST_HEAD(>tsgl_list);
> > ctx->len = len;
> > ctx->used = 0;
> > -   ctx->rcvused = 0;
> > +   atomic_set(>rcvused, 0);
> 
> I think ATOMIC_INIT(0) is more suitable here.

It's ugly to use it to assign a dynamic element like this.

ctx->rcvused = (atomic_t)ATOMIC_INIT(0);

Need the cast to avoid getting 
error: expected expression before '{' token
#define ATOMIC_INIT(i) { (i) }
There are only two drivers in the kernel doing this vs
a lot doing initialization using the atomic_set option.

I'm happy to change it though if you would prefer.

> 
> > ctx->more = 0;
> > ctx->merge = 0;
> > ctx->enc = 0;
> > diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
> > index c7c75ef41952..a5b4898f625a 100644
> > --- a/crypto/algif_skcipher.c
> > +++ b/crypto/algif_skcipher.c
> > @@ -388,7 +388,7 @@ static int skcipher_accept_parent_nokey(void *private,
> > struct sock *sk) INIT_LIST_HEAD(>tsgl_list);
> > ctx->len = len;
> > ctx->used = 0;
> > -   ctx->rcvused = 0;
> > +   atomic_set(>rcvused, 0);
> 
> dto.
> 
> > ctx->more = 0;
> > ctx->merge = 0;
> > ctx->enc = 0;
> > diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h
> > index 38d9c5861ed8..f38227a78eae 100644
> > --- a/include/crypto/if_alg.h
> > +++ b/include/crypto/if_alg.h
> > @@ -18,6 +18,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> > 
> >  #include 
> > @@ -150,7 +151,7 @@ struct af_alg_ctx {
> > struct crypto_wait wait;
> > 
> > size_t used;
> > -   size_t rcvused;
> > +   atomic_t rcvused;
> > 
> > bool more;
> > bool merge;
> > @@ -215,7 +216,7 @@ static inline int af_alg_rcvbuf(struct sock *sk)
> > struct af_alg_ctx *ctx = ask->private;
> > 
> > return max_t(int, max_t(int, sk->sk_rcvbuf & PAGE_MASK, PAGE_SIZE) -
> > - ctx->rcvused, 0);
> > +atomic_read(>rcvused), 0);
> >  }
> > 
> >  /**
> 
> Other than the comments above:
> 
> Reviewed-by: Stephan Mueller 
> 
> 
> Ciao
> Stephan



Re: [PATCH] crypto: Fix race around ctx->rcvused by making it atomic_t

2017-12-19 Thread Stephan Mueller
Am Dienstag, 19. Dezember 2017, 11:31:22 CET schrieb Jonathan Cameron:

Hi Jonathan,

> This variable was increased and decreased without any protection.
> Result was an occasional misscount and negative wrap around resulting
> in false resource allocation failures.
> 
> Signed-off-by: Jonathan Cameron 
> Fixes: 2d97591ef43d ("crypto: af_alg - consolidation of duplicate code")
> ---
> Resending due to incompetence on my part - sorry all!
> 
> The fixes tag is probably not ideal as I'm not 100% sure when this actually
> became a bug.  The code in question was introduced in
> 
> Fixes: e870456d8e7c ("crypto: algif_skcipher - overhaul memory management")
> and related.
> rcvused moved in
> Fixes: 2d97591ef43d ("crypto: af_alg - consolidation of duplicate code")
> So I have gone with the latter option as that is where it will cleanly
> apply. However, it probably doesn't matter as both are present only in the
> 4.14 final kernel.
> 
>  crypto/af_alg.c | 4 ++--
>  crypto/algif_aead.c | 2 +-
>  crypto/algif_skcipher.c | 2 +-
>  include/crypto/if_alg.h | 5 +++--
>  4 files changed, 7 insertions(+), 6 deletions(-)
> 
> diff --git a/crypto/af_alg.c b/crypto/af_alg.c
> index 8612aa36a3ef..759cfa678c04 100644
> --- a/crypto/af_alg.c
> +++ b/crypto/af_alg.c
> @@ -664,7 +664,7 @@ void af_alg_free_areq_sgls(struct af_alg_async_req
> *areq) unsigned int i;
> 
>   list_for_each_entry_safe(rsgl, tmp, >rsgl_list, list) {
> - ctx->rcvused -= rsgl->sg_num_bytes;
> + atomic_sub(rsgl->sg_num_bytes, >rcvused);
>   af_alg_free_sg(>sgl);
>   list_del(>list);
>   if (rsgl != >first_rsgl)
> @@ -1173,7 +1173,7 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr
> *msg, int flags,
> 
>   areq->last_rsgl = rsgl;
>   len += err;
> - ctx->rcvused += err;
> + atomic_add(err, >rcvused);
>   rsgl->sg_num_bytes = err;
>   iov_iter_advance(>msg_iter, err);
>   }
> diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
> index db9378a45296..d3da3b0869f5 100644
> --- a/crypto/algif_aead.c
> +++ b/crypto/algif_aead.c
> @@ -550,7 +550,7 @@ static int aead_accept_parent_nokey(void *private,
> struct sock *sk) INIT_LIST_HEAD(>tsgl_list);
>   ctx->len = len;
>   ctx->used = 0;
> - ctx->rcvused = 0;
> + atomic_set(>rcvused, 0);

I think ATOMIC_INIT(0) is more suitable here.

>   ctx->more = 0;
>   ctx->merge = 0;
>   ctx->enc = 0;
> diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
> index c7c75ef41952..a5b4898f625a 100644
> --- a/crypto/algif_skcipher.c
> +++ b/crypto/algif_skcipher.c
> @@ -388,7 +388,7 @@ static int skcipher_accept_parent_nokey(void *private,
> struct sock *sk) INIT_LIST_HEAD(>tsgl_list);
>   ctx->len = len;
>   ctx->used = 0;
> - ctx->rcvused = 0;
> + atomic_set(>rcvused, 0);

dto.

>   ctx->more = 0;
>   ctx->merge = 0;
>   ctx->enc = 0;
> diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h
> index 38d9c5861ed8..f38227a78eae 100644
> --- a/include/crypto/if_alg.h
> +++ b/include/crypto/if_alg.h
> @@ -18,6 +18,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
> 
>  #include 
> @@ -150,7 +151,7 @@ struct af_alg_ctx {
>   struct crypto_wait wait;
> 
>   size_t used;
> - size_t rcvused;
> + atomic_t rcvused;
> 
>   bool more;
>   bool merge;
> @@ -215,7 +216,7 @@ static inline int af_alg_rcvbuf(struct sock *sk)
>   struct af_alg_ctx *ctx = ask->private;
> 
>   return max_t(int, max_t(int, sk->sk_rcvbuf & PAGE_MASK, PAGE_SIZE) -
> -   ctx->rcvused, 0);
> +  atomic_read(>rcvused), 0);
>  }
> 
>  /**

Other than the comments above:

Reviewed-by: Stephan Mueller 


Ciao
Stephan


Re: [Plinth PATCH]{topost} crypto: Fix race around ctx->rcvused by making it atomic_t

2017-12-19 Thread Jonathan Cameron
Sorry idiot moment.  Sent that out with our internal markings.
Resending shortly.

On Tue, 19 Dec 2017 10:27:24 +
Jonathan Cameron  wrote:

> This variable was increased and decreased without any protection.
> Result was an occasional misscount and negative wrap around resulting
> in false resource allocation failures.
> 
> Signed-off-by: Jonathan Cameron 
> Fixes: 2d97591ef43d ("crypto: af_alg - consolidation of duplicate code")
> ---
> The fixes tag is probably not ideal as I'm not 100% sure when this actually
> became a bug.  The code in question was introduced in
> 
> Fixes: e870456d8e7c ("crypto: algif_skcipher - overhaul memory management")
> and related.
> rcvused moved in 
> Fixes: 2d97591ef43d ("crypto: af_alg - consolidation of duplicate code")
> So I have gone with the latter option as that is where it will cleanly apply.
> However, it probably doesn't matter as both are present only in the 4.14
> final kernel.
> 
>  crypto/af_alg.c | 4 ++--
>  crypto/algif_aead.c | 2 +-
>  crypto/algif_skcipher.c | 2 +-
>  include/crypto/if_alg.h | 5 +++--
>  4 files changed, 7 insertions(+), 6 deletions(-)
> 
> diff --git a/crypto/af_alg.c b/crypto/af_alg.c
> index 8612aa36a3ef..759cfa678c04 100644
> --- a/crypto/af_alg.c
> +++ b/crypto/af_alg.c
> @@ -664,7 +664,7 @@ void af_alg_free_areq_sgls(struct af_alg_async_req *areq)
>   unsigned int i;
>  
>   list_for_each_entry_safe(rsgl, tmp, >rsgl_list, list) {
> - ctx->rcvused -= rsgl->sg_num_bytes;
> + atomic_sub(rsgl->sg_num_bytes, >rcvused);
>   af_alg_free_sg(>sgl);
>   list_del(>list);
>   if (rsgl != >first_rsgl)
> @@ -1173,7 +1173,7 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr 
> *msg, int flags,
>  
>   areq->last_rsgl = rsgl;
>   len += err;
> - ctx->rcvused += err;
> + atomic_add(err, >rcvused);
>   rsgl->sg_num_bytes = err;
>   iov_iter_advance(>msg_iter, err);
>   }
> diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
> index db9378a45296..d3da3b0869f5 100644
> --- a/crypto/algif_aead.c
> +++ b/crypto/algif_aead.c
> @@ -550,7 +550,7 @@ static int aead_accept_parent_nokey(void *private, struct 
> sock *sk)
>   INIT_LIST_HEAD(>tsgl_list);
>   ctx->len = len;
>   ctx->used = 0;
> - ctx->rcvused = 0;
> + atomic_set(>rcvused, 0);
>   ctx->more = 0;
>   ctx->merge = 0;
>   ctx->enc = 0;
> diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
> index c7c75ef41952..a5b4898f625a 100644
> --- a/crypto/algif_skcipher.c
> +++ b/crypto/algif_skcipher.c
> @@ -388,7 +388,7 @@ static int skcipher_accept_parent_nokey(void *private, 
> struct sock *sk)
>   INIT_LIST_HEAD(>tsgl_list);
>   ctx->len = len;
>   ctx->used = 0;
> - ctx->rcvused = 0;
> + atomic_set(>rcvused, 0);
>   ctx->more = 0;
>   ctx->merge = 0;
>   ctx->enc = 0;
> diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h
> index 38d9c5861ed8..f38227a78eae 100644
> --- a/include/crypto/if_alg.h
> +++ b/include/crypto/if_alg.h
> @@ -18,6 +18,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  
>  #include 
> @@ -150,7 +151,7 @@ struct af_alg_ctx {
>   struct crypto_wait wait;
>  
>   size_t used;
> - size_t rcvused;
> + atomic_t rcvused;
>  
>   bool more;
>   bool merge;
> @@ -215,7 +216,7 @@ static inline int af_alg_rcvbuf(struct sock *sk)
>   struct af_alg_ctx *ctx = ask->private;
>  
>   return max_t(int, max_t(int, sk->sk_rcvbuf & PAGE_MASK, PAGE_SIZE) -
> -   ctx->rcvused, 0);
> +  atomic_read(>rcvused), 0);
>  }
>  
>  /**



[Plinth PATCH]{topost} crypto: Fix race around ctx->rcvused by making it atomic_t

2017-12-19 Thread Jonathan Cameron
This variable was increased and decreased without any protection.
Result was an occasional misscount and negative wrap around resulting
in false resource allocation failures.

Signed-off-by: Jonathan Cameron 
Fixes: 2d97591ef43d ("crypto: af_alg - consolidation of duplicate code")
---
The fixes tag is probably not ideal as I'm not 100% sure when this actually
became a bug.  The code in question was introduced in

Fixes: e870456d8e7c ("crypto: algif_skcipher - overhaul memory management")
and related.
rcvused moved in 
Fixes: 2d97591ef43d ("crypto: af_alg - consolidation of duplicate code")
So I have gone with the latter option as that is where it will cleanly apply.
However, it probably doesn't matter as both are present only in the 4.14
final kernel.

 crypto/af_alg.c | 4 ++--
 crypto/algif_aead.c | 2 +-
 crypto/algif_skcipher.c | 2 +-
 include/crypto/if_alg.h | 5 +++--
 4 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 8612aa36a3ef..759cfa678c04 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -664,7 +664,7 @@ void af_alg_free_areq_sgls(struct af_alg_async_req *areq)
unsigned int i;
 
list_for_each_entry_safe(rsgl, tmp, >rsgl_list, list) {
-   ctx->rcvused -= rsgl->sg_num_bytes;
+   atomic_sub(rsgl->sg_num_bytes, >rcvused);
af_alg_free_sg(>sgl);
list_del(>list);
if (rsgl != >first_rsgl)
@@ -1173,7 +1173,7 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, 
int flags,
 
areq->last_rsgl = rsgl;
len += err;
-   ctx->rcvused += err;
+   atomic_add(err, >rcvused);
rsgl->sg_num_bytes = err;
iov_iter_advance(>msg_iter, err);
}
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index db9378a45296..d3da3b0869f5 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -550,7 +550,7 @@ static int aead_accept_parent_nokey(void *private, struct 
sock *sk)
INIT_LIST_HEAD(>tsgl_list);
ctx->len = len;
ctx->used = 0;
-   ctx->rcvused = 0;
+   atomic_set(>rcvused, 0);
ctx->more = 0;
ctx->merge = 0;
ctx->enc = 0;
diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index c7c75ef41952..a5b4898f625a 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -388,7 +388,7 @@ static int skcipher_accept_parent_nokey(void *private, 
struct sock *sk)
INIT_LIST_HEAD(>tsgl_list);
ctx->len = len;
ctx->used = 0;
-   ctx->rcvused = 0;
+   atomic_set(>rcvused, 0);
ctx->more = 0;
ctx->merge = 0;
ctx->enc = 0;
diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h
index 38d9c5861ed8..f38227a78eae 100644
--- a/include/crypto/if_alg.h
+++ b/include/crypto/if_alg.h
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -150,7 +151,7 @@ struct af_alg_ctx {
struct crypto_wait wait;
 
size_t used;
-   size_t rcvused;
+   atomic_t rcvused;
 
bool more;
bool merge;
@@ -215,7 +216,7 @@ static inline int af_alg_rcvbuf(struct sock *sk)
struct af_alg_ctx *ctx = ask->private;
 
return max_t(int, max_t(int, sk->sk_rcvbuf & PAGE_MASK, PAGE_SIZE) -
- ctx->rcvused, 0);
+atomic_read(>rcvused), 0);
 }
 
 /**
-- 
2.11.0



[PATCH v2 3/4] crypto: caam - save Era in driver's private data

2017-12-19 Thread Horia Geantă
Save Era in driver's private data for further usage,
like deciding whether an erratum applies or a feature is available
based on its value.

Signed-off-by: Horia Geantă 
---
 drivers/crypto/caam/ctrl.c   | 4 +++-
 drivers/crypto/caam/intern.h | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index 027e121c6f70..75d280cb2dc0 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -611,6 +611,8 @@ static int caam_probe(struct platform_device *pdev)
goto iounmap_ctrl;
}
 
+   ctrlpriv->era = caam_get_era();
+
ret = of_platform_populate(nprop, caam_match, NULL, dev);
if (ret) {
dev_err(dev, "JR platform devices creation error\n");
@@ -742,7 +744,7 @@ static int caam_probe(struct platform_device *pdev)
 
/* Report "alive" for developer to see */
dev_info(dev, "device ID = 0x%016llx (Era %d)\n", caam_id,
-caam_get_era());
+ctrlpriv->era);
dev_info(dev, "job rings = %d, qi = %d, dpaa2 = %s\n",
 ctrlpriv->total_jobrs, ctrlpriv->qi_present,
 caam_dpaa2 ? "yes" : "no");
diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h
index 91f1107276e5..7696a774a362 100644
--- a/drivers/crypto/caam/intern.h
+++ b/drivers/crypto/caam/intern.h
@@ -84,6 +84,7 @@ struct caam_drv_private {
u8 qi_present;  /* Nonzero if QI present in device */
int secvio_irq; /* Security violation interrupt number */
int virt_en;/* Virtualization enabled in CAAM */
+   int era;/* CAAM Era (internal HW revision) */
 
 #defineRNG4_MAX_HANDLES 2
/* RNG4 block */
-- 
2.12.0.264.gd6db3f216544



[PATCH v2 4/4] crypto: caam - add Derived Key Protocol (DKP) support

2017-12-19 Thread Horia Geantă
Offload split key generation in CAAM engine, using DKP.
DKP is supported starting with Era 6.

Note that the way assoclen is transmitted from the job descriptor
to the shared descriptor changes - DPOVRD register is used instead
of MATH3 (where available), since DKP protocol thrashes the MATH
registers.

The replacement of MDHA split key generation with DKP has the side
effect of the crypto engine writing the authentication key, and thus
the DMA mapping direction for the buffer holding the key has to change
from DMA_TO_DEVICE to DMA_BIDIRECTIONAL.
There are two cases:
-key is inlined in descriptor - descriptor buffer mapping changes
-key is referenced - key buffer mapping changes

Signed-off-by: Horia Geantă 
---
v2: fix DMA mapping direction for buffers holding the split key

 drivers/crypto/caam/caamalg.c  | 112 ---
 drivers/crypto/caam/caamalg_desc.c | 176 ++---
 drivers/crypto/caam/caamalg_desc.h |  10 +--
 drivers/crypto/caam/caamalg_qi.c   |  54 +---
 drivers/crypto/caam/caamhash.c |  73 ++-
 drivers/crypto/caam/desc.h |  29 ++
 drivers/crypto/caam/desc_constr.h  |  41 +
 drivers/crypto/caam/key_gen.c  |  30 ---
 drivers/crypto/caam/key_gen.h  |  30 +++
 9 files changed, 384 insertions(+), 171 deletions(-)

diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index f5666e50c1e7..2188235be02d 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -108,6 +108,7 @@ struct caam_ctx {
dma_addr_t sh_desc_dec_dma;
dma_addr_t sh_desc_givenc_dma;
dma_addr_t key_dma;
+   enum dma_data_direction dir;
struct device *jrdev;
struct alginfo adata;
struct alginfo cdata;
@@ -118,6 +119,7 @@ static int aead_null_set_sh_desc(struct crypto_aead *aead)
 {
struct caam_ctx *ctx = crypto_aead_ctx(aead);
struct device *jrdev = ctx->jrdev;
+   struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent);
u32 *desc;
int rem_bytes = CAAM_DESC_BYTES_MAX - AEAD_DESC_JOB_IO_LEN -
ctx->adata.keylen_pad;
@@ -136,9 +138,10 @@ static int aead_null_set_sh_desc(struct crypto_aead *aead)
 
/* aead_encrypt shared descriptor */
desc = ctx->sh_desc_enc;
-   cnstr_shdsc_aead_null_encap(desc, >adata, ctx->authsize);
+   cnstr_shdsc_aead_null_encap(desc, >adata, ctx->authsize,
+   ctrlpriv->era);
dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
-  desc_bytes(desc), DMA_TO_DEVICE);
+  desc_bytes(desc), ctx->dir);
 
/*
 * Job Descriptor and Shared Descriptors
@@ -154,9 +157,10 @@ static int aead_null_set_sh_desc(struct crypto_aead *aead)
 
/* aead_decrypt shared descriptor */
desc = ctx->sh_desc_dec;
-   cnstr_shdsc_aead_null_decap(desc, >adata, ctx->authsize);
+   cnstr_shdsc_aead_null_decap(desc, >adata, ctx->authsize,
+   ctrlpriv->era);
dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma,
-  desc_bytes(desc), DMA_TO_DEVICE);
+  desc_bytes(desc), ctx->dir);
 
return 0;
 }
@@ -168,6 +172,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
unsigned int ivsize = crypto_aead_ivsize(aead);
struct caam_ctx *ctx = crypto_aead_ctx(aead);
struct device *jrdev = ctx->jrdev;
+   struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent);
u32 ctx1_iv_off = 0;
u32 *desc, *nonce = NULL;
u32 inl_mask;
@@ -234,9 +239,9 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
desc = ctx->sh_desc_enc;
cnstr_shdsc_aead_encap(desc, >cdata, >adata, ivsize,
   ctx->authsize, is_rfc3686, nonce, ctx1_iv_off,
-  false);
+  false, ctrlpriv->era);
dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
-  desc_bytes(desc), DMA_TO_DEVICE);
+  desc_bytes(desc), ctx->dir);
 
 skip_enc:
/*
@@ -266,9 +271,9 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
desc = ctx->sh_desc_dec;
cnstr_shdsc_aead_decap(desc, >cdata, >adata, ivsize,
   ctx->authsize, alg->caam.geniv, is_rfc3686,
-  nonce, ctx1_iv_off, false);
+  nonce, ctx1_iv_off, false, ctrlpriv->era);
dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma,
-  desc_bytes(desc), DMA_TO_DEVICE);
+  desc_bytes(desc), ctx->dir);
 
if (!alg->caam.geniv)
goto skip_givenc;
@@ -300,9 +305,9 @@ static int 

[PATCH v2 1/4] crypto: caam - constify key data

2017-12-19 Thread Horia Geantă
Key data is not modified, it is copied in the shared descriptor.

Signed-off-by: Horia Geantă 
---
 drivers/crypto/caam/caamalg_desc.c |  6 +++---
 drivers/crypto/caam/desc_constr.h  | 10 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/crypto/caam/caamalg_desc.c 
b/drivers/crypto/caam/caamalg_desc.c
index 530c14ee32de..2db9e85bf81c 100644
--- a/drivers/crypto/caam/caamalg_desc.c
+++ b/drivers/crypto/caam/caamalg_desc.c
@@ -1075,7 +1075,7 @@ void cnstr_shdsc_ablkcipher_encap(u32 * const desc, 
struct alginfo *cdata,
 
/* Load nonce into CONTEXT1 reg */
if (is_rfc3686) {
-   u8 *nonce = cdata->key_virt + cdata->keylen;
+   const u8 *nonce = cdata->key_virt + cdata->keylen;
 
append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
   LDST_CLASS_IND_CCB |
@@ -1140,7 +1140,7 @@ void cnstr_shdsc_ablkcipher_decap(u32 * const desc, 
struct alginfo *cdata,
 
/* Load nonce into CONTEXT1 reg */
if (is_rfc3686) {
-   u8 *nonce = cdata->key_virt + cdata->keylen;
+   const u8 *nonce = cdata->key_virt + cdata->keylen;
 
append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
   LDST_CLASS_IND_CCB |
@@ -1209,7 +1209,7 @@ void cnstr_shdsc_ablkcipher_givencap(u32 * const desc, 
struct alginfo *cdata,
 
/* Load Nonce into CONTEXT1 reg */
if (is_rfc3686) {
-   u8 *nonce = cdata->key_virt + cdata->keylen;
+   const u8 *nonce = cdata->key_virt + cdata->keylen;
 
append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
   LDST_CLASS_IND_CCB |
diff --git a/drivers/crypto/caam/desc_constr.h 
b/drivers/crypto/caam/desc_constr.h
index ba1ca0806f0a..5b39b7d7a47a 100644
--- a/drivers/crypto/caam/desc_constr.h
+++ b/drivers/crypto/caam/desc_constr.h
@@ -109,7 +109,7 @@ static inline void init_job_desc_shared(u32 * const desc, 
dma_addr_t ptr,
append_ptr(desc, ptr);
 }
 
-static inline void append_data(u32 * const desc, void *data, int len)
+static inline void append_data(u32 * const desc, const void *data, int len)
 {
u32 *offset = desc_end(desc);
 
@@ -172,7 +172,7 @@ static inline void append_cmd_ptr_extlen(u32 * const desc, 
dma_addr_t ptr,
append_cmd(desc, len);
 }
 
-static inline void append_cmd_data(u32 * const desc, void *data, int len,
+static inline void append_cmd_data(u32 * const desc, const void *data, int len,
   u32 command)
 {
append_cmd(desc, command | IMMEDIATE | len);
@@ -271,7 +271,7 @@ APPEND_SEQ_PTR_INTLEN(in, IN)
 APPEND_SEQ_PTR_INTLEN(out, OUT)
 
 #define APPEND_CMD_PTR_TO_IMM(cmd, op) \
-static inline void append_##cmd##_as_imm(u32 * const desc, void *data, \
+static inline void append_##cmd##_as_imm(u32 * const desc, const void *data, \
 unsigned int len, u32 options) \
 { \
PRINT_POS; \
@@ -312,7 +312,7 @@ APPEND_CMD_PTR_LEN(seq_out_ptr, SEQ_OUT_PTR, u32)
  * from length of immediate data provided, e.g., split keys
  */
 #define APPEND_CMD_PTR_TO_IMM2(cmd, op) \
-static inline void append_##cmd##_as_imm(u32 * const desc, void *data, \
+static inline void append_##cmd##_as_imm(u32 * const desc, const void *data, \
 unsigned int data_len, \
 unsigned int len, u32 options) \
 { \
@@ -452,7 +452,7 @@ struct alginfo {
unsigned int keylen_pad;
union {
dma_addr_t key_dma;
-   void *key_virt;
+   const void *key_virt;
};
bool key_inline;
 };
-- 
2.12.0.264.gd6db3f216544



[PATCH v2 2/4] crypto: caam - remove needless ablkcipher key copy

2017-12-19 Thread Horia Geantă
ablkcipher shared descriptors are relatively small, thus there is enough
space for the key to be inlined.
Accordingly, there is no need to copy the key in ctx->key.

Signed-off-by: Horia Geantă 
---
 drivers/crypto/caam/caamalg.c| 8 ++--
 drivers/crypto/caam/caamalg_qi.c | 8 ++--
 2 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index baa8dd52472d..f5666e50c1e7 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -625,7 +625,6 @@ static int ablkcipher_setkey(struct crypto_ablkcipher 
*ablkcipher,
const bool is_rfc3686 = (ctr_mode &&
 (strstr(alg_name, "rfc3686") != NULL));
 
-   memcpy(ctx->key, key, keylen);
 #ifdef DEBUG
print_hex_dump(KERN_ERR, "key in @"__stringify(__LINE__)": ",
   DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
@@ -648,9 +647,8 @@ static int ablkcipher_setkey(struct crypto_ablkcipher 
*ablkcipher,
keylen -= CTR_RFC3686_NONCE_SIZE;
}
 
-   dma_sync_single_for_device(jrdev, ctx->key_dma, keylen, DMA_TO_DEVICE);
ctx->cdata.keylen = keylen;
-   ctx->cdata.key_virt = ctx->key;
+   ctx->cdata.key_virt = key;
ctx->cdata.key_inline = true;
 
/* ablkcipher_encrypt shared descriptor */
@@ -691,10 +689,8 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher 
*ablkcipher,
return -EINVAL;
}
 
-   memcpy(ctx->key, key, keylen);
-   dma_sync_single_for_device(jrdev, ctx->key_dma, keylen, DMA_TO_DEVICE);
ctx->cdata.keylen = keylen;
-   ctx->cdata.key_virt = ctx->key;
+   ctx->cdata.key_virt = key;
ctx->cdata.key_inline = true;
 
/* xts_ablkcipher_encrypt shared descriptor */
diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c
index ad14b69a052e..b45401786530 100644
--- a/drivers/crypto/caam/caamalg_qi.c
+++ b/drivers/crypto/caam/caamalg_qi.c
@@ -272,7 +272,6 @@ static int ablkcipher_setkey(struct crypto_ablkcipher 
*ablkcipher,
const bool is_rfc3686 = (ctr_mode && strstr(alg_name, "rfc3686"));
int ret = 0;
 
-   memcpy(ctx->key, key, keylen);
 #ifdef DEBUG
print_hex_dump(KERN_ERR, "key in @" __stringify(__LINE__)": ",
   DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
@@ -295,9 +294,8 @@ static int ablkcipher_setkey(struct crypto_ablkcipher 
*ablkcipher,
keylen -= CTR_RFC3686_NONCE_SIZE;
}
 
-   dma_sync_single_for_device(jrdev, ctx->key_dma, keylen, DMA_TO_DEVICE);
ctx->cdata.keylen = keylen;
-   ctx->cdata.key_virt = ctx->key;
+   ctx->cdata.key_virt = key;
ctx->cdata.key_inline = true;
 
/* ablkcipher encrypt, decrypt, givencrypt shared descriptors */
@@ -356,10 +354,8 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher 
*ablkcipher,
return -EINVAL;
}
 
-   memcpy(ctx->key, key, keylen);
-   dma_sync_single_for_device(jrdev, ctx->key_dma, keylen, DMA_TO_DEVICE);
ctx->cdata.keylen = keylen;
-   ctx->cdata.key_virt = ctx->key;
+   ctx->cdata.key_virt = key;
ctx->cdata.key_inline = true;
 
/* xts ablkcipher encrypt, decrypt shared descriptors */
-- 
2.12.0.264.gd6db3f216544



Re: [PATCH] crypto: x86/twofish-3way - Fix %rbp usage

2017-12-19 Thread Juergen Gross
On 19/12/17 08:54, Ingo Molnar wrote:
> 
> * Eric Biggers  wrote:
> 
>> There may be a small overhead caused by replacing 'xchg REG, REG' with
>> the needed sequence 'mov MEM, REG; mov REG, MEM; mov REG, REG' once per
>> round.  But, counterintuitively, when I tested "ctr-twofish-3way" on a
>> Haswell processor, the new version was actually about 2% faster.
>> (Perhaps 'xchg' is not as well optimized as plain moves.)
> 
> XCHG has implicit LOCK semantics on all x86 CPUs, so that's not a surprising 
> result I think.

Exchanging 2 registers can be done without memory access via:

xor reg1, reg2
xor reg2, reg1
xor reg1, reg2


Juergen