[PATCH] crypto: aes_ti - disable interrupts while accessing sbox

2018-10-03 Thread Eric Biggers
From: Eric Biggers 

The generic constant-time AES implementation is supposed to preload the
AES S-box into the CPU's L1 data cache.  But, an interrupt handler can
run on the CPU and muck with the cache.  Worse, on preemptible kernels
the process can even be preempted and moved to a different CPU.  So the
implementation may actually still be vulnerable to cache-timing attacks.

Make it more robust by disabling interrupts while the sbox is used.

In some quick tests on x86 and ARM, this doesn't affect performance
significantly.  Responsiveness is also a concern, but interrupts are
only disabled for a single AES block which even on ARM Cortex-A7 is
"only" ~1500 cycles to encrypt or ~2600 cycles to decrypt.

Fixes: b5e0b032b6c3 ("crypto: aes - add generic time invariant AES cipher")
Signed-off-by: Eric Biggers 
---
 crypto/aes_ti.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/crypto/aes_ti.c b/crypto/aes_ti.c
index 03023b2290e8e..81b604419ee0e 100644
--- a/crypto/aes_ti.c
+++ b/crypto/aes_ti.c
@@ -269,6 +269,7 @@ static void aesti_encrypt(struct crypto_tfm *tfm, u8 *out, 
const u8 *in)
const u32 *rkp = ctx->key_enc + 4;
int rounds = 6 + ctx->key_length / 4;
u32 st0[4], st1[4];
+   unsigned long flags;
int round;
 
st0[0] = ctx->key_enc[0] ^ get_unaligned_le32(in);
@@ -276,6 +277,12 @@ static void aesti_encrypt(struct crypto_tfm *tfm, u8 *out, 
const u8 *in)
st0[2] = ctx->key_enc[2] ^ get_unaligned_le32(in + 8);
st0[3] = ctx->key_enc[3] ^ get_unaligned_le32(in + 12);
 
+   /*
+* Disable interrupts (including preemption) while the sbox is loaded
+* into L1 cache and used for encryption on this CPU.
+*/
+   local_irq_save(flags);
+
st0[0] ^= __aesti_sbox[ 0] ^ __aesti_sbox[128];
st0[1] ^= __aesti_sbox[32] ^ __aesti_sbox[160];
st0[2] ^= __aesti_sbox[64] ^ __aesti_sbox[192];
@@ -300,6 +307,8 @@ static void aesti_encrypt(struct crypto_tfm *tfm, u8 *out, 
const u8 *in)
put_unaligned_le32(subshift(st1, 1) ^ rkp[5], out + 4);
put_unaligned_le32(subshift(st1, 2) ^ rkp[6], out + 8);
put_unaligned_le32(subshift(st1, 3) ^ rkp[7], out + 12);
+
+   local_irq_restore(flags);
 }
 
 static void aesti_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
@@ -308,6 +317,7 @@ static void aesti_decrypt(struct crypto_tfm *tfm, u8 *out, 
const u8 *in)
const u32 *rkp = ctx->key_dec + 4;
int rounds = 6 + ctx->key_length / 4;
u32 st0[4], st1[4];
+   unsigned long flags;
int round;
 
st0[0] = ctx->key_dec[0] ^ get_unaligned_le32(in);
@@ -315,6 +325,12 @@ static void aesti_decrypt(struct crypto_tfm *tfm, u8 *out, 
const u8 *in)
st0[2] = ctx->key_dec[2] ^ get_unaligned_le32(in + 8);
st0[3] = ctx->key_dec[3] ^ get_unaligned_le32(in + 12);
 
+   /*
+* Disable interrupts (including preemption) while the sbox is loaded
+* into L1 cache and used for decryption on this CPU.
+*/
+   local_irq_save(flags);
+
st0[0] ^= __aesti_inv_sbox[ 0] ^ __aesti_inv_sbox[128];
st0[1] ^= __aesti_inv_sbox[32] ^ __aesti_inv_sbox[160];
st0[2] ^= __aesti_inv_sbox[64] ^ __aesti_inv_sbox[192];
@@ -339,6 +355,8 @@ static void aesti_decrypt(struct crypto_tfm *tfm, u8 *out, 
const u8 *in)
put_unaligned_le32(inv_subshift(st1, 1) ^ rkp[5], out + 4);
put_unaligned_le32(inv_subshift(st1, 2) ^ rkp[6], out + 8);
put_unaligned_le32(inv_subshift(st1, 3) ^ rkp[7], out + 12);
+
+   local_irq_restore(flags);
 }
 
 static struct crypto_alg aes_alg = {
-- 
2.19.0



Re: [PATCH] crypto: arm64/aes - fix handling sub-block CTS-CBC inputs

2018-10-03 Thread Ard Biesheuvel
On 3 October 2018 at 07:22, Eric Biggers  wrote:
> From: Eric Biggers 
>
> In the new arm64 CTS-CBC implementation, return an error code rather
> than crashing on inputs shorter than AES_BLOCK_SIZE bytes.  Also set
> cra_blocksize to AES_BLOCK_SIZE (like is done in the cts template) to
> indicate the minimum input size.
>
> Fixes: dd597fb33ff0 ("crypto: arm64/aes-blk - add support for CTS-CBC mode")
> Signed-off-by: Eric Biggers 

Thanks Eric

Reviewed-by: Ard Biesheuvel 

> ---
>  arch/arm64/crypto/aes-glue.c | 13 +
>  1 file changed, 9 insertions(+), 4 deletions(-)
>
> diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
> index 26d2b0263ba63..1e676625ef33f 100644
> --- a/arch/arm64/crypto/aes-glue.c
> +++ b/arch/arm64/crypto/aes-glue.c
> @@ -243,8 +243,11 @@ static int cts_cbc_encrypt(struct skcipher_request *req)
>
> skcipher_request_set_tfm(>subreq, tfm);
>
> -   if (req->cryptlen == AES_BLOCK_SIZE)
> +   if (req->cryptlen <= AES_BLOCK_SIZE) {
> +   if (req->cryptlen < AES_BLOCK_SIZE)
> +   return -EINVAL;
> cbc_blocks = 1;
> +   }
>
> if (cbc_blocks > 0) {
> unsigned int blocks;
> @@ -305,8 +308,11 @@ static int cts_cbc_decrypt(struct skcipher_request *req)
>
> skcipher_request_set_tfm(>subreq, tfm);
>
> -   if (req->cryptlen == AES_BLOCK_SIZE)
> +   if (req->cryptlen <= AES_BLOCK_SIZE) {
> +   if (req->cryptlen < AES_BLOCK_SIZE)
> +   return -EINVAL;
> cbc_blocks = 1;
> +   }
>
> if (cbc_blocks > 0) {
> unsigned int blocks;
> @@ -486,14 +492,13 @@ static struct skcipher_alg aes_algs[] = { {
> .cra_driver_name= "__cts-cbc-aes-" MODE,
> .cra_priority   = PRIO,
> .cra_flags  = CRYPTO_ALG_INTERNAL,
> -   .cra_blocksize  = 1,
> +   .cra_blocksize  = AES_BLOCK_SIZE,
> .cra_ctxsize= sizeof(struct crypto_aes_ctx),
> .cra_module = THIS_MODULE,
> },
> .min_keysize= AES_MIN_KEY_SIZE,
> .max_keysize= AES_MAX_KEY_SIZE,
> .ivsize = AES_BLOCK_SIZE,
> -   .chunksize  = AES_BLOCK_SIZE,
> .walksize   = 2 * AES_BLOCK_SIZE,
> .setkey = skcipher_aes_setkey,
> .encrypt= cts_cbc_encrypt,
> --
> 2.19.0
>