From: Eric Biggers <ebigg...@google.com>

The generic constant-time AES implementation is supposed to preload the
AES S-box into the CPU's L1 data cache.  But, an interrupt handler can
run on the CPU and muck with the cache.  Worse, on preemptible kernels
the process can even be preempted and moved to a different CPU.  So the
implementation may actually still be vulnerable to cache-timing attacks.

Make it more robust by disabling interrupts while the sbox is used.

In some quick tests on x86 and ARM, this doesn't affect performance
significantly.  Responsiveness is also a concern, but interrupts are
only disabled for a single AES block which even on ARM Cortex-A7 is
"only" ~1500 cycles to encrypt or ~2600 cycles to decrypt.

Fixes: b5e0b032b6c3 ("crypto: aes - add generic time invariant AES cipher")
Signed-off-by: Eric Biggers <ebigg...@google.com>
---
 crypto/aes_ti.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/crypto/aes_ti.c b/crypto/aes_ti.c
index 03023b2290e8e..81b604419ee0e 100644
--- a/crypto/aes_ti.c
+++ b/crypto/aes_ti.c
@@ -269,6 +269,7 @@ static void aesti_encrypt(struct crypto_tfm *tfm, u8 *out, 
const u8 *in)
        const u32 *rkp = ctx->key_enc + 4;
        int rounds = 6 + ctx->key_length / 4;
        u32 st0[4], st1[4];
+       unsigned long flags;
        int round;
 
        st0[0] = ctx->key_enc[0] ^ get_unaligned_le32(in);
@@ -276,6 +277,12 @@ static void aesti_encrypt(struct crypto_tfm *tfm, u8 *out, 
const u8 *in)
        st0[2] = ctx->key_enc[2] ^ get_unaligned_le32(in + 8);
        st0[3] = ctx->key_enc[3] ^ get_unaligned_le32(in + 12);
 
+       /*
+        * Disable interrupts (including preemption) while the sbox is loaded
+        * into L1 cache and used for encryption on this CPU.
+        */
+       local_irq_save(flags);
+
        st0[0] ^= __aesti_sbox[ 0] ^ __aesti_sbox[128];
        st0[1] ^= __aesti_sbox[32] ^ __aesti_sbox[160];
        st0[2] ^= __aesti_sbox[64] ^ __aesti_sbox[192];
@@ -300,6 +307,8 @@ static void aesti_encrypt(struct crypto_tfm *tfm, u8 *out, 
const u8 *in)
        put_unaligned_le32(subshift(st1, 1) ^ rkp[5], out + 4);
        put_unaligned_le32(subshift(st1, 2) ^ rkp[6], out + 8);
        put_unaligned_le32(subshift(st1, 3) ^ rkp[7], out + 12);
+
+       local_irq_restore(flags);
 }
 
 static void aesti_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
@@ -308,6 +317,7 @@ static void aesti_decrypt(struct crypto_tfm *tfm, u8 *out, 
const u8 *in)
        const u32 *rkp = ctx->key_dec + 4;
        int rounds = 6 + ctx->key_length / 4;
        u32 st0[4], st1[4];
+       unsigned long flags;
        int round;
 
        st0[0] = ctx->key_dec[0] ^ get_unaligned_le32(in);
@@ -315,6 +325,12 @@ static void aesti_decrypt(struct crypto_tfm *tfm, u8 *out, 
const u8 *in)
        st0[2] = ctx->key_dec[2] ^ get_unaligned_le32(in + 8);
        st0[3] = ctx->key_dec[3] ^ get_unaligned_le32(in + 12);
 
+       /*
+        * Disable interrupts (including preemption) while the sbox is loaded
+        * into L1 cache and used for decryption on this CPU.
+        */
+       local_irq_save(flags);
+
        st0[0] ^= __aesti_inv_sbox[ 0] ^ __aesti_inv_sbox[128];
        st0[1] ^= __aesti_inv_sbox[32] ^ __aesti_inv_sbox[160];
        st0[2] ^= __aesti_inv_sbox[64] ^ __aesti_inv_sbox[192];
@@ -339,6 +355,8 @@ static void aesti_decrypt(struct crypto_tfm *tfm, u8 *out, 
const u8 *in)
        put_unaligned_le32(inv_subshift(st1, 1) ^ rkp[5], out + 4);
        put_unaligned_le32(inv_subshift(st1, 2) ^ rkp[6], out + 8);
        put_unaligned_le32(inv_subshift(st1, 3) ^ rkp[7], out + 12);
+
+       local_irq_restore(flags);
 }
 
 static struct crypto_alg aes_alg = {
-- 
2.19.0

Reply via email to