The arm64 kernel will shortly disallow nested kernel mode NEON.

So honour this in the ARMv8 Crypto Extensions implementation of
CCM-AES, and fall back to a scalar implementation using the generic
crypto helpers for AES, XOR and incrementing the CTR counter.

Signed-off-by: Ard Biesheuvel <ard.biesheu...@linaro.org>
---
 arch/arm64/crypto/Kconfig           |   1 +
 arch/arm64/crypto/aes-ce-ccm-glue.c | 174 ++++++++++++++++----
 2 files changed, 140 insertions(+), 35 deletions(-)

diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 2fd4bb6d0b5a..ba637765c19a 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -59,6 +59,7 @@ config CRYPTO_AES_ARM64_CE_CCM
        depends on ARM64 && KERNEL_MODE_NEON
        select CRYPTO_ALGAPI
        select CRYPTO_AES_ARM64_CE
+       select CRYPTO_AES_ARM64
        select CRYPTO_AEAD
 
 config CRYPTO_AES_ARM64_CE_BLK
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c 
b/arch/arm64/crypto/aes-ce-ccm-glue.c
index 6a7dbc7c83a6..a1254036f2b1 100644
--- a/arch/arm64/crypto/aes-ce-ccm-glue.c
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -1,7 +1,7 @@
 /*
  * aes-ccm-glue.c - AES-CCM transform for ARMv8 with Crypto Extensions
  *
- * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheu...@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheu...@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -9,6 +9,7 @@
  */
 
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <asm/unaligned.h>
 #include <crypto/aes.h>
 #include <crypto/scatterwalk.h>
@@ -44,6 +45,8 @@ asmlinkage void ce_aes_ccm_decrypt(u8 out[], u8 const in[], 
u32 cbytes,
 asmlinkage void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u32 const rk[],
                                 u32 rounds);
 
+asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int 
rounds);
+
 static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key,
                      unsigned int key_len)
 {
@@ -103,7 +106,45 @@ static int ccm_init_mac(struct aead_request *req, u8 
maciv[], u32 msglen)
        return 0;
 }
 
-static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
+static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[],
+                          u32 abytes, u32 *macp, bool use_neon)
+{
+       if (likely(use_neon)) {
+               ce_aes_ccm_auth_data(mac, in, abytes, macp, key->key_enc,
+                                    num_rounds(key));
+       } else {
+               if (*macp > 0 && *macp < AES_BLOCK_SIZE) {
+                       int added = min(abytes, AES_BLOCK_SIZE - *macp);
+
+                       crypto_xor(&mac[*macp], in, added);
+
+                       *macp += added;
+                       in += added;
+                       abytes -= added;
+               }
+
+               while (abytes > AES_BLOCK_SIZE) {
+                       __aes_arm64_encrypt(key->key_enc, mac, mac,
+                                           num_rounds(key));
+                       crypto_xor(mac, in, AES_BLOCK_SIZE);
+
+                       in += AES_BLOCK_SIZE;
+                       abytes -= AES_BLOCK_SIZE;
+               }
+
+               if (abytes > 0) {
+                       __aes_arm64_encrypt(key->key_enc, mac, mac,
+                                           num_rounds(key));
+                       crypto_xor(mac, in, abytes);
+                       *macp = abytes;
+               } else {
+                       *macp = 0;
+               }
+       }
+}
+
+static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[],
+                                  bool use_neon)
 {
        struct crypto_aead *aead = crypto_aead_reqtfm(req);
        struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
@@ -122,8 +163,7 @@ static void ccm_calculate_auth_mac(struct aead_request 
*req, u8 mac[])
                ltag.len = 6;
        }
 
-       ce_aes_ccm_auth_data(mac, (u8 *)&ltag, ltag.len, &macp, ctx->key_enc,
-                            num_rounds(ctx));
+       ccm_update_mac(ctx, mac, (u8 *)&ltag, ltag.len, &macp, use_neon);
        scatterwalk_start(&walk, req->src);
 
        do {
@@ -135,8 +175,7 @@ static void ccm_calculate_auth_mac(struct aead_request 
*req, u8 mac[])
                        n = scatterwalk_clamp(&walk, len);
                }
                p = scatterwalk_map(&walk);
-               ce_aes_ccm_auth_data(mac, p, n, &macp, ctx->key_enc,
-                                    num_rounds(ctx));
+               ccm_update_mac(ctx, mac, p, n, &macp, use_neon);
                len -= n;
 
                scatterwalk_unmap(p);
@@ -145,6 +184,56 @@ static void ccm_calculate_auth_mac(struct aead_request 
*req, u8 mac[])
        } while (len);
 }
 
+static int ccm_crypt_fallback(struct skcipher_walk *walk, u8 mac[], u8 iv0[],
+                             struct crypto_aes_ctx *ctx, bool enc)
+{
+       u8 buf[AES_BLOCK_SIZE];
+       int err = 0;
+
+       while (walk->nbytes) {
+               int blocks = walk->nbytes / AES_BLOCK_SIZE;
+               u32 tail = walk->nbytes % AES_BLOCK_SIZE;
+               u8 *dst = walk->dst.virt.addr;
+               u8 *src = walk->src.virt.addr;
+               u32 nbytes = walk->nbytes;
+
+               if (nbytes == walk->total && tail > 0) {
+                       blocks++;
+                       tail = 0;
+               }
+
+               do {
+                       u32 bsize = AES_BLOCK_SIZE;
+
+                       if (nbytes < AES_BLOCK_SIZE)
+                               bsize = nbytes;
+
+                       crypto_inc(walk->iv, AES_BLOCK_SIZE);
+                       __aes_arm64_encrypt(ctx->key_enc, buf, walk->iv,
+                                           num_rounds(ctx));
+                       __aes_arm64_encrypt(ctx->key_enc, mac, mac,
+                                           num_rounds(ctx));
+                       if (enc)
+                               crypto_xor(mac, src, bsize);
+                       crypto_xor_cpy(dst, src, buf, bsize);
+                       if (!enc)
+                               crypto_xor(mac, dst, bsize);
+                       dst += bsize;
+                       src += bsize;
+                       nbytes -= bsize;
+               } while (--blocks);
+
+               err = skcipher_walk_done(walk, tail);
+       }
+
+       if (!err) {
+               __aes_arm64_encrypt(ctx->key_enc, buf, iv0, num_rounds(ctx));
+               __aes_arm64_encrypt(ctx->key_enc, mac, mac, num_rounds(ctx));
+               crypto_xor(mac, buf, AES_BLOCK_SIZE);
+       }
+       return err;
+}
+
 static int ccm_encrypt(struct aead_request *req)
 {
        struct crypto_aead *aead = crypto_aead_reqtfm(req);
@@ -153,39 +242,46 @@ static int ccm_encrypt(struct aead_request *req)
        u8 __aligned(8) mac[AES_BLOCK_SIZE];
        u8 buf[AES_BLOCK_SIZE];
        u32 len = req->cryptlen;
+       bool use_neon = may_use_simd();
        int err;
 
        err = ccm_init_mac(req, mac, len);
        if (err)
                return err;
 
-       kernel_neon_begin_partial(6);
+       if (likely(use_neon))
+               kernel_neon_begin();
 
        if (req->assoclen)
-               ccm_calculate_auth_mac(req, mac);
+               ccm_calculate_auth_mac(req, mac, use_neon);
 
        /* preserve the original iv for the final round */
        memcpy(buf, req->iv, AES_BLOCK_SIZE);
 
        err = skcipher_walk_aead_encrypt(&walk, req, true);
 
-       while (walk.nbytes) {
-               u32 tail = walk.nbytes % AES_BLOCK_SIZE;
-
-               if (walk.nbytes == walk.total)
-                       tail = 0;
+       if (likely(use_neon)) {
+               while (walk.nbytes) {
+                       u32 tail = walk.nbytes % AES_BLOCK_SIZE;
 
-               ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-                                  walk.nbytes - tail, ctx->key_enc,
-                                  num_rounds(ctx), mac, walk.iv);
+                       if (walk.nbytes == walk.total)
+                               tail = 0;
 
-               err = skcipher_walk_done(&walk, tail);
-       }
-       if (!err)
-               ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+                       ce_aes_ccm_encrypt(walk.dst.virt.addr,
+                                          walk.src.virt.addr,
+                                          walk.nbytes - tail, ctx->key_enc,
+                                          num_rounds(ctx), mac, walk.iv);
 
-       kernel_neon_end();
+                       err = skcipher_walk_done(&walk, tail);
+               }
+               if (!err)
+                       ce_aes_ccm_final(mac, buf, ctx->key_enc,
+                                        num_rounds(ctx));
 
+               kernel_neon_end();
+       } else {
+               err = ccm_crypt_fallback(&walk, mac, buf, ctx, true);
+       }
        if (err)
                return err;
 
@@ -205,38 +301,46 @@ static int ccm_decrypt(struct aead_request *req)
        u8 __aligned(8) mac[AES_BLOCK_SIZE];
        u8 buf[AES_BLOCK_SIZE];
        u32 len = req->cryptlen - authsize;
+       bool use_neon = may_use_simd();
        int err;
 
        err = ccm_init_mac(req, mac, len);
        if (err)
                return err;
 
-       kernel_neon_begin_partial(6);
+       if (likely(use_neon))
+               kernel_neon_begin();
 
        if (req->assoclen)
-               ccm_calculate_auth_mac(req, mac);
+               ccm_calculate_auth_mac(req, mac, use_neon);
 
        /* preserve the original iv for the final round */
        memcpy(buf, req->iv, AES_BLOCK_SIZE);
 
        err = skcipher_walk_aead_decrypt(&walk, req, true);
 
-       while (walk.nbytes) {
-               u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+       if (likely(use_neon)) {
+               while (walk.nbytes) {
+                       u32 tail = walk.nbytes % AES_BLOCK_SIZE;
 
-               if (walk.nbytes == walk.total)
-                       tail = 0;
+                       if (walk.nbytes == walk.total)
+                               tail = 0;
 
-               ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
-                                  walk.nbytes - tail, ctx->key_enc,
-                                  num_rounds(ctx), mac, walk.iv);
+                       ce_aes_ccm_decrypt(walk.dst.virt.addr,
+                                          walk.src.virt.addr,
+                                          walk.nbytes - tail, ctx->key_enc,
+                                          num_rounds(ctx), mac, walk.iv);
 
-               err = skcipher_walk_done(&walk, tail);
-       }
-       if (!err)
-               ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+                       err = skcipher_walk_done(&walk, tail);
+               }
+               if (!err)
+                       ce_aes_ccm_final(mac, buf, ctx->key_enc,
+                                        num_rounds(ctx));
 
-       kernel_neon_end();
+               kernel_neon_end();
+       } else {
+               err = ccm_crypt_fallback(&walk, mac, buf, ctx, false);
+       }
 
        if (err)
                return err;
-- 
2.9.3

Reply via email to