Re: [PATCH 3/3] crypto: x86 - Add optimized AEGIS implementations

2018-05-20 Thread Ondrej Mosnáček
2018-05-20 4:41 GMT+02:00 Eric Biggers :
> Hi Ondrej,
>
> On Fri, May 11, 2018 at 02:12:51PM +0200, Ondrej Mosnáček wrote:
>> From: Ondrej Mosnacek 
>>
>> This patch adds optimized implementations of AEGIS-128, AEGIS-128L,
>> and AEGIS-256, utilizing the AES-NI and SSE2 x86 extensions.
>>
>> Signed-off-by: Ondrej Mosnacek 
> [...]
>> +static int crypto_aegis256_aesni_setkey(struct crypto_aead *aead, const u8 
>> *key,
>> + unsigned int keylen)
>> +{
>> + struct aegis_ctx *ctx = crypto_aegis256_aesni_ctx(aead);
>> +
>> + if (keylen != AEGIS256_KEY_SIZE) {
>> + crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
>> + return -EINVAL;
>> + }
>> +
>> + memcpy(ctx->key.bytes, key, AEGIS256_KEY_SIZE);
>> +
>> + return 0;
>> +}
>
> This code is copying 32 bytes into a 16-byte buffer.

Indeed, I must have overlooked that while copy-pasting and editing the
boilerplate...

I will send a follow-up patch soon.

Thanks for the report!

>
> ==
> BUG: KASAN: slab-out-of-bounds in memcpy include/linux/string.h:345 [inline]
> BUG: KASAN: slab-out-of-bounds in crypto_aegis256_aesni_setkey+0x23/0x60 
> arch/x86/crypto/aegis256-aesni-glue.c:167
> Write of size 32 at addr 88006c16b650 by task cryptomgr_test/120
> CPU: 2 PID: 120 Comm: cryptomgr_test Not tainted 
> 4.17.0-rc1-00069-g6ecc9d9ff91f #31
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
> 1.11.0-20171110_100015-anatol 04/01/2014
> Call Trace:
>  __dump_stack lib/dump_stack.c:77 [inline]
>  dump_stack+0x86/0xca lib/dump_stack.c:113
>  print_address_description+0x65/0x204 mm/kasan/report.c:256
>  kasan_report_error mm/kasan/report.c:354 [inline]
>  kasan_report.cold.6+0x242/0x304 mm/kasan/report.c:412
>  check_memory_region_inline mm/kasan/kasan.c:260 [inline]
>  check_memory_region+0x13c/0x1b0 mm/kasan/kasan.c:267
>  memcpy+0x37/0x50 mm/kasan/kasan.c:303
>  memcpy include/linux/string.h:345 [inline]
>  crypto_aegis256_aesni_setkey+0x23/0x60 
> arch/x86/crypto/aegis256-aesni-glue.c:167
>  crypto_aead_setkey+0xa4/0x1e0 crypto/aead.c:62
>  cryptd_aead_setkey+0x30/0x50 crypto/cryptd.c:938
>  crypto_aead_setkey+0xa4/0x1e0 crypto/aead.c:62
>  cryptd_aegis256_aesni_setkey+0x30/0x50 
> arch/x86/crypto/aegis256-aesni-glue.c:259
>  crypto_aead_setkey+0xa4/0x1e0 crypto/aead.c:62
>  __test_aead+0x8bf/0x3770 crypto/testmgr.c:675
>  test_aead+0x28/0x110 crypto/testmgr.c:957
>  alg_test_aead+0x8b/0x140 crypto/testmgr.c:1690
>  alg_test.part.5+0x1bb/0x4d0 crypto/testmgr.c:3845
>  alg_test+0x23/0x25 crypto/testmgr.c:3865
>  cryptomgr_test+0x56/0x80 crypto/algboss.c:223
>  kthread+0x329/0x3f0 kernel/kthread.c:238
>  ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:412
> Allocated by task 120:
>  save_stack mm/kasan/kasan.c:448 [inline]
>  set_track mm/kasan/kasan.c:460 [inline]
>  kasan_kmalloc.part.1+0x5f/0xf0 mm/kasan/kasan.c:553
>  kasan_kmalloc+0xaf/0xc0 mm/kasan/kasan.c:538
>  __do_kmalloc mm/slab.c:3718 [inline]
>  __kmalloc+0x114/0x1d0 mm/slab.c:3727
>  kmalloc include/linux/slab.h:517 [inline]
>  kzalloc include/linux/slab.h:701 [inline]
>  crypto_create_tfm+0x80/0x2c0 crypto/api.c:464
>  crypto_spawn_tfm2+0x57/0x90 crypto/algapi.c:717
>  crypto_spawn_aead include/crypto/internal/aead.h:112 [inline]
>  cryptd_aead_init_tfm+0x3d/0x110 crypto/cryptd.c:1033
>  crypto_aead_init_tfm+0x130/0x190 crypto/aead.c:111
>  crypto_create_tfm+0xda/0x2c0 crypto/api.c:471
>  crypto_alloc_tfm+0xcf/0x1d0 crypto/api.c:543
>  crypto_alloc_aead+0x14/0x20 crypto/aead.c:351
>  cryptd_alloc_aead+0xeb/0x1c0 crypto/cryptd.c:1334
>  cryptd_aegis256_aesni_init_tfm+0x24/0xf0 
> arch/x86/crypto/aegis256-aesni-glue.c:308
>  crypto_aead_init_tfm+0x130/0x190 crypto/aead.c:111
>  crypto_create_tfm+0xda/0x2c0 crypto/api.c:471
>  crypto_alloc_tfm+0xcf/0x1d0 crypto/api.c:543
>  crypto_alloc_aead+0x14/0x20 crypto/aead.c:351
>  alg_test_aead+0x1f/0x140 crypto/testmgr.c:1682
>  alg_test.part.5+0x1bb/0x4d0 crypto/testmgr.c:3845
>  alg_test+0x23/0x25 crypto/testmgr.c:3865
>  cryptomgr_test+0x56/0x80 crypto/algboss.c:223
>  kthread+0x329/0x3f0 kernel/kthread.c:238
>  ret_from_[   16.453502] serial8250: too much work for irq4
> Freed by task 0:
> (stack is not available)
> The buggy address belongs to the object at 88006c16b600
> The buggy address is located 80 bytes inside of
> The buggy address belongs to the page:
> page:ea00017a4f68 count:1 mapcount:0 mapping:88006c16b000 index:0x0
> flags: 0x1000100(slab)
> raw: 01000100 88006c16b000  00010015
> raw: ea00017a2470 88006d401548 88006d400400
> page dumped because: kasan: bad access detected
> Memory state around the buggy address:
>  88006c16b500: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
>  88006c16b580: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc

Re: [PATCH 3/3] crypto: x86 - Add optimized AEGIS implementations

2018-05-19 Thread Eric Biggers
Hi Ondrej,

On Fri, May 11, 2018 at 02:12:51PM +0200, Ondrej Mosnáček wrote:
> From: Ondrej Mosnacek 
> 
> This patch adds optimized implementations of AEGIS-128, AEGIS-128L,
> and AEGIS-256, utilizing the AES-NI and SSE2 x86 extensions.
> 
> Signed-off-by: Ondrej Mosnacek 
[...]
> +static int crypto_aegis256_aesni_setkey(struct crypto_aead *aead, const u8 
> *key,
> + unsigned int keylen)
> +{
> + struct aegis_ctx *ctx = crypto_aegis256_aesni_ctx(aead);
> +
> + if (keylen != AEGIS256_KEY_SIZE) {
> + crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
> + return -EINVAL;
> + }
> +
> + memcpy(ctx->key.bytes, key, AEGIS256_KEY_SIZE);
> +
> + return 0;
> +}

This code is copying 32 bytes into a 16-byte buffer.

==
BUG: KASAN: slab-out-of-bounds in memcpy include/linux/string.h:345 [inline]
BUG: KASAN: slab-out-of-bounds in crypto_aegis256_aesni_setkey+0x23/0x60 
arch/x86/crypto/aegis256-aesni-glue.c:167
Write of size 32 at addr 88006c16b650 by task cryptomgr_test/120
CPU: 2 PID: 120 Comm: cryptomgr_test Not tainted 4.17.0-rc1-00069-g6ecc9d9ff91f 
#31
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
1.11.0-20171110_100015-anatol 04/01/2014
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x86/0xca lib/dump_stack.c:113
 print_address_description+0x65/0x204 mm/kasan/report.c:256
 kasan_report_error mm/kasan/report.c:354 [inline]
 kasan_report.cold.6+0x242/0x304 mm/kasan/report.c:412
 check_memory_region_inline mm/kasan/kasan.c:260 [inline]
 check_memory_region+0x13c/0x1b0 mm/kasan/kasan.c:267
 memcpy+0x37/0x50 mm/kasan/kasan.c:303
 memcpy include/linux/string.h:345 [inline]
 crypto_aegis256_aesni_setkey+0x23/0x60 
arch/x86/crypto/aegis256-aesni-glue.c:167
 crypto_aead_setkey+0xa4/0x1e0 crypto/aead.c:62
 cryptd_aead_setkey+0x30/0x50 crypto/cryptd.c:938
 crypto_aead_setkey+0xa4/0x1e0 crypto/aead.c:62
 cryptd_aegis256_aesni_setkey+0x30/0x50 
arch/x86/crypto/aegis256-aesni-glue.c:259
 crypto_aead_setkey+0xa4/0x1e0 crypto/aead.c:62
 __test_aead+0x8bf/0x3770 crypto/testmgr.c:675
 test_aead+0x28/0x110 crypto/testmgr.c:957
 alg_test_aead+0x8b/0x140 crypto/testmgr.c:1690
 alg_test.part.5+0x1bb/0x4d0 crypto/testmgr.c:3845
 alg_test+0x23/0x25 crypto/testmgr.c:3865
 cryptomgr_test+0x56/0x80 crypto/algboss.c:223
 kthread+0x329/0x3f0 kernel/kthread.c:238
 ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:412
Allocated by task 120:
 save_stack mm/kasan/kasan.c:448 [inline]
 set_track mm/kasan/kasan.c:460 [inline]
 kasan_kmalloc.part.1+0x5f/0xf0 mm/kasan/kasan.c:553
 kasan_kmalloc+0xaf/0xc0 mm/kasan/kasan.c:538
 __do_kmalloc mm/slab.c:3718 [inline]
 __kmalloc+0x114/0x1d0 mm/slab.c:3727
 kmalloc include/linux/slab.h:517 [inline]
 kzalloc include/linux/slab.h:701 [inline]
 crypto_create_tfm+0x80/0x2c0 crypto/api.c:464
 crypto_spawn_tfm2+0x57/0x90 crypto/algapi.c:717
 crypto_spawn_aead include/crypto/internal/aead.h:112 [inline]
 cryptd_aead_init_tfm+0x3d/0x110 crypto/cryptd.c:1033
 crypto_aead_init_tfm+0x130/0x190 crypto/aead.c:111
 crypto_create_tfm+0xda/0x2c0 crypto/api.c:471
 crypto_alloc_tfm+0xcf/0x1d0 crypto/api.c:543
 crypto_alloc_aead+0x14/0x20 crypto/aead.c:351
 cryptd_alloc_aead+0xeb/0x1c0 crypto/cryptd.c:1334
 cryptd_aegis256_aesni_init_tfm+0x24/0xf0 
arch/x86/crypto/aegis256-aesni-glue.c:308
 crypto_aead_init_tfm+0x130/0x190 crypto/aead.c:111
 crypto_create_tfm+0xda/0x2c0 crypto/api.c:471
 crypto_alloc_tfm+0xcf/0x1d0 crypto/api.c:543
 crypto_alloc_aead+0x14/0x20 crypto/aead.c:351
 alg_test_aead+0x1f/0x140 crypto/testmgr.c:1682
 alg_test.part.5+0x1bb/0x4d0 crypto/testmgr.c:3845
 alg_test+0x23/0x25 crypto/testmgr.c:3865
 cryptomgr_test+0x56/0x80 crypto/algboss.c:223
 kthread+0x329/0x3f0 kernel/kthread.c:238
 ret_from_[   16.453502] serial8250: too much work for irq4
Freed by task 0:
(stack is not available)
The buggy address belongs to the object at 88006c16b600
The buggy address is located 80 bytes inside of
The buggy address belongs to the page:
page:ea00017a4f68 count:1 mapcount:0 mapping:88006c16b000 index:0x0
flags: 0x1000100(slab)
raw: 01000100 88006c16b000  00010015
raw: ea00017a2470 88006d401548 88006d400400
page dumped because: kasan: bad access detected
Memory state around the buggy address:
 88006c16b500: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 88006c16b580: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
>88006c16b600: 00 00 00 00 00 00 00 00 00 00 00 00 00 fc fc fc
  ^
 88006c16b680: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb
 88006c16b700: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
==
Disabling lock debugging due to kernel taint


[PATCH 3/3] crypto: x86 - Add optimized AEGIS implementations

2018-05-11 Thread Ondrej Mosnáček
From: Ondrej Mosnacek 

This patch adds optimized implementations of AEGIS-128, AEGIS-128L,
and AEGIS-256, utilizing the AES-NI and SSE2 x86 extensions.

Signed-off-by: Ondrej Mosnacek 
---
 arch/x86/crypto/Makefile   |   8 +
 arch/x86/crypto/aegis128-aesni-asm.S   | 749 ++
 arch/x86/crypto/aegis128-aesni-glue.c  | 407 
 arch/x86/crypto/aegis128l-aesni-asm.S  | 825 +
 arch/x86/crypto/aegis128l-aesni-glue.c | 407 
 arch/x86/crypto/aegis256-aesni-asm.S   | 702 +
 arch/x86/crypto/aegis256-aesni-glue.c  | 407 
 crypto/Kconfig |  24 +
 8 files changed, 3529 insertions(+)
 create mode 100644 arch/x86/crypto/aegis128-aesni-asm.S
 create mode 100644 arch/x86/crypto/aegis128-aesni-glue.c
 create mode 100644 arch/x86/crypto/aegis128l-aesni-asm.S
 create mode 100644 arch/x86/crypto/aegis128l-aesni-glue.c
 create mode 100644 arch/x86/crypto/aegis256-aesni-asm.S
 create mode 100644 arch/x86/crypto/aegis256-aesni-glue.c

diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 5f07333bb224..c183553a4bd6 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -38,6 +38,10 @@ obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
 obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o
 obj-$(CONFIG_CRYPTO_POLY1305_X86_64) += poly1305-x86_64.o
 
+obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o
+obj-$(CONFIG_CRYPTO_AEGIS128L_AESNI_SSE2) += aegis128l-aesni.o
+obj-$(CONFIG_CRYPTO_AEGIS256_AESNI_SSE2) += aegis256-aesni.o
+
 # These modules require assembler to support AVX.
 ifeq ($(avx_supported),yes)
obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64) += \
@@ -72,6 +76,10 @@ salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
 chacha20-x86_64-y := chacha20-ssse3-x86_64.o chacha20_glue.o
 serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
 
+aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o
+aegis128l-aesni-y := aegis128l-aesni-asm.o aegis128l-aesni-glue.o
+aegis256-aesni-y := aegis256-aesni-asm.o aegis256-aesni-glue.o
+
 ifeq ($(avx_supported),yes)
camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \
camellia_aesni_avx_glue.o
diff --git a/arch/x86/crypto/aegis128-aesni-asm.S 
b/arch/x86/crypto/aegis128-aesni-asm.S
new file mode 100644
index ..9254e0b6cc06
--- /dev/null
+++ b/arch/x86/crypto/aegis128-aesni-asm.S
@@ -0,0 +1,749 @@
+/*
+ * AES-NI + SSE2 implementation of AEGIS-128
+ *
+ * Copyright (c) 2017-2018 Ondrej Mosnacek 
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include 
+#include 
+
+#define STATE0 %xmm0
+#define STATE1 %xmm1
+#define STATE2 %xmm2
+#define STATE3 %xmm3
+#define STATE4 %xmm4
+#define KEY%xmm5
+#define MSG%xmm5
+#define T0 %xmm6
+#define T1 %xmm7
+
+#define STATEP %rdi
+#define LEN%rsi
+#define SRC%rdx
+#define DST%rcx
+
+.section .rodata.cst16.aegis128_const, "aM", @progbits, 32
+.align 16
+.Laegis128_const_0:
+   .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
+   .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
+.Laegis128_const_1:
+   .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
+   .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
+
+.section .rodata.cst16.aegis128_counter, "aM", @progbits, 16
+.align 16
+.Laegis128_counter:
+   .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
+   .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+
+.text
+
+/*
+ * aegis128_update
+ * input:
+ *   STATE[0-4] - input state
+ * output:
+ *   STATE[0-4] - output state (shifted positions)
+ * changed:
+ *   T0
+ */
+.macro aegis128_update
+   movdqa STATE4, T0
+   aesenc STATE0, STATE4
+   aesenc STATE1, STATE0
+   aesenc STATE2, STATE1
+   aesenc STATE3, STATE2
+   aesenc T0, STATE3
+.endm
+
+/*
+ * __load_partial: internal ABI
+ * input:
+ *   LEN - bytes
+ *   SRC - src
+ * output:
+ *   MSG  - message block
+ * changed:
+ *   T0
+ *   %r8
+ *   %r9
+ */
+__load_partial:
+   xor %r9, %r9
+   pxor MSG, MSG
+
+   mov LEN, %r8
+   and $0x1, %r8
+   jz .Lld_partial_1
+
+   mov LEN, %r8
+   and $0x1E, %r8
+   add SRC, %r8
+   mov (%r8), %r9b
+
+.Lld_partial_1:
+   mov LEN, %r8
+   and $0x2, %r8
+   jz .Lld_partial_2
+
+   mov LEN, %r8
+   and $0x1C, %r8
+   add SRC, %r8
+   shl $0x10, %r9
+   mov (%r8), %r9w
+
+.Lld_partial_2:
+   mov LEN, %r8
+   and $0x4, %r8
+   jz .Lld_partial_4
+
+   mov LEN, %r8
+   and $0x18, %r8
+   add SRC, %r8
+   shl $32,