CVS commit: src/sys/crypto/arch/arm
Module Name:src Committed By: rin Date: Mon Aug 7 01:14:19 UTC 2023 Modified Files: src/sys/crypto/arch/arm: arm_neon.h Log Message: sys/crypto: aarch64: Catch up with builtin rename for GCC12 Kernel self tests successfully pass for aarch64{,eb}. Same binary generated by GCC10 and GCC12 for: --- #include #include "arm_neon.h" uint32x4_t my_vshrq_n_u32(uint32x4_t v, uint8_t bits) { return vshrq_n_u32(v, bits); } uint8x16_t my_vshrq_n_u8(uint8x16_t v, uint8_t bits) { return vshrq_n_u8(v, bits); } --- To generate a diff of this commit: cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/arch/arm/arm_neon.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/arch/arm/arm_neon.h diff -u src/sys/crypto/arch/arm/arm_neon.h:1.1 src/sys/crypto/arch/arm/arm_neon.h:1.2 --- src/sys/crypto/arch/arm/arm_neon.h:1.1 Mon Aug 7 01:07:36 2023 +++ src/sys/crypto/arch/arm/arm_neon.h Mon Aug 7 01:14:19 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: arm_neon.h,v 1.1 2023/08/07 01:07:36 rin Exp $ */ +/* $NetBSD: arm_neon.h,v 1.2 2023/08/07 01:14:19 rin Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -522,7 +522,11 @@ static __inline uint32x4_t vshrq_n_u32(uint32x4_t __v, uint8_t __bits) { #ifdef __aarch64__ +# if __GNUC_PREREQ__(12, 0) + return __builtin_aarch64_lshrv4si_uus(__v, __bits); +# else return (uint32x4_t)__builtin_aarch64_lshrv4si((int32x4_t)__v, __bits); +# endif #else return (uint32x4_t)__builtin_neon_vshru_nv4si((int32x4_t)__v, __bits); #endif @@ -538,7 +542,11 @@ static __inline uint8x16_t vshrq_n_u8(uint8x16_t __v, uint8_t __bits) { #ifdef __aarch64__ +# if __GNUC_PREREQ__(12, 0) + return __builtin_aarch64_lshrv16qi_uus(__v, __bits); +# else return (uint8x16_t)__builtin_aarch64_lshrv16qi((int8x16_t)__v, __bits); +# endif #else return (uint8x16_t)__builtin_neon_vshru_nv16qi((int8x16_t)__v, __bits); #endif
CVS commit: src/sys/crypto/arch/arm
Module Name:src Committed By: rin Date: Mon Aug 7 01:14:19 UTC 2023 Modified Files: src/sys/crypto/arch/arm: arm_neon.h Log Message: sys/crypto: aarch64: Catch up with builtin rename for GCC12 Kernel self tests successfully pass for aarch64{,eb}. Same binary generated by GCC10 and GCC12 for: --- #include #include "arm_neon.h" uint32x4_t my_vshrq_n_u32(uint32x4_t v, uint8_t bits) { return vshrq_n_u32(v, bits); } uint8x16_t my_vshrq_n_u8(uint8x16_t v, uint8_t bits) { return vshrq_n_u8(v, bits); } --- To generate a diff of this commit: cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/arch/arm/arm_neon.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto
Module Name:src Committed By: rin Date: Mon Aug 7 01:07:36 UTC 2023 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_impl.h src/sys/crypto/aes/arch/x86: aes_sse2_impl.h aes_ssse3_impl.h src/sys/crypto/chacha/arch/arm: chacha_neon.c src/sys/crypto/chacha/arch/x86: chacha_sse2.c Added Files: src/sys/crypto/arch/arm: arm_neon.h arm_neon_imm.h src/sys/crypto/arch/x86: immintrin.h immintrin_ext.h Removed Files: src/sys/crypto/aes/arch/arm: arm_neon.h arm_neon_imm.h src/sys/crypto/aes/arch/x86: immintrin.h immintrin_ext.h src/sys/crypto/chacha/arch/arm: arm_neon.h arm_neon_imm.h src/sys/crypto/chacha/arch/x86: immintrin.h Log Message: sys/crypto: Introduce arch/{arm,x86} to share common MD headers Dedup between aes and chacha. No binary changes. To generate a diff of this commit: cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/arm/aes_neon_impl.h cvs rdiff -u -r1.12 -r0 src/sys/crypto/aes/arch/arm/arm_neon.h cvs rdiff -u -r1.2 -r0 src/sys/crypto/aes/arch/arm/arm_neon_imm.h cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/x86/aes_sse2_impl.h cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/x86/aes_ssse3_impl.h cvs rdiff -u -r1.5 -r0 src/sys/crypto/aes/arch/x86/immintrin.h cvs rdiff -u -r1.1 -r0 src/sys/crypto/aes/arch/x86/immintrin_ext.h cvs rdiff -u -r0 -r1.1 src/sys/crypto/arch/arm/arm_neon.h \ src/sys/crypto/arch/arm/arm_neon_imm.h cvs rdiff -u -r0 -r1.1 src/sys/crypto/arch/x86/immintrin.h \ src/sys/crypto/arch/x86/immintrin_ext.h cvs rdiff -u -r1.7 -r0 src/sys/crypto/chacha/arch/arm/arm_neon.h cvs rdiff -u -r1.2 -r0 src/sys/crypto/chacha/arch/arm/arm_neon_imm.h cvs rdiff -u -r1.8 -r1.9 src/sys/crypto/chacha/arch/arm/chacha_neon.c cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/chacha/arch/x86/chacha_sse2.c cvs rdiff -u -r1.1 -r0 src/sys/crypto/chacha/arch/x86/immintrin.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_neon_impl.h diff -u src/sys/crypto/aes/arch/arm/aes_neon_impl.h:1.3 src/sys/crypto/aes/arch/arm/aes_neon_impl.h:1.4 --- src/sys/crypto/aes/arch/arm/aes_neon_impl.h:1.3 Sat Aug 8 14:47:01 2020 +++ src/sys/crypto/aes/arch/arm/aes_neon_impl.h Mon Aug 7 01:07:35 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_neon_impl.h,v 1.3 2020/08/08 14:47:01 riastradh Exp $ */ +/* $NetBSD: aes_neon_impl.h,v 1.4 2023/08/07 01:07:35 rin Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -31,8 +31,8 @@ #include -#include "arm_neon.h" -#include "arm_neon_imm.h" +#include +#include #include #include Index: src/sys/crypto/aes/arch/x86/aes_sse2_impl.h diff -u src/sys/crypto/aes/arch/x86/aes_sse2_impl.h:1.2 src/sys/crypto/aes/arch/x86/aes_sse2_impl.h:1.3 --- src/sys/crypto/aes/arch/x86/aes_sse2_impl.h:1.2 Mon Jun 29 23:50:05 2020 +++ src/sys/crypto/aes/arch/x86/aes_sse2_impl.h Mon Aug 7 01:07:36 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_sse2_impl.h,v 1.2 2020/06/29 23:50:05 riastradh Exp $ */ +/* $NetBSD: aes_sse2_impl.h,v 1.3 2023/08/07 01:07:36 rin Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -33,8 +33,8 @@ #include #include -#include -#include +#include +#include void aes_sse2_bitslice_Sbox(__m128i[static 4]); void aes_sse2_bitslice_invSbox(__m128i[static 4]); Index: src/sys/crypto/aes/arch/x86/aes_ssse3_impl.h diff -u src/sys/crypto/aes/arch/x86/aes_ssse3_impl.h:1.1 src/sys/crypto/aes/arch/x86/aes_ssse3_impl.h:1.2 --- src/sys/crypto/aes/arch/x86/aes_ssse3_impl.h:1.1 Mon Jun 29 23:51:35 2020 +++ src/sys/crypto/aes/arch/x86/aes_ssse3_impl.h Mon Aug 7 01:07:36 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_ssse3_impl.h,v 1.1 2020/06/29 23:51:35 riastradh Exp $ */ +/* $NetBSD: aes_ssse3_impl.h,v 1.2 2023/08/07 01:07:36 rin Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -33,8 +33,8 @@ #include #include -#include -#include +#include +#include __m128i aes_ssse3_enc1(const struct aesenc *, __m128i, unsigned); __m128i aes_ssse3_dec1(const struct aesdec *, __m128i, unsigned); Index: src/sys/crypto/chacha/arch/arm/chacha_neon.c diff -u src/sys/crypto/chacha/arch/arm/chacha_neon.c:1.8 src/sys/crypto/chacha/arch/arm/chacha_neon.c:1.9 --- src/sys/crypto/chacha/arch/arm/chacha_neon.c:1.8 Sat Aug 8 14:47:01 2020 +++ src/sys/crypto/chacha/arch/arm/chacha_neon.c Mon Aug 7 01:07:36 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: chacha_neon.c,v 1.8 2020/08/08 14:47:01 riastradh Exp $ */ +/* $NetBSD: chacha_neon.c,v 1.9 2023/08/07 01:07:36 rin Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -29,8 +29,8 @@ #include #include -#include "arm_neon.h" -#include "arm_neon_imm.h" +#include +#include #include "chacha_neon.h" /* Index: src/sys/crypto/chacha/arch/x86/chacha_sse2.c diff -u src/sys/crypto/chacha/arch/x86/chacha_sse2.c:1.2 src/sys/crypto/chacha/arch/x86/chacha_sse2.c:1.3 ---
CVS commit: src/sys/crypto
Module Name:src Committed By: rin Date: Mon Aug 7 01:07:36 UTC 2023 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_impl.h src/sys/crypto/aes/arch/x86: aes_sse2_impl.h aes_ssse3_impl.h src/sys/crypto/chacha/arch/arm: chacha_neon.c src/sys/crypto/chacha/arch/x86: chacha_sse2.c Added Files: src/sys/crypto/arch/arm: arm_neon.h arm_neon_imm.h src/sys/crypto/arch/x86: immintrin.h immintrin_ext.h Removed Files: src/sys/crypto/aes/arch/arm: arm_neon.h arm_neon_imm.h src/sys/crypto/aes/arch/x86: immintrin.h immintrin_ext.h src/sys/crypto/chacha/arch/arm: arm_neon.h arm_neon_imm.h src/sys/crypto/chacha/arch/x86: immintrin.h Log Message: sys/crypto: Introduce arch/{arm,x86} to share common MD headers Dedup between aes and chacha. No binary changes. To generate a diff of this commit: cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/arm/aes_neon_impl.h cvs rdiff -u -r1.12 -r0 src/sys/crypto/aes/arch/arm/arm_neon.h cvs rdiff -u -r1.2 -r0 src/sys/crypto/aes/arch/arm/arm_neon_imm.h cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/x86/aes_sse2_impl.h cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/x86/aes_ssse3_impl.h cvs rdiff -u -r1.5 -r0 src/sys/crypto/aes/arch/x86/immintrin.h cvs rdiff -u -r1.1 -r0 src/sys/crypto/aes/arch/x86/immintrin_ext.h cvs rdiff -u -r0 -r1.1 src/sys/crypto/arch/arm/arm_neon.h \ src/sys/crypto/arch/arm/arm_neon_imm.h cvs rdiff -u -r0 -r1.1 src/sys/crypto/arch/x86/immintrin.h \ src/sys/crypto/arch/x86/immintrin_ext.h cvs rdiff -u -r1.7 -r0 src/sys/crypto/chacha/arch/arm/arm_neon.h cvs rdiff -u -r1.2 -r0 src/sys/crypto/chacha/arch/arm/arm_neon_imm.h cvs rdiff -u -r1.8 -r1.9 src/sys/crypto/chacha/arch/arm/chacha_neon.c cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/chacha/arch/x86/chacha_sse2.c cvs rdiff -u -r1.1 -r0 src/sys/crypto/chacha/arch/x86/immintrin.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: rin Date: Mon Aug 7 00:58:35 UTC 2023 Modified Files: src/sys/crypto/aes/arch/arm: arm_neon.h Log Message: sys/crypto/{aes,chacha}/arch/arm/arm_neon.h: Sync (whitespace fix) No binary changes. To generate a diff of this commit: cvs rdiff -u -r1.11 -r1.12 src/sys/crypto/aes/arch/arm/arm_neon.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/arm_neon.h diff -u src/sys/crypto/aes/arch/arm/arm_neon.h:1.11 src/sys/crypto/aes/arch/arm/arm_neon.h:1.12 --- src/sys/crypto/aes/arch/arm/arm_neon.h:1.11 Mon Sep 7 18:06:13 2020 +++ src/sys/crypto/aes/arch/arm/arm_neon.h Mon Aug 7 00:58:35 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: arm_neon.h,v 1.11 2020/09/07 18:06:13 jakllsch Exp $ */ +/* $NetBSD: arm_neon.h,v 1.12 2023/08/07 00:58:35 rin Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -232,7 +232,7 @@ static __inline uint32_t vgetq_lane_u32(uint32x4_t __v, uint8_t __i) { #ifdef __aarch64__ - return __v[__neon_laneq_index(__v,__i)]; + return __v[__neon_laneq_index(__v, __i)]; #else return (uint32_t)__builtin_neon_vget_laneuv4si((int32x4_t)__v, __i); #endif
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: rin Date: Mon Aug 7 00:58:35 UTC 2023 Modified Files: src/sys/crypto/aes/arch/arm: arm_neon.h Log Message: sys/crypto/{aes,chacha}/arch/arm/arm_neon.h: Sync (whitespace fix) No binary changes. To generate a diff of this commit: cvs rdiff -u -r1.11 -r1.12 src/sys/crypto/aes/arch/arm/arm_neon.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/cprng_fast
Module Name:src Committed By: riastradh Date: Sat Aug 5 11:39:18 UTC 2023 Modified Files: src/sys/crypto/cprng_fast: cprng_fast.c Log Message: cprng_fast(9): Drop and retake percpu reference across cprng_strong. cprng_strong may sleep on an adaptive lock (via entropy_extract), which invalidates percpu(9) references. Discovered by stumbling upon this panic in a test run: panic: kernel diagnostic assertion "(cprng == percpu_getref(cprng_fast_percpu)) && (percpu_putref(cprng_fast_percpu), true)" failed: file "/home/riastradh/netbsd/current/src/sys/rump/librump/rumpkern/../../../crypto/cprng_fast/cprng_fast.c", line 117 XXX pullup-10 To generate a diff of this commit: cvs rdiff -u -r1.18 -r1.19 src/sys/crypto/cprng_fast/cprng_fast.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/cprng_fast/cprng_fast.c diff -u src/sys/crypto/cprng_fast/cprng_fast.c:1.18 src/sys/crypto/cprng_fast/cprng_fast.c:1.19 --- src/sys/crypto/cprng_fast/cprng_fast.c:1.18 Thu Sep 1 18:32:25 2022 +++ src/sys/crypto/cprng_fast/cprng_fast.c Sat Aug 5 11:39:18 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: cprng_fast.c,v 1.18 2022/09/01 18:32:25 riastradh Exp $ */ +/* $NetBSD: cprng_fast.c,v 1.19 2023/08/05 11:39:18 riastradh Exp $ */ /*- * Copyright (c) 2014 The NetBSD Foundation, Inc. @@ -30,7 +30,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: cprng_fast.c,v 1.18 2022/09/01 18:32:25 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: cprng_fast.c,v 1.19 2023/08/05 11:39:18 riastradh Exp $"); #include #include @@ -58,7 +58,7 @@ struct cprng_fast { }; static void cprng_fast_init_cpu(void *, void *, struct cpu_info *); -static void cprng_fast_reseed(struct cprng_fast *); +static void cprng_fast_reseed(struct cprng_fast **, unsigned); static void cprng_fast_seed(struct cprng_fast *, const void *); static void cprng_fast_buf(struct cprng_fast *, void *, unsigned); @@ -93,6 +93,7 @@ static int cprng_fast_get(struct cprng_fast **cprngp) { struct cprng_fast *cprng; + unsigned epoch; int s; KASSERT(!cpu_intr_p()); @@ -101,9 +102,10 @@ cprng_fast_get(struct cprng_fast **cprng *cprngp = cprng = percpu_getref(cprng_fast_percpu); s = splsoftserial(); - if (__predict_false(cprng->epoch != entropy_epoch())) { + epoch = entropy_epoch(); + if (__predict_false(cprng->epoch != epoch)) { splx(s); - cprng_fast_reseed(cprng); + cprng_fast_reseed(cprngp, epoch); s = splsoftserial(); } @@ -121,13 +123,25 @@ cprng_fast_put(struct cprng_fast *cprng, } static void -cprng_fast_reseed(struct cprng_fast *cprng) +cprng_fast_reseed(struct cprng_fast **cprngp, unsigned epoch) { - unsigned epoch = entropy_epoch(); + struct cprng_fast *cprng; uint8_t seed[CPRNG_FAST_SEED_BYTES]; int s; + /* + * Drop the percpu(9) reference to extract a fresh seed from + * the entropy pool. cprng_strong may sleep on an adaptive + * lock, which invalidates our percpu(9) reference. + * + * This may race with reseeding in another thread, which is no + * big deal -- worst case, we rewind the entropy epoch here and + * cause the next caller to reseed again, and in the end we + * just reseed a couple more times than necessary. + */ + percpu_putref(cprng_fast_percpu); cprng_strong(kern_cprng, seed, sizeof(seed), 0); + *cprngp = cprng = percpu_getref(cprng_fast_percpu); s = splsoftserial(); cprng_fast_seed(cprng, seed);
CVS commit: src/sys/crypto/cprng_fast
Module Name:src Committed By: riastradh Date: Sat Aug 5 11:39:18 UTC 2023 Modified Files: src/sys/crypto/cprng_fast: cprng_fast.c Log Message: cprng_fast(9): Drop and retake percpu reference across cprng_strong. cprng_strong may sleep on an adaptive lock (via entropy_extract), which invalidates percpu(9) references. Discovered by stumbling upon this panic in a test run: panic: kernel diagnostic assertion "(cprng == percpu_getref(cprng_fast_percpu)) && (percpu_putref(cprng_fast_percpu), true)" failed: file "/home/riastradh/netbsd/current/src/sys/rump/librump/rumpkern/../../../crypto/cprng_fast/cprng_fast.c", line 117 XXX pullup-10 To generate a diff of this commit: cvs rdiff -u -r1.18 -r1.19 src/sys/crypto/cprng_fast/cprng_fast.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto
Module Name:src Committed By: jmcneill Date: Sat Nov 5 17:36:33 UTC 2022 Modified Files: src/sys/crypto/aes: aes_impl.c src/sys/crypto/chacha: chacha_impl.c Log Message: Make aes and chacha prints debug only. To generate a diff of this commit: cvs rdiff -u -r1.9 -r1.10 src/sys/crypto/aes/aes_impl.c cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/chacha/chacha_impl.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto
Module Name:src Committed By: jmcneill Date: Sat Nov 5 17:36:33 UTC 2022 Modified Files: src/sys/crypto/aes: aes_impl.c src/sys/crypto/chacha: chacha_impl.c Log Message: Make aes and chacha prints debug only. To generate a diff of this commit: cvs rdiff -u -r1.9 -r1.10 src/sys/crypto/aes/aes_impl.c cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/chacha/chacha_impl.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/aes_impl.c diff -u src/sys/crypto/aes/aes_impl.c:1.9 src/sys/crypto/aes/aes_impl.c:1.10 --- src/sys/crypto/aes/aes_impl.c:1.9 Mon Jul 27 20:45:15 2020 +++ src/sys/crypto/aes/aes_impl.c Sat Nov 5 17:36:33 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_impl.c,v 1.9 2020/07/27 20:45:15 riastradh Exp $ */ +/* $NetBSD: aes_impl.c,v 1.10 2022/11/05 17:36:33 jmcneill Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -27,7 +27,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_impl.c,v 1.9 2020/07/27 20:45:15 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_impl.c,v 1.10 2022/11/05 17:36:33 jmcneill Exp $"); #include #include @@ -121,7 +121,7 @@ aes_select(void) if (aes_impl == NULL) panic("AES self-tests failed"); - aprint_verbose("aes: %s\n", aes_impl->ai_name); + aprint_debug("aes: %s\n", aes_impl->ai_name); return 0; } Index: src/sys/crypto/chacha/chacha_impl.c diff -u src/sys/crypto/chacha/chacha_impl.c:1.3 src/sys/crypto/chacha/chacha_impl.c:1.4 --- src/sys/crypto/chacha/chacha_impl.c:1.3 Mon Jul 27 20:49:10 2020 +++ src/sys/crypto/chacha/chacha_impl.c Sat Nov 5 17:36:33 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: chacha_impl.c,v 1.3 2020/07/27 20:49:10 riastradh Exp $ */ +/* $NetBSD: chacha_impl.c,v 1.4 2022/11/05 17:36:33 jmcneill Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -84,7 +84,7 @@ chacha_select(void) chacha_impl = chacha_md_impl; } - aprint_verbose("chacha: %s\n", chacha_impl->ci_name); + aprint_debug("chacha: %s\n", chacha_impl->ci_name); return 0; }
CVS commit: src/sys/crypto/cprng_fast
Module Name:src Committed By: riastradh Date: Thu Sep 1 18:32:25 UTC 2022 Modified Files: src/sys/crypto/cprng_fast: cprng_fast.c Log Message: cprng_fast(9): Assert not in pserialize read section. This may sleep to take the global entropy lock in case it needs to be reseeded. If that happens we can't be in a pserialize read section. To generate a diff of this commit: cvs rdiff -u -r1.17 -r1.18 src/sys/crypto/cprng_fast/cprng_fast.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/cprng_fast/cprng_fast.c diff -u src/sys/crypto/cprng_fast/cprng_fast.c:1.17 src/sys/crypto/cprng_fast/cprng_fast.c:1.18 --- src/sys/crypto/cprng_fast/cprng_fast.c:1.17 Wed Jun 1 15:44:37 2022 +++ src/sys/crypto/cprng_fast/cprng_fast.c Thu Sep 1 18:32:25 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: cprng_fast.c,v 1.17 2022/06/01 15:44:37 riastradh Exp $ */ +/* $NetBSD: cprng_fast.c,v 1.18 2022/09/01 18:32:25 riastradh Exp $ */ /*- * Copyright (c) 2014 The NetBSD Foundation, Inc. @@ -30,7 +30,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: cprng_fast.c,v 1.17 2022/06/01 15:44:37 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: cprng_fast.c,v 1.18 2022/09/01 18:32:25 riastradh Exp $"); #include #include @@ -41,6 +41,7 @@ __KERNEL_RCSID(0, "$NetBSD: cprng_fast.c #include #include #include +#include #include @@ -95,6 +96,7 @@ cprng_fast_get(struct cprng_fast **cprng int s; KASSERT(!cpu_intr_p()); + KASSERT(pserialize_not_in_read_section()); *cprngp = cprng = percpu_getref(cprng_fast_percpu); s = splsoftserial();
CVS commit: src/sys/crypto/cprng_fast
Module Name:src Committed By: riastradh Date: Thu Sep 1 18:32:25 UTC 2022 Modified Files: src/sys/crypto/cprng_fast: cprng_fast.c Log Message: cprng_fast(9): Assert not in pserialize read section. This may sleep to take the global entropy lock in case it needs to be reseeded. If that happens we can't be in a pserialize read section. To generate a diff of this commit: cvs rdiff -u -r1.17 -r1.18 src/sys/crypto/cprng_fast/cprng_fast.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Sun Jun 26 17:52:54 UTC 2022 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_subr.c Log Message: arm/aes_neon: Fix formatting of self-test failure message. Discovered by code inspection. Remarkably, a combination of errors made this fail to be a stack buffer overrun. Verified by booting with ARMv8.0-AES disabled and with the self-test artificially made to fail. To generate a diff of this commit: cvs rdiff -u -r1.7 -r1.8 src/sys/crypto/aes/arch/arm/aes_neon_subr.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_neon_subr.c diff -u src/sys/crypto/aes/arch/arm/aes_neon_subr.c:1.7 src/sys/crypto/aes/arch/arm/aes_neon_subr.c:1.8 --- src/sys/crypto/aes/arch/arm/aes_neon_subr.c:1.7 Sun Aug 9 02:48:38 2020 +++ src/sys/crypto/aes/arch/arm/aes_neon_subr.c Sun Jun 26 17:52:54 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_neon_subr.c,v 1.7 2020/08/09 02:48:38 riastradh Exp $ */ +/* $NetBSD: aes_neon_subr.c,v 1.8 2022/06/26 17:52:54 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -27,7 +27,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_neon_subr.c,v 1.7 2020/08/09 02:48:38 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_neon_subr.c,v 1.8 2022/06/26 17:52:54 riastradh Exp $"); #ifdef _KERNEL #include @@ -183,11 +183,11 @@ aes_neon_xts_update_selftest(void) for (i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) { storeblock(t, aes_neon_xts_update(loadblock(cases[i].in))); if (memcmp(t, cases[i].out, 16)) { - char buf[33]; + char buf[3*16 + 1]; unsigned j; for (j = 0; j < 16; j++) { -snprintf(buf + 2*j, sizeof(buf) - 2*j, +snprintf(buf + 3*j, sizeof(buf) - 3*j, " %02hhx", t[j]); } printf("%s %u: %s\n", __func__, i, buf);
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Sun Jun 26 17:52:54 UTC 2022 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_subr.c Log Message: arm/aes_neon: Fix formatting of self-test failure message. Discovered by code inspection. Remarkably, a combination of errors made this fail to be a stack buffer overrun. Verified by booting with ARMv8.0-AES disabled and with the self-test artificially made to fail. To generate a diff of this commit: cvs rdiff -u -r1.7 -r1.8 src/sys/crypto/aes/arch/arm/aes_neon_subr.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/cprng_fast
Module Name:src Committed By: riastradh Date: Wed Jun 1 15:44:37 UTC 2022 Modified Files: src/sys/crypto/cprng_fast: cprng_fast.c Log Message: cprng(9): cprng_fast is no longer used from interrupt context. Rip out logic to defer reseeding to softint. To generate a diff of this commit: cvs rdiff -u -r1.16 -r1.17 src/sys/crypto/cprng_fast/cprng_fast.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/cprng_fast/cprng_fast.c diff -u src/sys/crypto/cprng_fast/cprng_fast.c:1.16 src/sys/crypto/cprng_fast/cprng_fast.c:1.17 --- src/sys/crypto/cprng_fast/cprng_fast.c:1.16 Tue Jul 28 20:15:07 2020 +++ src/sys/crypto/cprng_fast/cprng_fast.c Wed Jun 1 15:44:37 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: cprng_fast.c,v 1.16 2020/07/28 20:15:07 riastradh Exp $ */ +/* $NetBSD: cprng_fast.c,v 1.17 2022/06/01 15:44:37 riastradh Exp $ */ /*- * Copyright (c) 2014 The NetBSD Foundation, Inc. @@ -30,7 +30,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: cprng_fast.c,v 1.16 2020/07/28 20:15:07 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: cprng_fast.c,v 1.17 2022/06/01 15:44:37 riastradh Exp $"); #include #include @@ -39,7 +39,6 @@ __KERNEL_RCSID(0, "$NetBSD: cprng_fast.c #include #include #include -#include #include #include @@ -58,8 +57,7 @@ struct cprng_fast { }; static void cprng_fast_init_cpu(void *, void *, struct cpu_info *); -static void cprng_fast_schedule_reseed(struct cprng_fast *); -static void cprng_fast_intr(void *); +static void cprng_fast_reseed(struct cprng_fast *); static void cprng_fast_seed(struct cprng_fast *, const void *); static void cprng_fast_buf(struct cprng_fast *, void *, unsigned); @@ -68,7 +66,6 @@ static void cprng_fast_buf_short(void *, static void cprng_fast_buf_long(void *, size_t); static percpu_t *cprng_fast_percpu __read_mostly; -static void *cprng_fast_softint __read_mostly; void cprng_fast_init(void) @@ -76,20 +73,14 @@ cprng_fast_init(void) cprng_fast_percpu = percpu_create(sizeof(struct cprng_fast), cprng_fast_init_cpu, NULL, NULL); - cprng_fast_softint = softint_establish(SOFTINT_SERIAL|SOFTINT_MPSAFE, - _fast_intr, NULL); } static void cprng_fast_init_cpu(void *p, void *arg __unused, struct cpu_info *ci) { struct cprng_fast *const cprng = p; - uint8_t seed[CPRNG_FAST_SEED_BYTES]; - cprng->epoch = entropy_epoch(); - cprng_strong(kern_cprng, seed, sizeof seed, 0); - cprng_fast_seed(cprng, seed); - (void)explicit_memset(seed, 0, sizeof seed); + cprng->epoch = 0; cprng->reseed_evcnt = kmem_alloc(sizeof(*cprng->reseed_evcnt), KM_SLEEP); @@ -103,11 +94,16 @@ cprng_fast_get(struct cprng_fast **cprng struct cprng_fast *cprng; int s; + KASSERT(!cpu_intr_p()); + *cprngp = cprng = percpu_getref(cprng_fast_percpu); - s = splvm(); + s = splsoftserial(); - if (__predict_false(cprng->epoch != entropy_epoch())) - cprng_fast_schedule_reseed(cprng); + if (__predict_false(cprng->epoch != entropy_epoch())) { + splx(s); + cprng_fast_reseed(cprng); + s = splsoftserial(); + } return s; } @@ -123,29 +119,19 @@ cprng_fast_put(struct cprng_fast *cprng, } static void -cprng_fast_schedule_reseed(struct cprng_fast *cprng __unused) -{ - - softint_schedule(cprng_fast_softint); -} - -static void -cprng_fast_intr(void *cookie __unused) +cprng_fast_reseed(struct cprng_fast *cprng) { unsigned epoch = entropy_epoch(); - struct cprng_fast *cprng; uint8_t seed[CPRNG_FAST_SEED_BYTES]; int s; cprng_strong(kern_cprng, seed, sizeof(seed), 0); - cprng = percpu_getref(cprng_fast_percpu); - s = splvm(); + s = splsoftserial(); cprng_fast_seed(cprng, seed); cprng->epoch = epoch; cprng->reseed_evcnt->ev_count++; splx(s); - percpu_putref(cprng_fast_percpu); explicit_memset(seed, 0, sizeof(seed)); }
CVS commit: src/sys/crypto/cprng_fast
Module Name:src Committed By: riastradh Date: Wed Jun 1 15:44:37 UTC 2022 Modified Files: src/sys/crypto/cprng_fast: cprng_fast.c Log Message: cprng(9): cprng_fast is no longer used from interrupt context. Rip out logic to defer reseeding to softint. To generate a diff of this commit: cvs rdiff -u -r1.16 -r1.17 src/sys/crypto/cprng_fast/cprng_fast.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/aes
Module Name:src Committed By: msaitoh Date: Sun Dec 5 04:48:35 UTC 2021 Modified Files: src/sys/crypto/aes: aes_selftest.c Log Message: s/folllowing/following/ To generate a diff of this commit: cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/aes/aes_selftest.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/aes_selftest.c diff -u src/sys/crypto/aes/aes_selftest.c:1.6 src/sys/crypto/aes/aes_selftest.c:1.7 --- src/sys/crypto/aes/aes_selftest.c:1.6 Tue Sep 8 22:48:24 2020 +++ src/sys/crypto/aes/aes_selftest.c Sun Dec 5 04:48:35 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_selftest.c,v 1.6 2020/09/08 22:48:24 riastradh Exp $ */ +/* $NetBSD: aes_selftest.c,v 1.7 2021/12/05 04:48:35 msaitoh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -27,7 +27,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_selftest.c,v 1.6 2020/09/08 22:48:24 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_selftest.c,v 1.7 2021/12/05 04:48:35 msaitoh Exp $"); #ifdef _KERNEL @@ -133,7 +133,7 @@ aes_selftest_encdec(const struct aes_imp if (outbuf[17] != 0x1a) return aes_selftest_fail(impl, outbuf + 17, (const uint8_t[1]){0x1a}, 1, - "AES overrun folllowing"); + "AES overrun following"); /* Success! */ return 0;
CVS commit: src/sys/crypto/aes
Module Name:src Committed By: msaitoh Date: Sun Dec 5 04:48:35 UTC 2021 Modified Files: src/sys/crypto/aes: aes_selftest.c Log Message: s/folllowing/following/ To generate a diff of this commit: cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/aes/aes_selftest.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto
Module Name:src Committed By: jmcneill Date: Sun Oct 17 14:45:45 UTC 2021 Modified Files: src/sys/crypto/adiantum: adiantum.c src/sys/crypto/aes: aes_ccm.c src/sys/crypto/blake2: blake2s.c Log Message: Upgrade self-test passed messages from verbose to debug. To generate a diff of this commit: cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/adiantum/adiantum.c cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/aes_ccm.c cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/blake2/blake2s.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/adiantum/adiantum.c diff -u src/sys/crypto/adiantum/adiantum.c:1.6 src/sys/crypto/adiantum/adiantum.c:1.7 --- src/sys/crypto/adiantum/adiantum.c:1.6 Wed Apr 14 21:29:57 2021 +++ src/sys/crypto/adiantum/adiantum.c Sun Oct 17 14:45:45 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: adiantum.c,v 1.6 2021/04/14 21:29:57 christos Exp $ */ +/* $NetBSD: adiantum.c,v 1.7 2021/10/17 14:45:45 jmcneill Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -37,7 +37,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: adiantum.c,v 1.6 2021/04/14 21:29:57 christos Exp $"); +__KERNEL_RCSID(1, "$NetBSD: adiantum.c,v 1.7 2021/10/17 14:45:45 jmcneill Exp $"); #include #include @@ -1938,7 +1938,7 @@ adiantum_modcmd(modcmd_t cmd, void *opaq result |= adiantum_selftest(); if (result) panic("adiantum self-test failed"); - aprint_verbose("adiantum: self-test passed\n"); + aprint_debug("adiantum: self-test passed\n"); return 0; } case MODULE_CMD_FINI: Index: src/sys/crypto/aes/aes_ccm.c diff -u src/sys/crypto/aes/aes_ccm.c:1.5 src/sys/crypto/aes/aes_ccm.c:1.6 --- src/sys/crypto/aes/aes_ccm.c:1.5 Mon Aug 10 06:27:29 2020 +++ src/sys/crypto/aes/aes_ccm.c Sun Oct 17 14:45:45 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_ccm.c,v 1.5 2020/08/10 06:27:29 rin Exp $ */ +/* $NetBSD: aes_ccm.c,v 1.6 2021/10/17 14:45:45 jmcneill Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -35,7 +35,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_ccm.c,v 1.5 2020/08/10 06:27:29 rin Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_ccm.c,v 1.6 2021/10/17 14:45:45 jmcneill Exp $"); #include #include @@ -611,7 +611,7 @@ aes_ccm_modcmd(modcmd_t cmd, void *opaqu case MODULE_CMD_INIT: if (aes_ccm_selftest()) return EIO; - aprint_verbose("aes_ccm: self-test passed\n"); + aprint_debug("aes_ccm: self-test passed\n"); return 0; case MODULE_CMD_FINI: return 0; Index: src/sys/crypto/blake2/blake2s.c diff -u src/sys/crypto/blake2/blake2s.c:1.1 src/sys/crypto/blake2/blake2s.c:1.2 --- src/sys/crypto/blake2/blake2s.c:1.1 Thu Aug 20 21:21:05 2020 +++ src/sys/crypto/blake2/blake2s.c Sun Oct 17 14:45:45 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: blake2s.c,v 1.1 2020/08/20 21:21:05 riastradh Exp $ */ +/* $NetBSD: blake2s.c,v 1.2 2021/10/17 14:45:45 jmcneill Exp $ */ /*- * Copyright (c) 2015 Taylor R. Campbell @@ -29,7 +29,7 @@ #ifdef _KERNEL #include -__KERNEL_RCSID(0, "$NetBSD: blake2s.c,v 1.1 2020/08/20 21:21:05 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: blake2s.c,v 1.2 2021/10/17 14:45:45 jmcneill Exp $"); #include #include @@ -338,7 +338,7 @@ blake2s_modcmd(modcmd_t cmd, void *opaqu case MODULE_CMD_INIT: if (blake2s_selftest()) panic("blake2s: self-test failed"); - aprint_verbose("blake2s: self-test passed\n"); + aprint_debug("blake2s: self-test passed\n"); return 0; case MODULE_CMD_FINI: return 0;
CVS commit: src/sys/crypto
Module Name:src Committed By: jmcneill Date: Sun Oct 17 14:45:45 UTC 2021 Modified Files: src/sys/crypto/adiantum: adiantum.c src/sys/crypto/aes: aes_ccm.c src/sys/crypto/blake2: blake2s.c Log Message: Upgrade self-test passed messages from verbose to debug. To generate a diff of this commit: cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/adiantum/adiantum.c cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/aes_ccm.c cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/blake2/blake2s.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/camellia
Module Name:src Committed By: gutteridge Date: Sat Sep 4 00:33:10 UTC 2021 Modified Files: src/sys/crypto/camellia: camellia-api.c camellia.c Log Message: Fix typos in comments and add missing KERNEL_RCSID To generate a diff of this commit: cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/camellia/camellia-api.c cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/camellia/camellia.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/camellia/camellia-api.c diff -u src/sys/crypto/camellia/camellia-api.c:1.1 src/sys/crypto/camellia/camellia-api.c:1.2 --- src/sys/crypto/camellia/camellia-api.c:1.1 Thu May 5 17:38:36 2011 +++ src/sys/crypto/camellia/camellia-api.c Sat Sep 4 00:33:09 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: camellia-api.c,v 1.1 2011/05/05 17:38:36 drochner Exp $ */ +/* $NetBSD: camellia-api.c,v 1.2 2021/09/04 00:33:09 gutteridge Exp $ */ /* * @@ -28,6 +28,7 @@ */ #include +__KERNEL_RCSID(0, "$NetBSD: camellia-api.c,v 1.2 2021/09/04 00:33:09 gutteridge Exp $"); #include #include Index: src/sys/crypto/camellia/camellia.c diff -u src/sys/crypto/camellia/camellia.c:1.2 src/sys/crypto/camellia/camellia.c:1.3 --- src/sys/crypto/camellia/camellia.c:1.2 Wed Jan 1 15:18:57 2014 +++ src/sys/crypto/camellia/camellia.c Sat Sep 4 00:33:09 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: camellia.c,v 1.2 2014/01/01 15:18:57 pgoyette Exp $ */ +/* $NetBSD: camellia.c,v 1.3 2021/09/04 00:33:09 gutteridge Exp $ */ /* camellia.h ver 1.1.0 * @@ -33,6 +33,8 @@ */ #include +__KERNEL_RCSID(0, "$NetBSD: camellia.c,v 1.3 2021/09/04 00:33:09 gutteridge Exp $"); + #include #include #include @@ -1007,7 +1009,7 @@ camellia_encrypt128(const uint32_t *subk void camellia_decrypt128(const uint32_t *subkey, uint32_t *io) { -uint32_t il,ir,t0,t1; /* temporary valiables */ +uint32_t il,ir,t0,t1; /* temporary variables */ /* pre whitening but absorb kw2*/ io[0] ^= SUBL(24); @@ -1077,7 +1079,7 @@ camellia_decrypt128(const uint32_t *subk void camellia_encrypt256(const uint32_t *subkey, uint32_t *io) { -uint32_t il,ir,t0,t1; /* temporary valiables */ +uint32_t il,ir,t0,t1; /* temporary variables */ /* pre whitening but absorb kw2*/ io[0] ^= SUBL(0); @@ -1160,7 +1162,7 @@ camellia_encrypt256(const uint32_t *subk void camellia_decrypt256(const uint32_t *subkey, uint32_t *io) { -uint32_t il,ir,t0,t1; /* temporary valiables */ +uint32_t il,ir,t0,t1; /* temporary variables */ /* pre whitening but absorb kw2*/ io[0] ^= SUBL(32);
CVS commit: src/sys/crypto/camellia
Module Name:src Committed By: gutteridge Date: Sat Sep 4 00:33:10 UTC 2021 Modified Files: src/sys/crypto/camellia: camellia-api.c camellia.c Log Message: Fix typos in comments and add missing KERNEL_RCSID To generate a diff of this commit: cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/camellia/camellia-api.c cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/camellia/camellia.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/adiantum
Module Name:src Committed By: christos Date: Wed Apr 14 21:29:57 UTC 2021 Modified Files: src/sys/crypto/adiantum: adiantum.c Log Message: use an enum instead of constant variables so that they work in CTASSERT. To generate a diff of this commit: cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/adiantum/adiantum.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/adiantum/adiantum.c diff -u src/sys/crypto/adiantum/adiantum.c:1.5 src/sys/crypto/adiantum/adiantum.c:1.6 --- src/sys/crypto/adiantum/adiantum.c:1.5 Sun Jul 26 00:05:20 2020 +++ src/sys/crypto/adiantum/adiantum.c Wed Apr 14 17:29:57 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: adiantum.c,v 1.5 2020/07/26 04:05:20 riastradh Exp $ */ +/* $NetBSD: adiantum.c,v 1.6 2021/04/14 21:29:57 christos Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -37,7 +37,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: adiantum.c,v 1.5 2020/07/26 04:05:20 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: adiantum.c,v 1.6 2021/04/14 21:29:57 christos Exp $"); #include #include @@ -390,10 +390,12 @@ static void nh(uint8_t h[static 32], const uint8_t *m, size_t mlen, const uint32_t k[static 268 /* u/w + 2s(r - 1) */]) { - const unsigned w = 32; /* word size */ - const unsigned s = 2; /* stride */ - const unsigned r = 4; /* rounds */ - const unsigned u = 8192; /* unit count (bits per msg unit) */ + enum { + s = 2, /* stride */ + r = 4, /* rounds */ + w = 32, /* word size */ + u = 8192 /* unit count (bits per msg unit) */ + }; uint64_t h0 = 0, h1 = 0, h2 = 0, h3 = 0; unsigned i;
CVS commit: src/sys/crypto/adiantum
Module Name:src Committed By: christos Date: Wed Apr 14 21:29:57 UTC 2021 Modified Files: src/sys/crypto/adiantum: adiantum.c Log Message: use an enum instead of constant variables so that they work in CTASSERT. To generate a diff of this commit: cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/adiantum/adiantum.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: rin Date: Sat Nov 21 08:09:21 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon.c Log Message: Fix build with clang for earmv7hf; loadroundkey() is used only for __aarch64__. To generate a diff of this commit: cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/arm/aes_neon.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_neon.c diff -u src/sys/crypto/aes/arch/arm/aes_neon.c:1.5 src/sys/crypto/aes/arch/arm/aes_neon.c:1.6 --- src/sys/crypto/aes/arch/arm/aes_neon.c:1.5 Sat Aug 8 14:47:01 2020 +++ src/sys/crypto/aes/arch/arm/aes_neon.c Sat Nov 21 08:09:21 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_neon.c,v 1.5 2020/08/08 14:47:01 riastradh Exp $ */ +/* $NetBSD: aes_neon.c,v 1.6 2020/11/21 08:09:21 rin Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -39,7 +39,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_neon.c,v 1.5 2020/08/08 14:47:01 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_neon.c,v 1.6 2020/11/21 08:09:21 rin Exp $"); #include @@ -196,11 +196,13 @@ inv = VQ_N_U8(0x80,0x01,0x08,0x0D,0x0F,0 inva = VQ_N_U8(0x80,0x07,0x0B,0x0F,0x06,0x0A,0x04,0x01, 0x09,0x08,0x05,0x02,0x0C,0x0E,0x0D,0x03); +#ifdef __aarch64__ static inline uint8x16_t loadroundkey(const void *rkp) { return vld1q_u8(rkp); } +#endif static inline void storeroundkey(void *rkp, uint8x16_t rk)
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: rin Date: Sat Nov 21 08:09:21 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon.c Log Message: Fix build with clang for earmv7hf; loadroundkey() is used only for __aarch64__. To generate a diff of this commit: cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/arm/aes_neon.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto
Module Name:src Committed By: jmcneill Date: Sat Oct 10 08:24:10 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_impl.c src/sys/crypto/chacha/arch/arm: chacha_neon_impl.c Log Message: Fix detection of NEON features. ID_AA64PFR0_EL1_ADV_SIMD_NONE means SIMD is not available, and any other value means it is. To generate a diff of this commit: cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/arch/arm/aes_neon_impl.c cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/chacha/arch/arm/chacha_neon_impl.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_neon_impl.c diff -u src/sys/crypto/aes/arch/arm/aes_neon_impl.c:1.4 src/sys/crypto/aes/arch/arm/aes_neon_impl.c:1.5 --- src/sys/crypto/aes/arch/arm/aes_neon_impl.c:1.4 Sat Jul 25 22:36:06 2020 +++ src/sys/crypto/aes/arch/arm/aes_neon_impl.c Sat Oct 10 08:24:10 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_neon_impl.c,v 1.4 2020/07/25 22:36:06 riastradh Exp $ */ +/* $NetBSD: aes_neon_impl.c,v 1.5 2020/10/10 08:24:10 jmcneill Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -27,7 +27,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_neon_impl.c,v 1.4 2020/07/25 22:36:06 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_neon_impl.c,v 1.5 2020/10/10 08:24:10 jmcneill Exp $"); #include #include @@ -200,10 +200,10 @@ aes_neon_probe(void) return -1; #endif switch (__SHIFTOUT(id->ac_aa64pfr0, ID_AA64PFR0_EL1_ADVSIMD)) { - case ID_AA64PFR0_EL1_ADV_SIMD_IMPL: - break; - default: + case ID_AA64PFR0_EL1_ADV_SIMD_NONE: return -1; + default: + break; } #else #ifdef _KERNEL Index: src/sys/crypto/chacha/arch/arm/chacha_neon_impl.c diff -u src/sys/crypto/chacha/arch/arm/chacha_neon_impl.c:1.1 src/sys/crypto/chacha/arch/arm/chacha_neon_impl.c:1.2 --- src/sys/crypto/chacha/arch/arm/chacha_neon_impl.c:1.1 Sat Jul 25 22:51:57 2020 +++ src/sys/crypto/chacha/arch/arm/chacha_neon_impl.c Sat Oct 10 08:24:10 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: chacha_neon_impl.c,v 1.1 2020/07/25 22:51:57 riastradh Exp $ */ +/* $NetBSD: chacha_neon_impl.c,v 1.2 2020/10/10 08:24:10 jmcneill Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -27,7 +27,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: chacha_neon_impl.c,v 1.1 2020/07/25 22:51:57 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: chacha_neon_impl.c,v 1.2 2020/10/10 08:24:10 jmcneill Exp $"); #include "chacha_neon.h" @@ -147,10 +147,10 @@ chacha_probe_neon(void) return -1; #endif switch (__SHIFTOUT(id->ac_aa64pfr0, ID_AA64PFR0_EL1_ADVSIMD)) { - case ID_AA64PFR0_EL1_ADV_SIMD_IMPL: - break; - default: + case ID_AA64PFR0_EL1_ADV_SIMD_NONE: return -1; + default: + break; } #else #ifdef _KERNEL
CVS commit: src/sys/crypto
Module Name:src Committed By: jmcneill Date: Sat Oct 10 08:24:10 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_impl.c src/sys/crypto/chacha/arch/arm: chacha_neon_impl.c Log Message: Fix detection of NEON features. ID_AA64PFR0_EL1_ADV_SIMD_NONE means SIMD is not available, and any other value means it is. To generate a diff of this commit: cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/arch/arm/aes_neon_impl.c cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/chacha/arch/arm/chacha_neon_impl.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Thu Sep 10 11:31:04 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_32.S Log Message: aes neon: Gather mc_forward/backward so we can load 256 bits at once. To generate a diff of this commit: cvs rdiff -u -r1.10 -r1.11 src/sys/crypto/aes/arch/arm/aes_neon_32.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.10 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.11 --- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.10 Thu Sep 10 11:30:28 2020 +++ src/sys/crypto/aes/arch/arm/aes_neon_32.S Thu Sep 10 11:31:03 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_neon_32.S,v 1.10 2020/09/10 11:30:28 riastradh Exp $ */ +/* $NetBSD: aes_neon_32.S,v 1.11 2020/09/10 11:31:03 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -28,7 +28,7 @@ #include -RCSID("$NetBSD: aes_neon_32.S,v 1.10 2020/09/10 11:30:28 riastradh Exp $") +RCSID("$NetBSD: aes_neon_32.S,v 1.11 2020/09/10 11:31:03 riastradh Exp $") .fpu neon @@ -54,36 +54,26 @@ inva: .byte 0x09,0x08,0x05,0x02,0x0C,0x0E,0x0D,0x03 END(inva) - .type mc_forward,_ASM_TYPE_OBJECT -mc_forward: - .byte 0x01,0x02,0x03,0x00,0x05,0x06,0x07,0x04 /* 0 */ + .type mc,_ASM_TYPE_OBJECT +mc: + .byte 0x01,0x02,0x03,0x00,0x05,0x06,0x07,0x04 /* 0 forward */ .byte 0x09,0x0A,0x0B,0x08,0x0D,0x0E,0x0F,0x0C - - .byte 0x05,0x06,0x07,0x04,0x09,0x0A,0x0B,0x08 /* 1 */ + .byte 0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06 /* 0 backward */ + .byte 0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E + .byte 0x05,0x06,0x07,0x04,0x09,0x0A,0x0B,0x08 /* 1 forward */ .byte 0x0D,0x0E,0x0F,0x0C,0x01,0x02,0x03,0x00 - - .byte 0x09,0x0A,0x0B,0x08,0x0D,0x0E,0x0F,0x0C /* 2 */ + .byte 0x0F,0x0C,0x0D,0x0E,0x03,0x00,0x01,0x02 /* 1 backward */ + .byte 0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A + .byte 0x09,0x0A,0x0B,0x08,0x0D,0x0E,0x0F,0x0C /* 2 forward */ .byte 0x01,0x02,0x03,0x00,0x05,0x06,0x07,0x04 - + .byte 0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E /* 2 backward */ + .byte 0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06 .Lmc_forward_3: - .byte 0x0D,0x0E,0x0F,0x0C,0x01,0x02,0x03,0x00 /* 3 */ + .byte 0x0D,0x0E,0x0F,0x0C,0x01,0x02,0x03,0x00 /* 3 forward */ .byte 0x05,0x06,0x07,0x04,0x09,0x0A,0x0B,0x08 -END(mc_forward) - - .type mc_backward,_ASM_TYPE_OBJECT -mc_backward: - .byte 0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06 /* 0 */ - .byte 0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E - - .byte 0x0F,0x0C,0x0D,0x0E,0x03,0x00,0x01,0x02 /* 1 */ - .byte 0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A - - .byte 0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E /* 2 */ - .byte 0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06 - - .byte 0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A /* 3 */ + .byte 0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A /* 3 backward */ .byte 0x0F,0x0C,0x0D,0x0E,0x03,0x00,0x01,0x02 -END(mc_backward) +END(mc) .type sr,_ASM_TYPE_OBJECT sr: @@ -210,8 +200,7 @@ ENTRY(aes_neon_enc1) /* * r3: rmod4 - * r4: mc_forward - * r5: mc_backward + * r4: mc * r6,r8,r10,ip: temporaries * q0={d0-d1}: x/ak/A * q1={d2-d3}: 0x0f0f... @@ -225,8 +214,8 @@ ENTRY(aes_neon_enc1) * q9={d18-d19}: sb2[1] * q10={d20-d21}: inv * q11={d22-d23}: inva - * q12={d24-d25}: ir/iak/iakr/sb1_0(io)/mc_backward[rmod4] - * q13={d26-d27}: jr/jak/jakr/sb1_1(jo)/mc_forward[rmod4] + * q12={d24-d25}: ir/iak/iakr/sb1_0(io)/mc[rmod4].backward + * q13={d26-d27}: jr/jak/jakr/sb1_1(jo)/mc[rmod4].forward * q14={d28-d29}: rk/A2/A2_B_D * q15={d30-d31}: A2_B/sr[rmod4] */ @@ -254,9 +243,8 @@ ENTRY(aes_neon_enc1) vld1.8 {q8-q9}, [r6 :256] /* q8 = sb2[0], q9 = sb2[1] */ vld1.8 {q10-q11}, [r8 :256] /* q10 = inv, q11 = inva */ - /* (r4, r5) := (_forward[0], _backward[0]) */ - add r4, ip, #(mc_forward - .Lconstants) - add r5, ip, #(mc_backward - .Lconstants) + /* r4 := mc */ + add r4, ip, #(mc - .Lconstants) /* (q2, q3) := (lo, hi) */ vshr.u8 q3, q0, #4 @@ -291,13 +279,11 @@ ENTRY(aes_neon_enc1) vtbl.8 d25, {q8}, d5 vtbl.8 d26, {q9}, d6 vtbl.8 d27, {q9}, d7 + add r6, r4, r3, lsl #5 /* r6 := [rmod4] */ veor q14, q12, q13 - /* (q12, q13) := (mc_forward[rmod4], mc_backward[rmod4]) */ - add r6, r4, r3, lsl #4 - add r8, r5, r3, lsl #4 - vld1.8 {q12}, [r6 :128] - vld1.8 {q13}, [r8 :128] + /* (q12, q13) := (mc[rmod4].forward, mc[rmod4].backward) */ + vld1.8 {q12-q13}, [r6 :256] /* q15 := A2_B = A2 + A(mcf) */ vtbl.8 d30, {q0}, d24 @@ -474,7 +460,7 @@ ENTRY(aes_neon_dec1) add r8, ip, #(.Lmc_forward_3 - .Lconstants) vld1.8 {q6-q7}, [r4 :256] /* q6 := dsbb[0], q7 := dsbb[1] */ vld1.8 {q10-q11}, [r6 :256] /* q10 := inv, q11 := inva */ - vld1.8 {q15}, [r8 :128] /* q15 := mc_forward[3] */ + vld1.8 {q15}, [r8 :128] /* q15 := mc[3].forward */ /* (q2, q3) := (lo, hi) */ vshr.u8 q3, q0, #4
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Thu Sep 10 11:31:04 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_32.S Log Message: aes neon: Gather mc_forward/backward so we can load 256 bits at once. To generate a diff of this commit: cvs rdiff -u -r1.10 -r1.11 src/sys/crypto/aes/arch/arm/aes_neon_32.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Thu Sep 10 11:30:28 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_32.S Log Message: aes neon: Hoist dsbd/dsbe address calculation out of loop. To generate a diff of this commit: cvs rdiff -u -r1.9 -r1.10 src/sys/crypto/aes/arch/arm/aes_neon_32.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.9 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.10 --- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.9 Thu Sep 10 11:30:08 2020 +++ src/sys/crypto/aes/arch/arm/aes_neon_32.S Thu Sep 10 11:30:28 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_neon_32.S,v 1.9 2020/09/10 11:30:08 riastradh Exp $ */ +/* $NetBSD: aes_neon_32.S,v 1.10 2020/09/10 11:30:28 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -28,7 +28,7 @@ #include -RCSID("$NetBSD: aes_neon_32.S,v 1.9 2020/09/10 11:30:08 riastradh Exp $") +RCSID("$NetBSD: aes_neon_32.S,v 1.10 2020/09/10 11:30:28 riastradh Exp $") .fpu neon @@ -431,6 +431,9 @@ ENTRY(aes_neon_dec1) /* * r3: 3 & ~(nrounds - 1) + * r4: dsbd + * r5: dsbe + * r6,r8,r10,ip: temporaries * q0={d0-d1}: x/ak * q1={d2-d3}: 0x0f0f... * q2={d4-d5}: lo/k/j/io @@ -488,6 +491,10 @@ ENTRY(aes_neon_dec1) add r4, ip, #(dsb9 - .Lconstants) vld1.8 {q4-q5}, [r4 :256] /* q4 := dsb9[0], q5 := dsb9[1] */ + /* r4 := dsbd, r5 := dsbe */ + add r4, ip, #(dsbd - .Lconstants) + add r5, ip, #(dsbe - .Lconstants) + /* q0 := rk[0] + diptlo(lo) + dipthi(hi) */ veor q0, q14, q2 veor q0, q0, q3 @@ -496,7 +503,6 @@ ENTRY(aes_neon_dec1) _ALIGN_TEXT 1: /* load dsbd */ - add r4, ip, #(dsbd - .Lconstants) vld1.8 {q8-q9}, [r4 :256] /* q8 := dsbd[0], q9 := dsbd[1] */ vld1.8 {q14}, [r0 :128]! /* q14 = *rk++ */ @@ -522,8 +528,7 @@ ENTRY(aes_neon_dec1) veor q0, q0, q13 /* load dsbe */ - add r4, ip, #(dsbe - .Lconstants) - vld1.8 {q8-q9}, [r4 :256]! /* q8 := dsbe[0], q9 := dsbe[1] */ + vld1.8 {q8-q9}, [r5 :256] /* q8 := dsbe[0], q9 := dsbe[1] */ /* q0 := x(mc) + dsbb_0(io) + dsbb_1(jo) */ vtbl.8 d28, {q0}, d30
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Thu Sep 10 11:30:28 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_32.S Log Message: aes neon: Hoist dsbd/dsbe address calculation out of loop. To generate a diff of this commit: cvs rdiff -u -r1.9 -r1.10 src/sys/crypto/aes/arch/arm/aes_neon_32.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Thu Sep 10 11:30:08 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_32.S Log Message: aes neon: Tweak register usage. - Call r12 by its usual name, ip. - No need for r7 or r11=fp at the moment. To generate a diff of this commit: cvs rdiff -u -r1.8 -r1.9 src/sys/crypto/aes/arch/arm/aes_neon_32.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Thu Sep 10 11:30:08 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_32.S Log Message: aes neon: Tweak register usage. - Call r12 by its usual name, ip. - No need for r7 or r11=fp at the moment. To generate a diff of this commit: cvs rdiff -u -r1.8 -r1.9 src/sys/crypto/aes/arch/arm/aes_neon_32.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.8 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.9 --- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.8 Thu Sep 10 11:29:43 2020 +++ src/sys/crypto/aes/arch/arm/aes_neon_32.S Thu Sep 10 11:30:08 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_neon_32.S,v 1.8 2020/09/10 11:29:43 riastradh Exp $ */ +/* $NetBSD: aes_neon_32.S,v 1.9 2020/09/10 11:30:08 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -28,7 +28,7 @@ #include -RCSID("$NetBSD: aes_neon_32.S,v 1.8 2020/09/10 11:29:43 riastradh Exp $") +RCSID("$NetBSD: aes_neon_32.S,v 1.9 2020/09/10 11:30:08 riastradh Exp $") .fpu neon @@ -205,14 +205,14 @@ ENTRY(aes_neon_enc1) vldr d1, [sp] /* d1 := x hi */ ldr r1, [sp, #8] /* r1 := nrounds */ #endif - push {r4, r5, r6, r7, r8, r10, r11, lr} + push {r4, r5, r6, r8, r10, lr} vpush {d8-d15} /* * r3: rmod4 * r4: mc_forward * r5: mc_backward - * r6,r7,r8,r10,r11,r12: temporaries + * r6,r8,r10,ip: temporaries * q0={d0-d1}: x/ak/A * q1={d2-d3}: 0x0f0f... * q2={d4-d5}: lo/k/j/io @@ -231,32 +231,32 @@ ENTRY(aes_neon_enc1) * q15={d30-d31}: A2_B/sr[rmod4] */ - /* r12 := .Lconstants - .Lconstants_addr, r11 := .Lconstants_addr */ - ldr r12, .Lconstants_addr - adr r11, .Lconstants_addr + /* ip := .Lconstants - .Lconstants_addr, r10 := .Lconstants_addr */ + ldr ip, .Lconstants_addr + adr r10, .Lconstants_addr vld1.8 {q14}, [r0 :128]! /* q14 = *rk++ */ movw r3, #0 vmov.i8 q1, #0x0f - /* r12 := .Lconstants */ - add r12, r12, r11 + /* ip := .Lconstants */ + add ip, ip, r10 /* (q4, q5) := (iptlo, ipthi) */ - add r6, r12, #(ipt - .Lconstants) + add r6, ip, #(ipt - .Lconstants) vld1.8 {q4-q5}, [r6 :256] /* load the rest of the constants */ - add r4, r12, #(sb1 - .Lconstants) - add r6, r12, #(sb2 - .Lconstants) - add r8, r12, #(.Linv_inva - .Lconstants) + add r4, ip, #(sb1 - .Lconstants) + add r6, ip, #(sb2 - .Lconstants) + add r8, ip, #(.Linv_inva - .Lconstants) vld1.8 {q6-q7}, [r4 :256] /* q6 = sb1[0], q7 = sb1[1] */ vld1.8 {q8-q9}, [r6 :256] /* q8 = sb2[0], q9 = sb2[1] */ vld1.8 {q10-q11}, [r8 :256] /* q10 = inv, q11 = inva */ /* (r4, r5) := (_forward[0], _backward[0]) */ - add r4, r12, #(mc_forward - .Lconstants) - add r5, r12, #(mc_backward - .Lconstants) + add r4, ip, #(mc_forward - .Lconstants) + add r5, ip, #(mc_backward - .Lconstants) /* (q2, q3) := (lo, hi) */ vshr.u8 q3, q0, #4 @@ -295,9 +295,9 @@ ENTRY(aes_neon_enc1) /* (q12, q13) := (mc_forward[rmod4], mc_backward[rmod4]) */ add r6, r4, r3, lsl #4 - add r7, r5, r3, lsl #4 + add r8, r5, r3, lsl #4 vld1.8 {q12}, [r6 :128] - vld1.8 {q13}, [r7 :128] + vld1.8 {q13}, [r8 :128] /* q15 := A2_B = A2 + A(mcf) */ vtbl.8 d30, {q0}, d24 @@ -365,8 +365,8 @@ ENTRY(aes_neon_enc1) bne 1b /* (q6, q7, q15) := (sbo[0], sbo[1], sr[rmod4]) */ - add r8, r12, #(sr - .Lconstants) - add r6, r12, #(sbo - .Lconstants) + add r8, ip, #(sr - .Lconstants) + add r6, ip, #(sbo - .Lconstants) add r8, r8, r3, lsl #4 vld1.8 {q6-q7}, [r6 :256] vld1.8 {q15}, [r8 :128] @@ -388,7 +388,7 @@ ENTRY(aes_neon_enc1) vtbl.8 d1, {q2}, d31 vpop {d8-d15} - pop {r4, r5, r6, r7, r8, r10, r11, lr} + pop {r4, r5, r6, r8, r10, lr} #ifdef __SOFTFP__ #ifdef __ARM_BIG_ENDIAN vmov r1, r0, d0 @@ -426,7 +426,7 @@ ENTRY(aes_neon_dec1) vldr d1, [sp] /* d1 := x hi */ ldr r1, [sp, #8] /* r1 := nrounds */ #endif - push {r4, r5, r6, r7, r8, r10, r11, lr} + push {r4, r5, r6, r8, r10, lr} vpush {d8-d15} /* @@ -449,26 +449,26 @@ ENTRY(aes_neon_dec1) * q15={d30-d31}: mc/sr[3 & ~(nrounds - 1)] */ - /* r12 := .Lconstants - .Lconstants_addr, r11 := .Lconstants_addr */ - ldr r12, .Lconstants_addr - adr r11, .Lconstants_addr + /* ip := .Lconstants - .Lconstants_addr, r10 := .Lconstants_addr */ + ldr ip, .Lconstants_addr + adr r10, .Lconstants_addr vld1.8 {q14}, [r0 :128]! /* q14 = *rk++ */ rsb r3, r1, #0 /* r3 := ~(x - 1) = -x */ vmov.i8 q1, #0x0f and r3, r3, #3 /* r3 := 3 & ~(x - 1) */ - /* r12 := .Lconstants */ - add r12, r12, r11 + /* ip := .Lconstants */ + add ip, ip, r10 /* (q4, q5) := (diptlo, dipthi) */ - add r6, r12, #(dipt - .Lconstants) + add r6, ip, #(dipt - .Lconstants) vld1.8 {q4-q5}, [r6 :256] /* load the rest of the constants */ - add r4, r12, #(dsbb - .Lconstants) - add r6, r12, #(.Linv_inva - .Lconstants) - add r8, r12, #(.Lmc_forward_3 -
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Thu Sep 10 11:29:43 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_32.S Log Message: aes neon: Write vtbl with {qN} rather than {d(2N)-d(2N+1)}. Cosmetic; no functional change. To generate a diff of this commit: cvs rdiff -u -r1.7 -r1.8 src/sys/crypto/aes/arch/arm/aes_neon_32.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.7 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.8 --- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.7 Thu Sep 10 11:29:02 2020 +++ src/sys/crypto/aes/arch/arm/aes_neon_32.S Thu Sep 10 11:29:43 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_neon_32.S,v 1.7 2020/09/10 11:29:02 riastradh Exp $ */ +/* $NetBSD: aes_neon_32.S,v 1.8 2020/09/10 11:29:43 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -28,7 +28,7 @@ #include -RCSID("$NetBSD: aes_neon_32.S,v 1.7 2020/09/10 11:29:02 riastradh Exp $") +RCSID("$NetBSD: aes_neon_32.S,v 1.8 2020/09/10 11:29:43 riastradh Exp $") .fpu neon @@ -264,10 +264,10 @@ ENTRY(aes_neon_enc1) vand q3, q3, q1 /* q3 := (x >> 4) & 0x0f0f... */ /* (q2, q3) := (iptlo(lo), ipthi(hi)) */ - vtbl.8 d4, {d8-d9}, d4 - vtbl.8 d5, {d8-d9}, d5 - vtbl.8 d6, {d10-d11}, d6 - vtbl.8 d7, {d10-d11}, d7 + vtbl.8 d4, {q4}, d4 + vtbl.8 d5, {q4}, d5 + vtbl.8 d6, {q5}, d6 + vtbl.8 d7, {q5}, d7 /* q0 := rk[0] + iptlo(lo) + ipthi(hi) */ veor q0, q14, q2 @@ -279,18 +279,18 @@ ENTRY(aes_neon_enc1) 1: vld1.8 {q14}, [r0 :128]! /* q14 = *rk++ */ /* q0 := A = rk[i] + sb1_0(io) + sb1_1(jo) */ - vtbl.8 d24, {d12-d13}, d4 - vtbl.8 d25, {d12-d13}, d5 - vtbl.8 d26, {d14-d15}, d6 - vtbl.8 d27, {d14-d15}, d7 + vtbl.8 d24, {q6}, d4 + vtbl.8 d25, {q6}, d5 + vtbl.8 d26, {q7}, d6 + vtbl.8 d27, {q7}, d7 veor q0, q14, q12 veor q0, q0, q13 /* q14 := A2 = sb2_0[io] + sb2_1[jo] */ - vtbl.8 d24, {d16-d17}, d4 - vtbl.8 d25, {d16-d17}, d5 - vtbl.8 d26, {d18-d19}, d6 - vtbl.8 d27, {d18-d19}, d7 + vtbl.8 d24, {q8}, d4 + vtbl.8 d25, {q8}, d5 + vtbl.8 d26, {q9}, d6 + vtbl.8 d27, {q9}, d7 veor q14, q12, q13 /* (q12, q13) := (mc_forward[rmod4], mc_backward[rmod4]) */ @@ -300,18 +300,18 @@ ENTRY(aes_neon_enc1) vld1.8 {q13}, [r7 :128] /* q15 := A2_B = A2 + A(mcf) */ - vtbl.8 d30, {d0-d1}, d24 - vtbl.8 d31, {d0-d1}, d25 + vtbl.8 d30, {q0}, d24 + vtbl.8 d31, {q0}, d25 veor q15, q15, q14 /* q14 := A2_B_D = A2_B + A(mcb) */ - vtbl.8 d28, {d0-d1}, d26 - vtbl.8 d29, {d0-d1}, d27 + vtbl.8 d28, {q0}, d26 + vtbl.8 d29, {q0}, d27 veor q14, q14, q15 /* q0 := x = A2_B_D + A2_B(mcf) */ - vtbl.8 d0, {d30-d31}, d24 - vtbl.8 d1, {d30-d31}, d25 + vtbl.8 d0, {q15}, d24 + vtbl.8 d1, {q15}, d25 veor q0, q0, q14 2: /* @@ -324,19 +324,19 @@ ENTRY(aes_neon_enc1) vand q3, q3, q1 /* q3 := (x >> 4) & 0x0f0f... */ /* q0 := a/k */ - vtbl.8 d0, {d22-d23}, d4 - vtbl.8 d1, {d22-d23}, d5 + vtbl.8 d0, {q11}, d4 + vtbl.8 d1, {q11}, d5 /* q2 := j = i + k */ veor q2, q3, q2 /* q12 := ir = 1/i */ - vtbl.8 d24, {d20-d21}, d6 - vtbl.8 d25, {d20-d21}, d7 + vtbl.8 d24, {q10}, d6 + vtbl.8 d25, {q10}, d7 /* q13 := jr = 1/j */ - vtbl.8 d26, {d20-d21}, d4 - vtbl.8 d27, {d20-d21}, d5 + vtbl.8 d26, {q10}, d4 + vtbl.8 d27, {q10}, d5 /* q12 := iak = 1/i + a/k */ veor q12, q12, q0 @@ -345,12 +345,12 @@ ENTRY(aes_neon_enc1) veor q13, q13, q0 /* q12 := iakr = 1/(1/i + a/k) */ - vtbl.8 d24, {d20-d21}, d24 - vtbl.8 d25, {d20-d21}, d25 + vtbl.8 d24, {q10}, d24 + vtbl.8 d25, {q10}, d25 /* q13 := jakr = 1/(1/j + a/k) */ - vtbl.8 d26, {d20-d21}, d26 - vtbl.8 d27, {d20-d21}, d27 + vtbl.8 d26, {q10}, d26 + vtbl.8 d27, {q10}, d27 /* q2 := io = j + 1/(1/i + a/k) */ veor q2, q2, q12 @@ -374,18 +374,18 @@ ENTRY(aes_neon_enc1) vld1.8 {q14}, [r0 :128]! /* q14 = *rk++ */ /* (q2, q3) := (sbo_0(io), sbo_1(jo)) */ - vtbl.8 d4, {d12-d13}, d4 - vtbl.8 d5, {d12-d13}, d5 - vtbl.8 d6, {d14-d15}, d6 - vtbl.8 d7, {d14-d15}, d7 + vtbl.8 d4, {q6}, d4 + vtbl.8 d5, {q6}, d5 + vtbl.8 d6, {q7}, d6 + vtbl.8 d7, {q7}, d7 /* q2 := x = rk[nr] + sbo_0(io) + sbo_1(jo) */ veor q2, q2, q14 veor q2, q2, q3 /* q0 := x(sr[rmod4]) */ - vtbl.8 d0, {d4-d5}, d30 - vtbl.8 d1, {d4-d5}, d31 + vtbl.8 d0, {q2}, d30 + vtbl.8 d1, {q2}, d31 vpop {d8-d15} pop {r4, r5, r6, r7, r8, r10, r11, lr} @@ -479,10 +479,10 @@ ENTRY(aes_neon_dec1) vand q3, q3, q1 /* q3 := (x >> 4) & 0x0f0f... */ /* (q2, q3) := (diptlo(lo), dipthi(hi)) */ - vtbl.8 d4, {d8-d9}, d4 - vtbl.8 d5, {d8-d9}, d5 - vtbl.8 d6, {d10-d11}, d6 - vtbl.8 d7, {d10-d11}, d7 + vtbl.8 d4, {q4}, d4 + vtbl.8 d5, {q4}, d5 + vtbl.8 d6, {q5}, d6 + vtbl.8 d7, {q5}, d7 /* load dsb9 */ add r4, r12, #(dsb9 - .Lconstants) @@ -502,22 +502,22 @@ ENTRY(aes_neon_dec1) vld1.8 {q14}, [r0 :128]! /* q14 = *rk++ */ /* q0 := rk[i] + dsb9_0(io) + dsb9_1(jo)
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Thu Sep 10 11:29:43 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_32.S Log Message: aes neon: Write vtbl with {qN} rather than {d(2N)-d(2N+1)}. Cosmetic; no functional change. To generate a diff of this commit: cvs rdiff -u -r1.7 -r1.8 src/sys/crypto/aes/arch/arm/aes_neon_32.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Thu Sep 10 11:29:02 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_32.S Log Message: aes neon: Issue 256-bit loads rather than pairs of 128-bit loads. Not sure why I didn't realize you could do this before! Saves some temporary registers that can now be allocated to shave off a few cycles. To generate a diff of this commit: cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/aes/arch/arm/aes_neon_32.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Thu Sep 10 11:29:02 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_32.S Log Message: aes neon: Issue 256-bit loads rather than pairs of 128-bit loads. Not sure why I didn't realize you could do this before! Saves some temporary registers that can now be allocated to shave off a few cycles. To generate a diff of this commit: cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/aes/arch/arm/aes_neon_32.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.6 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.7 --- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.6 Sun Aug 16 18:02:03 2020 +++ src/sys/crypto/aes/arch/arm/aes_neon_32.S Thu Sep 10 11:29:02 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_neon_32.S,v 1.6 2020/08/16 18:02:03 riastradh Exp $ */ +/* $NetBSD: aes_neon_32.S,v 1.7 2020/09/10 11:29:02 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -28,7 +28,7 @@ #include -RCSID("$NetBSD: aes_neon_32.S,v 1.6 2020/08/16 18:02:03 riastradh Exp $") +RCSID("$NetBSD: aes_neon_32.S,v 1.7 2020/09/10 11:29:02 riastradh Exp $") .fpu neon @@ -38,9 +38,10 @@ RCSID("$NetBSD: aes_neon_32.S,v 1.6 2020 .long .Lconstants - . .section .rodata - .p2align 4 + .p2align 5 .Lconstants: +.Linv_inva: /* inv and inva must be consecutive */ .type inv,_ASM_TYPE_OBJECT inv: .byte 0x80,0x01,0x08,0x0D,0x0F,0x06,0x05,0x0E @@ -99,125 +100,85 @@ sr: .byte 0x08,0x05,0x02,0x0F,0x0C,0x09,0x06,0x03 END(sr) - .type iptlo,_ASM_TYPE_OBJECT -iptlo: - .byte 0x00,0x70,0x2A,0x5A,0x98,0xE8,0xB2,0xC2 + .type ipt,_ASM_TYPE_OBJECT +ipt: + .byte 0x00,0x70,0x2A,0x5A,0x98,0xE8,0xB2,0xC2 /* lo */ .byte 0x08,0x78,0x22,0x52,0x90,0xE0,0xBA,0xCA -END(iptlo) - - .type ipthi,_ASM_TYPE_OBJECT -ipthi: - .byte 0x00,0x4D,0x7C,0x31,0x7D,0x30,0x01,0x4C + .byte 0x00,0x4D,0x7C,0x31,0x7D,0x30,0x01,0x4C /* hi */ .byte 0x81,0xCC,0xFD,0xB0,0xFC,0xB1,0x80,0xCD -END(ipthi) +END(ipt) - .type sb1_0,_ASM_TYPE_OBJECT -sb1_0: - .byte 0x00,0x3E,0x50,0xCB,0x8F,0xE1,0x9B,0xB1 + .type sb1,_ASM_TYPE_OBJECT +sb1: + .byte 0x00,0x3E,0x50,0xCB,0x8F,0xE1,0x9B,0xB1 /* 0 */ .byte 0x44,0xF5,0x2A,0x14,0x6E,0x7A,0xDF,0xA5 -END(sb1_0) - - .type sb1_1,_ASM_TYPE_OBJECT -sb1_1: - .byte 0x00,0x23,0xE2,0xFA,0x15,0xD4,0x18,0x36 + .byte 0x00,0x23,0xE2,0xFA,0x15,0xD4,0x18,0x36 /* 1 */ .byte 0xEF,0xD9,0x2E,0x0D,0xC1,0xCC,0xF7,0x3B -END(sb1_1) +END(sb1) - .type sb2_0,_ASM_TYPE_OBJECT -sb2_0: - .byte 0x00,0x24,0x71,0x0B,0xC6,0x93,0x7A,0xE2 + .type sb2,_ASM_TYPE_OBJECT +sb2: + .byte 0x00,0x24,0x71,0x0B,0xC6,0x93,0x7A,0xE2 /* 0 */ .byte 0xCD,0x2F,0x98,0xBC,0x55,0xE9,0xB7,0x5E -END(sb2_0) - - .type sb2_1,_ASM_TYPE_OBJECT -sb2_1: - .byte 0x00,0x29,0xE1,0x0A,0x40,0x88,0xEB,0x69 + .byte 0x00,0x29,0xE1,0x0A,0x40,0x88,0xEB,0x69 /* 1 */ .byte 0x4A,0x23,0x82,0xAB,0xC8,0x63,0xA1,0xC2 -END(sb2_1) +END(sb2) - .type sbo_0,_ASM_TYPE_OBJECT -sbo_0: - .byte 0x00,0xC7,0xBD,0x6F,0x17,0x6D,0xD2,0xD0 + .type sbo,_ASM_TYPE_OBJECT +sbo: + .byte 0x00,0xC7,0xBD,0x6F,0x17,0x6D,0xD2,0xD0 /* 0 */ .byte 0x78,0xA8,0x02,0xC5,0x7A,0xBF,0xAA,0x15 -END(sbo_0) - - .type sbo_1,_ASM_TYPE_OBJECT -sbo_1: - .byte 0x00,0x6A,0xBB,0x5F,0xA5,0x74,0xE4,0xCF + .byte 0x00,0x6A,0xBB,0x5F,0xA5,0x74,0xE4,0xCF /* 1 */ .byte 0xFA,0x35,0x2B,0x41,0xD1,0x90,0x1E,0x8E -END(sbo_1) +END(sbo) - .type diptlo,_ASM_TYPE_OBJECT -diptlo: - .byte 0x00,0x5F,0x54,0x0B,0x04,0x5B,0x50,0x0F + .type dipt,_ASM_TYPE_OBJECT +dipt: + .byte 0x00,0x5F,0x54,0x0B,0x04,0x5B,0x50,0x0F /* lo */ .byte 0x1A,0x45,0x4E,0x11,0x1E,0x41,0x4A,0x15 -END(diptlo) - - .type dipthi,_ASM_TYPE_OBJECT -dipthi: - .byte 0x00,0x65,0x05,0x60,0xE6,0x83,0xE3,0x86 + .byte 0x00,0x65,0x05,0x60,0xE6,0x83,0xE3,0x86 /* hi */ .byte 0x94,0xF1,0x91,0xF4,0x72,0x17,0x77,0x12 -END(dipthi) +END(dipt) - .type dsb9_0,_ASM_TYPE_OBJECT -dsb9_0: - .byte 0x00,0xD6,0x86,0x9A,0x53,0x03,0x1C,0x85 + .type dsb9,_ASM_TYPE_OBJECT +dsb9: + .byte 0x00,0xD6,0x86,0x9A,0x53,0x03,0x1C,0x85 /* 0 */ .byte 0xC9,0x4C,0x99,0x4F,0x50,0x1F,0xD5,0xCA -END(dsb9_0) - - .type dsb9_1,_ASM_TYPE_OBJECT -dsb9_1: - .byte 0x00,0x49,0xD7,0xEC,0x89,0x17,0x3B,0xC0 + .byte 0x00,0x49,0xD7,0xEC,0x89,0x17,0x3B,0xC0 /* 1 */ .byte 0x65,0xA5,0xFB,0xB2,0x9E,0x2C,0x5E,0x72 -END(dsb9_1) +END(dsb9) - .type dsbd_0,_ASM_TYPE_OBJECT -dsbd_0: - .byte 0x00,0xA2,0xB1,0xE6,0xDF,0xCC,0x57,0x7D + .type dsbd,_ASM_TYPE_OBJECT +dsbd: + .byte 0x00,0xA2,0xB1,0xE6,0xDF,0xCC,0x57,0x7D /* 0 */ .byte 0x39,0x44,0x2A,0x88,0x13,0x9B,0x6E,0xF5 -END(dsbd_0) - - .type dsbd_1,_ASM_TYPE_OBJECT -dsbd_1: - .byte 0x00,0xCB,0xC6,0x24,0xF7,0xFA,0xE2,0x3C + .byte 0x00,0xCB,0xC6,0x24,0xF7,0xFA,0xE2,0x3C /* 1 */ .byte 0xD3,0xEF,0xDE,0x15,0x0D,0x18,0x31,0x29 -END(dsbd_1) +END(dsbd) - .type dsbb_0,_ASM_TYPE_OBJECT -dsbb_0: - .byte 0x00,0x42,0xB4,0x96,0x92,0x64,0x22,0xD0
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Tue Sep 8 23:58:09 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_armv8_64.S Log Message: aesarmv8: Reallocate registers to shave off unnecessary MOV. To generate a diff of this commit: cvs rdiff -u -r1.14 -r1.15 src/sys/crypto/aes/arch/arm/aes_armv8_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.14 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.15 --- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.14 Tue Sep 8 23:57:43 2020 +++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S Tue Sep 8 23:58:09 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_armv8_64.S,v 1.14 2020/09/08 23:57:43 riastradh Exp $ */ +/* $NetBSD: aes_armv8_64.S,v 1.15 2020/09/08 23:58:09 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -28,7 +28,7 @@ #include -RCSID("$NetBSD: aes_armv8_64.S,v 1.14 2020/09/08 23:57:43 riastradh Exp $") +RCSID("$NetBSD: aes_armv8_64.S,v 1.15 2020/09/08 23:58:09 riastradh Exp $") .arch_extension aes @@ -917,13 +917,12 @@ END(aesarmv8_cbcmac_update1) ENTRY(aesarmv8_ccm_enc1) stp fp, lr, [sp, #-16]! /* push stack frame */ mov fp, sp - ld1 {v0.16b, v1.16b}, [x4] /* q0 := auth, q2 := ctr (be) */ - mov v2.16b, v1.16b + ld1 {v0.16b-v1.16b}, [x4] /* q0 := auth, q1 := ctr (be) */ adrl x11, ctr32_inc /* x11 := _inc */ ld1 {v5.4s}, [x11] /* q5 := (0,0,0,1) (host-endian) */ mov x9, x0 /* x9 := enckey */ mov x10, x3 /* x10 := nbytes */ - rev32 v2.16b, v2.16b /* q2 := ctr (host-endian) */ + rev32 v2.16b, v1.16b /* q2 := ctr (host-endian) */ _ALIGN_TEXT 1: ld1 {v3.16b}, [x1], #0x10 /* q3 := plaintext block */ add v2.4s, v2.4s, v5.4s /* increment ctr (32-bit) */ @@ -937,9 +936,8 @@ ENTRY(aesarmv8_ccm_enc1) subs x10, x10, #0x10 /* count down bytes */ st1 {v3.16b}, [x2], #0x10 /* store ciphertext block */ b.ne 1b /* repeat if more blocks */ - rev32 v2.16b, v2.16b /* q2 := ctr (big-endian) */ - mov v1.16b, v2.16b /* store updated auth/ctr */ - st1 {v0.16b-v1.16b}, [x4] + rev32 v1.16b, v2.16b /* q1 := ctr (big-endian) */ + st1 {v0.16b-v1.16b}, [x4] /* store updated auth/ctr */ ldp fp, lr, [sp], #16 /* pop stack frame */ ret END(aesarmv8_ccm_enc1)
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Tue Sep 8 23:57:43 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_armv8_64.S Log Message: aesarmv8: Issue two 4-register ld/st, not four 2-register ld/st. To generate a diff of this commit: cvs rdiff -u -r1.13 -r1.14 src/sys/crypto/aes/arch/arm/aes_armv8_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.13 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.14 --- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.13 Tue Sep 8 23:57:13 2020 +++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S Tue Sep 8 23:57:43 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_armv8_64.S,v 1.13 2020/09/08 23:57:13 riastradh Exp $ */ +/* $NetBSD: aes_armv8_64.S,v 1.14 2020/09/08 23:57:43 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -28,7 +28,7 @@ #include -RCSID("$NetBSD: aes_armv8_64.S,v 1.13 2020/09/08 23:57:13 riastradh Exp $") +RCSID("$NetBSD: aes_armv8_64.S,v 1.14 2020/09/08 23:57:43 riastradh Exp $") .arch_extension aes @@ -693,10 +693,8 @@ ENTRY(aesarmv8_xts_enc8) mov v30.16b, v31.16b /* q30 := tweak[6] */ bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */ /* q31 := tweak[7] */ - ld1 {v0.16b,v1.16b}, [x1], #0x20 /* q[i] := ptxt[i] */ - ld1 {v2.16b,v3.16b}, [x1], #0x20 - ld1 {v4.16b,v5.16b}, [x1], #0x20 - ld1 {v6.16b,v7.16b}, [x1], #0x20 + ld1 {v0.16b-v3.16b}, [x1], #0x40 /* q[i] := ptxt[i] */ + ld1 {v4.16b-v7.16b}, [x1], #0x40 eor v0.16b, v0.16b, v24.16b /* q[i] := ptxt[i] ^ tweak[i] */ eor v1.16b, v1.16b, v25.16b eor v2.16b, v2.16b, v26.16b @@ -716,10 +714,8 @@ ENTRY(aesarmv8_xts_enc8) eor v5.16b, v5.16b, v29.16b eor v6.16b, v6.16b, v30.16b eor v7.16b, v7.16b, v31.16b - st1 {v0.16b,v1.16b}, [x2], #0x20 /* store ciphertext blocks */ - st1 {v2.16b,v3.16b}, [x2], #0x20 - st1 {v4.16b,v5.16b}, [x2], #0x20 - st1 {v6.16b,v7.16b}, [x2], #0x20 + st1 {v0.16b-v3.16b}, [x2], #0x40 /* store ciphertext blocks */ + st1 {v4.16b-v7.16b}, [x2], #0x40 bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */ subs x10, x10, #0x80 /* count down nbytes */ b.ne 1b /* repeat if more block groups */
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Tue Sep 8 23:57:43 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_armv8_64.S Log Message: aesarmv8: Issue two 4-register ld/st, not four 2-register ld/st. To generate a diff of this commit: cvs rdiff -u -r1.13 -r1.14 src/sys/crypto/aes/arch/arm/aes_armv8_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Tue Sep 8 23:57:13 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_armv8_64.S Log Message: aesarmv8: Adapt aes_armv8_64.S to big-endian. Patch mainly from (and tested by) jakllsch@ with minor tweaks by me. To generate a diff of this commit: cvs rdiff -u -r1.12 -r1.13 src/sys/crypto/aes/arch/arm/aes_armv8_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Tue Sep 8 23:58:09 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_armv8_64.S Log Message: aesarmv8: Reallocate registers to shave off unnecessary MOV. To generate a diff of this commit: cvs rdiff -u -r1.14 -r1.15 src/sys/crypto/aes/arch/arm/aes_armv8_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Tue Sep 8 23:57:13 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_armv8_64.S Log Message: aesarmv8: Adapt aes_armv8_64.S to big-endian. Patch mainly from (and tested by) jakllsch@ with minor tweaks by me. To generate a diff of this commit: cvs rdiff -u -r1.12 -r1.13 src/sys/crypto/aes/arch/arm/aes_armv8_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.12 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.13 --- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.12 Sat Aug 8 14:47:01 2020 +++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S Tue Sep 8 23:57:13 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_armv8_64.S,v 1.12 2020/08/08 14:47:01 riastradh Exp $ */ +/* $NetBSD: aes_armv8_64.S,v 1.13 2020/09/08 23:57:13 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -28,7 +28,7 @@ #include -RCSID("$NetBSD: aes_armv8_64.S,v 1.12 2020/08/08 14:47:01 riastradh Exp $") +RCSID("$NetBSD: aes_armv8_64.S,v 1.13 2020/09/08 23:57:13 riastradh Exp $") .arch_extension aes @@ -114,11 +114,11 @@ END(unshiftrows_rotword_3) * Standard ABI calling convention. */ ENTRY(aesarmv8_setenckey128) - ldr q1, [x1] /* q1 := master key */ + ld1 {v1.16b}, [x1] /* q1 := master key */ adrl x4, unshiftrows_rotword_3 eor v0.16b, v0.16b, v0.16b /* q0 := 0 */ - ldr q16, [x4] /* q16 := unshiftrows_rotword_3 table */ + ld1 {v16.16b}, [x4] /* q16 := unshiftrows_rotword_3 table */ str q1, [x0], #0x10 /* store master key as first round key */ mov x2, #10 /* round count */ @@ -171,14 +171,14 @@ END(aesarmv8_setenckey128) * Standard ABI calling convention. */ ENTRY(aesarmv8_setenckey192) - ldr q1, [x1], #0x10 /* q1 := master key[0:128) */ - ldr d2, [x1] /* d2 := master key[128:192) */ + ld1 {v1.16b}, [x1], #0x10 /* q1 := master key[0:128) */ + ld1 {v2.8b}, [x1] /* d2 := master key[128:192) */ adrl x4, unshiftrows_rotword_1 adrl x5, unshiftrows_rotword_3 eor v0.16b, v0.16b, v0.16b /* q0 := 0 */ - ldr q16, [x4] /* q16 := unshiftrows_rotword_1 */ - ldr q17, [x5] /* q17 := unshiftrows_rotword_3 */ + ld1 {v16.16b}, [x4] /* q16 := unshiftrows_rotword_1 */ + ld1 {v17.16b}, [x5] /* q17 := unshiftrows_rotword_3 */ str q1, [x0], #0x10 /* store master key[0:128) as round key */ mov x2, #12 /* round count */ @@ -351,13 +351,13 @@ END(aesarmv8_setenckey192) */ ENTRY(aesarmv8_setenckey256) /* q1 := key[0:128), q2 := key[128:256) */ - ldp q1, q2, [x1], #0x20 + ld1 {v1.16b-v2.16b}, [x1], #0x20 adrl x4, unshiftrows_rotword_3 adrl x5, unshiftrows_3 eor v0.16b, v0.16b, v0.16b /* q0 := 0 */ - ldr q16, [x4] /* q16 := unshiftrows_rotword_3 */ - ldr q17, [x5] /* q17 := unshiftrows_3 */ + ld1 {v16.16b}, [x4] /* q16 := unshiftrows_rotword_3 */ + ld1 {v17.16b}, [x5] /* q17 := unshiftrows_3 */ /* store master key as first two round keys */ stp q1, q2, [x0], #0x20 @@ -461,9 +461,9 @@ END(aesarmv8_enctodec) ENTRY(aesarmv8_enc) stp fp, lr, [sp, #-16]! /* push stack frame */ mov fp, sp - ldr q0, [x1] /* q0 := ptxt */ + ld1 {v0.16b}, [x1] /* q0 := ptxt */ bl aesarmv8_enc1 /* q0 := ctxt; trash x0/x3/q16 */ - str q0, [x2] /* store ctxt */ + st1 {v0.16b}, [x2] /* store ctxt */ ldp fp, lr, [sp], #16 /* pop stack frame */ ret END(aesarmv8_enc) @@ -479,9 +479,9 @@ END(aesarmv8_enc) ENTRY(aesarmv8_dec) stp fp, lr, [sp, #-16]! /* push stack frame */ mov fp, sp - ldr q0, [x1] /* q0 := ctxt */ + ld1 {v0.16b}, [x1] /* q0 := ctxt */ bl aesarmv8_dec1 /* q0 := ptxt; trash x0/x3/q16 */ - str q0, [x2] /* store ptxt */ + st1 {v0.16b}, [x2] /* store ptxt */ ldp fp, lr, [sp], #16 /* pop stack frame */ ret END(aesarmv8_dec) @@ -503,17 +503,17 @@ ENTRY(aesarmv8_cbc_enc) mov fp, sp mov x9, x0 /* x9 := enckey */ mov x10, x3 /* x10 := nbytes */ - ldr q0, [x4] /* q0 := chaining value */ + ld1 {v0.16b}, [x4] /* q0 := chaining value */ _ALIGN_TEXT -1: ldr q1, [x1], #0x10 /* q1 := plaintext block */ +1: ld1 {v1.16b}, [x1], #0x10 /* q1 := plaintext block */ eor v0.16b, v0.16b, v1.16b /* q0 := cv ^ ptxt */ mov x0, x9 /* x0 := enckey */ mov x3, x5 /* x3 := nrounds */ bl aesarmv8_enc1 /* q0 := ctxt; trash x0/x3/q16 */ subs x10, x10, #0x10 /* count down nbytes */ - str q0, [x2], #0x10 /* store ciphertext block */ + st1 {v0.16b}, [x2], #0x10 /* store ciphertext block */ b.ne 1b /* repeat if x10 is nonzero */ - str q0, [x4] /* store chaining value */ + st1 {v0.16b}, [x4] /* store chaining value */ ldp fp, lr, [sp], #16 /* pop stack frame */ 2: ret END(aesarmv8_cbc_enc) @@ -533,18 +533,21 @@ END(aesarmv8_cbc_enc) ENTRY(aesarmv8_cbc_dec1) stp fp, lr, [sp, #-16]! /* push stack frame */ mov fp, sp - ldr q24, [x4] /* q24 := iv */ + ld1 {v24.16b}, [x4] /* q24 := iv */ mov x9, x0 /* x9 := enckey */
CVS commit: src/sys/crypto/aes
Module Name:src Committed By: riastradh Date: Tue Sep 8 22:48:24 UTC 2020 Modified Files: src/sys/crypto/aes: aes_selftest.c src/sys/crypto/aes/arch/x86: aes_sse2_subr.c Log Message: aes(9): Fix edge case in bitsliced SSE2 AES-CBC decryption. Make sure self-tests exercise this edge case. Discovered by confusion over code inspection of jak's adaptation of aes_armv8_64.S for big-endian. To generate a diff of this commit: cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/aes_selftest.c cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/x86/aes_sse2_subr.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/aes_selftest.c diff -u src/sys/crypto/aes/aes_selftest.c:1.5 src/sys/crypto/aes/aes_selftest.c:1.6 --- src/sys/crypto/aes/aes_selftest.c:1.5 Sat Jul 25 22:36:42 2020 +++ src/sys/crypto/aes/aes_selftest.c Tue Sep 8 22:48:24 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_selftest.c,v 1.5 2020/07/25 22:36:42 riastradh Exp $ */ +/* $NetBSD: aes_selftest.c,v 1.6 2020/09/08 22:48:24 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -27,7 +27,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_selftest.c,v 1.5 2020/07/25 22:36:42 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_selftest.c,v 1.6 2020/09/08 22:48:24 riastradh Exp $"); #ifdef _KERNEL @@ -210,7 +210,7 @@ aes_selftest_encdec_cbc(const struct aes uint8_t in[144]; uint8_t outbuf[146] = { [0] = 0x1a, [145] = 0x1a }, *out = outbuf + 1; uint8_t iv0[16], iv[16]; - unsigned i; + unsigned i, j; for (i = 0; i < 32; i++) key[i] = i; @@ -237,21 +237,26 @@ aes_selftest_encdec_cbc(const struct aes "AES-%u-CBC dec", aes_keybits[i]); /* Try incrementally, with IV update. */ - memcpy(iv, iv0, 16); - impl->ai_cbc_enc(, in, out, 16, iv, aes_nrounds[i]); - impl->ai_cbc_enc(, in + 16, out + 16, 128, iv, - aes_nrounds[i]); - if (memcmp(out, expected[i], 144)) - return aes_selftest_fail(impl, out, expected[i], 144, - "AES-%u-CBC enc incremental", aes_keybits[i]); - - memcpy(iv, iv0, 16); - impl->ai_cbc_dec(, out, out, 128, iv, aes_nrounds[i]); - impl->ai_cbc_dec(, out + 128, out + 128, 16, iv, - aes_nrounds[i]); - if (memcmp(out, in, 144)) - return aes_selftest_fail(impl, out, in, 144, - "AES-%u-CBC dec incremental", aes_keybits[i]); + for (j = 0; j < 144; j += 16) { + memcpy(iv, iv0, 16); + impl->ai_cbc_enc(, in, out, j, iv, aes_nrounds[i]); + impl->ai_cbc_enc(, in + j, out + j, 144 - j, iv, + aes_nrounds[i]); + if (memcmp(out, expected[i], 144)) +return aes_selftest_fail(impl, out, +expected[i], 144, "AES-%u-CBC enc inc %u", +aes_keybits[i], j); + + memcpy(iv, iv0, 16); + impl->ai_cbc_dec(, out, out, j, iv, + aes_nrounds[i]); + impl->ai_cbc_dec(, out + j, out + j, 144 - j, iv, + aes_nrounds[i]); + if (memcmp(out, in, 144)) +return aes_selftest_fail(impl, out, +in, 144, "AES-%u-CBC dec inc %u", +aes_keybits[i], j); + } } if (outbuf[0] != 0x1a) Index: src/sys/crypto/aes/arch/x86/aes_sse2_subr.c diff -u src/sys/crypto/aes/arch/x86/aes_sse2_subr.c:1.3 src/sys/crypto/aes/arch/x86/aes_sse2_subr.c:1.4 --- src/sys/crypto/aes/arch/x86/aes_sse2_subr.c:1.3 Sat Jul 25 22:29:56 2020 +++ src/sys/crypto/aes/arch/x86/aes_sse2_subr.c Tue Sep 8 22:48:24 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_sse2_subr.c,v 1.3 2020/07/25 22:29:56 riastradh Exp $ */ +/* $NetBSD: aes_sse2_subr.c,v 1.4 2020/09/08 22:48:24 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -27,7 +27,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_sse2_subr.c,v 1.3 2020/07/25 22:29:56 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_sse2_subr.c,v 1.4 2020/09/08 22:48:24 riastradh Exp $"); #ifdef _KERNEL #include @@ -200,11 +200,13 @@ aes_sse2_cbc_dec(const struct aesdec *de case 48: w = _mm_loadu_epi8(in + nbytes - 32); q[1] = aes_sse2_interleave_in(w); - /*FALLTHROUGH*/ - case 32: w = _mm_loadu_epi8(in + nbytes - 48); q[0] = aes_sse2_interleave_in(w); - /*FALLTHROUGH*/ + break; + case 32: + w = _mm_loadu_epi8(in + nbytes - 32); + q[0] = aes_sse2_interleave_in(w); + break; case 16: break; }
CVS commit: src/sys/crypto/aes
Module Name:src Committed By: riastradh Date: Tue Sep 8 22:48:24 UTC 2020 Modified Files: src/sys/crypto/aes: aes_selftest.c src/sys/crypto/aes/arch/x86: aes_sse2_subr.c Log Message: aes(9): Fix edge case in bitsliced SSE2 AES-CBC decryption. Make sure self-tests exercise this edge case. Discovered by confusion over code inspection of jak's adaptation of aes_armv8_64.S for big-endian. To generate a diff of this commit: cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/aes_selftest.c cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/x86/aes_sse2_subr.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/chacha/arch/arm
Module Name:src Committed By: jakllsch Date: Tue Sep 8 17:17:32 UTC 2020 Modified Files: src/sys/crypto/chacha/arch/arm: files.chacha_arm Log Message: use correct condition To generate a diff of this commit: cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/chacha/arch/arm/files.chacha_arm Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/chacha/arch/arm
Module Name:src Committed By: jakllsch Date: Tue Sep 8 17:17:32 UTC 2020 Modified Files: src/sys/crypto/chacha/arch/arm: files.chacha_arm Log Message: use correct condition To generate a diff of this commit: cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/chacha/arch/arm/files.chacha_arm Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/chacha/arch/arm/files.chacha_arm diff -u src/sys/crypto/chacha/arch/arm/files.chacha_arm:1.3 src/sys/crypto/chacha/arch/arm/files.chacha_arm:1.4 --- src/sys/crypto/chacha/arch/arm/files.chacha_arm:1.3 Tue Jul 28 20:08:48 2020 +++ src/sys/crypto/chacha/arch/arm/files.chacha_arm Tue Sep 8 17:17:32 2020 @@ -1,9 +1,9 @@ -# $NetBSD: files.chacha_arm,v 1.3 2020/07/28 20:08:48 riastradh Exp $ +# $NetBSD: files.chacha_arm,v 1.4 2020/09/08 17:17:32 jakllsch Exp $ ifdef aarch64 makeoptions chacha "COPTS.chacha_neon.c"+="-march=armv8-a" else -makeoptions aes "COPTS.chacha_neon.c"+="-mfloat-abi=softfp -mfpu=neon" +makeoptions chacha "COPTS.chacha_neon.c"+="-mfloat-abi=softfp -mfpu=neon" endif file crypto/chacha/arch/arm/chacha_neon.c chacha & (cpu_cortex | aarch64)
CVS commit: src/sys/crypto
Module Name:src Committed By: jakllsch Date: Mon Sep 7 18:06:13 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: arm_neon.h src/sys/crypto/chacha/arch/arm: arm_neon.h Log Message: Fix vgetq_lane_u32 for aarch64eb with GCC Fixes NEON AES on aarch64eb To generate a diff of this commit: cvs rdiff -u -r1.10 -r1.11 src/sys/crypto/aes/arch/arm/arm_neon.h cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/chacha/arch/arm/arm_neon.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/arm_neon.h diff -u src/sys/crypto/aes/arch/arm/arm_neon.h:1.10 src/sys/crypto/aes/arch/arm/arm_neon.h:1.11 --- src/sys/crypto/aes/arch/arm/arm_neon.h:1.10 Sun Aug 9 02:49:38 2020 +++ src/sys/crypto/aes/arch/arm/arm_neon.h Mon Sep 7 18:06:13 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: arm_neon.h,v 1.10 2020/08/09 02:49:38 riastradh Exp $ */ +/* $NetBSD: arm_neon.h,v 1.11 2020/09/07 18:06:13 jakllsch Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -232,7 +232,7 @@ static __inline uint32_t vgetq_lane_u32(uint32x4_t __v, uint8_t __i) { #ifdef __aarch64__ - return __v[__i]; + return __v[__neon_laneq_index(__v,__i)]; #else return (uint32_t)__builtin_neon_vget_laneuv4si((int32x4_t)__v, __i); #endif Index: src/sys/crypto/chacha/arch/arm/arm_neon.h diff -u src/sys/crypto/chacha/arch/arm/arm_neon.h:1.6 src/sys/crypto/chacha/arch/arm/arm_neon.h:1.7 --- src/sys/crypto/chacha/arch/arm/arm_neon.h:1.6 Sun Aug 9 02:49:38 2020 +++ src/sys/crypto/chacha/arch/arm/arm_neon.h Mon Sep 7 18:06:13 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: arm_neon.h,v 1.6 2020/08/09 02:49:38 riastradh Exp $ */ +/* $NetBSD: arm_neon.h,v 1.7 2020/09/07 18:06:13 jakllsch Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -232,7 +232,7 @@ static __inline uint32_t vgetq_lane_u32(uint32x4_t __v, uint8_t __i) { #ifdef __aarch64__ - return __v[__i]; + return __v[__neon_laneq_index(__v, __i)]; #else return (uint32_t)__builtin_neon_vget_laneuv4si((int32x4_t)__v, __i); #endif
CVS commit: src/sys/crypto
Module Name:src Committed By: jakllsch Date: Mon Sep 7 18:06:13 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: arm_neon.h src/sys/crypto/chacha/arch/arm: arm_neon.h Log Message: Fix vgetq_lane_u32 for aarch64eb with GCC Fixes NEON AES on aarch64eb To generate a diff of this commit: cvs rdiff -u -r1.10 -r1.11 src/sys/crypto/aes/arch/arm/arm_neon.h cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/chacha/arch/arm/arm_neon.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/chacha/arch/arm
Module Name:src Committed By: jakllsch Date: Mon Sep 7 18:05:17 UTC 2020 Modified Files: src/sys/crypto/chacha/arch/arm: chacha_neon_64.S Log Message: Use a working macro to detect big endian aarch64. Fixes aarch64eb NEON ChaCha. To generate a diff of this commit: cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/chacha/arch/arm
Module Name:src Committed By: jakllsch Date: Mon Sep 7 18:05:17 UTC 2020 Modified Files: src/sys/crypto/chacha/arch/arm: chacha_neon_64.S Log Message: Use a working macro to detect big endian aarch64. Fixes aarch64eb NEON ChaCha. To generate a diff of this commit: cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/chacha/arch/arm/chacha_neon_64.S diff -u src/sys/crypto/chacha/arch/arm/chacha_neon_64.S:1.6 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S:1.7 --- src/sys/crypto/chacha/arch/arm/chacha_neon_64.S:1.6 Sat Aug 8 14:47:01 2020 +++ src/sys/crypto/chacha/arch/arm/chacha_neon_64.S Mon Sep 7 18:05:17 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: chacha_neon_64.S,v 1.6 2020/08/08 14:47:01 riastradh Exp $ */ +/* $NetBSD: chacha_neon_64.S,v 1.7 2020/09/07 18:05:17 jakllsch Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -28,7 +28,7 @@ #include -RCSID("$NetBSD: chacha_neon_64.S,v 1.6 2020/08/08 14:47:01 riastradh Exp $") +RCSID("$NetBSD: chacha_neon_64.S,v 1.7 2020/09/07 18:05:17 jakllsch Exp $") #define ROUND(a0,b0,c0,d0,a1,b1,c1,d1,a2,b2,c2,d2,a3,b3,c3,d3,t0,t1,t2,t3, r) \ STEP(STEP0,a0,b0,c0,d0,a1,b1,c1,d1,a2,b2,c2,d2,a3,b3,c3,d3,t0,t1,t2,t3, r); \ @@ -130,12 +130,12 @@ STEP(STEP19,a0,b0,c0,d0,a1,b1,c1,d1,a2,b #define STEP19(a,b,c,d, t, r) /* nothing */ #endif -#if _BYTE_ORDER == _LITTLE_ENDIAN -#define HTOLE32(x) -#define LE32TOH(x) -#elif _BYTE_ORDER == _BIG_ENDIAN +#if defined(__AARCH64EB__) #define HTOLE32(x) rev32 x, x #define LE32TOH(x) rev32 x, x +#else +#define LE32TOH(x) +#define HTOLE32(x) #endif /*
CVS commit: src/sys/crypto/chacha/arch/arm
Module Name:src Committed By: riastradh Date: Sun Aug 23 16:39:06 UTC 2020 Modified Files: src/sys/crypto/chacha/arch/arm: chacha_neon_32.S Log Message: Adjust sp, not fp, to allocate a 32-byte temporary. Costs another couple MOV instructions, but we can't skimp on this -- there's no red zone below sp for interrupts on arm, so we can't touch anything there. So just use fp to save sp and then adjust sp itself, rather than using fp as a temporary register to point just below sp. Should fix PR port-arm/55598 -- previously the ChaCha self-test failed 33/1 trials triggered by sysctl during running system; with the patch it has failed 0/1 trials. (Presumably it happened more often at boot time, leading to 5/26 failures in the test bed, because we just enabled interrupts and some devices are starting to deliver interrupts.) To generate a diff of this commit: cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/chacha/arch/arm/chacha_neon_32.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/chacha/arch/arm/chacha_neon_32.S diff -u src/sys/crypto/chacha/arch/arm/chacha_neon_32.S:1.3 src/sys/crypto/chacha/arch/arm/chacha_neon_32.S:1.4 --- src/sys/crypto/chacha/arch/arm/chacha_neon_32.S:1.3 Sat Aug 8 14:47:01 2020 +++ src/sys/crypto/chacha/arch/arm/chacha_neon_32.S Sun Aug 23 16:39:06 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: chacha_neon_32.S,v 1.3 2020/08/08 14:47:01 riastradh Exp $ */ +/* $NetBSD: chacha_neon_32.S,v 1.4 2020/08/23 16:39:06 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -28,7 +28,7 @@ #include -RCSID("$NetBSD: chacha_neon_32.S,v 1.3 2020/08/08 14:47:01 riastradh Exp $") +RCSID("$NetBSD: chacha_neon_32.S,v 1.4 2020/08/23 16:39:06 riastradh Exp $") .fpu neon @@ -54,7 +54,7 @@ RCSID("$NetBSD: chacha_neon_32.S,v 1.3 2 */ .macro ROUNDLD a0,a1,a2,a3, b0,b1,b2,b3, c0,c1,c2,c3, d0,d1,d2,d3 - vld1.8 {\c2-\c3}, [fp, :256] + vld1.8 {\c2-\c3}, [sp, :256] .endm .macro ROUND a0,a1,a2,a3, b0,b1,b2,b3, c0,c1,c2,c3, d0,d1,d2,d3, c0l, d0l,d0h,d1l,d1h,d2l,d2h,d3l,d3h @@ -80,7 +80,7 @@ RCSID("$NetBSD: chacha_neon_32.S,v 1.3 2 vadd.u32 \c2, \c2, \d2 vadd.u32 \c3, \c3, \d3 - vst1.8 {\c0-\c1}, [fp, :256] /* free c0 and c1 as temps */ + vst1.8 {\c0-\c1}, [sp, :256] /* free c0 and c1 as temps */ veor \c0, \b0, \c0 veor \c1, \b1, \c1 @@ -118,7 +118,7 @@ RCSID("$NetBSD: chacha_neon_32.S,v 1.3 2 vtbl.8 \d3l, {\d3l}, \c0l vtbl.8 \d3h, {\d3h}, \c0l - vld1.8 {\c0-\c1}, [fp, :256] /* restore c0 and c1 */ + vld1.8 {\c0-\c1}, [sp, :256] /* restore c0 and c1 */ /* c += d; b ^= c; b <<<= 7 */ vadd.u32 \c2, \c2, \d2 @@ -126,7 +126,7 @@ RCSID("$NetBSD: chacha_neon_32.S,v 1.3 2 vadd.u32 \c0, \c0, \d0 vadd.u32 \c1, \c1, \d1 - vst1.8 {\c2-\c3}, [fp, :256] /* free c2 and c3 as temps */ + vst1.8 {\c2-\c3}, [sp, :256] /* free c2 and c3 as temps */ veor \c2, \b2, \c2 veor \c3, \b3, \c3 @@ -160,17 +160,18 @@ ENTRY(chacha_stream256_neon) /* save callee-saves registers */ push {r4, r5, r6, r7, r8, r10, fp, lr} vpush {d8-d15} + mov fp, sp /* r7 := .Lconstants - .Lconstants_addr, r6 := .Lconstants_addr */ ldr r7, .Lconstants_addr adr r6, .Lconstants_addr /* reserve space for two 128-bit/16-byte q registers */ - sub fp, sp, #0x20 - bic fp, fp, #0x1f /* align */ + sub sp, sp, #0x20 + bic sp, sp, #0x1f /* align */ /* get parameters */ - add ip, sp, #96 + add ip, fp, #96 add r7, r7, r6 /* r7 := .Lconstants (= v0123) */ ldm ip, {r4, r5} /* r4 := const, r5 := nr */ ldm r2, {r6, r8, r10} /* (r6, r8, r10) := nonce[0:12) */ @@ -311,7 +312,7 @@ ENTRY(chacha_stream256_neon) vadd.u32 q3, q3, q8 vadd.u32 q7, q7, q8 - vld1.8 {q8-q9}, [fp, :256] /* restore q8-q9 */ + vld1.8 {q8-q9}, [sp, :256] /* restore q8-q9 */ vst1.8 {q0-q1}, [r0]! vld1.8 {q0}, [r3] /* q0 := key[16:32) */ @@ -354,9 +355,10 @@ ENTRY(chacha_stream256_neon) /* zero temporary space on the stack */ vmov.i32 q0, #0 vmov.i32 q1, #0 - vst1.8 {q0-q1}, [fp, :256] + vst1.8 {q0-q1}, [sp, :256] /* restore callee-saves registers and stack */ + mov sp, fp vpop {d8-d15} pop {r4, r5, r6, r7, r8, r10, fp, lr} bx lr @@ -374,17 +376,18 @@ ENTRY(chacha_stream_xor256_neon) /* save callee-saves registers */ push {r4, r5, r6, r7, r8, r10, fp, lr} vpush {d8-d15} + mov fp, sp /* r7 := .Lconstants - .Lconstants_addr, r6 := .Lconstants_addr */ ldr r7, .Lconstants_addr adr r6, .Lconstants_addr /* reserve space for two 128-bit/16-byte q registers */ - sub fp, sp, #0x20 - bic fp, fp, #0x1f /* align */ + sub sp, sp, #0x20 + bic sp, sp, #0x1f /* align */ /* get parameters */ - add ip, sp, #96 + add ip, fp, #96 add r7, r7, r6 /* r7 := .Lconstants (= v0123) */ ldm ip, {r4, r5, ip} /* r4 := key, r5 := const, ip := nr */ ldm r3, {r6, r8, r10} /* (r6, r8, r10) := nonce[0:12) */ @@ -475,7 +478,7 @@
CVS commit: src/sys/crypto/chacha/arch/arm
Module Name:src Committed By: riastradh Date: Sun Aug 23 16:39:06 UTC 2020 Modified Files: src/sys/crypto/chacha/arch/arm: chacha_neon_32.S Log Message: Adjust sp, not fp, to allocate a 32-byte temporary. Costs another couple MOV instructions, but we can't skimp on this -- there's no red zone below sp for interrupts on arm, so we can't touch anything there. So just use fp to save sp and then adjust sp itself, rather than using fp as a temporary register to point just below sp. Should fix PR port-arm/55598 -- previously the ChaCha self-test failed 33/1 trials triggered by sysctl during running system; with the patch it has failed 0/1 trials. (Presumably it happened more often at boot time, leading to 5/26 failures in the test bed, because we just enabled interrupts and some devices are starting to deliver interrupts.) To generate a diff of this commit: cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/chacha/arch/arm/chacha_neon_32.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/blake2
Module Name:src Committed By: riastradh Date: Thu Aug 20 21:21:05 UTC 2020 Added Files: src/sys/crypto/blake2: blake2s.c blake2s.h files.blake2s Log Message: Import small BLAKE2s implementation. To generate a diff of this commit: cvs rdiff -u -r0 -r1.1 src/sys/crypto/blake2/blake2s.c \ src/sys/crypto/blake2/blake2s.h src/sys/crypto/blake2/files.blake2s Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Added files: Index: src/sys/crypto/blake2/blake2s.c diff -u /dev/null src/sys/crypto/blake2/blake2s.c:1.1 --- /dev/null Thu Aug 20 21:21:05 2020 +++ src/sys/crypto/blake2/blake2s.c Thu Aug 20 21:21:05 2020 @@ -0,0 +1,350 @@ +/* $NetBSD: blake2s.c,v 1.1 2020/08/20 21:21:05 riastradh Exp $ */ + +/*- + * Copyright (c) 2015 Taylor R. Campbell + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *notice, this list of conditions and the following disclaimer in the + *documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef _KERNEL + +#include +__KERNEL_RCSID(0, "$NetBSD: blake2s.c,v 1.1 2020/08/20 21:21:05 riastradh Exp $"); + +#include +#include + +#include + +#else + +#define _POSIX_C_SOURCE 200809L + +#include +#include +#include + +#endif + +#include "blake2s.h" + +#include + +static inline uint32_t +rotr32(uint32_t x, unsigned c) +{ + + return ((x >> c) | (x << (32 - c))); +} + +#define BLAKE2S_G(VA, VB, VC, VD, X, Y) do \ +{ \ + (VA) = (VA) + (VB) + (X); \ + (VD) = rotr32((VD) ^ (VA), 16); \ + (VC) = (VC) + (VD); \ + (VB) = rotr32((VB) ^ (VC), 12); \ + (VA) = (VA) + (VB) + (Y); \ + (VD) = rotr32((VD) ^ (VA), 8); \ + (VC) = (VC) + (VD); \ + (VB) = rotr32((VB) ^ (VC), 7); \ +} while (0) + +static const uint32_t blake2s_iv[8] = { + 0x6a09e667U, 0xbb67ae85U, 0x3c6ef372U, 0xa54ff53aU, + 0x510e527fU, 0x9b05688cU, 0x1f83d9abU, 0x5be0cd19U, +}; + +static const uint8_t blake2s_sigma[10][16] = { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 }, +}; + +static void +blake2s_compress(uint32_t h[8], uint64_t c, uint32_t last, +const uint8_t in[64]) +{ + uint32_t v0,v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15; + uint32_t m[16]; + unsigned i; + + /* Load the variables: first 8 from state, next 8 from IV. */ + v0 = h[0]; + v1 = h[1]; + v2 = h[2]; + v3 = h[3]; + v4 = h[4]; + v5 = h[5]; + v6 = h[6]; + v7 = h[7]; + v8 = blake2s_iv[0]; + v9 = blake2s_iv[1]; + v10 = blake2s_iv[2]; + v11 = blake2s_iv[3]; + v12 = blake2s_iv[4]; + v13 = blake2s_iv[5]; + v14 = blake2s_iv[6]; + v15 = blake2s_iv[7]; + + /* Incorporate the block counter and whether this is last. */ + v12 ^= c & 0xU; + v13 ^= c >> 32; + v14 ^= last; + + /* Load the message block. */ + for (i = 0; i < 16; i++) + m[i] = le32dec(in + 4*i); + + /* Transform the variables. */ + for (i = 0; i < 10; i++) { + const uint8_t *sigma = blake2s_sigma[i]; + + BLAKE2S_G(v0, v4, v8, v12, m[sigma[ 0]], m[sigma[ 1]]); + BLAKE2S_G(v1, v5, v9, v13, m[sigma[ 2]], m[sigma[ 3]]); + BLAKE2S_G(v2, v6, v10, v14, m[sigma[ 4]], m[sigma[ 5]]); + BLAKE2S_G(v3, v7, v11, v15, m[sigma[
CVS commit: src/sys/crypto/blake2
Module Name:src Committed By: riastradh Date: Thu Aug 20 21:21:05 UTC 2020 Added Files: src/sys/crypto/blake2: blake2s.c blake2s.h files.blake2s Log Message: Import small BLAKE2s implementation. To generate a diff of this commit: cvs rdiff -u -r0 -r1.1 src/sys/crypto/blake2/blake2s.c \ src/sys/crypto/blake2/blake2s.h src/sys/crypto/blake2/files.blake2s Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Sun Aug 16 18:02:03 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_32.S files.aesneon Log Message: Fix AES NEON code for big-endian softfp ARM. ...which is how the kernel runs. Switch to using __SOFTFP__ for consistency with how it gets exposed to C, although I'm not sure how to get it defined automagically in the toolchain for .S files so that's set manually in files.aesneon for now. To generate a diff of this commit: cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/arm/aes_neon_32.S cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/arm/files.aesneon Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.5 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.6 --- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.5 Sat Aug 8 14:47:01 2020 +++ src/sys/crypto/aes/arch/arm/aes_neon_32.S Sun Aug 16 18:02:03 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_neon_32.S,v 1.5 2020/08/08 14:47:01 riastradh Exp $ */ +/* $NetBSD: aes_neon_32.S,v 1.6 2020/08/16 18:02:03 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -28,7 +28,7 @@ #include -RCSID("$NetBSD: aes_neon_32.S,v 1.5 2020/08/08 14:47:01 riastradh Exp $") +RCSID("$NetBSD: aes_neon_32.S,v 1.6 2020/08/16 18:02:03 riastradh Exp $") .fpu neon @@ -228,15 +228,19 @@ END(dsbo_1) * aes_neon_enc1(const struct aesenc *enc@r0, uint8x16_t x@q0, * unsigned nrounds@r1) * - * With -mfloat-abi=soft(fp) (here spelled `#ifdef _KERNEL'): + * With -mfloat-abi=soft(fp) (i.e., __SOFTFP__): * * uint8x16_t@(r0,r1,r2,r3) * aes_neon_enc1(const struct aesenc *enc@r0, * uint8x16_t x@(r2,r3,sp[0],sp[4]), nrounds@sp[8]) */ ENTRY(aes_neon_enc1) -#ifdef _KERNEL +#ifdef __SOFTFP__ +#ifdef __ARM_BIG_ENDIAN + vmov d0, r3, r2 /* d0 := x lo */ +#else vmov d0, r2, r3 /* d0 := x lo */ +#endif vldr d1, [sp] /* d1 := x hi */ ldr r1, [sp, #8] /* r1 := nrounds */ #endif @@ -434,10 +438,15 @@ ENTRY(aes_neon_enc1) vpop {d8-d15} pop {r4, r5, r6, r7, r8, r10, r11, lr} -#ifdef _KERNEL +#ifdef __SOFTFP__ +#ifdef __ARM_BIG_ENDIAN + vmov r1, r0, d0 + vmov r3, r2, d1 +#else vmov r0, r1, d0 vmov r2, r3, d1 #endif +#endif bx lr END(aes_neon_enc1) @@ -457,8 +466,12 @@ END(aes_neon_enc1) * uint8x16_t x@(r2,r3,sp[0],sp[4]), nrounds@sp[8]) */ ENTRY(aes_neon_dec1) -#ifdef _KERNEL +#ifdef __SOFTFP__ +#ifdef __ARM_BIG_ENDIAN + vmov d0, r3, r2 /* d0 := x lo */ +#else vmov d0, r2, r3 /* d0 := x lo */ +#endif vldr d1, [sp] /* d1 := x hi */ ldr r1, [sp, #8] /* r1 := nrounds */ #endif @@ -669,9 +682,14 @@ ENTRY(aes_neon_dec1) vpop {d8-d15} pop {r4, r5, r6, r7, r8, r10, r11, lr} -#ifdef _KERNEL +#ifdef __SOFTFP__ +#ifdef __ARM_BIG_ENDIAN + vmov r1, r0, d0 + vmov r3, r2, d1 +#else vmov r0, r1, d0 vmov r2, r3, d1 #endif +#endif bx lr END(aes_neon_dec1) Index: src/sys/crypto/aes/arch/arm/files.aesneon diff -u src/sys/crypto/aes/arch/arm/files.aesneon:1.3 src/sys/crypto/aes/arch/arm/files.aesneon:1.4 --- src/sys/crypto/aes/arch/arm/files.aesneon:1.3 Tue Jun 30 17:03:13 2020 +++ src/sys/crypto/aes/arch/arm/files.aesneon Sun Aug 16 18:02:03 2020 @@ -1,4 +1,4 @@ -# $NetBSD: files.aesneon,v 1.3 2020/06/30 17:03:13 riastradh Exp $ +# $NetBSD: files.aesneon,v 1.4 2020/08/16 18:02:03 riastradh Exp $ ifdef aarch64 makeoptions aes "COPTS.aes_neon.c"+="-march=armv8-a" @@ -8,6 +8,8 @@ makeoptions aes "COPTS.aes_neon.c"+="-mf makeoptions aes "COPTS.aes_neon_subr.c"+="-mfloat-abi=softfp -mfpu=neon" endif +makeoptions aes "AOPTS.aes_neon_32.S"+="-D__SOFTFP__" + file crypto/aes/arch/arm/aes_neon.c aes & (cpu_cortex | aarch64) file crypto/aes/arch/arm/aes_neon_impl.c aes & (cpu_cortex | aarch64) file crypto/aes/arch/arm/aes_neon_subr.c aes & (cpu_cortex | aarch64)
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Sun Aug 16 18:02:03 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_32.S files.aesneon Log Message: Fix AES NEON code for big-endian softfp ARM. ...which is how the kernel runs. Switch to using __SOFTFP__ for consistency with how it gets exposed to C, although I'm not sure how to get it defined automagically in the toolchain for .S files so that's set manually in files.aesneon for now. To generate a diff of this commit: cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/arm/aes_neon_32.S cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/arm/files.aesneon Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/aes
Module Name:src Committed By: rin Date: Mon Aug 10 06:27:29 UTC 2020 Modified Files: src/sys/crypto/aes: aes_ccm.c Log Message: Add hack to compile aes_ccm_tag() with -O0 for m68k for GCC8. GCC 8 miscompiles aes_ccm_tag() for m68k with optimization level -O[12], which results in failure in aes_ccm_selftest(): | aes_ccm_selftest: tag 0: 8 bytes @ 0x4d3e38 | 03 80 5f 08 22 6f cb fe | .._."o.. | aes_ccm_selftest: verify 0 failed | ... | WARNING: module error: built-in module aes_ccm failed its MODULE_CMD_INIT, error 5 This is observed for amiga (A1200, 68060), mac68k (Quadra 840AV, 68040), and luna68k (nono, 68030 emulator). However, it is not for sun3 (TME, 68020 emulator) and sun2 (TME, 68010 emulator). At the moment, it is unclear whether this is due to differences b/w 68010-20 vs 68030-60, or something wrong with TME. To generate a diff of this commit: cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/aes_ccm.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/aes
Module Name:src Committed By: rin Date: Mon Aug 10 06:27:29 UTC 2020 Modified Files: src/sys/crypto/aes: aes_ccm.c Log Message: Add hack to compile aes_ccm_tag() with -O0 for m68k for GCC8. GCC 8 miscompiles aes_ccm_tag() for m68k with optimization level -O[12], which results in failure in aes_ccm_selftest(): | aes_ccm_selftest: tag 0: 8 bytes @ 0x4d3e38 | 03 80 5f 08 22 6f cb fe | .._."o.. | aes_ccm_selftest: verify 0 failed | ... | WARNING: module error: built-in module aes_ccm failed its MODULE_CMD_INIT, error 5 This is observed for amiga (A1200, 68060), mac68k (Quadra 840AV, 68040), and luna68k (nono, 68030 emulator). However, it is not for sun3 (TME, 68020 emulator) and sun2 (TME, 68010 emulator). At the moment, it is unclear whether this is due to differences b/w 68010-20 vs 68030-60, or something wrong with TME. To generate a diff of this commit: cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/aes_ccm.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/aes_ccm.c diff -u src/sys/crypto/aes/aes_ccm.c:1.4 src/sys/crypto/aes/aes_ccm.c:1.5 --- src/sys/crypto/aes/aes_ccm.c:1.4 Mon Jul 27 20:44:30 2020 +++ src/sys/crypto/aes/aes_ccm.c Mon Aug 10 06:27:29 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_ccm.c,v 1.4 2020/07/27 20:44:30 riastradh Exp $ */ +/* $NetBSD: aes_ccm.c,v 1.5 2020/08/10 06:27:29 rin Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -35,7 +35,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_ccm.c,v 1.4 2020/07/27 20:44:30 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_ccm.c,v 1.5 2020/08/10 06:27:29 rin Exp $"); #include #include @@ -301,6 +301,9 @@ aes_ccm_dec(struct aes_ccm *C, const voi } void +#if defined(__m68k__) && __GNUC_PREREQ__(8, 0) +__attribute__((__optimize__("O0"))) +#endif aes_ccm_tag(struct aes_ccm *C, void *out) { uint8_t *auth = C->authctr;
CVS commit: src/sys/crypto
Module Name:src Committed By: riastradh Date: Sun Aug 9 02:49:38 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: arm_neon.h src/sys/crypto/chacha/arch/arm: arm_neon.h Log Message: Fix some clang neon intrinsics. Compile-tested only, with -Wno-nonportable-vector-initializers. Need to address -- and test -- this stuff properly but this is progress. To generate a diff of this commit: cvs rdiff -u -r1.9 -r1.10 src/sys/crypto/aes/arch/arm/arm_neon.h cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/chacha/arch/arm/arm_neon.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/arm_neon.h diff -u src/sys/crypto/aes/arch/arm/arm_neon.h:1.9 src/sys/crypto/aes/arch/arm/arm_neon.h:1.10 --- src/sys/crypto/aes/arch/arm/arm_neon.h:1.9 Sun Aug 9 02:48:38 2020 +++ src/sys/crypto/aes/arch/arm/arm_neon.h Sun Aug 9 02:49:38 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: arm_neon.h,v 1.9 2020/08/09 02:48:38 riastradh Exp $ */ +/* $NetBSD: arm_neon.h,v 1.10 2020/08/09 02:49:38 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -85,6 +85,8 @@ typedef __attribute__((neon_vector_type( typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t; typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t; +typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t; + typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t; typedef struct { uint8x8_t val[2]; } uint8x8x2_t; @@ -218,7 +220,7 @@ vextq_u8(uint8x16_t __lo, uint8x16_t __h 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0); \ uint8x16_t __r = __builtin_neon_vextq_v((int8x16_t)__lo_r, \ (int8x16_t)__hi_r, (__i), 48); \ - return __builtin_shufflevector(__r, __r, \ + __builtin_shufflevector(__r, __r, \ 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0); \ }) #endif /* __LITTLE_ENDIAN */ @@ -326,19 +328,37 @@ vqtbl1q_u8(uint8x16_t __tab, uint8x16_t return (uint8x16_t)__out64; #endif #elif defined(__clang__) -#ifdef __LITTLE_ENDIAN__ - return (uint8x16_t)__builtin_neon_vqtbl1q_v((int8x16_t)__tab, - (int8x16_t)__idx, 48); -#else - uint32x4_t __lo_r = __builtin_shufflevector(__lo, __lo, +#ifndef __LITTLE_ENDIAN__ + __tab = __builtin_shufflevector(__tab, __tab, 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0); - uint32x4_t __hi_r = __builtin_shufflevector(__hi, __hi, + __idx = __builtin_shufflevector(__idx, __idx, 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0); - uint32x4_t __r = __builtin_neon_vqtbl1q_v((int8x16_t)__tab, - (int8x16_t)__idx, __i, 48); - return __builtin_shufflevector(__r, __r, +#endif + uint8x16_t __r; +#ifdef __aarch64__ + __r = __builtin_neon_vqtbl1q_v((int8x16_t)__tab, (int8x16_t)__idx, 48); +#else + uint64x2_t __tab64 = (uint64x2_t)__tab; + uint8x8_t __tablo = (uint8x8_t)__tab64[0]; + uint8x8_t __tabhi = (uint8x8_t)__tab64[1]; + uint64x2_t __idx64, __out64; + int8x8_t __idxlo, __idxhi, __outlo, __outhi; + + __idx64 = (uint64x2_t)__idx; + __idxlo = (int8x8_t)__idx64[0]; + __idxhi = (int8x8_t)__idx64[1]; + __outlo = (uint8x8_t)__builtin_neon_vtbl2_v((int8x8_t)__tablo, + (int8x8_t)__tabhi, (int8x8_t)__idxlo, 16); + __outhi = (uint8x8_t)__builtin_neon_vtbl2_v((int8x8_t)__tablo, + (int8x8_t)__tabhi, (int8x8_t)__idxhi, 16); + __out64 = (uint64x2_t) { (uint64_t)__outlo, (uint64_t)__outhi }; + __r = (uint8x16_t)__out64; +#endif +#ifndef __LITTLE_ENDIAN__ + __r = __builtin_shufflevector(__r, __r, 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0); #endif + return __r; #endif } @@ -579,7 +599,7 @@ vsriq_n_u32(uint32x4_t __vins, uint32x4_ (int32x4_t)__builtin_neon_vsriq_n_v((int32x4_t)(__vins), \ (int32x4_t)(__vsh), (__bits), 34) #else -#define vsliq_n_s32(__vins, __vsh, __bits) ( \ +#define vsriq_n_s32(__vins, __vsh, __bits) ( \ { \ int32x4_t __tvins = (__vins); \ int32x4_t __tvsh = (__vsh); \ Index: src/sys/crypto/chacha/arch/arm/arm_neon.h diff -u src/sys/crypto/chacha/arch/arm/arm_neon.h:1.5 src/sys/crypto/chacha/arch/arm/arm_neon.h:1.6 --- src/sys/crypto/chacha/arch/arm/arm_neon.h:1.5 Sun Aug 9 02:48:38 2020 +++ src/sys/crypto/chacha/arch/arm/arm_neon.h Sun Aug 9 02:49:38 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: arm_neon.h,v 1.5 2020/08/09 02:48:38 riastradh Exp $ */ +/* $NetBSD: arm_neon.h,v 1.6 2020/08/09 02:49:38 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -85,6 +85,8 @@ typedef __attribute__((neon_vector_type( typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t; typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t; +typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t; + typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t; typedef struct { uint8x8_t val[2]; } uint8x8x2_t; @@ -218,7 +220,7 @@ vextq_u8(uint8x16_t __lo, uint8x16_t __h
CVS commit: src/sys/crypto
Module Name:src Committed By: riastradh Date: Sun Aug 9 02:49:38 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: arm_neon.h src/sys/crypto/chacha/arch/arm: arm_neon.h Log Message: Fix some clang neon intrinsics. Compile-tested only, with -Wno-nonportable-vector-initializers. Need to address -- and test -- this stuff properly but this is progress. To generate a diff of this commit: cvs rdiff -u -r1.9 -r1.10 src/sys/crypto/aes/arch/arm/arm_neon.h cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/chacha/arch/arm/arm_neon.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto
Module Name:src Committed By: riastradh Date: Sun Aug 9 02:48:38 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_subr.c arm_neon.h src/sys/crypto/chacha/arch/arm: arm_neon.h Log Message: Use vshlq_n_s32 rather than vsliq_n_s32 with zero destination. Not sure why I reached for vsliq_n_s32 at first -- probably so I wouldn't have to deal with a new intrinsic in arm_neon.h! To generate a diff of this commit: cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/aes/arch/arm/aes_neon_subr.c cvs rdiff -u -r1.8 -r1.9 src/sys/crypto/aes/arch/arm/arm_neon.h cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/chacha/arch/arm/arm_neon.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_neon_subr.c diff -u src/sys/crypto/aes/arch/arm/aes_neon_subr.c:1.6 src/sys/crypto/aes/arch/arm/aes_neon_subr.c:1.7 --- src/sys/crypto/aes/arch/arm/aes_neon_subr.c:1.6 Sun Aug 9 02:00:57 2020 +++ src/sys/crypto/aes/arch/arm/aes_neon_subr.c Sun Aug 9 02:48:38 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_neon_subr.c,v 1.6 2020/08/09 02:00:57 riastradh Exp $ */ +/* $NetBSD: aes_neon_subr.c,v 1.7 2020/08/09 02:48:38 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -27,7 +27,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_neon_subr.c,v 1.6 2020/08/09 02:00:57 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_neon_subr.c,v 1.7 2020/08/09 02:48:38 riastradh Exp $"); #ifdef _KERNEL #include @@ -151,7 +151,7 @@ aes_neon_xts_update(uint8x16_t t8) t = vreinterpretq_s32_u8(t8); mask = vcltq_s32(t, zero); /* -1 if high bit set else 0 */ mask = vextq_u32(mask, mask, 3); /* rotate quarters */ - t_ = vsliq_n_s32(zero, t, 1); /* shift */ + t_ = vshlq_n_s32(t, 1); /* shift */ t_ ^= carry & mask; return vreinterpretq_u8_s32(t_); Index: src/sys/crypto/aes/arch/arm/arm_neon.h diff -u src/sys/crypto/aes/arch/arm/arm_neon.h:1.8 src/sys/crypto/aes/arch/arm/arm_neon.h:1.9 --- src/sys/crypto/aes/arch/arm/arm_neon.h:1.8 Sat Aug 8 14:47:01 2020 +++ src/sys/crypto/aes/arch/arm/arm_neon.h Sun Aug 9 02:48:38 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: arm_neon.h,v 1.8 2020/08/08 14:47:01 riastradh Exp $ */ +/* $NetBSD: arm_neon.h,v 1.9 2020/08/09 02:48:38 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -466,6 +466,22 @@ vsetq_lane_u64(uint64_t __x, uint64x2_t #if defined(__GNUC__) && !defined(__clang__) _INTRINSATTR +static __inline int32x4_t +vshlq_n_s32(int32x4_t __v, uint8_t __bits) +{ +#ifdef __aarch64__ + return (int32x4_t)__builtin_aarch64_ashlv4si(__v, __bits); +#else + return (int32x4_t)__builtin_neon_vshl_nv4si(__v, __bits); +#endif +} +#elif defined(__clang__) +#define vshlq_n_s32(__v, __bits) \ + (int32x4_t)__builtin_neon_vshlq_n_v((int32x4_t)(__v), (__bits), 34) +#endif + +#if defined(__GNUC__) && !defined(__clang__) +_INTRINSATTR static __inline uint32x4_t vshlq_n_u32(uint32x4_t __v, uint8_t __bits) { Index: src/sys/crypto/chacha/arch/arm/arm_neon.h diff -u src/sys/crypto/chacha/arch/arm/arm_neon.h:1.4 src/sys/crypto/chacha/arch/arm/arm_neon.h:1.5 --- src/sys/crypto/chacha/arch/arm/arm_neon.h:1.4 Sat Aug 8 14:47:01 2020 +++ src/sys/crypto/chacha/arch/arm/arm_neon.h Sun Aug 9 02:48:38 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: arm_neon.h,v 1.4 2020/08/08 14:47:01 riastradh Exp $ */ +/* $NetBSD: arm_neon.h,v 1.5 2020/08/09 02:48:38 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -466,6 +466,22 @@ vsetq_lane_u64(uint64_t __x, uint64x2_t #if defined(__GNUC__) && !defined(__clang__) _INTRINSATTR +static __inline int32x4_t +vshlq_n_s32(int32x4_t __v, uint8_t __bits) +{ +#ifdef __aarch64__ + return (int32x4_t)__builtin_aarch64_ashlv4si(__v, __bits); +#else + return (int32x4_t)__builtin_neon_vshl_nv4si(__v, __bits); +#endif +} +#elif defined(__clang__) +#define vshlq_n_s32(__v, __bits) \ + (int32x4_t)__builtin_neon_vshlq_n_v((int32x4_t)(__v), (__bits), 34) +#endif + +#if defined(__GNUC__) && !defined(__clang__) +_INTRINSATTR static __inline uint32x4_t vshlq_n_u32(uint32x4_t __v, uint8_t __bits) {
CVS commit: src/sys/crypto
Module Name:src Committed By: riastradh Date: Sun Aug 9 02:48:38 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_subr.c arm_neon.h src/sys/crypto/chacha/arch/arm: arm_neon.h Log Message: Use vshlq_n_s32 rather than vsliq_n_s32 with zero destination. Not sure why I reached for vsliq_n_s32 at first -- probably so I wouldn't have to deal with a new intrinsic in arm_neon.h! To generate a diff of this commit: cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/aes/arch/arm/aes_neon_subr.c cvs rdiff -u -r1.8 -r1.9 src/sys/crypto/aes/arch/arm/arm_neon.h cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/chacha/arch/arm/arm_neon.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Sun Aug 9 02:00:57 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_subr.c Log Message: Nix outdated comment. I implemented this parallelism a couple weeks ago. To generate a diff of this commit: cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/arm/aes_neon_subr.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Sun Aug 9 02:00:57 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_subr.c Log Message: Nix outdated comment. I implemented this parallelism a couple weeks ago. To generate a diff of this commit: cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/arm/aes_neon_subr.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_neon_subr.c diff -u src/sys/crypto/aes/arch/arm/aes_neon_subr.c:1.5 src/sys/crypto/aes/arch/arm/aes_neon_subr.c:1.6 --- src/sys/crypto/aes/arch/arm/aes_neon_subr.c:1.5 Sat Aug 8 14:47:01 2020 +++ src/sys/crypto/aes/arch/arm/aes_neon_subr.c Sun Aug 9 02:00:57 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_neon_subr.c,v 1.5 2020/08/08 14:47:01 riastradh Exp $ */ +/* $NetBSD: aes_neon_subr.c,v 1.6 2020/08/09 02:00:57 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -27,7 +27,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_neon_subr.c,v 1.5 2020/08/08 14:47:01 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_neon_subr.c,v 1.6 2020/08/09 02:00:57 riastradh Exp $"); #ifdef _KERNEL #include @@ -287,12 +287,6 @@ aes_neon_cbcmac_update1(const struct aes storeblock(auth0, auth); } -/* - * XXX On aarch64, we have enough registers that we should be able to - * pipeline two simultaneous vpaes computations in an `aes_neon_enc2' - * function, which should substantially improve CCM throughput. - */ - void aes_neon_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16], uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32],
CVS commit: src/sys/crypto
Module Name:src Committed By: riastradh Date: Sun Aug 9 01:59:04 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: arm_neon_imm.h src/sys/crypto/chacha/arch/arm: arm_neon_imm.h Log Message: Fix mistake in big-endian arm clang. Swapped the two halves (only gcc does that, I think) and wrote j,i backwards, oops. (I don't have a big-endian arm clang build handy to test; hoping this works.) To generate a diff of this commit: cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/arm/arm_neon_imm.h cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/chacha/arch/arm/arm_neon_imm.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/arm_neon_imm.h diff -u src/sys/crypto/aes/arch/arm/arm_neon_imm.h:1.1 src/sys/crypto/aes/arch/arm/arm_neon_imm.h:1.2 --- src/sys/crypto/aes/arch/arm/arm_neon_imm.h:1.1 Sat Aug 8 14:47:01 2020 +++ src/sys/crypto/aes/arch/arm/arm_neon_imm.h Sun Aug 9 01:59:04 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: arm_neon_imm.h,v 1.1 2020/08/08 14:47:01 riastradh Exp $ */ +/* $NetBSD: arm_neon_imm.h,v 1.2 2020/08/09 01:59:04 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -71,7 +71,7 @@ #define V_N_U8(a,b,c,d,e,f,g,h) \ {h,g,f,e,d,c,b,a} #define VQ_N_U8(a,b,c,d,e,f,g,h, i,j,k,l,m,n,o,p) \ - {h,g,f,e,d,c,b,a, p,o,n,m,l,k,i,j} + {p,o,n,m,l,k,j,i, h,g,f,e,d,c,b,a} #define VQ_N_U32(a,b,c,d) \ {d,c, b,a} #endif Index: src/sys/crypto/chacha/arch/arm/arm_neon_imm.h diff -u src/sys/crypto/chacha/arch/arm/arm_neon_imm.h:1.1 src/sys/crypto/chacha/arch/arm/arm_neon_imm.h:1.2 --- src/sys/crypto/chacha/arch/arm/arm_neon_imm.h:1.1 Sat Aug 8 14:47:01 2020 +++ src/sys/crypto/chacha/arch/arm/arm_neon_imm.h Sun Aug 9 01:59:04 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: arm_neon_imm.h,v 1.1 2020/08/08 14:47:01 riastradh Exp $ */ +/* $NetBSD: arm_neon_imm.h,v 1.2 2020/08/09 01:59:04 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -71,7 +71,7 @@ #define V_N_U8(a,b,c,d,e,f,g,h) \ {h,g,f,e,d,c,b,a} #define VQ_N_U8(a,b,c,d,e,f,g,h, i,j,k,l,m,n,o,p) \ - {h,g,f,e,d,c,b,a, p,o,n,m,l,k,i,j} + {p,o,n,m,l,k,j,i, h,g,f,e,d,c,b,a} #define VQ_N_U32(a,b,c,d) \ {d,c, b,a} #endif
CVS commit: src/sys/crypto
Module Name:src Committed By: riastradh Date: Sun Aug 9 01:59:04 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: arm_neon_imm.h src/sys/crypto/chacha/arch/arm: arm_neon_imm.h Log Message: Fix mistake in big-endian arm clang. Swapped the two halves (only gcc does that, I think) and wrote j,i backwards, oops. (I don't have a big-endian arm clang build handy to test; hoping this works.) To generate a diff of this commit: cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/arm/arm_neon_imm.h cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/chacha/arch/arm/arm_neon_imm.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto
Module Name:src Committed By: riastradh Date: Sat Aug 8 14:47:01 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_armv8_64.S aes_neon.c aes_neon_32.S aes_neon_impl.h aes_neon_subr.c arm_neon.h src/sys/crypto/chacha/arch/arm: arm_neon.h chacha_neon.c chacha_neon_32.S chacha_neon_64.S Added Files: src/sys/crypto/aes/arch/arm: arm_neon_imm.h src/sys/crypto/chacha/arch/arm: arm_neon_imm.h Log Message: Fix ARM NEON implementations of AES and ChaCha on big-endian ARM. New macros such as VQ_N_U32(a,b,c,d) for NEON vector initializers. Needed because GCC and Clang disagree on the ordering of lanes, depending on whether it's 64-bit big-endian, 32-bit big-endian, or little-endian -- and, bizarrely, both of them disagree with the architectural numbering of lanes. Experimented with using static const uint8_t x8[16] = {...}; uint8x16_t x = vld1q_u8(x8); which doesn't require knowing anything about the ordering of lanes, but this generates considerably worse code and apparently confuses GCC into not recognizing the constant value of x8. Fix some clang mistakes while here too. To generate a diff of this commit: cvs rdiff -u -r1.11 -r1.12 src/sys/crypto/aes/arch/arm/aes_armv8_64.S cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/arch/arm/aes_neon.c \ src/sys/crypto/aes/arch/arm/aes_neon_32.S \ src/sys/crypto/aes/arch/arm/aes_neon_subr.c cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/arm/aes_neon_impl.h cvs rdiff -u -r1.7 -r1.8 src/sys/crypto/aes/arch/arm/arm_neon.h cvs rdiff -u -r0 -r1.1 src/sys/crypto/aes/arch/arm/arm_neon_imm.h cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/chacha/arch/arm/arm_neon.h cvs rdiff -u -r0 -r1.1 src/sys/crypto/chacha/arch/arm/arm_neon_imm.h cvs rdiff -u -r1.7 -r1.8 src/sys/crypto/chacha/arch/arm/chacha_neon.c cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/chacha/arch/arm/chacha_neon_32.S cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.11 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.12 --- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.11 Mon Jul 27 20:57:23 2020 +++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S Sat Aug 8 14:47:01 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_armv8_64.S,v 1.11 2020/07/27 20:57:23 riastradh Exp $ */ +/* $NetBSD: aes_armv8_64.S,v 1.12 2020/08/08 14:47:01 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -26,11 +26,9 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include - #include -RCSID("$NetBSD: aes_armv8_64.S,v 1.11 2020/07/27 20:57:23 riastradh Exp $") +RCSID("$NetBSD: aes_armv8_64.S,v 1.12 2020/08/08 14:47:01 riastradh Exp $") .arch_extension aes @@ -921,19 +919,13 @@ ENTRY(aesarmv8_ccm_enc1) ld1 {v5.4s}, [x11] /* q5 := (0,0,0,1) (host-endian) */ mov x9, x0 /* x9 := enckey */ mov x10, x3 /* x10 := nbytes */ -#if _BYTE_ORDER == _LITTLE_ENDIAN rev32 v2.16b, v2.16b /* q2 := ctr (host-endian) */ -#endif _ALIGN_TEXT 1: ldr q3, [x1], #0x10 /* q3 := plaintext block */ add v2.4s, v2.4s, v5.4s /* increment ctr (32-bit) */ mov x0, x9 /* x0 := enckey */ mov x3, x5 /* x3 := nrounds */ -#if _BYTE_ORDER == _LITTLE_ENDIAN rev32 v1.16b, v2.16b /* q1 := ctr (big-endian) */ -#else - mov v1.16b, v2.16b /* q1 := ctr (big-endian) */ -#endif eor v0.16b, v0.16b, v3.16b /* q0 := auth ^ ptxt */ bl aesarmv8_enc2 /* q0 := auth', q1 := pad; * trash x0/x3/q16 */ @@ -941,9 +933,7 @@ ENTRY(aesarmv8_ccm_enc1) subs x10, x10, #0x10 /* count down bytes */ str q3, [x2], #0x10 /* store ciphertext block */ b.ne 1b /* repeat if more blocks */ -#if _BYTE_ORDER == _LITTLE_ENDIAN rev32 v2.16b, v2.16b /* q2 := ctr (big-endian) */ -#endif stp q0, q2, [x4] /* store updated auth/ctr */ ldp fp, lr, [sp], #16 /* pop stack frame */ ret @@ -968,18 +958,12 @@ ENTRY(aesarmv8_ccm_dec1) ld1 {v5.4s}, [x11] /* q5 := (0,0,0,1) (host-endian) */ mov x9, x0 /* x9 := enckey */ mov x10, x3 /* x10 := nbytes */ -#if _BYTE_ORDER == _LITTLE_ENDIAN rev32 v2.16b, v2.16b /* q2 := ctr (host-endian) */ -#endif /* Decrypt the first block. */ add v2.4s, v2.4s, v5.4s /* increment ctr (32-bit) */ mov x3, x5 /* x3 := nrounds */ -#if _BYTE_ORDER == _LITTLE_ENDIAN rev32 v0.16b, v2.16b /* q0 := ctr (big-endian) */ -#else - mov v0.16b, v2.16b /* q0 := ctr (big-endian) */ -#endif ldr q3, [x1], #0x10 /* q3 := ctxt */ bl aesarmv8_enc1 /* q0 := pad; trash x0/x3/q16 */ b 2f @@ -995,11 +979,7 @@ ENTRY(aesarmv8_ccm_dec1) add v2.4s, v2.4s, v5.4s /* increment ctr (32-bit) */ mov x0, x9 /* x0 := enckey */ mov x3, x5 /* x3 := nrounds */ -#if _BYTE_ORDER == _LITTLE_ENDIAN rev32 v0.16b, v2.16b /* q0 := ctr (big-endian) */ -#else - mov v0.16b, v2.16b /* q0 := ctr
CVS commit: src/sys/crypto
Module Name:src Committed By: riastradh Date: Sat Aug 8 14:47:01 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_armv8_64.S aes_neon.c aes_neon_32.S aes_neon_impl.h aes_neon_subr.c arm_neon.h src/sys/crypto/chacha/arch/arm: arm_neon.h chacha_neon.c chacha_neon_32.S chacha_neon_64.S Added Files: src/sys/crypto/aes/arch/arm: arm_neon_imm.h src/sys/crypto/chacha/arch/arm: arm_neon_imm.h Log Message: Fix ARM NEON implementations of AES and ChaCha on big-endian ARM. New macros such as VQ_N_U32(a,b,c,d) for NEON vector initializers. Needed because GCC and Clang disagree on the ordering of lanes, depending on whether it's 64-bit big-endian, 32-bit big-endian, or little-endian -- and, bizarrely, both of them disagree with the architectural numbering of lanes. Experimented with using static const uint8_t x8[16] = {...}; uint8x16_t x = vld1q_u8(x8); which doesn't require knowing anything about the ordering of lanes, but this generates considerably worse code and apparently confuses GCC into not recognizing the constant value of x8. Fix some clang mistakes while here too. To generate a diff of this commit: cvs rdiff -u -r1.11 -r1.12 src/sys/crypto/aes/arch/arm/aes_armv8_64.S cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/arch/arm/aes_neon.c \ src/sys/crypto/aes/arch/arm/aes_neon_32.S \ src/sys/crypto/aes/arch/arm/aes_neon_subr.c cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/arm/aes_neon_impl.h cvs rdiff -u -r1.7 -r1.8 src/sys/crypto/aes/arch/arm/arm_neon.h cvs rdiff -u -r0 -r1.1 src/sys/crypto/aes/arch/arm/arm_neon_imm.h cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/chacha/arch/arm/arm_neon.h cvs rdiff -u -r0 -r1.1 src/sys/crypto/chacha/arch/arm/arm_neon_imm.h cvs rdiff -u -r1.7 -r1.8 src/sys/crypto/chacha/arch/arm/chacha_neon.c cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/chacha/arch/arm/chacha_neon_32.S cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/chacha/arch/arm
Module Name:src Committed By: riastradh Date: Wed Jul 29 14:23:59 UTC 2020 Modified Files: src/sys/crypto/chacha/arch/arm: chacha_neon_32.S Log Message: Issue three more swaps to save eight stores. Reduces code size and yields a small (~2%) cgd throughput boost. Remove duplicate comment while here. To generate a diff of this commit: cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/chacha/arch/arm/chacha_neon_32.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/chacha/arch/arm
Module Name:src Committed By: riastradh Date: Wed Jul 29 14:23:59 UTC 2020 Modified Files: src/sys/crypto/chacha/arch/arm: chacha_neon_32.S Log Message: Issue three more swaps to save eight stores. Reduces code size and yields a small (~2%) cgd throughput boost. Remove duplicate comment while here. To generate a diff of this commit: cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/chacha/arch/arm/chacha_neon_32.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/chacha/arch/arm/chacha_neon_32.S diff -u src/sys/crypto/chacha/arch/arm/chacha_neon_32.S:1.1 src/sys/crypto/chacha/arch/arm/chacha_neon_32.S:1.2 --- src/sys/crypto/chacha/arch/arm/chacha_neon_32.S:1.1 Tue Jul 28 20:08:48 2020 +++ src/sys/crypto/chacha/arch/arm/chacha_neon_32.S Wed Jul 29 14:23:59 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: chacha_neon_32.S,v 1.1 2020/07/28 20:08:48 riastradh Exp $ */ +/* $NetBSD: chacha_neon_32.S,v 1.2 2020/07/29 14:23:59 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -28,7 +28,7 @@ #include -RCSID("$NetBSD: chacha_neon_32.S,v 1.1 2020/07/28 20:08:48 riastradh Exp $") +RCSID("$NetBSD: chacha_neon_32.S,v 1.2 2020/07/29 14:23:59 riastradh Exp $") .fpu neon @@ -305,21 +305,29 @@ ENTRY(chacha_stream256_neon) * q7 = (x3[4], x3[5]; x3[6], x3[7]) * * The first two rows to write out are q0 = x0[0:4) and q4 = - * x0[4:8). If we first swap q1 and q4, then once we've - * written them out we free up consecutive registers q0-q1 for - * store-multiple. + * x0[4:8). Swapping q1<->q4, q3<->q6, q9<->q12, and q11<->q14 + * enables us to issue all stores in consecutive pairs: + * x0 in q0-q1 + * x1 in q8-q9 + * x2 in q2-q3 + * x3 in q10-q11 + * x4 in q4-q5 + * x5 in q12-q3 + * x6 in q6-q7 + * x7 in q14-q15 */ vswp q1, q4 + vswp q3, q6 vadd.u32 q0, q0, q9 vadd.u32 q4, q4, q9 vadd.u32 q2, q2, q9 - vadd.u32 q3, q3, q9 + vadd.u32 q6, q6, q9 vadd.u32 q1, q1, q8 vadd.u32 q5, q5, q8 - vadd.u32 q6, q6, q8 + vadd.u32 q3, q3, q8 vadd.u32 q7, q7, q8 vld1.32 {q8-q9}, [fp, :256] /* restore q8-q9 */ @@ -349,14 +357,17 @@ ENTRY(chacha_stream256_neon) vswp d19, d22 vswp d27, d30 + vswp q9, q12 + vswp q11, q14 + vadd.u32 q8, q8, q0 - vadd.u32 q9, q9, q0 + vadd.u32 q12, q12, q0 vadd.u32 q10, q10, q0 - vadd.u32 q11, q11, q0 + vadd.u32 q14, q14, q0 - vadd.u32 q12, q12, q1 + vadd.u32 q9, q9, q1 vadd.u32 q13, q13, q1 - vadd.u32 q14, q14, q1 + vadd.u32 q11, q11, q1 vadd.u32 q15, q15, q1 LE32TOH(q8) @@ -368,28 +379,18 @@ ENTRY(chacha_stream256_neon) LE32TOH(q14) LE32TOH(q15) - /* prepare to zero temporary space on stack */ - vmov.i32 q0, #0 - vmov.i32 q1, #0 - - /* vst1.32 {q0}, [r0]! */ - /* vst1.32 {q1}, [r0]! */ /* (was q4 before vswp) */ - vst1.32 {q8}, [r0]! - vst1.32 {q12}, [r0]! - vst1.32 {q2}, [r0]! - vst1.32 {q6}, [r0]! - vst1.32 {q10}, [r0]! - vst1.32 {q14}, [r0]! - vst1.32 {q4}, [r0]! /* (was q1 before vswp) */ - vst1.32 {q5}, [r0]! - vst1.32 {q9}, [r0]! - vst1.32 {q13}, [r0]! - vst1.32 {q3}, [r0]! - vst1.32 {q7}, [r0]! - vst1.32 {q11}, [r0]! - vst1.32 {q15}, [r0] + /* vst1.32 {q0-q1}, [r0]! */ + vst1.32 {q8-q9}, [r0]! + vst1.32 {q2-q3}, [r0]! + vst1.32 {q10-q11}, [r0]! + vst1.32 {q4-q5}, [r0]! + vst1.32 {q12-q13}, [r0]! + vst1.32 {q6-q7}, [r0]! + vst1.32 {q14-q15}, [r0] /* zero temporary space on the stack */ + vmov.i32 q0, #0 + vmov.i32 q1, #0 vst1.8 {q0-q1}, [fp, :256] /* restore callee-saves registers and stack */ @@ -481,42 +482,8 @@ ENTRY(chacha_stream_xor256_neon) * in only 16 registers, compute p[i] ^ (y[i] + x[i]) for i in * {0,1,2,...,15}. The twist is that the p[i] and the y[i] are * transposed from one another, and the x[i] are in general - * registers and memory. So we have: - * - * q0 = (x0[0], x1[0]; x2[0], x3[0]) - * q1 = (x0[1], x1[1]; x2[1], x3[1]) - * q2 = (x0[2], x1[2]; x2[2], x3[2]) - * q3 = (x0[3], x1[3]; x2[3], x3[3]) - * ... - * q15 = (x0[15], x1[15]; x2[15], x3[15]) - * - * where xi[j] is the jth word of the ith 16-word block. Zip - * consecutive pairs with vzip.32, and you get: - * - * q0 = (x0[0], x0[1]; x1[0], x1[1]) - * q1 = (x2[0], x2[1]; x3[0], x3[1]) - * q2 = (x0[2], x0[3]; x1[2], x1[3]) - * q3 = (x2[2], x2[3]; x3[2], x3[3]) - * ... - * q15 = (x2[14], x2[15]; x3[14], x3[15]) - * - * As 64-bit d registers, this is: - * - * d0 = (x0[0], x0[1]) d1 = (x1[0], x1[1]) - * d2 = (x2[0], x2[1]) d3 = (x3[0], x3[1]) - * d4 = (x0[2], x0[3]) d5 = (x1[2], x1[3]) - * d6 = (x2[2], x2[3]) d7 = (x3[2], x3[3]) - * ... - * d30 = (x2[14], x2[15]) d31 = (x3[14], x3[15]) - * - * Swap d1<->d4, d3<->d6, ..., and you get: - * - * q0 = (x0[0], x0[1]; x0[2], x0[3]) - * q1 = (x2[0], x2[1]; x2[2], x2[3]) - * q2 = (x1[0], x1[1]; x1[2], x1[3]) - * q3 = (x3[0], x3[1]; x3[2], x3[3]) - * ... - * q15 = (x15[0], x15[1]; x15[2], x15[3]) + * registers and memory.
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Tue Jul 28 20:11:09 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon.c aes_neon_impl.h aes_neon_subr.c arm_neon.h Log Message: Draft 2x vectorized neon vpaes for aarch64. Gives a modest speed boost on rk3399 (Cortex-A53/A72), around 20% in cgd tests, for parallelizable operations like CBC decryption; same improvement should probably carry over to rpi4 CPU which lacks ARMv8.0-AES. To generate a diff of this commit: cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/arm/aes_neon.c \ src/sys/crypto/aes/arch/arm/aes_neon_subr.c cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/arm/aes_neon_impl.h cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/aes/arch/arm/arm_neon.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_neon.c diff -u src/sys/crypto/aes/arch/arm/aes_neon.c:1.3 src/sys/crypto/aes/arch/arm/aes_neon.c:1.4 --- src/sys/crypto/aes/arch/arm/aes_neon.c:1.3 Tue Jun 30 20:32:11 2020 +++ src/sys/crypto/aes/arch/arm/aes_neon.c Tue Jul 28 20:11:09 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_neon.c,v 1.3 2020/06/30 20:32:11 riastradh Exp $ */ +/* $NetBSD: aes_neon.c,v 1.4 2020/07/28 20:11:09 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -39,7 +39,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_neon.c,v 1.3 2020/06/30 20:32:11 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_neon.c,v 1.4 2020/07/28 20:11:09 riastradh Exp $"); #include @@ -589,6 +589,59 @@ aes_neon_enc1(const struct aesenc *enc, return vqtbl1q_u8(x, sr[rmod4]); } +uint8x16x2_t +aes_neon_enc2(const struct aesenc *enc, uint8x16x2_t x, unsigned nrounds) +{ + const uint32_t *rk32 = enc->aese_aes.aes_rk; + uint8x16_t inv_ = *(const volatile uint8x16_t *) + uint8x16_t inva_ = *(const volatile uint8x16_t *) + uint8x16_t sb1_0 = ((const volatile uint8x16_t *)sb1)[0]; + uint8x16_t sb1_1 = ((const volatile uint8x16_t *)sb1)[1]; + uint8x16_t sb2_0 = ((const volatile uint8x16_t *)sb2)[0]; + uint8x16_t sb2_1 = ((const volatile uint8x16_t *)sb2)[1]; + uint8x16_t x0 = x.val[0], x1 = x.val[1]; + uint8x16_t io0, jo0, io1, jo1; + unsigned rmod4 = 0; + + x0 = aes_schedule_transform(x0, ipt); + x1 = aes_schedule_transform(x1, ipt); + x0 ^= loadroundkey(rk32); + x1 ^= loadroundkey(rk32); + for (;;) { + uint8x16_t A_0, A2_0, A2_B_0, A2_B_D_0; + uint8x16_t A_1, A2_1, A2_B_1, A2_B_D_1; + + subbytes(, , x0, inv_, inva_); + subbytes(, , x1, inv_, inva_); + + rk32 += 4; + rmod4 = (rmod4 + 1) % 4; + if (--nrounds == 0) + break; + + A_0 = vqtbl1q_u8(sb1_0, io0) ^ vqtbl1q_u8(sb1_1, jo0); + A_1 = vqtbl1q_u8(sb1_0, io1) ^ vqtbl1q_u8(sb1_1, jo1); + A_0 ^= loadroundkey(rk32); + A_1 ^= loadroundkey(rk32); + A2_0 = vqtbl1q_u8(sb2_0, io0) ^ vqtbl1q_u8(sb2_1, jo0); + A2_1 = vqtbl1q_u8(sb2_0, io1) ^ vqtbl1q_u8(sb2_1, jo1); + A2_B_0 = A2_0 ^ vqtbl1q_u8(A_0, mc_forward[rmod4]); + A2_B_1 = A2_1 ^ vqtbl1q_u8(A_1, mc_forward[rmod4]); + A2_B_D_0 = A2_B_0 ^ vqtbl1q_u8(A_0, mc_backward[rmod4]); + A2_B_D_1 = A2_B_1 ^ vqtbl1q_u8(A_1, mc_backward[rmod4]); + x0 = A2_B_D_0 ^ vqtbl1q_u8(A2_B_0, mc_forward[rmod4]); + x1 = A2_B_D_1 ^ vqtbl1q_u8(A2_B_1, mc_forward[rmod4]); + } + x0 = vqtbl1q_u8(sbo[0], io0) ^ vqtbl1q_u8(sbo[1], jo0); + x1 = vqtbl1q_u8(sbo[0], io1) ^ vqtbl1q_u8(sbo[1], jo1); + x0 ^= loadroundkey(rk32); + x1 ^= loadroundkey(rk32); + return (uint8x16x2_t) { .val = { + [0] = vqtbl1q_u8(x0, sr[rmod4]), + [1] = vqtbl1q_u8(x1, sr[rmod4]), + } }; +} + uint8x16_t aes_neon_dec1(const struct aesdec *dec, uint8x16_t x, unsigned nrounds) { @@ -628,4 +681,60 @@ aes_neon_dec1(const struct aesdec *dec, return vqtbl1q_u8(x, sr[i]); } +uint8x16x2_t +aes_neon_dec2(const struct aesdec *dec, uint8x16x2_t x, unsigned nrounds) +{ + const uint32_t *rk32 = dec->aesd_aes.aes_rk; + unsigned i = 3 & ~(nrounds - 1); + uint8x16_t inv_ = *(const volatile uint8x16_t *) + uint8x16_t inva_ = *(const volatile uint8x16_t *) + uint8x16_t x0 = x.val[0], x1 = x.val[1]; + uint8x16_t io0, jo0, io1, jo1, mc; + + x0 = aes_schedule_transform(x0, dipt); + x1 = aes_schedule_transform(x1, dipt); + x0 ^= loadroundkey(rk32); + x1 ^= loadroundkey(rk32); + rk32 += 4; + + mc = mc_forward[3]; + for (;;) { + subbytes(, , x0, inv_, inva_); + subbytes(, , x1, inv_, inva_); + if (--nrounds == 0) + break; + + x0 = vqtbl1q_u8(dsb9[0], io0) ^ vqtbl1q_u8(dsb9[1], jo0); + x1 = vqtbl1q_u8(dsb9[0], io1) ^ vqtbl1q_u8(dsb9[1], jo1); + x0 ^= loadroundkey(rk32); + x1 ^= loadroundkey(rk32); + rk32 += 4;/* next round key */ + + x0 = vqtbl1q_u8(x0, mc); + x1 = vqtbl1q_u8(x1, mc); + x0 ^= vqtbl1q_u8(dsbd[0], io0) ^ vqtbl1q_u8(dsbd[1], jo0); + x1 ^= vqtbl1q_u8(dsbd[0], io1) ^ vqtbl1q_u8(dsbd[1], jo1); + + x0 = vqtbl1q_u8(x0, mc); + x1 = vqtbl1q_u8(x1, mc); + x0 ^= vqtbl1q_u8(dsbb[0], io0) ^ vqtbl1q_u8(dsbb[1], jo0); + x1 ^= vqtbl1q_u8(dsbb[0],
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Tue Jul 28 20:11:09 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon.c aes_neon_impl.h aes_neon_subr.c arm_neon.h Log Message: Draft 2x vectorized neon vpaes for aarch64. Gives a modest speed boost on rk3399 (Cortex-A53/A72), around 20% in cgd tests, for parallelizable operations like CBC decryption; same improvement should probably carry over to rpi4 CPU which lacks ARMv8.0-AES. To generate a diff of this commit: cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/arm/aes_neon.c \ src/sys/crypto/aes/arch/arm/aes_neon_subr.c cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/arm/aes_neon_impl.h cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/aes/arch/arm/arm_neon.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/chacha/arch/arm
Module Name:src Committed By: riastradh Date: Tue Jul 28 20:05:33 UTC 2020 Modified Files: src/sys/crypto/chacha/arch/arm: chacha_neon.c Log Message: Fix big-endian build with appropriate casts around vrev32q_u8. To generate a diff of this commit: cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/chacha/arch/arm/chacha_neon.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/chacha/arch/arm/chacha_neon.c diff -u src/sys/crypto/chacha/arch/arm/chacha_neon.c:1.5 src/sys/crypto/chacha/arch/arm/chacha_neon.c:1.6 --- src/sys/crypto/chacha/arch/arm/chacha_neon.c:1.5 Mon Jul 27 20:58:56 2020 +++ src/sys/crypto/chacha/arch/arm/chacha_neon.c Tue Jul 28 20:05:33 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: chacha_neon.c,v 1.5 2020/07/27 20:58:56 riastradh Exp $ */ +/* $NetBSD: chacha_neon.c,v 1.6 2020/07/28 20:05:33 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -53,7 +53,7 @@ vhtole_u32(uint32x4_t x) #if _BYTE_ORDER == _LITTLE_ENDIAN return x; #elif _BYTE_ORDER == _BIG_ENDIAN - return vrev32q_u8(x); + return vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(x))); #endif } @@ -63,7 +63,7 @@ vletoh_u32(uint32x4_t x) #if _BYTE_ORDER == _LITTLE_ENDIAN return x; #elif _BYTE_ORDER == _BIG_ENDIAN - return vrev32q_u8(x); + return vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(x))); #endif }
CVS commit: src/sys/crypto/chacha/arch/arm
Module Name:src Committed By: riastradh Date: Tue Jul 28 20:05:33 UTC 2020 Modified Files: src/sys/crypto/chacha/arch/arm: chacha_neon.c Log Message: Fix big-endian build with appropriate casts around vrev32q_u8. To generate a diff of this commit: cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/chacha/arch/arm/chacha_neon.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/chacha/arch/arm
Module Name:src Committed By: riastradh Date: Tue Jul 28 15:42:41 UTC 2020 Modified Files: src/sys/crypto/chacha/arch/arm: chacha_neon_64.S Log Message: Fix typo in comment. To generate a diff of this commit: cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/chacha/arch/arm/chacha_neon_64.S diff -u src/sys/crypto/chacha/arch/arm/chacha_neon_64.S:1.4 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S:1.5 --- src/sys/crypto/chacha/arch/arm/chacha_neon_64.S:1.4 Mon Jul 27 20:57:23 2020 +++ src/sys/crypto/chacha/arch/arm/chacha_neon_64.S Tue Jul 28 15:42:41 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: chacha_neon_64.S,v 1.4 2020/07/27 20:57:23 riastradh Exp $ */ +/* $NetBSD: chacha_neon_64.S,v 1.5 2020/07/28 15:42:41 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -28,7 +28,7 @@ #include -RCSID("$NetBSD: chacha_neon_64.S,v 1.4 2020/07/27 20:57:23 riastradh Exp $") +RCSID("$NetBSD: chacha_neon_64.S,v 1.5 2020/07/28 15:42:41 riastradh Exp $") #define ROUND(a0,b0,c0,d0,a1,b1,c1,d1,a2,b2,c2,d2,a3,b3,c3,d3,t0,t1,t2,t3, r) \ STEP(STEP0,a0,b0,c0,d0,a1,b1,c1,d1,a2,b2,c2,d2,a3,b3,c3,d3,t0,t1,t2,t3, r); \ @@ -142,7 +142,7 @@ STEP(STEP19,a0,b0,c0,d0,a1,b1,c1,d1,a2,b * chacha_stream256_neon(uint8_t s[256]@x0, * uint32_t blkno@w1, * const uint8_t nonce[12]@x2, - * const uint8_t key[12]@x3, + * const uint8_t key[32]@x3, * const uint8_t const[16]@x4, * unsigned nr@w5) */
CVS commit: src/sys/crypto/chacha/arch/arm
Module Name:src Committed By: riastradh Date: Tue Jul 28 15:42:41 UTC 2020 Modified Files: src/sys/crypto/chacha/arch/arm: chacha_neon_64.S Log Message: Fix typo in comment. To generate a diff of this commit: cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/aes/arch/x86
Module Name:src Committed By: riastradh Date: Tue Jul 28 14:01:35 UTC 2020 Modified Files: src/sys/crypto/aes/arch/x86: aes_via.c Log Message: Initialize authctr in both branches. I guess I didn't test the unaligned case, weird. To generate a diff of this commit: cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/x86/aes_via.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/x86/aes_via.c diff -u src/sys/crypto/aes/arch/x86/aes_via.c:1.5 src/sys/crypto/aes/arch/x86/aes_via.c:1.6 --- src/sys/crypto/aes/arch/x86/aes_via.c:1.5 Sat Jul 25 22:31:32 2020 +++ src/sys/crypto/aes/arch/x86/aes_via.c Tue Jul 28 14:01:35 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_via.c,v 1.5 2020/07/25 22:31:32 riastradh Exp $ */ +/* $NetBSD: aes_via.c,v 1.6 2020/07/28 14:01:35 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -27,7 +27,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.5 2020/07/25 22:31:32 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.6 2020/07/28 14:01:35 riastradh Exp $"); #ifdef _KERNEL #include @@ -739,6 +739,7 @@ aesvia_ccm_enc1(const struct aesenc *enc authctr = authctrbuf; ccmenc_unaligned_evcnt.ev_count++; } else { + authctr = authctr0; ccmenc_aligned_evcnt.ev_count++; } c0 = le32dec(authctr0 + 16 + 4*0); @@ -812,6 +813,7 @@ aesvia_ccm_dec1(const struct aesenc *enc le32enc(authctr + 16 + 4*2, c2); ccmdec_unaligned_evcnt.ev_count++; } else { + authctr = authctr0; ccmdec_aligned_evcnt.ev_count++; }
CVS commit: src/sys/crypto/aes/arch/x86
Module Name:src Committed By: riastradh Date: Tue Jul 28 14:01:35 UTC 2020 Modified Files: src/sys/crypto/aes/arch/x86: aes_via.c Log Message: Initialize authctr in both branches. I guess I didn't test the unaligned case, weird. To generate a diff of this commit: cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/x86/aes_via.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Re: CVS commit: src/sys/crypto/aes
On 27/07/2020 21:44, Taylor R Campbell wrote: Module Name:src Committed By: riastradh Date: Mon Jul 27 20:44:30 UTC 2020 Modified Files: src/sys/crypto/aes: aes_ccm.c aes_ccm.h Log Message: Gather auth[16] and ctr[16] into one authctr[32]. Should appease clang. clang is still not appeased :/ --- aes_via.o --- /home/roy/src/hg/src/sys/crypto/aes/arch/x86/aes_via.c:807:6: error: variable 'authctr' is used uninitialized whenever 'if' condition is false [-Werror,-Wsometimes-uninitialized] if ((uintptr_t)authctr0 & 0xf) { ^ /home/roy/src/hg/src/sys/crypto/aes/arch/x86/aes_via.c:820:10: note: uninitialized use occurs here be32enc(authctr + 16 + 4*3, ++c3); ^~~ /home/roy/src/hg/src/sys/crypto/aes/arch/x86/aes_via.c:807:2: note: remove the 'if' if its condition is always true if ((uintptr_t)authctr0 & 0xf) { ^~~ /home/roy/src/hg/src/sys/crypto/aes/arch/x86/aes_via.c:796:18: note: initialize the variable 'authctr' to silence this warning uint8_t *authctr; ^ = NULL 1 error generated. *** [aes_via.o] Error code 1
CVS commit: src/sys/crypto/chacha/arch/arm
Module Name:src Committed By: riastradh Date: Mon Jul 27 20:58:56 UTC 2020 Modified Files: src/sys/crypto/chacha/arch/arm: arm_neon.h chacha_neon.c Log Message: Note that VSRI seems to hurt here. To generate a diff of this commit: cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/chacha/arch/arm/arm_neon.h cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/chacha/arch/arm/chacha_neon.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/chacha/arch/arm
Module Name:src Committed By: riastradh Date: Mon Jul 27 20:58:56 UTC 2020 Modified Files: src/sys/crypto/chacha/arch/arm: arm_neon.h chacha_neon.c Log Message: Note that VSRI seems to hurt here. To generate a diff of this commit: cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/chacha/arch/arm/arm_neon.h cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/chacha/arch/arm/chacha_neon.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/chacha/arch/arm/arm_neon.h diff -u src/sys/crypto/chacha/arch/arm/arm_neon.h:1.2 src/sys/crypto/chacha/arch/arm/arm_neon.h:1.3 --- src/sys/crypto/chacha/arch/arm/arm_neon.h:1.2 Mon Jul 27 20:58:06 2020 +++ src/sys/crypto/chacha/arch/arm/arm_neon.h Mon Jul 27 20:58:56 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: arm_neon.h,v 1.2 2020/07/27 20:58:06 riastradh Exp $ */ +/* $NetBSD: arm_neon.h,v 1.3 2020/07/27 20:58:56 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -529,6 +529,40 @@ vsliq_n_s32(int32x4_t __vins, int32x4_t #endif /* __LITTLE_ENDIAN__ */ #endif +#if defined(__GNUC__) && !defined(__clang__) +_INTRINSATTR +static __inline uint32x4_t +vsriq_n_u32(uint32x4_t __vins, uint32x4_t __vsh, uint8_t __bits) +{ +#ifdef __aarch64__ + return __builtin_aarch64_usri_nv4si_uuus(__vins, __vsh, __bits); +#else + return (uint32x4_t)__builtin_neon_vsri_nv4si((int32x4_t)__vins, + (int32x4_t)__vsh, __bits); +#endif +} +#elif defined(__clang__) +#ifdef __LITTLE_ENDIAN__ +#define vsriq_n_u32(__vins, __vsh, __bits) \ + (int32x4_t)__builtin_neon_vsriq_n_v((int32x4_t)(__vins), \ + (int32x4_t)(__vsh), (__bits), 34) +#else +#define vsliq_n_s32(__vins, __vsh, __bits) ( \ +{ \ + int32x4_t __tvins = (__vins); \ + int32x4_t __tvsh = (__vsh); \ + uint8_t __tbits = (__bits); \ + int32x4_t __vins_r = __builtin_shufflevector(__tvins, __tvins, \ + 3,2,1,0); \ + int32x4_t __vsh_r = __builtin_shufflevector(__tvsh, __tvsh, \ + 3,2,1,0); \ + int32x4_t __r = __builtin_neon_vsriq_n_v(__tvins, __tvsh, __tbits,\ + 34); \ + __builtin_shufflevector(__r, __r, 3,2,1,0); \ +}) +#endif +#endif + _INTRINSATTR static __inline void vst1q_u32(uint32_t *__p32, uint32x4_t __v) Index: src/sys/crypto/chacha/arch/arm/chacha_neon.c diff -u src/sys/crypto/chacha/arch/arm/chacha_neon.c:1.4 src/sys/crypto/chacha/arch/arm/chacha_neon.c:1.5 --- src/sys/crypto/chacha/arch/arm/chacha_neon.c:1.4 Mon Jul 27 20:58:06 2020 +++ src/sys/crypto/chacha/arch/arm/chacha_neon.c Mon Jul 27 20:58:56 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: chacha_neon.c,v 1.4 2020/07/27 20:58:06 riastradh Exp $ */ +/* $NetBSD: chacha_neon.c,v 1.5 2020/07/27 20:58:56 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -36,7 +36,15 @@ static inline uint32x4_t vrolq_n_u32(uint32x4_t x, uint8_t n) { + /* + * Tempting to use VSHL/VSRI instead of VSHL/VSHR/VORR, but in + * practice it hurts performance at least on Cortex-A8. + */ +#if 1 return vshlq_n_u32(x, n) | vshrq_n_u32(x, 32 - n); +#else + return vsriq_n_u32(vshlq_n_u32(x, n), x, 32 - n); +#endif } static inline uint32x4_t
CVS commit: src/sys/crypto/chacha/arch/arm
Module Name:src Committed By: riastradh Date: Mon Jul 27 20:58:07 UTC 2020 Modified Files: src/sys/crypto/chacha/arch/arm: arm_neon.h chacha_neon.c Log Message: Take advantage of REV32 and TBL for 16-bit and 8-bit rotations. However, disable use of (V)TBL on armv7/aarch32 for now, because for some reason GCC spills things to the stack despite having plenty of free registers, which hurts performance more than it helps at least on ARM Cortex-A8. To generate a diff of this commit: cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/chacha/arch/arm/arm_neon.h cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/chacha/arch/arm/chacha_neon.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/chacha/arch/arm/arm_neon.h diff -u src/sys/crypto/chacha/arch/arm/arm_neon.h:1.1 src/sys/crypto/chacha/arch/arm/arm_neon.h:1.2 --- src/sys/crypto/chacha/arch/arm/arm_neon.h:1.1 Sat Jul 25 22:51:57 2020 +++ src/sys/crypto/chacha/arch/arm/arm_neon.h Mon Jul 27 20:58:06 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: arm_neon.h,v 1.1 2020/07/25 22:51:57 riastradh Exp $ */ +/* $NetBSD: arm_neon.h,v 1.2 2020/07/27 20:58:06 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -39,6 +39,7 @@ typedef __Int32x4_t int32x4_t; typedef __Int64x2_t int64x2_t; typedef __Int8x16_t int8x16_t; +typedef __Uint16x8_t uint16x8_t; typedef __Uint32x4_t uint32x4_t; typedef __Uint64x2_t uint64x2_t; typedef __Uint8x16_t uint8x16_t; @@ -46,6 +47,7 @@ typedef __Uint8x16_t uint8x16_t; typedef __simd128_int32_t int32x4_t; typedef __simd128_int64_t int64x2_t; typedef __simd128_int8_t int8x16_t; +typedef __simd128_uint16_t uint16x8_t; typedef __simd128_uint32_t uint32x4_t; typedef __simd128_uint64_t uint64x2_t; typedef __simd128_uint8_t uint8x16_t; @@ -70,9 +72,11 @@ typedef struct { uint8x8_t val[2]; } uin typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t; typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t; typedef __attribute__((neon_vector_type(4))) int32_t int32x4_t; + typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t; typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t; typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t; +typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t; typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t; typedef struct { uint8x8_t val[2]; } uint8x8x2_t; @@ -330,6 +334,27 @@ vreinterpretq_s32_u8(uint8x16_t __v) } _INTRINSATTR +static __inline uint16x8_t +vreinterpretq_u16_u32(uint32x4_t __v) +{ + return (uint16x8_t)__v; +} + +_INTRINSATTR +static __inline uint32x4_t +vreinterpretq_u32_u16(uint16x8_t __v) +{ + return (uint32x4_t)__v; +} + +_INTRINSATTR +static __inline uint32x4_t +vreinterpretq_u32_u64(uint64x2_t __v) +{ + return (uint32x4_t)__v; +} + +_INTRINSATTR static __inline uint32x4_t vreinterpretq_u32_u8(uint8x16_t __v) { @@ -338,6 +363,13 @@ vreinterpretq_u32_u8(uint8x16_t __v) _INTRINSATTR static __inline uint64x2_t +vreinterpretq_u64_u32(uint32x4_t __v) +{ + return (uint64x2_t)__v; +} + +_INTRINSATTR +static __inline uint64x2_t vreinterpretq_u64_u8(uint8x16_t __v) { return (uint64x2_t)__v; @@ -365,6 +397,17 @@ vreinterpretq_u8_u64(uint64x2_t __v) } _INTRINSATTR +static __inline uint16x8_t +vrev32q_u16(uint16x8_t __v) +{ +#if defined(__GNUC__) && !defined(__clang__) + return __builtin_shuffle(__v, (uint16x8_t) { 1,0, 3,2, 5,4, 7,6 }); +#elif defined(__clang__) + return __builtin_shufflevector(__v, 1,0, 3,2, 5,4, 7,6); +#endif +} + +_INTRINSATTR static __inline uint8x16_t vrev32q_u8(uint8x16_t __v) { @@ -531,4 +574,58 @@ vst1q_u8(uint8_t *__p8, uint8x16_t __v) #endif } +#ifndef __aarch64__ /* XXX */ + +_INTRINSATTR +static __inline uint8x8_t +vtbl1_u8(uint8x8_t __tab, uint8x8_t __idx) +{ +#if defined(__GNUC__) && !defined(__clang__) + return (uint8x8_t)__builtin_neon_vtbl1v8qi((int8x8_t)__tab, + (int8x8_t)__idx); +#elif defined(__clang__) + uint8x8_t __ret; +#ifndef __LITTLE_ENDIAN__ + __tab = __builtin_shufflevector(__tab, __tab, 7,6,5,4,3,2,1,0); + __idx = __builtin_shufflevector(__idx, __idx, 7,6,5,4,3,2,1,0); +#endif + __ret = (uint8x8_t)__builtin_neon_vtbl1_v((int8x8_t)__tab, + (int8x8_t)__idx, 16); +#ifndef __LITTLE_ENDIAN__ + __ret = __builtin_shufflevector(__ret, __ret, 7,6,5,4,3,2,1,0); +#endif + return __ret; +#endif +} + +_INTRINSATTR +static __inline uint8x8_t +vtbl2_u8(uint8x8x2_t __tab, uint8x8_t __idx) +{ +#if defined(__GNUC__) && !defined(__clang__) + union { + uint8x8x2_t __u8x8x82; + __builtin_neon_ti __ti; + } __u = { __tab }; + return (uint8x8_t)__builtin_neon_vtbl2v8qi(__u.__ti, (int8x8_t)__idx); +#elif defined(__clang__) + uint8x8_t __ret; +#ifndef __LITTLE_ENDIAN__ + __tab.val[0] = __builtin_shufflevector(__tab.val[0], __tab.val[0], + 7,6,5,4,3,2,1,0); + __tab.val[1] = __builtin_shufflevector(__tab.val[1], __tab.val[1], +
CVS commit: src/sys/crypto
Module Name:src Committed By: riastradh Date: Mon Jul 27 20:57:23 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_armv8_64.S aes_neon_32.S src/sys/crypto/aes/arch/x86: aes_ni_64.S src/sys/crypto/chacha/arch/arm: chacha_neon_64.S Log Message: Add RCSIDs to the AES and ChaCha .S sources. To generate a diff of this commit: cvs rdiff -u -r1.10 -r1.11 src/sys/crypto/aes/arch/arm/aes_armv8_64.S cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/arm/aes_neon_32.S cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/x86/aes_ni_64.S cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.10 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.11 --- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.10 Mon Jul 27 20:54:11 2020 +++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S Mon Jul 27 20:57:23 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_armv8_64.S,v 1.10 2020/07/27 20:54:11 riastradh Exp $ */ +/* $NetBSD: aes_armv8_64.S,v 1.11 2020/07/27 20:57:23 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -30,6 +30,8 @@ #include +RCSID("$NetBSD: aes_armv8_64.S,v 1.11 2020/07/27 20:57:23 riastradh Exp $") + .arch_extension aes /* Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.3 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.4 --- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.3 Mon Jul 27 20:53:22 2020 +++ src/sys/crypto/aes/arch/arm/aes_neon_32.S Mon Jul 27 20:57:23 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_neon_32.S,v 1.3 2020/07/27 20:53:22 riastradh Exp $ */ +/* $NetBSD: aes_neon_32.S,v 1.4 2020/07/27 20:57:23 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -28,6 +28,8 @@ #include +RCSID("$NetBSD: aes_neon_32.S,v 1.4 2020/07/27 20:57:23 riastradh Exp $") + .fpu neon .text Index: src/sys/crypto/aes/arch/x86/aes_ni_64.S diff -u src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.5 src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.6 --- src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.5 Mon Jul 27 20:53:22 2020 +++ src/sys/crypto/aes/arch/x86/aes_ni_64.S Mon Jul 27 20:57:23 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_ni_64.S,v 1.5 2020/07/27 20:53:22 riastradh Exp $ */ +/* $NetBSD: aes_ni_64.S,v 1.6 2020/07/27 20:57:23 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -28,6 +28,8 @@ #include +RCSID("$NetBSD: aes_ni_64.S,v 1.6 2020/07/27 20:57:23 riastradh Exp $") + /* * MOVDQA/MOVDQU are Move Double Quadword (Aligned/Unaligned), defined * to operate on integers; MOVAPS/MOVUPS are Move (Aligned/Unaligned) Index: src/sys/crypto/chacha/arch/arm/chacha_neon_64.S diff -u src/sys/crypto/chacha/arch/arm/chacha_neon_64.S:1.3 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S:1.4 --- src/sys/crypto/chacha/arch/arm/chacha_neon_64.S:1.3 Mon Jul 27 20:53:23 2020 +++ src/sys/crypto/chacha/arch/arm/chacha_neon_64.S Mon Jul 27 20:57:23 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: chacha_neon_64.S,v 1.3 2020/07/27 20:53:23 riastradh Exp $ */ +/* $NetBSD: chacha_neon_64.S,v 1.4 2020/07/27 20:57:23 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -28,6 +28,8 @@ #include +RCSID("$NetBSD: chacha_neon_64.S,v 1.4 2020/07/27 20:57:23 riastradh Exp $") + #define ROUND(a0,b0,c0,d0,a1,b1,c1,d1,a2,b2,c2,d2,a3,b3,c3,d3,t0,t1,t2,t3, r) \ STEP(STEP0,a0,b0,c0,d0,a1,b1,c1,d1,a2,b2,c2,d2,a3,b3,c3,d3,t0,t1,t2,t3, r); \ STEP(STEP1,a0,b0,c0,d0,a1,b1,c1,d1,a2,b2,c2,d2,a3,b3,c3,d3,t0,t1,t2,t3, r); \
CVS commit: src/sys/crypto/chacha/arch/arm
Module Name:src Committed By: riastradh Date: Mon Jul 27 20:58:07 UTC 2020 Modified Files: src/sys/crypto/chacha/arch/arm: arm_neon.h chacha_neon.c Log Message: Take advantage of REV32 and TBL for 16-bit and 8-bit rotations. However, disable use of (V)TBL on armv7/aarch32 for now, because for some reason GCC spills things to the stack despite having plenty of free registers, which hurts performance more than it helps at least on ARM Cortex-A8. To generate a diff of this commit: cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/chacha/arch/arm/arm_neon.h cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/chacha/arch/arm/chacha_neon.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto
Module Name:src Committed By: riastradh Date: Mon Jul 27 20:57:23 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_armv8_64.S aes_neon_32.S src/sys/crypto/aes/arch/x86: aes_ni_64.S src/sys/crypto/chacha/arch/arm: chacha_neon_64.S Log Message: Add RCSIDs to the AES and ChaCha .S sources. To generate a diff of this commit: cvs rdiff -u -r1.10 -r1.11 src/sys/crypto/aes/arch/arm/aes_armv8_64.S cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/arm/aes_neon_32.S cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/x86/aes_ni_64.S cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Mon Jul 27 20:54:12 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_armv8_64.S Log Message: Issue aese/aesmc and aesd/aesimc in pairs. Advised by the aarch64 optimization guide; increases cgd throughput by about 10%. To generate a diff of this commit: cvs rdiff -u -r1.9 -r1.10 src/sys/crypto/aes/arch/arm/aes_armv8_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Mon Jul 27 20:54:12 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_armv8_64.S Log Message: Issue aese/aesmc and aesd/aesimc in pairs. Advised by the aarch64 optimization guide; increases cgd throughput by about 10%. To generate a diff of this commit: cvs rdiff -u -r1.9 -r1.10 src/sys/crypto/aes/arch/arm/aes_armv8_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.9 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.10 --- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.9 Mon Jul 27 20:53:22 2020 +++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S Mon Jul 27 20:54:11 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_armv8_64.S,v 1.9 2020/07/27 20:53:22 riastradh Exp $ */ +/* $NetBSD: aes_armv8_64.S,v 1.10 2020/07/27 20:54:11 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -1041,15 +1041,18 @@ END(ctr32_inc) .type aesarmv8_enc1,@function aesarmv8_enc1: ldr q16, [x0], #0x10 /* load round key */ - b 2f + sub x3, x3, #1 _ALIGN_TEXT -1: /* q0 := MixColumns(q0) */ +1: /* q0 := MixColumns(ShiftRows(SubBytes(AddRoundKey_q16(q0 */ + aese v0.16b, v16.16b aesmc v0.16b, v0.16b -2: subs x3, x3, #1 + ldr q16, [x0], #0x10 + subs x3, x3, #1 + b.ne 1b /* q0 := ShiftRows(SubBytes(AddRoundKey_q16(q0))) */ aese v0.16b, v16.16b - ldr q16, [x0], #0x10 /* load next round key */ - b.ne 1b + ldr q16, [x0] /* load last round key */ + /* q0 := AddRoundKey_q16(q0) */ eor v0.16b, v0.16b, v16.16b ret END(aesarmv8_enc1) @@ -1067,17 +1070,21 @@ END(aesarmv8_enc1) .type aesarmv8_enc2,@function aesarmv8_enc2: ldr q16, [x0], #0x10 /* load round key */ - b 2f + sub x3, x3, #1 _ALIGN_TEXT -1: /* q[i] := MixColumns(q[i]) */ +1: /* q[i] := MixColumns(ShiftRows(SubBytes(AddRoundKey_q16(q[i] */ + aese v0.16b, v16.16b aesmc v0.16b, v0.16b + aese v1.16b, v16.16b aesmc v1.16b, v1.16b -2: subs x3, x3, #1 + ldr q16, [x0], #0x10 /* load next round key */ + subs x3, x3, #1 + b.ne 1b /* q[i] := ShiftRows(SubBytes(AddRoundKey_q16(q[i]))) */ aese v0.16b, v16.16b aese v1.16b, v16.16b - ldr q16, [x0], #0x10 /* load next round key */ - b.ne 1b + ldr q16, [x0] /* load last round key */ + /* q[i] := AddRoundKey_q16(q[i]) */ eor v0.16b, v0.16b, v16.16b eor v1.16b, v1.16b, v16.16b ret @@ -1097,18 +1104,28 @@ END(aesarmv8_enc2) .type aesarmv8_enc8,@function aesarmv8_enc8: ldr q16, [x0], #0x10 /* load round key */ - b 2f + sub x3, x3, #1 _ALIGN_TEXT -1: /* q[i] := MixColumns(q[i]) */ +1: /* q[i] := MixColumns(ShiftRows(SubBytes(AddRoundKey_q16(q[i] */ + aese v0.16b, v16.16b aesmc v0.16b, v0.16b + aese v1.16b, v16.16b aesmc v1.16b, v1.16b + aese v2.16b, v16.16b aesmc v2.16b, v2.16b + aese v3.16b, v16.16b aesmc v3.16b, v3.16b + aese v4.16b, v16.16b aesmc v4.16b, v4.16b + aese v5.16b, v16.16b aesmc v5.16b, v5.16b + aese v6.16b, v16.16b aesmc v6.16b, v6.16b + aese v7.16b, v16.16b aesmc v7.16b, v7.16b -2: subs x3, x3, #1 + ldr q16, [x0], #0x10 /* load next round key */ + subs x3, x3, #1 + b.ne 1b /* q[i] := ShiftRows(SubBytes(AddRoundKey_q16(q[i]))) */ aese v0.16b, v16.16b aese v1.16b, v16.16b @@ -1118,9 +1135,9 @@ aesarmv8_enc8: aese v5.16b, v16.16b aese v6.16b, v16.16b aese v7.16b, v16.16b - ldr q16, [x0], #0x10 /* load next round key */ - b.ne 1b - eor v0.16b, v0.16b, v16.16b /* AddRoundKey */ + ldr q16, [x0] /* load last round key */ + /* q[i] := AddRoundKey_q16(q[i]) */ + eor v0.16b, v0.16b, v16.16b eor v1.16b, v1.16b, v16.16b eor v2.16b, v2.16b, v16.16b eor v3.16b, v3.16b, v16.16b @@ -1144,15 +1161,19 @@ END(aesarmv8_enc8) .type aesarmv8_dec1,@function aesarmv8_dec1: ldr q16, [x0], #0x10 /* load round key */ - b 2f + sub x3, x3, #1 _ALIGN_TEXT -1: /* q0 := InMixColumns(q0) */ - aesimc v0.16b, v0.16b -2: subs x3, x3, #1 - /* q0 := InSubBytes(InShiftRows(AddRoundKey_q16(q0))) */ +1: /* q0 := InSubBytes(InShiftRows(AddRoundKey_q16(q0))) */ aesd v0.16b, v16.16b + /* q0 := InMixColumns(q0) */ + aesimc v0.16b, v0.16b ldr q16, [x0], #0x10 /* load next round key */ + subs x3, x3, #1 b.ne 1b + /* q0 := InSubBytes(InShiftRows(AddRoundKey_q16(q0))) */ + aesd v0.16b, v16.16b + ldr q16, [x0] /* load last round key */ + /* q0 := AddRoundKey_q16(q0) */ eor v0.16b, v0.16b, v16.16b ret END(aesarmv8_dec1) @@ -1171,18 +1192,29 @@ END(aesarmv8_dec1) .type aesarmv8_dec8,@function aesarmv8_dec8: ldr q16, [x0], #0x10 /* load round key */ - b 2f + sub x3, x3, #1 _ALIGN_TEXT -1: /* q[i] := InMixColumns(q[i]) */ +1: /* q[i] := InSubBytes(InShiftRows(AddRoundKey_q16(q[i]))) */ + aesd v0.16b, v16.16b + /* q[i] := InMixColumns(q[i]) */ aesimc v0.16b, v0.16b + aesd v1.16b, v16.16b aesimc v1.16b, v1.16b + aesd v2.16b, v16.16b aesimc v2.16b, v2.16b + aesd v3.16b, v16.16b aesimc v3.16b,
CVS commit: src/sys/crypto
Module Name:src Committed By: riastradh Date: Mon Jul 27 20:53:23 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_armv8_64.S aes_neon_32.S src/sys/crypto/aes/arch/x86: aes_ni_64.S src/sys/crypto/chacha/arch/arm: chacha_neon_64.S Log Message: Align critical-path loops in AES and ChaCha. To generate a diff of this commit: cvs rdiff -u -r1.8 -r1.9 src/sys/crypto/aes/arch/arm/aes_armv8_64.S cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/arm/aes_neon_32.S cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/arch/x86/aes_ni_64.S cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.8 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.9 --- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.8 Sat Jul 25 22:33:04 2020 +++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S Mon Jul 27 20:53:22 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_armv8_64.S,v 1.8 2020/07/25 22:33:04 riastradh Exp $ */ +/* $NetBSD: aes_armv8_64.S,v 1.9 2020/07/27 20:53:22 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -440,6 +440,7 @@ END(aesarmv8_setenckey256) ENTRY(aesarmv8_enctodec) ldr q0, [x0, x2, lsl #4] /* load last round key */ b 2f + _ALIGN_TEXT 1: aesimc v0.16b, v0.16b /* convert encryption to decryption */ 2: str q0, [x1], #0x10 /* store round key */ subs x2, x2, #1 /* count down round */ @@ -503,6 +504,7 @@ ENTRY(aesarmv8_cbc_enc) mov x9, x0 /* x9 := enckey */ mov x10, x3 /* x10 := nbytes */ ldr q0, [x4] /* q0 := chaining value */ + _ALIGN_TEXT 1: ldr q1, [x1], #0x10 /* q1 := plaintext block */ eor v0.16b, v0.16b, v1.16b /* q0 := cv ^ ptxt */ mov x0, x9 /* x0 := enckey */ @@ -539,6 +541,7 @@ ENTRY(aesarmv8_cbc_dec1) ldr q0, [x1, #-0x10]! /* q0 := last ciphertext block */ str q0, [x4] /* update iv */ b 2f + _ALIGN_TEXT 1: ldr q31, [x1, #-0x10]! /* q31 := chaining value */ eor v0.16b, v0.16b, v31.16b /* q0 := plaintext block */ str q0, [x2, #-0x10]! /* store plaintext block */ @@ -576,6 +579,7 @@ ENTRY(aesarmv8_cbc_dec8) ldp q6, q7, [x1, #-0x20]! /* q6, q7 := last ciphertext blocks */ str q7, [x4] /* update iv */ b 2f + _ALIGN_TEXT 1: ldp q6, q7, [x1, #-0x20]! eor v0.16b, v0.16b, v7.16b /* q0 := pt0 */ stp q0, q1, [x2, #-0x20]! @@ -629,6 +633,7 @@ ENTRY(aesarmv8_xts_enc1) mov x9, x0 /* x9 := enckey */ mov x10, x3 /* x10 := nbytes */ ldr q31, [x4] /* q31 := tweak */ + _ALIGN_TEXT 1: ldr q0, [x1], #0x10 /* q0 := ptxt */ mov x0, x9 /* x0 := enckey */ mov x3, x5 /* x3 := nrounds */ @@ -661,6 +666,7 @@ ENTRY(aesarmv8_xts_enc8) mov x9, x0 /* x9 := enckey */ mov x10, x3 /* x10 := nbytes */ ldr q31, [x4] /* q31 := tweak */ + _ALIGN_TEXT 1: mov v24.16b, v31.16b /* q24 := tweak[0] */ bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */ mov v25.16b, v31.16b /* q25 := tweak[1] */ @@ -729,6 +735,7 @@ ENTRY(aesarmv8_xts_dec1) mov x9, x0 /* x9 := deckey */ mov x10, x3 /* x10 := nbytes */ ldr q31, [x4] /* q31 := tweak */ + _ALIGN_TEXT 1: ldr q0, [x1], #0x10 /* q0 := ctxt */ mov x0, x9 /* x0 := deckey */ mov x3, x5 /* x3 := nrounds */ @@ -761,6 +768,7 @@ ENTRY(aesarmv8_xts_dec8) mov x9, x0 /* x9 := deckey */ mov x10, x3 /* x10 := nbytes */ ldr q31, [x4] /* q31 := tweak */ + _ALIGN_TEXT 1: mov v24.16b, v31.16b /* q24 := tweak[0] */ bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */ mov v25.16b, v31.16b /* q25 := tweak[1] */ @@ -879,6 +887,7 @@ ENTRY(aesarmv8_cbcmac_update1) ldr q0, [x3] /* q0 := initial authenticator */ mov x9, x0 /* x9 := enckey */ mov x5, x3 /* x5 := (enc1 trashes x3) */ + _ALIGN_TEXT 1: ldr q1, [x1], #0x10 /* q1 := plaintext block */ mov x0, x9 /* x0 := enckey */ mov x3, x4 /* x3 := nrounds */ @@ -913,6 +922,7 @@ ENTRY(aesarmv8_ccm_enc1) #if _BYTE_ORDER == _LITTLE_ENDIAN rev32 v2.16b, v2.16b /* q2 := ctr (host-endian) */ #endif + _ALIGN_TEXT 1: ldr q3, [x1], #0x10 /* q3 := plaintext block */ add v2.4s, v2.4s, v5.4s /* increment ctr (32-bit) */ mov x0, x9 /* x0 := enckey */ @@ -972,6 +982,7 @@ ENTRY(aesarmv8_ccm_dec1) bl aesarmv8_enc1 /* q0 := pad; trash x0/x3/q16 */ b 2f + _ALIGN_TEXT 1: /* * Authenticate the last block and decrypt the next block * simultaneously. @@ -1031,6 +1042,7 @@ END(ctr32_inc) aesarmv8_enc1: ldr q16, [x0], #0x10 /* load round key */ b 2f + _ALIGN_TEXT 1: /* q0 := MixColumns(q0) */ aesmc v0.16b, v0.16b 2: subs x3, x3, #1 @@ -1056,6 +1068,7 @@ END(aesarmv8_enc1) aesarmv8_enc2: ldr q16, [x0], #0x10 /* load round key */ b 2f + _ALIGN_TEXT 1: /* q[i] := MixColumns(q[i]) */ aesmc v0.16b, v0.16b aesmc v1.16b, v1.16b @@ -1085,6 +1098,7 @@ END(aesarmv8_enc2) aesarmv8_enc8: ldr q16, [x0], #0x10 /* load round key */ b 2f +
CVS commit: src/sys/crypto
Module Name:src Committed By: riastradh Date: Mon Jul 27 20:53:23 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_armv8_64.S aes_neon_32.S src/sys/crypto/aes/arch/x86: aes_ni_64.S src/sys/crypto/chacha/arch/arm: chacha_neon_64.S Log Message: Align critical-path loops in AES and ChaCha. To generate a diff of this commit: cvs rdiff -u -r1.8 -r1.9 src/sys/crypto/aes/arch/arm/aes_armv8_64.S cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/arm/aes_neon_32.S cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/arch/x86/aes_ni_64.S cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Mon Jul 27 20:52:11 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_32.S Log Message: PIC for aes_neon_32.S. Without this, tests/sys/crypto/aes/t_aes fails to start on armv7 because of R_ARM_ABS32 relocations in a nonwritable text segment for a PIE -- which atf quietly ignores in the final report! Yikes. To generate a diff of this commit: cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/arm/aes_neon_32.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.1 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.2 --- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.1 Mon Jun 29 23:57:56 2020 +++ src/sys/crypto/aes/arch/arm/aes_neon_32.S Mon Jul 27 20:52:10 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_neon_32.S,v 1.1 2020/06/29 23:57:56 riastradh Exp $ */ +/* $NetBSD: aes_neon_32.S,v 1.2 2020/07/27 20:52:10 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -30,8 +30,14 @@ .fpu neon + .text + .p2align 2 +.Lconstants_addr: + .long .Lconstants - . + .section .rodata .p2align 4 +.Lconstants: .type inv,_ASM_TYPE_OBJECT inv: @@ -239,7 +245,7 @@ ENTRY(aes_neon_enc1) * r3: rmod4 * r4: mc_forward * r5: mc_backward - * r6,r7,r8,r10,r11: temporaries + * r6,r7,r8,r10,r11,r12: temporaries * q0={d0-d1}: x/ak/A * q1={d2-d3}: 0x0f0f... * q2={d4-d5}: lo/k/j/io @@ -258,23 +264,30 @@ ENTRY(aes_neon_enc1) * q15={d30-d31}: A2_B/sr[rmod4] */ + /* r12 := .Lconstants - .Lconstants_addr, r11 := .Lconstants_addr */ + ldr r12, .Lconstants_addr + adr r11, .Lconstants_addr + vld1.64 {d28-d29}, [r0 :128]! /* q14 = *rk++ */ movw r3, #0 vmov.i8 q1, #0x0f + /* r12 := .Lconstants */ + add r12, r12, r11 + /* (q4, q5) := (iptlo, ipthi) */ - ldr r6, =iptlo - ldr r7, =ipthi + add r6, r12, #(iptlo - .Lconstants) + add r7, r12, #(ipthi - .Lconstants) vld1.64 {d8-d9}, [r6 :128] vld1.64 {d10-d11}, [r7 :128] /* load the rest of the constants */ - ldr r4, =sb1_0 - ldr r5, =sb1_1 - ldr r6, =sb2_0 - ldr r7, =sb2_1 - ldr r8, =inv - ldr r10, =inva + add r4, r12, #(sb1_0 - .Lconstants) + add r5, r12, #(sb1_1 - .Lconstants) + add r6, r12, #(sb2_0 - .Lconstants) + add r7, r12, #(sb2_1 - .Lconstants) + add r8, r12, #(inv - .Lconstants) + add r10, r12, #(inva - .Lconstants) vld1.64 {d12-d13}, [r4 :128] /* q6 = sb1[0] */ vld1.64 {d14-d15}, [r5 :128] /* q7 = sb1[1] */ vld1.64 {d16-d17}, [r6 :128] /* q8 = sb2[0] */ @@ -283,8 +296,8 @@ ENTRY(aes_neon_enc1) vld1.64 {d22-d23}, [r10 :128] /* q11 = inva */ /* (r4, r5) := (_forward[0], _backward[0]) */ - ldr r4, =mc_forward - ldr r5, =mc_backward + add r4, r12, #(mc_forward - .Lconstants) + add r5, r12, #(mc_backward - .Lconstants) /* (q2, q3) := (lo, hi) */ vshr.u8 q3, q0, #4 @@ -392,9 +405,9 @@ ENTRY(aes_neon_enc1) bne 1b /* (q6, q7, q15) := (sbo[0], sbo[1], sr[rmod4]) */ - ldr r8, =sr - ldr r6, =sbo_0 - ldr r7, =sbo_1 + add r8, r12, #(sr - .Lconstants) + add r6, r12, #(sbo_0 - .Lconstants) + add r7, r12, #(sbo_1 - .Lconstants) add r8, r8, r3, lsl #4 vld1.64 {d12-d13}, [r6 :128] vld1.64 {d14-d15}, [r7 :128] @@ -469,23 +482,30 @@ ENTRY(aes_neon_dec1) * q15={d30-d31}: mc/sr[3 & ~(nrounds - 1)] */ + /* r12 := .Lconstants - .Lconstants_addr, r11 := .Lconstants_addr */ + ldr r12, .Lconstants_addr + adr r11, .Lconstants_addr + vld1.64 {d28-d29}, [r0 :128]! /* q14 = *rk++ */ rsb r3, r1, #0 /* r3 := ~(x - 1) = -x */ vmov.i8 q1, #0x0f and r3, r3, #3 /* r3 := 3 & ~(x - 1) */ + /* r12 := .Lconstants */ + add r12, r12, r11 + /* (q4, q5) := (diptlo, dipthi) */ - ldr r6, =diptlo - ldr r7, =dipthi + add r6, r12, #(diptlo - .Lconstants) + add r7, r12, #(dipthi - .Lconstants) vld1.64 {d8-d9}, [r6 :128] vld1.64 {d10-d11}, [r7 :128] /* load the rest of the constants */ - ldr r4, =dsbb_0 - ldr r5, =dsbb_1 - ldr r6, =inv - ldr r7, =inva - ldr r8, =.Lmc_forward_3 + add r4, r12, #(dsbb_0 - .Lconstants) + add r5, r12, #(dsbb_1 - .Lconstants) + add r6, r12, #(inv - .Lconstants) + add r7, r12, #(inva - .Lconstants) + add r8, r12, #(.Lmc_forward_3 - .Lconstants) vld1.64 {d12-d13}, [r4 :128] /* q6 := dsbb[0] */ vld1.64 {d14-d15}, [r5 :128] /* q7 := dsbb[1] */ vld1.64 {d20-d21}, [r6 :128] /* q10 := inv */ @@ -504,8 +524,8 @@ ENTRY(aes_neon_dec1) vtbl.8 d7, {d10-d11}, d7 /* load dsb9 */ - ldr r4, =dsb9_0 - ldr r5, =dsb9_1 + add r4, r12, #(dsb9_0 - .Lconstants) + add r5, r12, #(dsb9_1 - .Lconstants) vld1.64 {d8-d9}, [r4 :128] /* q4 := dsb9[0] */ vld1.64 {d10-d11}, [r5 :128] /* q5 := dsb9[1] */ @@ -516,7 +536,7 @@ ENTRY(aes_neon_dec1) b 2f 1: /* load dsbd */ - ldr r4, =dsbd_0 + add r4, r12, #(dsbd_0 - .Lconstants) vld1.64 {d16-d17}, [r4 :128]! /* q8 := dsbd[0] */ vld1.64 {d18-d19}, [r4 :128] /* q9 := dsbd[1] */ @@ -543,7 +563,7
CVS commit: src/sys/crypto/aes/arch/arm
Module Name:src Committed By: riastradh Date: Mon Jul 27 20:52:11 UTC 2020 Modified Files: src/sys/crypto/aes/arch/arm: aes_neon_32.S Log Message: PIC for aes_neon_32.S. Without this, tests/sys/crypto/aes/t_aes fails to start on armv7 because of R_ARM_ABS32 relocations in a nonwritable text segment for a PIE -- which atf quietly ignores in the final report! Yikes. To generate a diff of this commit: cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/arm/aes_neon_32.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/chacha/arch/arm
Module Name:src Committed By: riastradh Date: Mon Jul 27 20:50:25 UTC 2020 Modified Files: src/sys/crypto/chacha/arch/arm: chacha_neon_64.S Log Message: Use rather than copying things from it here. Vestige from userland build on netbsd-9 during development. To generate a diff of this commit: cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/chacha/arch/arm/chacha_neon_64.S diff -u src/sys/crypto/chacha/arch/arm/chacha_neon_64.S:1.1 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S:1.2 --- src/sys/crypto/chacha/arch/arm/chacha_neon_64.S:1.1 Sat Jul 25 22:51:57 2020 +++ src/sys/crypto/chacha/arch/arm/chacha_neon_64.S Mon Jul 27 20:50:25 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: chacha_neon_64.S,v 1.1 2020/07/25 22:51:57 riastradh Exp $ */ +/* $NetBSD: chacha_neon_64.S,v 1.2 2020/07/27 20:50:25 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -26,23 +26,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ -.macro adrl reg, addr - adrp \reg, \addr - add \reg, \reg, #:lo12:\addr -.endm - -#define _ALIGN_TEXT \ - .p2align 4 - -#define ENTRY(x) \ - .text; \ - _ALIGN_TEXT; \ - .global x; \ - .type x,@function; \ -x: - -#define END(x) \ - .size x, . - x +#include #define ROUND(a0,b0,c0,d0,a1,b1,c1,d1,a2,b2,c2,d2,a3,b3,c3,d3,t0,t1,t2,t3, r) \ STEP(STEP0,a0,b0,c0,d0,a1,b1,c1,d1,a2,b2,c2,d2,a3,b3,c3,d3,t0,t1,t2,t3, r); \
CVS commit: src/sys/crypto/chacha/arch/arm
Module Name:src Committed By: riastradh Date: Mon Jul 27 20:50:25 UTC 2020 Modified Files: src/sys/crypto/chacha/arch/arm: chacha_neon_64.S Log Message: Use rather than copying things from it here. Vestige from userland build on netbsd-9 during development. To generate a diff of this commit: cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/chacha
Module Name:src Committed By: riastradh Date: Mon Jul 27 20:49:10 UTC 2020 Modified Files: src/sys/crypto/chacha: chacha_impl.c Log Message: Simplify ChaCha selection and allow it to be used much earlier. This way we can use it for cprng_fast early on. ChaCha is easy because there's no data formats that must be preserved from call to call but vary from implementation to implementation -- we could even make it a sysctl knob to dynamically select it with negligible cost. (In contrast, different AES implementations use different expanded key formats which must be preserved from aes_setenckey to aes_enc, for example, which means a considerably greater burden on dynamic selection that's not really worth it.) To generate a diff of this commit: cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/chacha/chacha_impl.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/chacha/chacha_impl.c diff -u src/sys/crypto/chacha/chacha_impl.c:1.2 src/sys/crypto/chacha/chacha_impl.c:1.3 --- src/sys/crypto/chacha/chacha_impl.c:1.2 Mon Jul 27 20:45:15 2020 +++ src/sys/crypto/chacha/chacha_impl.c Mon Jul 27 20:49:10 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: chacha_impl.c,v 1.2 2020/07/27 20:45:15 riastradh Exp $ */ +/* $NetBSD: chacha_impl.c,v 1.3 2020/07/27 20:49:10 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -38,17 +38,14 @@ #include "chacha.h" #include "chacha_ref.h" -static const struct chacha_impl *chacha_md_impl __read_mostly; -static const struct chacha_impl *chacha_impl __read_mostly; +static const struct chacha_impl *chacha_md_impl __read_mostly; +static const struct chacha_impl *chacha_impl __read_mostly = _ref_impl; static int sysctl_kern_crypto_chacha_selected(SYSCTLFN_ARGS) { struct sysctlnode node; - KASSERTMSG(chacha_impl != NULL, - "sysctl ran before ChaCha implementation was selected"); - node = *rnode; node.sysctl_data = __UNCONST(chacha_impl->ci_name); node.sysctl_size = strlen(chacha_impl->ci_name) + 1; @@ -79,8 +76,6 @@ static int chacha_select(void) { - KASSERT(chacha_impl == NULL); - if (chacha_md_impl) { if (chacha_selftest(chacha_md_impl)) aprint_error("chacha: self-test failed: %s\n", @@ -88,15 +83,6 @@ chacha_select(void) else chacha_impl = chacha_md_impl; } - if (chacha_impl == NULL) { - if (chacha_selftest(_ref_impl)) - aprint_error("chacha: self-test failed: %s\n", - chacha_ref_impl.ci_name); - else - chacha_impl = _ref_impl; - } - if (chacha_impl == NULL) - panic("ChaCha self-tests failed"); aprint_verbose("chacha: %s\n", chacha_impl->ci_name); return 0; @@ -118,26 +104,11 @@ chacha_modcmd(modcmd_t cmd, void *opaque } } -static void -chacha_guarantee_selected(void) -{ -#if 0 - static once_t once; - int error; - - error = RUN_ONCE(, chacha_select); - KASSERT(error == 0); -#endif -} - void chacha_md_init(const struct chacha_impl *impl) { KASSERT(cold); - KASSERTMSG(chacha_impl == NULL, - "ChaCha implementation `%s' already chosen, can't offer `%s'", - chacha_impl->ci_name, impl->ci_name); KASSERTMSG(chacha_md_impl == NULL, "ChaCha implementation `%s' already offered, can't offer `%s'", chacha_md_impl->ci_name, impl->ci_name); @@ -153,7 +124,6 @@ chacha_core(uint8_t out[restrict static unsigned nr) { - chacha_guarantee_selected(); (*chacha_impl->ci_chacha_core)(out, in, k, c, nr); } @@ -165,7 +135,6 @@ hchacha(uint8_t out[restrict static HCHA unsigned nr) { - chacha_guarantee_selected(); (*chacha_impl->ci_hchacha)(out, in, k, c, nr); } @@ -176,7 +145,6 @@ chacha_stream(uint8_t *restrict s, size_ unsigned nr) { - chacha_guarantee_selected(); (*chacha_impl->ci_chacha_stream)(s, nbytes, blkno, nonce, key, nr); } @@ -187,7 +155,6 @@ chacha_stream_xor(uint8_t *c, const uint unsigned nr) { - chacha_guarantee_selected(); (*chacha_impl->ci_chacha_stream_xor)(c, p, nbytes, blkno, nonce, key, nr); } @@ -199,7 +166,6 @@ xchacha_stream(uint8_t *restrict s, size unsigned nr) { - chacha_guarantee_selected(); (*chacha_impl->ci_xchacha_stream)(s, nbytes, blkno, nonce, key, nr); } @@ -210,7 +176,6 @@ xchacha_stream_xor(uint8_t *c, const uin unsigned nr) { - chacha_guarantee_selected(); (*chacha_impl->ci_xchacha_stream_xor)(c, p, nbytes, blkno, nonce, key, nr); }
CVS commit: src/sys/crypto/chacha
Module Name:src Committed By: riastradh Date: Mon Jul 27 20:49:10 UTC 2020 Modified Files: src/sys/crypto/chacha: chacha_impl.c Log Message: Simplify ChaCha selection and allow it to be used much earlier. This way we can use it for cprng_fast early on. ChaCha is easy because there's no data formats that must be preserved from call to call but vary from implementation to implementation -- we could even make it a sysctl knob to dynamically select it with negligible cost. (In contrast, different AES implementations use different expanded key formats which must be preserved from aes_setenckey to aes_enc, for example, which means a considerably greater burden on dynamic selection that's not really worth it.) To generate a diff of this commit: cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/chacha/chacha_impl.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto/chacha/arch
Module Name:src Committed By: riastradh Date: Mon Jul 27 20:48:18 UTC 2020 Modified Files: src/sys/crypto/chacha/arch/arm: chacha_neon.c src/sys/crypto/chacha/arch/x86: chacha_sse2.c Log Message: Reduce some duplication. Shouldn't substantively hurt performance -- the comparison that has been moved into the loop was essentially the former loop condition -- and may improve performance by reducing code size since there's only one inline call to chacha_permute instead of two. To generate a diff of this commit: cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/chacha/arch/arm/chacha_neon.c cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/chacha/arch/x86/chacha_sse2.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/chacha/arch/arm/chacha_neon.c diff -u src/sys/crypto/chacha/arch/arm/chacha_neon.c:1.1 src/sys/crypto/chacha/arch/arm/chacha_neon.c:1.2 --- src/sys/crypto/chacha/arch/arm/chacha_neon.c:1.1 Sat Jul 25 22:51:57 2020 +++ src/sys/crypto/chacha/arch/arm/chacha_neon.c Mon Jul 27 20:48:18 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: chacha_neon.c,v 1.1 2020/07/25 22:51:57 riastradh Exp $ */ +/* $NetBSD: chacha_neon.c,v 1.2 2020/07/27 20:48:18 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -168,7 +168,7 @@ chacha_stream_neon(uint8_t *restrict s, le32dec(nonce + 8) }; - for (; n >= 64; s += 64, n -= 64) { + for (; n; s += 64, n -= 64) { r0 = in0; r1 = in1; r2 = in2; @@ -178,32 +178,25 @@ chacha_stream_neon(uint8_t *restrict s, r1 = vhtole_u32(vaddq_u32(r1, in1)); r2 = vhtole_u32(vaddq_u32(r2, in2)); r3 = vhtole_u32(vaddq_u32(r3, in3)); + + if (n < 64) { +uint8_t buf[64] __aligned(16); + +vst1q_u32((uint32_t *)buf + 4*0, r0); +vst1q_u32((uint32_t *)buf + 4*1, r1); +vst1q_u32((uint32_t *)buf + 4*2, r2); +vst1q_u32((uint32_t *)buf + 4*3, r3); +memcpy(s, buf, n); + +break; + } + vst1q_u32((uint32_t *)s + 4*0, r0); vst1q_u32((uint32_t *)s + 4*1, r1); vst1q_u32((uint32_t *)s + 4*2, r2); vst1q_u32((uint32_t *)s + 4*3, r3); in3 = vaddq_u32(in3, blkno_inc); } - - if (n) { - uint8_t buf[64]; - - r0 = in0; - r1 = in1; - r2 = in2; - r3 = in3; - chacha_permute(, , , , nr); - r0 = vhtole_u32(vaddq_u32(r0, in0)); - r1 = vhtole_u32(vaddq_u32(r1, in1)); - r2 = vhtole_u32(vaddq_u32(r2, in2)); - r3 = vhtole_u32(vaddq_u32(r3, in3)); - vst1q_u32((uint32_t *)buf + 4*0, r0); - vst1q_u32((uint32_t *)buf + 4*1, r1); - vst1q_u32((uint32_t *)buf + 4*2, r2); - vst1q_u32((uint32_t *)buf + 4*3, r3); - - memcpy(s, buf, n); - } } } @@ -234,7 +227,7 @@ chacha_stream_xor_neon(uint8_t *s, const le32dec(nonce + 8) }; - for (; n >= 64; s += 64, p += 64, n -= 64) { + for (; n; s += 64, p += 64, n -= 64) { r0 = in0; r1 = in1; r2 = in2; @@ -244,6 +237,25 @@ chacha_stream_xor_neon(uint8_t *s, const r1 = vhtole_u32(vaddq_u32(r1, in1)); r2 = vhtole_u32(vaddq_u32(r2, in2)); r3 = vhtole_u32(vaddq_u32(r3, in3)); + + if (n < 64) { +uint8_t buf[64] __aligned(16); +unsigned i; + +vst1q_u32((uint32_t *)buf + 4*0, r0); +vst1q_u32((uint32_t *)buf + 4*1, r1); +vst1q_u32((uint32_t *)buf + 4*2, r2); +vst1q_u32((uint32_t *)buf + 4*3, r3); + +for (i = 0; i < n - n%4; i += 4) + le32enc(s + i, + le32dec(p + i) ^ le32dec(buf + i)); +for (; i < n; i++) + s[i] = p[i] ^ buf[i]; + +break; + } + r0 ^= vld1q_u32((const uint32_t *)p + 4*0); r1 ^= vld1q_u32((const uint32_t *)p + 4*1); r2 ^= vld1q_u32((const uint32_t *)p + 4*2); @@ -254,31 +266,6 @@ chacha_stream_xor_neon(uint8_t *s, const vst1q_u32((uint32_t *)s + 4*3, r3); in3 = vaddq_u32(in3, blkno_inc); } - - if (n) { - uint8_t buf[64]; - unsigned i; - - r0 = in0; - r1 = in1; - r2 = in2; - r3 = in3; - chacha_permute(, , , , nr); - r0 = vhtole_u32(vaddq_u32(r0, in0)); - r1 = vhtole_u32(vaddq_u32(r1, in1)); - r2 = vhtole_u32(vaddq_u32(r2, in2)); - r3 = vhtole_u32(vaddq_u32(r3, in3)); - vst1q_u32((uint32_t *)buf + 4*0, r0); - vst1q_u32((uint32_t *)buf + 4*1, r1); - vst1q_u32((uint32_t *)buf + 4*2, r2); - vst1q_u32((uint32_t *)buf + 4*3, r3); - - for (i = 0; i < n - n%4; i += 4) -le32enc(s + i, -le32dec(p + i) ^ le32dec(buf + i)); - for (; i < n; i++) -s[i] = p[i] ^ buf[i]; - } } } Index: src/sys/crypto/chacha/arch/x86/chacha_sse2.c diff -u src/sys/crypto/chacha/arch/x86/chacha_sse2.c:1.1 src/sys/crypto/chacha/arch/x86/chacha_sse2.c:1.2 --- src/sys/crypto/chacha/arch/x86/chacha_sse2.c:1.1 Sat Jul 25 22:49:20 2020 +++ src/sys/crypto/chacha/arch/x86/chacha_sse2.c Mon Jul 27 20:48:18 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: chacha_sse2.c,v 1.1 2020/07/25 22:49:20 riastradh Exp $ */ +/* $NetBSD: chacha_sse2.c,v 1.2 2020/07/27 20:48:18 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD
CVS commit: src/sys/crypto/chacha/arch
Module Name:src Committed By: riastradh Date: Mon Jul 27 20:48:18 UTC 2020 Modified Files: src/sys/crypto/chacha/arch/arm: chacha_neon.c src/sys/crypto/chacha/arch/x86: chacha_sse2.c Log Message: Reduce some duplication. Shouldn't substantively hurt performance -- the comparison that has been moved into the loop was essentially the former loop condition -- and may improve performance by reducing code size since there's only one inline call to chacha_permute instead of two. To generate a diff of this commit: cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/chacha/arch/arm/chacha_neon.c cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/chacha/arch/x86/chacha_sse2.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
CVS commit: src/sys/crypto
Module Name:src Committed By: riastradh Date: Mon Jul 27 20:45:15 UTC 2020 Modified Files: src/sys/crypto/aes: aes_impl.c src/sys/crypto/chacha: chacha_impl.c Log Message: New sysctl subtree kern.crypto. kern.crypto.aes.selected (formerly hw.aes_impl) kern.crypto.chacha.selected (formerly hw.chacha_impl) XXX Should maybe deduplicate creation of kern.crypto. To generate a diff of this commit: cvs rdiff -u -r1.8 -r1.9 src/sys/crypto/aes/aes_impl.c cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/chacha/chacha_impl.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. Modified files: Index: src/sys/crypto/aes/aes_impl.c diff -u src/sys/crypto/aes/aes_impl.c:1.8 src/sys/crypto/aes/aes_impl.c:1.9 --- src/sys/crypto/aes/aes_impl.c:1.8 Sat Jul 25 22:42:03 2020 +++ src/sys/crypto/aes/aes_impl.c Mon Jul 27 20:45:15 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_impl.c,v 1.8 2020/07/25 22:42:03 riastradh Exp $ */ +/* $NetBSD: aes_impl.c,v 1.9 2020/07/27 20:45:15 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -27,7 +27,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: aes_impl.c,v 1.8 2020/07/25 22:42:03 riastradh Exp $"); +__KERNEL_RCSID(1, "$NetBSD: aes_impl.c,v 1.9 2020/07/27 20:45:15 riastradh Exp $"); #include #include @@ -48,7 +48,7 @@ static const struct aes_impl *aes_md_imp static const struct aes_impl *aes_impl __read_mostly; static int -sysctl_hw_aes_impl(SYSCTLFN_ARGS) +sysctl_kern_crypto_aes_selected(SYSCTLFN_ARGS) { struct sysctlnode node; @@ -61,14 +61,24 @@ sysctl_hw_aes_impl(SYSCTLFN_ARGS) return sysctl_lookup(SYSCTLFN_CALL()); } -SYSCTL_SETUP(sysctl_hw_aes_setup, "sysctl hw.aes_impl setup") +SYSCTL_SETUP(sysctl_kern_crypto_aes_setup, "sysctl kern.crypto.aes setup") { + const struct sysctlnode *cnode; + const struct sysctlnode *aes_node; - sysctl_createv(clog, 0, NULL, NULL, - CTLFLAG_PERMANENT|CTLFLAG_READONLY, CTLTYPE_STRING, "aes_impl", + sysctl_createv(clog, 0, NULL, , 0, CTLTYPE_NODE, "crypto", + SYSCTL_DESCR("Kernel cryptography"), + NULL, 0, NULL, 0, + CTL_KERN, CTL_CREATE, CTL_EOL); + sysctl_createv(clog, 0, , _node, 0, CTLTYPE_NODE, "aes", + SYSCTL_DESCR("AES -- Advanced Encryption Standard"), + NULL, 0, NULL, 0, + CTL_CREATE, CTL_EOL); + sysctl_createv(clog, 0, _node, NULL, + CTLFLAG_PERMANENT|CTLFLAG_READONLY, CTLTYPE_STRING, "selected", SYSCTL_DESCR("Selected AES implementation"), - sysctl_hw_aes_impl, 0, NULL, 0, - CTL_HW, CTL_CREATE, CTL_EOL); + sysctl_kern_crypto_aes_selected, 0, NULL, 0, + CTL_CREATE, CTL_EOL); } /* Index: src/sys/crypto/chacha/chacha_impl.c diff -u src/sys/crypto/chacha/chacha_impl.c:1.1 src/sys/crypto/chacha/chacha_impl.c:1.2 --- src/sys/crypto/chacha/chacha_impl.c:1.1 Sat Jul 25 22:46:34 2020 +++ src/sys/crypto/chacha/chacha_impl.c Mon Jul 27 20:45:15 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: chacha_impl.c,v 1.1 2020/07/25 22:46:34 riastradh Exp $ */ +/* $NetBSD: chacha_impl.c,v 1.2 2020/07/27 20:45:15 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -42,7 +42,7 @@ static const struct chacha_impl *chacha_ static const struct chacha_impl *chacha_impl __read_mostly; static int -sysctl_hw_chacha_impl(SYSCTLFN_ARGS) +sysctl_kern_crypto_chacha_selected(SYSCTLFN_ARGS) { struct sysctlnode node; @@ -55,14 +55,24 @@ sysctl_hw_chacha_impl(SYSCTLFN_ARGS) return sysctl_lookup(SYSCTLFN_CALL()); } -SYSCTL_SETUP(sysctl_hw_chacha_setup, "sysctl hw.chacha_impl setup") +SYSCTL_SETUP(sysctl_kern_crypto_chacha_setup, "sysctl kern.crypto.chacha setup") { + const struct sysctlnode *cnode; + const struct sysctlnode *chacha_node; - sysctl_createv(clog, 0, NULL, NULL, - CTLFLAG_PERMANENT|CTLFLAG_READONLY, CTLTYPE_STRING, "chacha_impl", + sysctl_createv(clog, 0, NULL, , 0, CTLTYPE_NODE, "crypto", + SYSCTL_DESCR("Kernel cryptography"), + NULL, 0, NULL, 0, + CTL_KERN, CTL_CREATE, CTL_EOL); + sysctl_createv(clog, 0, , _node, 0, CTLTYPE_NODE, "chacha", + SYSCTL_DESCR("ChaCha"), + NULL, 0, NULL, 0, + CTL_CREATE, CTL_EOL); + sysctl_createv(clog, 0, _node, NULL, + CTLFLAG_PERMANENT|CTLFLAG_READONLY, CTLTYPE_STRING, "selected", SYSCTL_DESCR("Selected ChaCha implementation"), - sysctl_hw_chacha_impl, 0, NULL, 0, - CTL_HW, CTL_CREATE, CTL_EOL); + sysctl_kern_crypto_chacha_selected, 0, NULL, 0, + CTL_CREATE, CTL_EOL); } static int