CVS commit: src/sys/crypto/arch/arm

2023-08-06 Thread Rin Okuyama
Module Name:src
Committed By:   rin
Date:   Mon Aug  7 01:14:19 UTC 2023

Modified Files:
src/sys/crypto/arch/arm: arm_neon.h

Log Message:
sys/crypto: aarch64: Catch up with builtin rename for GCC12

Kernel self tests successfully pass for aarch64{,eb}.

Same binary generated by GCC10 and GCC12 for:
---
#include 
#include "arm_neon.h"

uint32x4_t my_vshrq_n_u32(uint32x4_t v, uint8_t bits)
{
return vshrq_n_u32(v, bits);
}

uint8x16_t my_vshrq_n_u8(uint8x16_t v, uint8_t bits)
{
return vshrq_n_u8(v, bits);
}
---


To generate a diff of this commit:
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/arch/arm/arm_neon.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/arch/arm/arm_neon.h
diff -u src/sys/crypto/arch/arm/arm_neon.h:1.1 src/sys/crypto/arch/arm/arm_neon.h:1.2
--- src/sys/crypto/arch/arm/arm_neon.h:1.1	Mon Aug  7 01:07:36 2023
+++ src/sys/crypto/arch/arm/arm_neon.h	Mon Aug  7 01:14:19 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: arm_neon.h,v 1.1 2023/08/07 01:07:36 rin Exp $	*/
+/*	$NetBSD: arm_neon.h,v 1.2 2023/08/07 01:14:19 rin Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -522,7 +522,11 @@ static __inline uint32x4_t
 vshrq_n_u32(uint32x4_t __v, uint8_t __bits)
 {
 #ifdef __aarch64__
+#  if __GNUC_PREREQ__(12, 0)
+	return __builtin_aarch64_lshrv4si_uus(__v, __bits);
+#  else
 	return (uint32x4_t)__builtin_aarch64_lshrv4si((int32x4_t)__v, __bits);
+#  endif
 #else
 	return (uint32x4_t)__builtin_neon_vshru_nv4si((int32x4_t)__v, __bits);
 #endif
@@ -538,7 +542,11 @@ static __inline uint8x16_t
 vshrq_n_u8(uint8x16_t __v, uint8_t __bits)
 {
 #ifdef __aarch64__
+#  if __GNUC_PREREQ__(12, 0)
+	return __builtin_aarch64_lshrv16qi_uus(__v, __bits);
+#  else
 	return (uint8x16_t)__builtin_aarch64_lshrv16qi((int8x16_t)__v, __bits);
+#  endif
 #else
 	return (uint8x16_t)__builtin_neon_vshru_nv16qi((int8x16_t)__v, __bits);
 #endif



CVS commit: src/sys/crypto/arch/arm

2023-08-06 Thread Rin Okuyama
Module Name:src
Committed By:   rin
Date:   Mon Aug  7 01:14:19 UTC 2023

Modified Files:
src/sys/crypto/arch/arm: arm_neon.h

Log Message:
sys/crypto: aarch64: Catch up with builtin rename for GCC12

Kernel self tests successfully pass for aarch64{,eb}.

Same binary generated by GCC10 and GCC12 for:
---
#include 
#include "arm_neon.h"

uint32x4_t my_vshrq_n_u32(uint32x4_t v, uint8_t bits)
{
return vshrq_n_u32(v, bits);
}

uint8x16_t my_vshrq_n_u8(uint8x16_t v, uint8_t bits)
{
return vshrq_n_u8(v, bits);
}
---


To generate a diff of this commit:
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/arch/arm/arm_neon.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto

2023-08-06 Thread Rin Okuyama
Module Name:src
Committed By:   rin
Date:   Mon Aug  7 01:07:36 UTC 2023

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon_impl.h
src/sys/crypto/aes/arch/x86: aes_sse2_impl.h aes_ssse3_impl.h
src/sys/crypto/chacha/arch/arm: chacha_neon.c
src/sys/crypto/chacha/arch/x86: chacha_sse2.c
Added Files:
src/sys/crypto/arch/arm: arm_neon.h arm_neon_imm.h
src/sys/crypto/arch/x86: immintrin.h immintrin_ext.h
Removed Files:
src/sys/crypto/aes/arch/arm: arm_neon.h arm_neon_imm.h
src/sys/crypto/aes/arch/x86: immintrin.h immintrin_ext.h
src/sys/crypto/chacha/arch/arm: arm_neon.h arm_neon_imm.h
src/sys/crypto/chacha/arch/x86: immintrin.h

Log Message:
sys/crypto: Introduce arch/{arm,x86} to share common MD headers

Dedup between aes and chacha. No binary changes.


To generate a diff of this commit:
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/arm/aes_neon_impl.h
cvs rdiff -u -r1.12 -r0 src/sys/crypto/aes/arch/arm/arm_neon.h
cvs rdiff -u -r1.2 -r0 src/sys/crypto/aes/arch/arm/arm_neon_imm.h
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/x86/aes_sse2_impl.h
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/x86/aes_ssse3_impl.h
cvs rdiff -u -r1.5 -r0 src/sys/crypto/aes/arch/x86/immintrin.h
cvs rdiff -u -r1.1 -r0 src/sys/crypto/aes/arch/x86/immintrin_ext.h
cvs rdiff -u -r0 -r1.1 src/sys/crypto/arch/arm/arm_neon.h \
src/sys/crypto/arch/arm/arm_neon_imm.h
cvs rdiff -u -r0 -r1.1 src/sys/crypto/arch/x86/immintrin.h \
src/sys/crypto/arch/x86/immintrin_ext.h
cvs rdiff -u -r1.7 -r0 src/sys/crypto/chacha/arch/arm/arm_neon.h
cvs rdiff -u -r1.2 -r0 src/sys/crypto/chacha/arch/arm/arm_neon_imm.h
cvs rdiff -u -r1.8 -r1.9 src/sys/crypto/chacha/arch/arm/chacha_neon.c
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/chacha/arch/x86/chacha_sse2.c
cvs rdiff -u -r1.1 -r0 src/sys/crypto/chacha/arch/x86/immintrin.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_neon_impl.h
diff -u src/sys/crypto/aes/arch/arm/aes_neon_impl.h:1.3 src/sys/crypto/aes/arch/arm/aes_neon_impl.h:1.4
--- src/sys/crypto/aes/arch/arm/aes_neon_impl.h:1.3	Sat Aug  8 14:47:01 2020
+++ src/sys/crypto/aes/arch/arm/aes_neon_impl.h	Mon Aug  7 01:07:35 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_neon_impl.h,v 1.3 2020/08/08 14:47:01 riastradh Exp $	*/
+/*	$NetBSD: aes_neon_impl.h,v 1.4 2023/08/07 01:07:35 rin Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -31,8 +31,8 @@
 
 #include 
 
-#include "arm_neon.h"
-#include "arm_neon_imm.h"
+#include 
+#include 
 
 #include 
 #include 

Index: src/sys/crypto/aes/arch/x86/aes_sse2_impl.h
diff -u src/sys/crypto/aes/arch/x86/aes_sse2_impl.h:1.2 src/sys/crypto/aes/arch/x86/aes_sse2_impl.h:1.3
--- src/sys/crypto/aes/arch/x86/aes_sse2_impl.h:1.2	Mon Jun 29 23:50:05 2020
+++ src/sys/crypto/aes/arch/x86/aes_sse2_impl.h	Mon Aug  7 01:07:36 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_sse2_impl.h,v 1.2 2020/06/29 23:50:05 riastradh Exp $	*/
+/*	$NetBSD: aes_sse2_impl.h,v 1.3 2023/08/07 01:07:36 rin Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -33,8 +33,8 @@
 
 #include 
 #include 
-#include 
-#include 
+#include 
+#include 
 
 void aes_sse2_bitslice_Sbox(__m128i[static 4]);
 void aes_sse2_bitslice_invSbox(__m128i[static 4]);

Index: src/sys/crypto/aes/arch/x86/aes_ssse3_impl.h
diff -u src/sys/crypto/aes/arch/x86/aes_ssse3_impl.h:1.1 src/sys/crypto/aes/arch/x86/aes_ssse3_impl.h:1.2
--- src/sys/crypto/aes/arch/x86/aes_ssse3_impl.h:1.1	Mon Jun 29 23:51:35 2020
+++ src/sys/crypto/aes/arch/x86/aes_ssse3_impl.h	Mon Aug  7 01:07:36 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_ssse3_impl.h,v 1.1 2020/06/29 23:51:35 riastradh Exp $	*/
+/*	$NetBSD: aes_ssse3_impl.h,v 1.2 2023/08/07 01:07:36 rin Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -33,8 +33,8 @@
 
 #include 
 #include 
-#include 
-#include 
+#include 
+#include 
 
 __m128i aes_ssse3_enc1(const struct aesenc *, __m128i, unsigned);
 __m128i aes_ssse3_dec1(const struct aesdec *, __m128i, unsigned);

Index: src/sys/crypto/chacha/arch/arm/chacha_neon.c
diff -u src/sys/crypto/chacha/arch/arm/chacha_neon.c:1.8 src/sys/crypto/chacha/arch/arm/chacha_neon.c:1.9
--- src/sys/crypto/chacha/arch/arm/chacha_neon.c:1.8	Sat Aug  8 14:47:01 2020
+++ src/sys/crypto/chacha/arch/arm/chacha_neon.c	Mon Aug  7 01:07:36 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: chacha_neon.c,v 1.8 2020/08/08 14:47:01 riastradh Exp $	*/
+/*	$NetBSD: chacha_neon.c,v 1.9 2023/08/07 01:07:36 rin Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -29,8 +29,8 @@
 #include 
 #include 
 
-#include "arm_neon.h"
-#include "arm_neon_imm.h"
+#include 
+#include 
 #include "chacha_neon.h"
 
 /*

Index: src/sys/crypto/chacha/arch/x86/chacha_sse2.c
diff -u src/sys/crypto/chacha/arch/x86/chacha_sse2.c:1.2 src/sys/crypto/chacha/arch/x86/chacha_sse2.c:1.3
--- 

CVS commit: src/sys/crypto

2023-08-06 Thread Rin Okuyama
Module Name:src
Committed By:   rin
Date:   Mon Aug  7 01:07:36 UTC 2023

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon_impl.h
src/sys/crypto/aes/arch/x86: aes_sse2_impl.h aes_ssse3_impl.h
src/sys/crypto/chacha/arch/arm: chacha_neon.c
src/sys/crypto/chacha/arch/x86: chacha_sse2.c
Added Files:
src/sys/crypto/arch/arm: arm_neon.h arm_neon_imm.h
src/sys/crypto/arch/x86: immintrin.h immintrin_ext.h
Removed Files:
src/sys/crypto/aes/arch/arm: arm_neon.h arm_neon_imm.h
src/sys/crypto/aes/arch/x86: immintrin.h immintrin_ext.h
src/sys/crypto/chacha/arch/arm: arm_neon.h arm_neon_imm.h
src/sys/crypto/chacha/arch/x86: immintrin.h

Log Message:
sys/crypto: Introduce arch/{arm,x86} to share common MD headers

Dedup between aes and chacha. No binary changes.


To generate a diff of this commit:
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/arm/aes_neon_impl.h
cvs rdiff -u -r1.12 -r0 src/sys/crypto/aes/arch/arm/arm_neon.h
cvs rdiff -u -r1.2 -r0 src/sys/crypto/aes/arch/arm/arm_neon_imm.h
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/x86/aes_sse2_impl.h
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/x86/aes_ssse3_impl.h
cvs rdiff -u -r1.5 -r0 src/sys/crypto/aes/arch/x86/immintrin.h
cvs rdiff -u -r1.1 -r0 src/sys/crypto/aes/arch/x86/immintrin_ext.h
cvs rdiff -u -r0 -r1.1 src/sys/crypto/arch/arm/arm_neon.h \
src/sys/crypto/arch/arm/arm_neon_imm.h
cvs rdiff -u -r0 -r1.1 src/sys/crypto/arch/x86/immintrin.h \
src/sys/crypto/arch/x86/immintrin_ext.h
cvs rdiff -u -r1.7 -r0 src/sys/crypto/chacha/arch/arm/arm_neon.h
cvs rdiff -u -r1.2 -r0 src/sys/crypto/chacha/arch/arm/arm_neon_imm.h
cvs rdiff -u -r1.8 -r1.9 src/sys/crypto/chacha/arch/arm/chacha_neon.c
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/chacha/arch/x86/chacha_sse2.c
cvs rdiff -u -r1.1 -r0 src/sys/crypto/chacha/arch/x86/immintrin.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/aes/arch/arm

2023-08-06 Thread Rin Okuyama
Module Name:src
Committed By:   rin
Date:   Mon Aug  7 00:58:35 UTC 2023

Modified Files:
src/sys/crypto/aes/arch/arm: arm_neon.h

Log Message:
sys/crypto/{aes,chacha}/arch/arm/arm_neon.h: Sync (whitespace fix)

No binary changes.


To generate a diff of this commit:
cvs rdiff -u -r1.11 -r1.12 src/sys/crypto/aes/arch/arm/arm_neon.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/arm_neon.h
diff -u src/sys/crypto/aes/arch/arm/arm_neon.h:1.11 src/sys/crypto/aes/arch/arm/arm_neon.h:1.12
--- src/sys/crypto/aes/arch/arm/arm_neon.h:1.11	Mon Sep  7 18:06:13 2020
+++ src/sys/crypto/aes/arch/arm/arm_neon.h	Mon Aug  7 00:58:35 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: arm_neon.h,v 1.11 2020/09/07 18:06:13 jakllsch Exp $	*/
+/*	$NetBSD: arm_neon.h,v 1.12 2023/08/07 00:58:35 rin Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -232,7 +232,7 @@ static __inline uint32_t
 vgetq_lane_u32(uint32x4_t __v, uint8_t __i)
 {
 #ifdef __aarch64__
-	return __v[__neon_laneq_index(__v,__i)];
+	return __v[__neon_laneq_index(__v, __i)];
 #else
 	return (uint32_t)__builtin_neon_vget_laneuv4si((int32x4_t)__v, __i);
 #endif



CVS commit: src/sys/crypto/aes/arch/arm

2023-08-06 Thread Rin Okuyama
Module Name:src
Committed By:   rin
Date:   Mon Aug  7 00:58:35 UTC 2023

Modified Files:
src/sys/crypto/aes/arch/arm: arm_neon.h

Log Message:
sys/crypto/{aes,chacha}/arch/arm/arm_neon.h: Sync (whitespace fix)

No binary changes.


To generate a diff of this commit:
cvs rdiff -u -r1.11 -r1.12 src/sys/crypto/aes/arch/arm/arm_neon.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/cprng_fast

2023-08-05 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sat Aug  5 11:39:18 UTC 2023

Modified Files:
src/sys/crypto/cprng_fast: cprng_fast.c

Log Message:
cprng_fast(9): Drop and retake percpu reference across cprng_strong.

cprng_strong may sleep on an adaptive lock (via entropy_extract),
which invalidates percpu(9) references.

Discovered by stumbling upon this panic in a test run:

panic: kernel diagnostic assertion "(cprng == percpu_getref(cprng_fast_percpu)) 
&& (percpu_putref(cprng_fast_percpu), true)" failed: file 
"/home/riastradh/netbsd/current/src/sys/rump/librump/rumpkern/../../../crypto/cprng_fast/cprng_fast.c",
 line 117

XXX pullup-10


To generate a diff of this commit:
cvs rdiff -u -r1.18 -r1.19 src/sys/crypto/cprng_fast/cprng_fast.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/cprng_fast/cprng_fast.c
diff -u src/sys/crypto/cprng_fast/cprng_fast.c:1.18 src/sys/crypto/cprng_fast/cprng_fast.c:1.19
--- src/sys/crypto/cprng_fast/cprng_fast.c:1.18	Thu Sep  1 18:32:25 2022
+++ src/sys/crypto/cprng_fast/cprng_fast.c	Sat Aug  5 11:39:18 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: cprng_fast.c,v 1.18 2022/09/01 18:32:25 riastradh Exp $	*/
+/*	$NetBSD: cprng_fast.c,v 1.19 2023/08/05 11:39:18 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2014 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include 
-__KERNEL_RCSID(0, "$NetBSD: cprng_fast.c,v 1.18 2022/09/01 18:32:25 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cprng_fast.c,v 1.19 2023/08/05 11:39:18 riastradh Exp $");
 
 #include 
 #include 
@@ -58,7 +58,7 @@ struct cprng_fast {
 };
 
 static void	cprng_fast_init_cpu(void *, void *, struct cpu_info *);
-static void	cprng_fast_reseed(struct cprng_fast *);
+static void	cprng_fast_reseed(struct cprng_fast **, unsigned);
 
 static void	cprng_fast_seed(struct cprng_fast *, const void *);
 static void	cprng_fast_buf(struct cprng_fast *, void *, unsigned);
@@ -93,6 +93,7 @@ static int
 cprng_fast_get(struct cprng_fast **cprngp)
 {
 	struct cprng_fast *cprng;
+	unsigned epoch;
 	int s;
 
 	KASSERT(!cpu_intr_p());
@@ -101,9 +102,10 @@ cprng_fast_get(struct cprng_fast **cprng
 	*cprngp = cprng = percpu_getref(cprng_fast_percpu);
 	s = splsoftserial();
 
-	if (__predict_false(cprng->epoch != entropy_epoch())) {
+	epoch = entropy_epoch();
+	if (__predict_false(cprng->epoch != epoch)) {
 		splx(s);
-		cprng_fast_reseed(cprng);
+		cprng_fast_reseed(cprngp, epoch);
 		s = splsoftserial();
 	}
 
@@ -121,13 +123,25 @@ cprng_fast_put(struct cprng_fast *cprng,
 }
 
 static void
-cprng_fast_reseed(struct cprng_fast *cprng)
+cprng_fast_reseed(struct cprng_fast **cprngp, unsigned epoch)
 {
-	unsigned epoch = entropy_epoch();
+	struct cprng_fast *cprng;
 	uint8_t seed[CPRNG_FAST_SEED_BYTES];
 	int s;
 
+	/*
+	 * Drop the percpu(9) reference to extract a fresh seed from
+	 * the entropy pool.  cprng_strong may sleep on an adaptive
+	 * lock, which invalidates our percpu(9) reference.
+	 *
+	 * This may race with reseeding in another thread, which is no
+	 * big deal -- worst case, we rewind the entropy epoch here and
+	 * cause the next caller to reseed again, and in the end we
+	 * just reseed a couple more times than necessary.
+	 */
+	percpu_putref(cprng_fast_percpu);
 	cprng_strong(kern_cprng, seed, sizeof(seed), 0);
+	*cprngp = cprng = percpu_getref(cprng_fast_percpu);
 
 	s = splsoftserial();
 	cprng_fast_seed(cprng, seed);



CVS commit: src/sys/crypto/cprng_fast

2023-08-05 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sat Aug  5 11:39:18 UTC 2023

Modified Files:
src/sys/crypto/cprng_fast: cprng_fast.c

Log Message:
cprng_fast(9): Drop and retake percpu reference across cprng_strong.

cprng_strong may sleep on an adaptive lock (via entropy_extract),
which invalidates percpu(9) references.

Discovered by stumbling upon this panic in a test run:

panic: kernel diagnostic assertion "(cprng == percpu_getref(cprng_fast_percpu)) 
&& (percpu_putref(cprng_fast_percpu), true)" failed: file 
"/home/riastradh/netbsd/current/src/sys/rump/librump/rumpkern/../../../crypto/cprng_fast/cprng_fast.c",
 line 117

XXX pullup-10


To generate a diff of this commit:
cvs rdiff -u -r1.18 -r1.19 src/sys/crypto/cprng_fast/cprng_fast.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto

2022-11-05 Thread Jared D. McNeill
Module Name:src
Committed By:   jmcneill
Date:   Sat Nov  5 17:36:33 UTC 2022

Modified Files:
src/sys/crypto/aes: aes_impl.c
src/sys/crypto/chacha: chacha_impl.c

Log Message:
Make aes and chacha prints debug only.


To generate a diff of this commit:
cvs rdiff -u -r1.9 -r1.10 src/sys/crypto/aes/aes_impl.c
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/chacha/chacha_impl.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto

2022-11-05 Thread Jared D. McNeill
Module Name:src
Committed By:   jmcneill
Date:   Sat Nov  5 17:36:33 UTC 2022

Modified Files:
src/sys/crypto/aes: aes_impl.c
src/sys/crypto/chacha: chacha_impl.c

Log Message:
Make aes and chacha prints debug only.


To generate a diff of this commit:
cvs rdiff -u -r1.9 -r1.10 src/sys/crypto/aes/aes_impl.c
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/chacha/chacha_impl.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/aes_impl.c
diff -u src/sys/crypto/aes/aes_impl.c:1.9 src/sys/crypto/aes/aes_impl.c:1.10
--- src/sys/crypto/aes/aes_impl.c:1.9	Mon Jul 27 20:45:15 2020
+++ src/sys/crypto/aes/aes_impl.c	Sat Nov  5 17:36:33 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_impl.c,v 1.9 2020/07/27 20:45:15 riastradh Exp $	*/
+/*	$NetBSD: aes_impl.c,v 1.10 2022/11/05 17:36:33 jmcneill Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_impl.c,v 1.9 2020/07/27 20:45:15 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_impl.c,v 1.10 2022/11/05 17:36:33 jmcneill Exp $");
 
 #include 
 #include 
@@ -121,7 +121,7 @@ aes_select(void)
 	if (aes_impl == NULL)
 		panic("AES self-tests failed");
 
-	aprint_verbose("aes: %s\n", aes_impl->ai_name);
+	aprint_debug("aes: %s\n", aes_impl->ai_name);
 	return 0;
 }
 

Index: src/sys/crypto/chacha/chacha_impl.c
diff -u src/sys/crypto/chacha/chacha_impl.c:1.3 src/sys/crypto/chacha/chacha_impl.c:1.4
--- src/sys/crypto/chacha/chacha_impl.c:1.3	Mon Jul 27 20:49:10 2020
+++ src/sys/crypto/chacha/chacha_impl.c	Sat Nov  5 17:36:33 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: chacha_impl.c,v 1.3 2020/07/27 20:49:10 riastradh Exp $	*/
+/*	$NetBSD: chacha_impl.c,v 1.4 2022/11/05 17:36:33 jmcneill Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -84,7 +84,7 @@ chacha_select(void)
 			chacha_impl = chacha_md_impl;
 	}
 
-	aprint_verbose("chacha: %s\n", chacha_impl->ci_name);
+	aprint_debug("chacha: %s\n", chacha_impl->ci_name);
 	return 0;
 }
 



CVS commit: src/sys/crypto/cprng_fast

2022-09-01 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Thu Sep  1 18:32:25 UTC 2022

Modified Files:
src/sys/crypto/cprng_fast: cprng_fast.c

Log Message:
cprng_fast(9): Assert not in pserialize read section.

This may sleep to take the global entropy lock in case it needs to be
reseeded.  If that happens we can't be in a pserialize read section.


To generate a diff of this commit:
cvs rdiff -u -r1.17 -r1.18 src/sys/crypto/cprng_fast/cprng_fast.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/cprng_fast/cprng_fast.c
diff -u src/sys/crypto/cprng_fast/cprng_fast.c:1.17 src/sys/crypto/cprng_fast/cprng_fast.c:1.18
--- src/sys/crypto/cprng_fast/cprng_fast.c:1.17	Wed Jun  1 15:44:37 2022
+++ src/sys/crypto/cprng_fast/cprng_fast.c	Thu Sep  1 18:32:25 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: cprng_fast.c,v 1.17 2022/06/01 15:44:37 riastradh Exp $	*/
+/*	$NetBSD: cprng_fast.c,v 1.18 2022/09/01 18:32:25 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2014 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include 
-__KERNEL_RCSID(0, "$NetBSD: cprng_fast.c,v 1.17 2022/06/01 15:44:37 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cprng_fast.c,v 1.18 2022/09/01 18:32:25 riastradh Exp $");
 
 #include 
 #include 
@@ -41,6 +41,7 @@ __KERNEL_RCSID(0, "$NetBSD: cprng_fast.c
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -95,6 +96,7 @@ cprng_fast_get(struct cprng_fast **cprng
 	int s;
 
 	KASSERT(!cpu_intr_p());
+	KASSERT(pserialize_not_in_read_section());
 
 	*cprngp = cprng = percpu_getref(cprng_fast_percpu);
 	s = splsoftserial();



CVS commit: src/sys/crypto/cprng_fast

2022-09-01 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Thu Sep  1 18:32:25 UTC 2022

Modified Files:
src/sys/crypto/cprng_fast: cprng_fast.c

Log Message:
cprng_fast(9): Assert not in pserialize read section.

This may sleep to take the global entropy lock in case it needs to be
reseeded.  If that happens we can't be in a pserialize read section.


To generate a diff of this commit:
cvs rdiff -u -r1.17 -r1.18 src/sys/crypto/cprng_fast/cprng_fast.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/aes/arch/arm

2022-06-26 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sun Jun 26 17:52:54 UTC 2022

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon_subr.c

Log Message:
arm/aes_neon: Fix formatting of self-test failure message.

Discovered by code inspection.  Remarkably, a combination of errors
made this fail to be a stack buffer overrun.  Verified by booting
with ARMv8.0-AES disabled and with the self-test artificially made to
fail.


To generate a diff of this commit:
cvs rdiff -u -r1.7 -r1.8 src/sys/crypto/aes/arch/arm/aes_neon_subr.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_neon_subr.c
diff -u src/sys/crypto/aes/arch/arm/aes_neon_subr.c:1.7 src/sys/crypto/aes/arch/arm/aes_neon_subr.c:1.8
--- src/sys/crypto/aes/arch/arm/aes_neon_subr.c:1.7	Sun Aug  9 02:48:38 2020
+++ src/sys/crypto/aes/arch/arm/aes_neon_subr.c	Sun Jun 26 17:52:54 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_neon_subr.c,v 1.7 2020/08/09 02:48:38 riastradh Exp $	*/
+/*	$NetBSD: aes_neon_subr.c,v 1.8 2022/06/26 17:52:54 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_neon_subr.c,v 1.7 2020/08/09 02:48:38 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_neon_subr.c,v 1.8 2022/06/26 17:52:54 riastradh Exp $");
 
 #ifdef _KERNEL
 #include 
@@ -183,11 +183,11 @@ aes_neon_xts_update_selftest(void)
 	for (i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
 		storeblock(t, aes_neon_xts_update(loadblock(cases[i].in)));
 		if (memcmp(t, cases[i].out, 16)) {
-			char buf[33];
+			char buf[3*16 + 1];
 			unsigned j;
 
 			for (j = 0; j < 16; j++) {
-snprintf(buf + 2*j, sizeof(buf) - 2*j,
+snprintf(buf + 3*j, sizeof(buf) - 3*j,
 " %02hhx", t[j]);
 			}
 			printf("%s %u: %s\n", __func__, i, buf);



CVS commit: src/sys/crypto/aes/arch/arm

2022-06-26 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sun Jun 26 17:52:54 UTC 2022

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon_subr.c

Log Message:
arm/aes_neon: Fix formatting of self-test failure message.

Discovered by code inspection.  Remarkably, a combination of errors
made this fail to be a stack buffer overrun.  Verified by booting
with ARMv8.0-AES disabled and with the self-test artificially made to
fail.


To generate a diff of this commit:
cvs rdiff -u -r1.7 -r1.8 src/sys/crypto/aes/arch/arm/aes_neon_subr.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/cprng_fast

2022-06-01 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Wed Jun  1 15:44:37 UTC 2022

Modified Files:
src/sys/crypto/cprng_fast: cprng_fast.c

Log Message:
cprng(9): cprng_fast is no longer used from interrupt context.

Rip out logic to defer reseeding to softint.


To generate a diff of this commit:
cvs rdiff -u -r1.16 -r1.17 src/sys/crypto/cprng_fast/cprng_fast.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/cprng_fast/cprng_fast.c
diff -u src/sys/crypto/cprng_fast/cprng_fast.c:1.16 src/sys/crypto/cprng_fast/cprng_fast.c:1.17
--- src/sys/crypto/cprng_fast/cprng_fast.c:1.16	Tue Jul 28 20:15:07 2020
+++ src/sys/crypto/cprng_fast/cprng_fast.c	Wed Jun  1 15:44:37 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: cprng_fast.c,v 1.16 2020/07/28 20:15:07 riastradh Exp $	*/
+/*	$NetBSD: cprng_fast.c,v 1.17 2022/06/01 15:44:37 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2014 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include 
-__KERNEL_RCSID(0, "$NetBSD: cprng_fast.c,v 1.16 2020/07/28 20:15:07 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cprng_fast.c,v 1.17 2022/06/01 15:44:37 riastradh Exp $");
 
 #include 
 #include 
@@ -39,7 +39,6 @@ __KERNEL_RCSID(0, "$NetBSD: cprng_fast.c
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 
@@ -58,8 +57,7 @@ struct cprng_fast {
 };
 
 static void	cprng_fast_init_cpu(void *, void *, struct cpu_info *);
-static void	cprng_fast_schedule_reseed(struct cprng_fast *);
-static void	cprng_fast_intr(void *);
+static void	cprng_fast_reseed(struct cprng_fast *);
 
 static void	cprng_fast_seed(struct cprng_fast *, const void *);
 static void	cprng_fast_buf(struct cprng_fast *, void *, unsigned);
@@ -68,7 +66,6 @@ static void	cprng_fast_buf_short(void *,
 static void	cprng_fast_buf_long(void *, size_t);
 
 static percpu_t	*cprng_fast_percpu	__read_mostly;
-static void	*cprng_fast_softint	__read_mostly;
 
 void
 cprng_fast_init(void)
@@ -76,20 +73,14 @@ cprng_fast_init(void)
 
 	cprng_fast_percpu = percpu_create(sizeof(struct cprng_fast),
 	cprng_fast_init_cpu, NULL, NULL);
-	cprng_fast_softint = softint_establish(SOFTINT_SERIAL|SOFTINT_MPSAFE,
-	_fast_intr, NULL);
 }
 
 static void
 cprng_fast_init_cpu(void *p, void *arg __unused, struct cpu_info *ci)
 {
 	struct cprng_fast *const cprng = p;
-	uint8_t seed[CPRNG_FAST_SEED_BYTES];
 
-	cprng->epoch = entropy_epoch();
-	cprng_strong(kern_cprng, seed, sizeof seed, 0);
-	cprng_fast_seed(cprng, seed);
-	(void)explicit_memset(seed, 0, sizeof seed);
+	cprng->epoch = 0;
 
 	cprng->reseed_evcnt = kmem_alloc(sizeof(*cprng->reseed_evcnt),
 	KM_SLEEP);
@@ -103,11 +94,16 @@ cprng_fast_get(struct cprng_fast **cprng
 	struct cprng_fast *cprng;
 	int s;
 
+	KASSERT(!cpu_intr_p());
+
 	*cprngp = cprng = percpu_getref(cprng_fast_percpu);
-	s = splvm();
+	s = splsoftserial();
 
-	if (__predict_false(cprng->epoch != entropy_epoch()))
-		cprng_fast_schedule_reseed(cprng);
+	if (__predict_false(cprng->epoch != entropy_epoch())) {
+		splx(s);
+		cprng_fast_reseed(cprng);
+		s = splsoftserial();
+	}
 
 	return s;
 }
@@ -123,29 +119,19 @@ cprng_fast_put(struct cprng_fast *cprng,
 }
 
 static void
-cprng_fast_schedule_reseed(struct cprng_fast *cprng __unused)
-{
-
-	softint_schedule(cprng_fast_softint);
-}
-
-static void
-cprng_fast_intr(void *cookie __unused)
+cprng_fast_reseed(struct cprng_fast *cprng)
 {
 	unsigned epoch = entropy_epoch();
-	struct cprng_fast *cprng;
 	uint8_t seed[CPRNG_FAST_SEED_BYTES];
 	int s;
 
 	cprng_strong(kern_cprng, seed, sizeof(seed), 0);
 
-	cprng = percpu_getref(cprng_fast_percpu);
-	s = splvm();
+	s = splsoftserial();
 	cprng_fast_seed(cprng, seed);
 	cprng->epoch = epoch;
 	cprng->reseed_evcnt->ev_count++;
 	splx(s);
-	percpu_putref(cprng_fast_percpu);
 
 	explicit_memset(seed, 0, sizeof(seed));
 }



CVS commit: src/sys/crypto/cprng_fast

2022-06-01 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Wed Jun  1 15:44:37 UTC 2022

Modified Files:
src/sys/crypto/cprng_fast: cprng_fast.c

Log Message:
cprng(9): cprng_fast is no longer used from interrupt context.

Rip out logic to defer reseeding to softint.


To generate a diff of this commit:
cvs rdiff -u -r1.16 -r1.17 src/sys/crypto/cprng_fast/cprng_fast.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/aes

2021-12-04 Thread SAITOH Masanobu
Module Name:src
Committed By:   msaitoh
Date:   Sun Dec  5 04:48:35 UTC 2021

Modified Files:
src/sys/crypto/aes: aes_selftest.c

Log Message:
s/folllowing/following/


To generate a diff of this commit:
cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/aes/aes_selftest.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/aes_selftest.c
diff -u src/sys/crypto/aes/aes_selftest.c:1.6 src/sys/crypto/aes/aes_selftest.c:1.7
--- src/sys/crypto/aes/aes_selftest.c:1.6	Tue Sep  8 22:48:24 2020
+++ src/sys/crypto/aes/aes_selftest.c	Sun Dec  5 04:48:35 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_selftest.c,v 1.6 2020/09/08 22:48:24 riastradh Exp $	*/
+/*	$NetBSD: aes_selftest.c,v 1.7 2021/12/05 04:48:35 msaitoh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_selftest.c,v 1.6 2020/09/08 22:48:24 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_selftest.c,v 1.7 2021/12/05 04:48:35 msaitoh Exp $");
 
 #ifdef _KERNEL
 
@@ -133,7 +133,7 @@ aes_selftest_encdec(const struct aes_imp
 	if (outbuf[17] != 0x1a)
 		return aes_selftest_fail(impl, outbuf + 17,
 		(const uint8_t[1]){0x1a}, 1,
-		"AES overrun folllowing");
+		"AES overrun following");
 
 	/* Success!  */
 	return 0;



CVS commit: src/sys/crypto/aes

2021-12-04 Thread SAITOH Masanobu
Module Name:src
Committed By:   msaitoh
Date:   Sun Dec  5 04:48:35 UTC 2021

Modified Files:
src/sys/crypto/aes: aes_selftest.c

Log Message:
s/folllowing/following/


To generate a diff of this commit:
cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/aes/aes_selftest.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto

2021-10-17 Thread Jared D. McNeill
Module Name:src
Committed By:   jmcneill
Date:   Sun Oct 17 14:45:45 UTC 2021

Modified Files:
src/sys/crypto/adiantum: adiantum.c
src/sys/crypto/aes: aes_ccm.c
src/sys/crypto/blake2: blake2s.c

Log Message:
Upgrade self-test passed messages from verbose to debug.


To generate a diff of this commit:
cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/adiantum/adiantum.c
cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/aes_ccm.c
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/blake2/blake2s.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/adiantum/adiantum.c
diff -u src/sys/crypto/adiantum/adiantum.c:1.6 src/sys/crypto/adiantum/adiantum.c:1.7
--- src/sys/crypto/adiantum/adiantum.c:1.6	Wed Apr 14 21:29:57 2021
+++ src/sys/crypto/adiantum/adiantum.c	Sun Oct 17 14:45:45 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: adiantum.c,v 1.6 2021/04/14 21:29:57 christos Exp $	*/
+/*	$NetBSD: adiantum.c,v 1.7 2021/10/17 14:45:45 jmcneill Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -37,7 +37,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: adiantum.c,v 1.6 2021/04/14 21:29:57 christos Exp $");
+__KERNEL_RCSID(1, "$NetBSD: adiantum.c,v 1.7 2021/10/17 14:45:45 jmcneill Exp $");
 
 #include 
 #include 
@@ -1938,7 +1938,7 @@ adiantum_modcmd(modcmd_t cmd, void *opaq
 		result |= adiantum_selftest();
 		if (result)
 			panic("adiantum self-test failed");
-		aprint_verbose("adiantum: self-test passed\n");
+		aprint_debug("adiantum: self-test passed\n");
 		return 0;
 	}
 	case MODULE_CMD_FINI:

Index: src/sys/crypto/aes/aes_ccm.c
diff -u src/sys/crypto/aes/aes_ccm.c:1.5 src/sys/crypto/aes/aes_ccm.c:1.6
--- src/sys/crypto/aes/aes_ccm.c:1.5	Mon Aug 10 06:27:29 2020
+++ src/sys/crypto/aes/aes_ccm.c	Sun Oct 17 14:45:45 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_ccm.c,v 1.5 2020/08/10 06:27:29 rin Exp $	*/
+/*	$NetBSD: aes_ccm.c,v 1.6 2021/10/17 14:45:45 jmcneill Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -35,7 +35,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_ccm.c,v 1.5 2020/08/10 06:27:29 rin Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_ccm.c,v 1.6 2021/10/17 14:45:45 jmcneill Exp $");
 
 #include 
 #include 
@@ -611,7 +611,7 @@ aes_ccm_modcmd(modcmd_t cmd, void *opaqu
 	case MODULE_CMD_INIT:
 		if (aes_ccm_selftest())
 			return EIO;
-		aprint_verbose("aes_ccm: self-test passed\n");
+		aprint_debug("aes_ccm: self-test passed\n");
 		return 0;
 	case MODULE_CMD_FINI:
 		return 0;

Index: src/sys/crypto/blake2/blake2s.c
diff -u src/sys/crypto/blake2/blake2s.c:1.1 src/sys/crypto/blake2/blake2s.c:1.2
--- src/sys/crypto/blake2/blake2s.c:1.1	Thu Aug 20 21:21:05 2020
+++ src/sys/crypto/blake2/blake2s.c	Sun Oct 17 14:45:45 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: blake2s.c,v 1.1 2020/08/20 21:21:05 riastradh Exp $	*/
+/*	$NetBSD: blake2s.c,v 1.2 2021/10/17 14:45:45 jmcneill Exp $	*/
 
 /*-
  * Copyright (c) 2015 Taylor R. Campbell
@@ -29,7 +29,7 @@
 #ifdef _KERNEL
 
 #include 
-__KERNEL_RCSID(0, "$NetBSD: blake2s.c,v 1.1 2020/08/20 21:21:05 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: blake2s.c,v 1.2 2021/10/17 14:45:45 jmcneill Exp $");
 
 #include 
 #include 
@@ -338,7 +338,7 @@ blake2s_modcmd(modcmd_t cmd, void *opaqu
 	case MODULE_CMD_INIT:
 		if (blake2s_selftest())
 			panic("blake2s: self-test failed");
-		aprint_verbose("blake2s: self-test passed\n");
+		aprint_debug("blake2s: self-test passed\n");
 		return 0;
 	case MODULE_CMD_FINI:
 		return 0;



CVS commit: src/sys/crypto

2021-10-17 Thread Jared D. McNeill
Module Name:src
Committed By:   jmcneill
Date:   Sun Oct 17 14:45:45 UTC 2021

Modified Files:
src/sys/crypto/adiantum: adiantum.c
src/sys/crypto/aes: aes_ccm.c
src/sys/crypto/blake2: blake2s.c

Log Message:
Upgrade self-test passed messages from verbose to debug.


To generate a diff of this commit:
cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/adiantum/adiantum.c
cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/aes_ccm.c
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/blake2/blake2s.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/camellia

2021-09-03 Thread David H. Gutteridge
Module Name:src
Committed By:   gutteridge
Date:   Sat Sep  4 00:33:10 UTC 2021

Modified Files:
src/sys/crypto/camellia: camellia-api.c camellia.c

Log Message:
Fix typos in comments and add missing KERNEL_RCSID


To generate a diff of this commit:
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/camellia/camellia-api.c
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/camellia/camellia.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/camellia/camellia-api.c
diff -u src/sys/crypto/camellia/camellia-api.c:1.1 src/sys/crypto/camellia/camellia-api.c:1.2
--- src/sys/crypto/camellia/camellia-api.c:1.1	Thu May  5 17:38:36 2011
+++ src/sys/crypto/camellia/camellia-api.c	Sat Sep  4 00:33:09 2021
@@ -1,4 +1,4 @@
-/* $NetBSD: camellia-api.c,v 1.1 2011/05/05 17:38:36 drochner Exp $ */
+/* $NetBSD: camellia-api.c,v 1.2 2021/09/04 00:33:09 gutteridge Exp $ */
 
 /*
  *
@@ -28,6 +28,7 @@
  */
 
 #include 
+__KERNEL_RCSID(0, "$NetBSD: camellia-api.c,v 1.2 2021/09/04 00:33:09 gutteridge Exp $");
 
 #include 
 #include 

Index: src/sys/crypto/camellia/camellia.c
diff -u src/sys/crypto/camellia/camellia.c:1.2 src/sys/crypto/camellia/camellia.c:1.3
--- src/sys/crypto/camellia/camellia.c:1.2	Wed Jan  1 15:18:57 2014
+++ src/sys/crypto/camellia/camellia.c	Sat Sep  4 00:33:09 2021
@@ -1,4 +1,4 @@
-/* $NetBSD: camellia.c,v 1.2 2014/01/01 15:18:57 pgoyette Exp $ */
+/* $NetBSD: camellia.c,v 1.3 2021/09/04 00:33:09 gutteridge Exp $ */
 
 /* camellia.h ver 1.1.0
  *
@@ -33,6 +33,8 @@
  */
 
 #include 
+__KERNEL_RCSID(0, "$NetBSD: camellia.c,v 1.3 2021/09/04 00:33:09 gutteridge Exp $");
+
 #include 
 #include 
 #include 
@@ -1007,7 +1009,7 @@ camellia_encrypt128(const uint32_t *subk
 void
 camellia_decrypt128(const uint32_t *subkey, uint32_t *io)
 {
-uint32_t il,ir,t0,t1;   /* temporary valiables */
+uint32_t il,ir,t0,t1;   /* temporary variables */
 
 /* pre whitening but absorb kw2*/
 io[0] ^= SUBL(24);
@@ -1077,7 +1079,7 @@ camellia_decrypt128(const uint32_t *subk
 void
 camellia_encrypt256(const uint32_t *subkey, uint32_t *io)
 {
-uint32_t il,ir,t0,t1;   /* temporary valiables */
+uint32_t il,ir,t0,t1;   /* temporary variables */
 
 /* pre whitening but absorb kw2*/
 io[0] ^= SUBL(0);
@@ -1160,7 +1162,7 @@ camellia_encrypt256(const uint32_t *subk
 void
 camellia_decrypt256(const uint32_t *subkey, uint32_t *io)
 {
-uint32_t il,ir,t0,t1;   /* temporary valiables */
+uint32_t il,ir,t0,t1;   /* temporary variables */
 
 /* pre whitening but absorb kw2*/
 io[0] ^= SUBL(32);



CVS commit: src/sys/crypto/camellia

2021-09-03 Thread David H. Gutteridge
Module Name:src
Committed By:   gutteridge
Date:   Sat Sep  4 00:33:10 UTC 2021

Modified Files:
src/sys/crypto/camellia: camellia-api.c camellia.c

Log Message:
Fix typos in comments and add missing KERNEL_RCSID


To generate a diff of this commit:
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/camellia/camellia-api.c
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/camellia/camellia.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/adiantum

2021-04-14 Thread Christos Zoulas
Module Name:src
Committed By:   christos
Date:   Wed Apr 14 21:29:57 UTC 2021

Modified Files:
src/sys/crypto/adiantum: adiantum.c

Log Message:
use an enum instead of constant variables so that they work in CTASSERT.


To generate a diff of this commit:
cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/adiantum/adiantum.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/adiantum/adiantum.c
diff -u src/sys/crypto/adiantum/adiantum.c:1.5 src/sys/crypto/adiantum/adiantum.c:1.6
--- src/sys/crypto/adiantum/adiantum.c:1.5	Sun Jul 26 00:05:20 2020
+++ src/sys/crypto/adiantum/adiantum.c	Wed Apr 14 17:29:57 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: adiantum.c,v 1.5 2020/07/26 04:05:20 riastradh Exp $	*/
+/*	$NetBSD: adiantum.c,v 1.6 2021/04/14 21:29:57 christos Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -37,7 +37,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: adiantum.c,v 1.5 2020/07/26 04:05:20 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: adiantum.c,v 1.6 2021/04/14 21:29:57 christos Exp $");
 
 #include 
 #include 
@@ -390,10 +390,12 @@ static void
 nh(uint8_t h[static 32], const uint8_t *m, size_t mlen,
 const uint32_t k[static 268 /* u/w + 2s(r - 1) */])
 {
-	const unsigned w = 32;	 /* word size */
-	const unsigned s = 2;	 /* stride */
-	const unsigned r = 4;	 /* rounds */
-	const unsigned u = 8192; /* unit count (bits per msg unit) */
+	enum {
+	s = 2,	 /* stride */
+	r = 4,	 /* rounds */
+	w = 32,	 /* word size */
+	u = 8192	 /* unit count (bits per msg unit) */
+	};
 	uint64_t h0 = 0, h1 = 0, h2 = 0, h3 = 0;
 	unsigned i;
 



CVS commit: src/sys/crypto/adiantum

2021-04-14 Thread Christos Zoulas
Module Name:src
Committed By:   christos
Date:   Wed Apr 14 21:29:57 UTC 2021

Modified Files:
src/sys/crypto/adiantum: adiantum.c

Log Message:
use an enum instead of constant variables so that they work in CTASSERT.


To generate a diff of this commit:
cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/adiantum/adiantum.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/aes/arch/arm

2020-11-21 Thread Rin Okuyama
Module Name:src
Committed By:   rin
Date:   Sat Nov 21 08:09:21 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon.c

Log Message:
Fix build with clang for earmv7hf; loadroundkey() is used only for __aarch64__.


To generate a diff of this commit:
cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/arm/aes_neon.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_neon.c
diff -u src/sys/crypto/aes/arch/arm/aes_neon.c:1.5 src/sys/crypto/aes/arch/arm/aes_neon.c:1.6
--- src/sys/crypto/aes/arch/arm/aes_neon.c:1.5	Sat Aug  8 14:47:01 2020
+++ src/sys/crypto/aes/arch/arm/aes_neon.c	Sat Nov 21 08:09:21 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_neon.c,v 1.5 2020/08/08 14:47:01 riastradh Exp $	*/
+/*	$NetBSD: aes_neon.c,v 1.6 2020/11/21 08:09:21 rin Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -39,7 +39,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_neon.c,v 1.5 2020/08/08 14:47:01 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_neon.c,v 1.6 2020/11/21 08:09:21 rin Exp $");
 
 #include 
 
@@ -196,11 +196,13 @@ inv	= VQ_N_U8(0x80,0x01,0x08,0x0D,0x0F,0
 inva	= VQ_N_U8(0x80,0x07,0x0B,0x0F,0x06,0x0A,0x04,0x01,
 	0x09,0x08,0x05,0x02,0x0C,0x0E,0x0D,0x03);
 
+#ifdef __aarch64__
 static inline uint8x16_t
 loadroundkey(const void *rkp)
 {
 	return vld1q_u8(rkp);
 }
+#endif
 
 static inline void
 storeroundkey(void *rkp, uint8x16_t rk)



CVS commit: src/sys/crypto/aes/arch/arm

2020-11-21 Thread Rin Okuyama
Module Name:src
Committed By:   rin
Date:   Sat Nov 21 08:09:21 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon.c

Log Message:
Fix build with clang for earmv7hf; loadroundkey() is used only for __aarch64__.


To generate a diff of this commit:
cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/arm/aes_neon.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto

2020-10-10 Thread Jared D. McNeill
Module Name:src
Committed By:   jmcneill
Date:   Sat Oct 10 08:24:10 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon_impl.c
src/sys/crypto/chacha/arch/arm: chacha_neon_impl.c

Log Message:
Fix detection of NEON features. ID_AA64PFR0_EL1_ADV_SIMD_NONE means SIMD
is not available, and any other value means it is.


To generate a diff of this commit:
cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/arch/arm/aes_neon_impl.c
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/chacha/arch/arm/chacha_neon_impl.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_neon_impl.c
diff -u src/sys/crypto/aes/arch/arm/aes_neon_impl.c:1.4 src/sys/crypto/aes/arch/arm/aes_neon_impl.c:1.5
--- src/sys/crypto/aes/arch/arm/aes_neon_impl.c:1.4	Sat Jul 25 22:36:06 2020
+++ src/sys/crypto/aes/arch/arm/aes_neon_impl.c	Sat Oct 10 08:24:10 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_neon_impl.c,v 1.4 2020/07/25 22:36:06 riastradh Exp $	*/
+/*	$NetBSD: aes_neon_impl.c,v 1.5 2020/10/10 08:24:10 jmcneill Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_neon_impl.c,v 1.4 2020/07/25 22:36:06 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_neon_impl.c,v 1.5 2020/10/10 08:24:10 jmcneill Exp $");
 
 #include 
 #include 
@@ -200,10 +200,10 @@ aes_neon_probe(void)
 		return -1;
 #endif
 	switch (__SHIFTOUT(id->ac_aa64pfr0, ID_AA64PFR0_EL1_ADVSIMD)) {
-	case ID_AA64PFR0_EL1_ADV_SIMD_IMPL:
-		break;
-	default:
+	case ID_AA64PFR0_EL1_ADV_SIMD_NONE:
 		return -1;
+	default:
+		break;
 	}
 #else
 #ifdef _KERNEL

Index: src/sys/crypto/chacha/arch/arm/chacha_neon_impl.c
diff -u src/sys/crypto/chacha/arch/arm/chacha_neon_impl.c:1.1 src/sys/crypto/chacha/arch/arm/chacha_neon_impl.c:1.2
--- src/sys/crypto/chacha/arch/arm/chacha_neon_impl.c:1.1	Sat Jul 25 22:51:57 2020
+++ src/sys/crypto/chacha/arch/arm/chacha_neon_impl.c	Sat Oct 10 08:24:10 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: chacha_neon_impl.c,v 1.1 2020/07/25 22:51:57 riastradh Exp $	*/
+/*	$NetBSD: chacha_neon_impl.c,v 1.2 2020/10/10 08:24:10 jmcneill Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: chacha_neon_impl.c,v 1.1 2020/07/25 22:51:57 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: chacha_neon_impl.c,v 1.2 2020/10/10 08:24:10 jmcneill Exp $");
 
 #include "chacha_neon.h"
 
@@ -147,10 +147,10 @@ chacha_probe_neon(void)
 		return -1;
 #endif
 	switch (__SHIFTOUT(id->ac_aa64pfr0, ID_AA64PFR0_EL1_ADVSIMD)) {
-	case ID_AA64PFR0_EL1_ADV_SIMD_IMPL:
-		break;
-	default:
+	case ID_AA64PFR0_EL1_ADV_SIMD_NONE:
 		return -1;
+	default:
+		break;
 	}
 #else
 #ifdef _KERNEL



CVS commit: src/sys/crypto

2020-10-10 Thread Jared D. McNeill
Module Name:src
Committed By:   jmcneill
Date:   Sat Oct 10 08:24:10 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon_impl.c
src/sys/crypto/chacha/arch/arm: chacha_neon_impl.c

Log Message:
Fix detection of NEON features. ID_AA64PFR0_EL1_ADV_SIMD_NONE means SIMD
is not available, and any other value means it is.


To generate a diff of this commit:
cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/arch/arm/aes_neon_impl.c
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/chacha/arch/arm/chacha_neon_impl.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/aes/arch/arm

2020-09-10 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Thu Sep 10 11:31:04 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon_32.S

Log Message:
aes neon: Gather mc_forward/backward so we can load 256 bits at once.


To generate a diff of this commit:
cvs rdiff -u -r1.10 -r1.11 src/sys/crypto/aes/arch/arm/aes_neon_32.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S
diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.10 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.11
--- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.10	Thu Sep 10 11:30:28 2020
+++ src/sys/crypto/aes/arch/arm/aes_neon_32.S	Thu Sep 10 11:31:03 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_neon_32.S,v 1.10 2020/09/10 11:30:28 riastradh Exp $	*/
+/*	$NetBSD: aes_neon_32.S,v 1.11 2020/09/10 11:31:03 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,7 +28,7 @@
 
 #include 
 
-RCSID("$NetBSD: aes_neon_32.S,v 1.10 2020/09/10 11:30:28 riastradh Exp $")
+RCSID("$NetBSD: aes_neon_32.S,v 1.11 2020/09/10 11:31:03 riastradh Exp $")
 
 	.fpu	neon
 
@@ -54,36 +54,26 @@ inva:
 	.byte	0x09,0x08,0x05,0x02,0x0C,0x0E,0x0D,0x03
 END(inva)
 
-	.type	mc_forward,_ASM_TYPE_OBJECT
-mc_forward:
-	.byte	0x01,0x02,0x03,0x00,0x05,0x06,0x07,0x04	/* 0 */
+	.type	mc,_ASM_TYPE_OBJECT
+mc:
+	.byte	0x01,0x02,0x03,0x00,0x05,0x06,0x07,0x04	/* 0 forward */
 	.byte	0x09,0x0A,0x0B,0x08,0x0D,0x0E,0x0F,0x0C
-
-	.byte	0x05,0x06,0x07,0x04,0x09,0x0A,0x0B,0x08	/* 1 */
+	.byte	0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06	/* 0 backward */
+	.byte	0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E
+	.byte	0x05,0x06,0x07,0x04,0x09,0x0A,0x0B,0x08	/* 1 forward */
 	.byte	0x0D,0x0E,0x0F,0x0C,0x01,0x02,0x03,0x00
-
-	.byte	0x09,0x0A,0x0B,0x08,0x0D,0x0E,0x0F,0x0C	/* 2 */
+	.byte	0x0F,0x0C,0x0D,0x0E,0x03,0x00,0x01,0x02	/* 1 backward */
+	.byte	0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A
+	.byte	0x09,0x0A,0x0B,0x08,0x0D,0x0E,0x0F,0x0C	/* 2 forward */
 	.byte	0x01,0x02,0x03,0x00,0x05,0x06,0x07,0x04
-
+	.byte	0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E	/* 2 backward */
+	.byte	0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06
 .Lmc_forward_3:
-	.byte	0x0D,0x0E,0x0F,0x0C,0x01,0x02,0x03,0x00	/* 3 */
+	.byte	0x0D,0x0E,0x0F,0x0C,0x01,0x02,0x03,0x00	/* 3 forward */
 	.byte	0x05,0x06,0x07,0x04,0x09,0x0A,0x0B,0x08
-END(mc_forward)
-
-	.type	mc_backward,_ASM_TYPE_OBJECT
-mc_backward:
-	.byte	0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06	/* 0 */
-	.byte	0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E
-
-	.byte	0x0F,0x0C,0x0D,0x0E,0x03,0x00,0x01,0x02	/* 1 */
-	.byte	0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A
-
-	.byte	0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E	/* 2 */
-	.byte	0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06
-
-	.byte	0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A	/* 3 */
+	.byte	0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A	/* 3 backward */
 	.byte	0x0F,0x0C,0x0D,0x0E,0x03,0x00,0x01,0x02
-END(mc_backward)
+END(mc)
 
 	.type	sr,_ASM_TYPE_OBJECT
 sr:
@@ -210,8 +200,7 @@ ENTRY(aes_neon_enc1)
 
 	/*
 	 * r3: rmod4
-	 * r4: mc_forward
-	 * r5: mc_backward
+	 * r4: mc
 	 * r6,r8,r10,ip: temporaries
 	 * q0={d0-d1}: x/ak/A
 	 * q1={d2-d3}: 0x0f0f...
@@ -225,8 +214,8 @@ ENTRY(aes_neon_enc1)
 	 * q9={d18-d19}: sb2[1]
 	 * q10={d20-d21}: inv
 	 * q11={d22-d23}: inva
-	 * q12={d24-d25}: ir/iak/iakr/sb1_0(io)/mc_backward[rmod4]
-	 * q13={d26-d27}: jr/jak/jakr/sb1_1(jo)/mc_forward[rmod4]
+	 * q12={d24-d25}: ir/iak/iakr/sb1_0(io)/mc[rmod4].backward
+	 * q13={d26-d27}: jr/jak/jakr/sb1_1(jo)/mc[rmod4].forward
 	 * q14={d28-d29}: rk/A2/A2_B_D
 	 * q15={d30-d31}: A2_B/sr[rmod4]
 	 */
@@ -254,9 +243,8 @@ ENTRY(aes_neon_enc1)
 	vld1.8	{q8-q9}, [r6 :256]	/* q8 = sb2[0], q9 = sb2[1] */
 	vld1.8	{q10-q11}, [r8 :256]	/* q10 = inv, q11 = inva */
 
-	/* (r4, r5) := (_forward[0], _backward[0]) */
-	add	r4, ip, #(mc_forward - .Lconstants)
-	add	r5, ip, #(mc_backward - .Lconstants)
+	/* r4 := mc */
+	add	r4, ip, #(mc - .Lconstants)
 
 	/* (q2, q3) := (lo, hi) */
 	vshr.u8	q3, q0, #4
@@ -291,13 +279,11 @@ ENTRY(aes_neon_enc1)
 	vtbl.8	d25, {q8}, d5
 	vtbl.8	d26, {q9}, d6
 	vtbl.8	d27, {q9}, d7
+	add	r6, r4, r3, lsl #5	/* r6 := [rmod4] */
 	veor	q14, q12, q13
 
-	/* (q12, q13) := (mc_forward[rmod4], mc_backward[rmod4]) */
-	add	r6, r4, r3, lsl #4
-	add	r8, r5, r3, lsl #4
-	vld1.8	{q12}, [r6 :128]
-	vld1.8	{q13}, [r8 :128]
+	/* (q12, q13) := (mc[rmod4].forward, mc[rmod4].backward) */
+	vld1.8	{q12-q13}, [r6 :256]
 
 	/* q15 := A2_B = A2 + A(mcf) */
 	vtbl.8	d30, {q0}, d24
@@ -474,7 +460,7 @@ ENTRY(aes_neon_dec1)
 	add	r8, ip, #(.Lmc_forward_3 - .Lconstants)
 	vld1.8	{q6-q7}, [r4 :256]	/* q6 := dsbb[0], q7 := dsbb[1] */
 	vld1.8	{q10-q11}, [r6 :256]	/* q10 := inv, q11 := inva */
-	vld1.8	{q15}, [r8 :128]	/* q15 := mc_forward[3] */
+	vld1.8	{q15}, [r8 :128]	/* q15 := mc[3].forward */
 
 	/* (q2, q3) := (lo, hi) */
 	vshr.u8	q3, q0, #4



CVS commit: src/sys/crypto/aes/arch/arm

2020-09-10 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Thu Sep 10 11:31:04 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon_32.S

Log Message:
aes neon: Gather mc_forward/backward so we can load 256 bits at once.


To generate a diff of this commit:
cvs rdiff -u -r1.10 -r1.11 src/sys/crypto/aes/arch/arm/aes_neon_32.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/aes/arch/arm

2020-09-10 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Thu Sep 10 11:30:28 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon_32.S

Log Message:
aes neon: Hoist dsbd/dsbe address calculation out of loop.


To generate a diff of this commit:
cvs rdiff -u -r1.9 -r1.10 src/sys/crypto/aes/arch/arm/aes_neon_32.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S
diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.9 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.10
--- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.9	Thu Sep 10 11:30:08 2020
+++ src/sys/crypto/aes/arch/arm/aes_neon_32.S	Thu Sep 10 11:30:28 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_neon_32.S,v 1.9 2020/09/10 11:30:08 riastradh Exp $	*/
+/*	$NetBSD: aes_neon_32.S,v 1.10 2020/09/10 11:30:28 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,7 +28,7 @@
 
 #include 
 
-RCSID("$NetBSD: aes_neon_32.S,v 1.9 2020/09/10 11:30:08 riastradh Exp $")
+RCSID("$NetBSD: aes_neon_32.S,v 1.10 2020/09/10 11:30:28 riastradh Exp $")
 
 	.fpu	neon
 
@@ -431,6 +431,9 @@ ENTRY(aes_neon_dec1)
 
 	/*
 	 * r3: 3 & ~(nrounds - 1)
+	 * r4: dsbd
+	 * r5: dsbe
+	 * r6,r8,r10,ip: temporaries
 	 * q0={d0-d1}: x/ak
 	 * q1={d2-d3}: 0x0f0f...
 	 * q2={d4-d5}: lo/k/j/io
@@ -488,6 +491,10 @@ ENTRY(aes_neon_dec1)
 	add	r4, ip, #(dsb9 - .Lconstants)
 	vld1.8	{q4-q5}, [r4 :256]	/* q4 := dsb9[0], q5 := dsb9[1] */
 
+	/* r4 := dsbd, r5 := dsbe */
+	add	r4, ip, #(dsbd - .Lconstants)
+	add	r5, ip, #(dsbe - .Lconstants)
+
 	/* q0 := rk[0] + diptlo(lo) + dipthi(hi) */
 	veor	q0, q14, q2
 	veor	q0, q0, q3
@@ -496,7 +503,6 @@ ENTRY(aes_neon_dec1)
 
 	_ALIGN_TEXT
 1:	/* load dsbd */
-	add	r4, ip, #(dsbd - .Lconstants)
 	vld1.8	{q8-q9}, [r4 :256]	/* q8 := dsbd[0], q9 := dsbd[1] */
 
 	vld1.8	{q14}, [r0 :128]!	/* q14 = *rk++ */
@@ -522,8 +528,7 @@ ENTRY(aes_neon_dec1)
 	veor	q0, q0, q13
 
 	/* load dsbe */
-	add	r4, ip, #(dsbe - .Lconstants)
-	vld1.8	{q8-q9}, [r4 :256]!	/* q8 := dsbe[0], q9 := dsbe[1] */
+	vld1.8	{q8-q9}, [r5 :256]	/* q8 := dsbe[0], q9 := dsbe[1] */
 
 	/* q0 := x(mc) + dsbb_0(io) + dsbb_1(jo) */
 	vtbl.8	d28, {q0}, d30



CVS commit: src/sys/crypto/aes/arch/arm

2020-09-10 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Thu Sep 10 11:30:28 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon_32.S

Log Message:
aes neon: Hoist dsbd/dsbe address calculation out of loop.


To generate a diff of this commit:
cvs rdiff -u -r1.9 -r1.10 src/sys/crypto/aes/arch/arm/aes_neon_32.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/aes/arch/arm

2020-09-10 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Thu Sep 10 11:30:08 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon_32.S

Log Message:
aes neon: Tweak register usage.

- Call r12 by its usual name, ip.
- No need for r7 or r11=fp at the moment.


To generate a diff of this commit:
cvs rdiff -u -r1.8 -r1.9 src/sys/crypto/aes/arch/arm/aes_neon_32.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/aes/arch/arm

2020-09-10 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Thu Sep 10 11:30:08 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon_32.S

Log Message:
aes neon: Tweak register usage.

- Call r12 by its usual name, ip.
- No need for r7 or r11=fp at the moment.


To generate a diff of this commit:
cvs rdiff -u -r1.8 -r1.9 src/sys/crypto/aes/arch/arm/aes_neon_32.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S
diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.8 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.9
--- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.8	Thu Sep 10 11:29:43 2020
+++ src/sys/crypto/aes/arch/arm/aes_neon_32.S	Thu Sep 10 11:30:08 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_neon_32.S,v 1.8 2020/09/10 11:29:43 riastradh Exp $	*/
+/*	$NetBSD: aes_neon_32.S,v 1.9 2020/09/10 11:30:08 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,7 +28,7 @@
 
 #include 
 
-RCSID("$NetBSD: aes_neon_32.S,v 1.8 2020/09/10 11:29:43 riastradh Exp $")
+RCSID("$NetBSD: aes_neon_32.S,v 1.9 2020/09/10 11:30:08 riastradh Exp $")
 
 	.fpu	neon
 
@@ -205,14 +205,14 @@ ENTRY(aes_neon_enc1)
 	vldr	d1, [sp]		/* d1 := x hi */
 	ldr	r1, [sp, #8]		/* r1 := nrounds */
 #endif
-	push	{r4, r5, r6, r7, r8, r10, r11, lr}
+	push	{r4, r5, r6, r8, r10, lr}
 	vpush	{d8-d15}
 
 	/*
 	 * r3: rmod4
 	 * r4: mc_forward
 	 * r5: mc_backward
-	 * r6,r7,r8,r10,r11,r12: temporaries
+	 * r6,r8,r10,ip: temporaries
 	 * q0={d0-d1}: x/ak/A
 	 * q1={d2-d3}: 0x0f0f...
 	 * q2={d4-d5}: lo/k/j/io
@@ -231,32 +231,32 @@ ENTRY(aes_neon_enc1)
 	 * q15={d30-d31}: A2_B/sr[rmod4]
 	 */
 
-	/* r12 := .Lconstants - .Lconstants_addr, r11 := .Lconstants_addr */
-	ldr	r12, .Lconstants_addr
-	adr	r11, .Lconstants_addr
+	/* ip := .Lconstants - .Lconstants_addr, r10 := .Lconstants_addr */
+	ldr	ip, .Lconstants_addr
+	adr	r10, .Lconstants_addr
 
 	vld1.8	{q14}, [r0 :128]!	/* q14 = *rk++ */
 	movw	r3, #0
 	vmov.i8	q1, #0x0f
 
-	/* r12 := .Lconstants */
-	add	r12, r12, r11
+	/* ip := .Lconstants */
+	add	ip, ip, r10
 
 	/* (q4, q5) := (iptlo, ipthi) */
-	add	r6, r12, #(ipt - .Lconstants)
+	add	r6, ip, #(ipt - .Lconstants)
 	vld1.8	{q4-q5}, [r6 :256]
 
 	/* load the rest of the constants */
-	add	r4, r12, #(sb1 - .Lconstants)
-	add	r6, r12, #(sb2 - .Lconstants)
-	add	r8, r12, #(.Linv_inva - .Lconstants)
+	add	r4, ip, #(sb1 - .Lconstants)
+	add	r6, ip, #(sb2 - .Lconstants)
+	add	r8, ip, #(.Linv_inva - .Lconstants)
 	vld1.8	{q6-q7}, [r4 :256]	/* q6 = sb1[0], q7 = sb1[1] */
 	vld1.8	{q8-q9}, [r6 :256]	/* q8 = sb2[0], q9 = sb2[1] */
 	vld1.8	{q10-q11}, [r8 :256]	/* q10 = inv, q11 = inva */
 
 	/* (r4, r5) := (_forward[0], _backward[0]) */
-	add	r4, r12, #(mc_forward - .Lconstants)
-	add	r5, r12, #(mc_backward - .Lconstants)
+	add	r4, ip, #(mc_forward - .Lconstants)
+	add	r5, ip, #(mc_backward - .Lconstants)
 
 	/* (q2, q3) := (lo, hi) */
 	vshr.u8	q3, q0, #4
@@ -295,9 +295,9 @@ ENTRY(aes_neon_enc1)
 
 	/* (q12, q13) := (mc_forward[rmod4], mc_backward[rmod4]) */
 	add	r6, r4, r3, lsl #4
-	add	r7, r5, r3, lsl #4
+	add	r8, r5, r3, lsl #4
 	vld1.8	{q12}, [r6 :128]
-	vld1.8	{q13}, [r7 :128]
+	vld1.8	{q13}, [r8 :128]
 
 	/* q15 := A2_B = A2 + A(mcf) */
 	vtbl.8	d30, {q0}, d24
@@ -365,8 +365,8 @@ ENTRY(aes_neon_enc1)
 	bne	1b
 
 	/* (q6, q7, q15) := (sbo[0], sbo[1], sr[rmod4]) */
-	add	r8, r12, #(sr - .Lconstants)
-	add	r6, r12, #(sbo - .Lconstants)
+	add	r8, ip, #(sr - .Lconstants)
+	add	r6, ip, #(sbo - .Lconstants)
 	add	r8, r8, r3, lsl #4
 	vld1.8	{q6-q7}, [r6 :256]
 	vld1.8	{q15}, [r8 :128]
@@ -388,7 +388,7 @@ ENTRY(aes_neon_enc1)
 	vtbl.8	d1, {q2}, d31
 
 	vpop	{d8-d15}
-	pop	{r4, r5, r6, r7, r8, r10, r11, lr}
+	pop	{r4, r5, r6, r8, r10, lr}
 #ifdef __SOFTFP__
 #ifdef __ARM_BIG_ENDIAN
 	vmov	r1, r0, d0
@@ -426,7 +426,7 @@ ENTRY(aes_neon_dec1)
 	vldr	d1, [sp]		/* d1 := x hi */
 	ldr	r1, [sp, #8]		/* r1 := nrounds */
 #endif
-	push	{r4, r5, r6, r7, r8, r10, r11, lr}
+	push	{r4, r5, r6, r8, r10, lr}
 	vpush	{d8-d15}
 
 	/*
@@ -449,26 +449,26 @@ ENTRY(aes_neon_dec1)
 	 * q15={d30-d31}: mc/sr[3 & ~(nrounds - 1)]
 	 */
 
-	/* r12 := .Lconstants - .Lconstants_addr, r11 := .Lconstants_addr */
-	ldr	r12, .Lconstants_addr
-	adr	r11, .Lconstants_addr
+	/* ip := .Lconstants - .Lconstants_addr, r10 := .Lconstants_addr */
+	ldr	ip, .Lconstants_addr
+	adr	r10, .Lconstants_addr
 
 	vld1.8	{q14}, [r0 :128]!	/* q14 = *rk++ */
 	rsb	r3, r1, #0		/* r3 := ~(x - 1) = -x */
 	vmov.i8	q1, #0x0f
 	and	r3, r3, #3		/* r3 := 3 & ~(x - 1) */
 
-	/* r12 := .Lconstants */
-	add	r12, r12, r11
+	/* ip := .Lconstants */
+	add	ip, ip, r10
 
 	/* (q4, q5) := (diptlo, dipthi) */
-	add	r6, r12, #(dipt - .Lconstants)
+	add	r6, ip, #(dipt - .Lconstants)
 	vld1.8	{q4-q5}, [r6 :256]
 
 	/* load the rest of the constants */
-	add	r4, r12, #(dsbb - .Lconstants)
-	add	r6, r12, #(.Linv_inva - .Lconstants)
-	add	r8, r12, #(.Lmc_forward_3 - 

CVS commit: src/sys/crypto/aes/arch/arm

2020-09-10 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Thu Sep 10 11:29:43 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon_32.S

Log Message:
aes neon: Write vtbl with {qN} rather than {d(2N)-d(2N+1)}.

Cosmetic; no functional change.


To generate a diff of this commit:
cvs rdiff -u -r1.7 -r1.8 src/sys/crypto/aes/arch/arm/aes_neon_32.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S
diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.7 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.8
--- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.7	Thu Sep 10 11:29:02 2020
+++ src/sys/crypto/aes/arch/arm/aes_neon_32.S	Thu Sep 10 11:29:43 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_neon_32.S,v 1.7 2020/09/10 11:29:02 riastradh Exp $	*/
+/*	$NetBSD: aes_neon_32.S,v 1.8 2020/09/10 11:29:43 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,7 +28,7 @@
 
 #include 
 
-RCSID("$NetBSD: aes_neon_32.S,v 1.7 2020/09/10 11:29:02 riastradh Exp $")
+RCSID("$NetBSD: aes_neon_32.S,v 1.8 2020/09/10 11:29:43 riastradh Exp $")
 
 	.fpu	neon
 
@@ -264,10 +264,10 @@ ENTRY(aes_neon_enc1)
 	vand	q3, q3, q1		/* q3 := (x >> 4) & 0x0f0f... */
 
 	/* (q2, q3) := (iptlo(lo), ipthi(hi)) */
-	vtbl.8	d4, {d8-d9}, d4
-	vtbl.8	d5, {d8-d9}, d5
-	vtbl.8	d6, {d10-d11}, d6
-	vtbl.8	d7, {d10-d11}, d7
+	vtbl.8	d4, {q4}, d4
+	vtbl.8	d5, {q4}, d5
+	vtbl.8	d6, {q5}, d6
+	vtbl.8	d7, {q5}, d7
 
 	/* q0 := rk[0] + iptlo(lo) + ipthi(hi) */
 	veor	q0, q14, q2
@@ -279,18 +279,18 @@ ENTRY(aes_neon_enc1)
 1:	vld1.8	{q14}, [r0 :128]!	/* q14 = *rk++ */
 
 	/* q0 := A = rk[i] + sb1_0(io) + sb1_1(jo) */
-	vtbl.8	d24, {d12-d13}, d4
-	vtbl.8	d25, {d12-d13}, d5
-	vtbl.8	d26, {d14-d15}, d6
-	vtbl.8	d27, {d14-d15}, d7
+	vtbl.8	d24, {q6}, d4
+	vtbl.8	d25, {q6}, d5
+	vtbl.8	d26, {q7}, d6
+	vtbl.8	d27, {q7}, d7
 	veor	q0, q14, q12
 	veor	q0, q0, q13
 
 	/* q14 := A2 = sb2_0[io] + sb2_1[jo] */
-	vtbl.8	d24, {d16-d17}, d4
-	vtbl.8	d25, {d16-d17}, d5
-	vtbl.8	d26, {d18-d19}, d6
-	vtbl.8	d27, {d18-d19}, d7
+	vtbl.8	d24, {q8}, d4
+	vtbl.8	d25, {q8}, d5
+	vtbl.8	d26, {q9}, d6
+	vtbl.8	d27, {q9}, d7
 	veor	q14, q12, q13
 
 	/* (q12, q13) := (mc_forward[rmod4], mc_backward[rmod4]) */
@@ -300,18 +300,18 @@ ENTRY(aes_neon_enc1)
 	vld1.8	{q13}, [r7 :128]
 
 	/* q15 := A2_B = A2 + A(mcf) */
-	vtbl.8	d30, {d0-d1}, d24
-	vtbl.8	d31, {d0-d1}, d25
+	vtbl.8	d30, {q0}, d24
+	vtbl.8	d31, {q0}, d25
 	veor	q15, q15, q14
 
 	/* q14 := A2_B_D = A2_B + A(mcb) */
-	vtbl.8	d28, {d0-d1}, d26
-	vtbl.8	d29, {d0-d1}, d27
+	vtbl.8	d28, {q0}, d26
+	vtbl.8	d29, {q0}, d27
 	veor	q14, q14, q15
 
 	/* q0 := x = A2_B_D + A2_B(mcf) */
-	vtbl.8	d0, {d30-d31}, d24
-	vtbl.8	d1, {d30-d31}, d25
+	vtbl.8	d0, {q15}, d24
+	vtbl.8	d1, {q15}, d25
 	veor	q0, q0, q14
 
 2:	/*
@@ -324,19 +324,19 @@ ENTRY(aes_neon_enc1)
 	vand	q3, q3, q1		/* q3 := (x >> 4) & 0x0f0f... */
 
 	/* q0 := a/k */
-	vtbl.8	d0, {d22-d23}, d4
-	vtbl.8	d1, {d22-d23}, d5
+	vtbl.8	d0, {q11}, d4
+	vtbl.8	d1, {q11}, d5
 
 	/* q2 := j = i + k */
 	veor	q2, q3, q2
 
 	/* q12 := ir = 1/i */
-	vtbl.8	d24, {d20-d21}, d6
-	vtbl.8	d25, {d20-d21}, d7
+	vtbl.8	d24, {q10}, d6
+	vtbl.8	d25, {q10}, d7
 
 	/* q13 := jr = 1/j */
-	vtbl.8	d26, {d20-d21}, d4
-	vtbl.8	d27, {d20-d21}, d5
+	vtbl.8	d26, {q10}, d4
+	vtbl.8	d27, {q10}, d5
 
 	/* q12 := iak = 1/i + a/k */
 	veor	q12, q12, q0
@@ -345,12 +345,12 @@ ENTRY(aes_neon_enc1)
 	veor	q13, q13, q0
 
 	/* q12 := iakr = 1/(1/i + a/k) */
-	vtbl.8	d24, {d20-d21}, d24
-	vtbl.8	d25, {d20-d21}, d25
+	vtbl.8	d24, {q10}, d24
+	vtbl.8	d25, {q10}, d25
 
 	/* q13 := jakr = 1/(1/j + a/k) */
-	vtbl.8	d26, {d20-d21}, d26
-	vtbl.8	d27, {d20-d21}, d27
+	vtbl.8	d26, {q10}, d26
+	vtbl.8	d27, {q10}, d27
 
 	/* q2 := io = j + 1/(1/i + a/k) */
 	veor	q2, q2, q12
@@ -374,18 +374,18 @@ ENTRY(aes_neon_enc1)
 	vld1.8	{q14}, [r0 :128]!	/* q14 = *rk++ */
 
 	/* (q2, q3) := (sbo_0(io), sbo_1(jo)) */
-	vtbl.8	d4, {d12-d13}, d4
-	vtbl.8	d5, {d12-d13}, d5
-	vtbl.8	d6, {d14-d15}, d6
-	vtbl.8	d7, {d14-d15}, d7
+	vtbl.8	d4, {q6}, d4
+	vtbl.8	d5, {q6}, d5
+	vtbl.8	d6, {q7}, d6
+	vtbl.8	d7, {q7}, d7
 
 	/* q2 := x = rk[nr] + sbo_0(io) + sbo_1(jo) */
 	veor	q2, q2, q14
 	veor	q2, q2, q3
 
 	/* q0 := x(sr[rmod4]) */
-	vtbl.8	d0, {d4-d5}, d30
-	vtbl.8	d1, {d4-d5}, d31
+	vtbl.8	d0, {q2}, d30
+	vtbl.8	d1, {q2}, d31
 
 	vpop	{d8-d15}
 	pop	{r4, r5, r6, r7, r8, r10, r11, lr}
@@ -479,10 +479,10 @@ ENTRY(aes_neon_dec1)
 	vand	q3, q3, q1		/* q3 := (x >> 4) & 0x0f0f... */
 
 	/* (q2, q3) := (diptlo(lo), dipthi(hi)) */
-	vtbl.8	d4, {d8-d9}, d4
-	vtbl.8	d5, {d8-d9}, d5
-	vtbl.8	d6, {d10-d11}, d6
-	vtbl.8	d7, {d10-d11}, d7
+	vtbl.8	d4, {q4}, d4
+	vtbl.8	d5, {q4}, d5
+	vtbl.8	d6, {q5}, d6
+	vtbl.8	d7, {q5}, d7
 
 	/* load dsb9 */
 	add	r4, r12, #(dsb9 - .Lconstants)
@@ -502,22 +502,22 @@ ENTRY(aes_neon_dec1)
 	vld1.8	{q14}, [r0 :128]!	/* q14 = *rk++ */
 
 	/* q0 := rk[i] + dsb9_0(io) + dsb9_1(jo) 

CVS commit: src/sys/crypto/aes/arch/arm

2020-09-10 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Thu Sep 10 11:29:43 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon_32.S

Log Message:
aes neon: Write vtbl with {qN} rather than {d(2N)-d(2N+1)}.

Cosmetic; no functional change.


To generate a diff of this commit:
cvs rdiff -u -r1.7 -r1.8 src/sys/crypto/aes/arch/arm/aes_neon_32.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/aes/arch/arm

2020-09-10 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Thu Sep 10 11:29:02 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon_32.S

Log Message:
aes neon: Issue 256-bit loads rather than pairs of 128-bit loads.

Not sure why I didn't realize you could do this before!

Saves some temporary registers that can now be allocated to shave off
a few cycles.


To generate a diff of this commit:
cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/aes/arch/arm/aes_neon_32.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/aes/arch/arm

2020-09-10 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Thu Sep 10 11:29:02 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon_32.S

Log Message:
aes neon: Issue 256-bit loads rather than pairs of 128-bit loads.

Not sure why I didn't realize you could do this before!

Saves some temporary registers that can now be allocated to shave off
a few cycles.


To generate a diff of this commit:
cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/aes/arch/arm/aes_neon_32.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S
diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.6 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.7
--- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.6	Sun Aug 16 18:02:03 2020
+++ src/sys/crypto/aes/arch/arm/aes_neon_32.S	Thu Sep 10 11:29:02 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_neon_32.S,v 1.6 2020/08/16 18:02:03 riastradh Exp $	*/
+/*	$NetBSD: aes_neon_32.S,v 1.7 2020/09/10 11:29:02 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,7 +28,7 @@
 
 #include 
 
-RCSID("$NetBSD: aes_neon_32.S,v 1.6 2020/08/16 18:02:03 riastradh Exp $")
+RCSID("$NetBSD: aes_neon_32.S,v 1.7 2020/09/10 11:29:02 riastradh Exp $")
 
 	.fpu	neon
 
@@ -38,9 +38,10 @@ RCSID("$NetBSD: aes_neon_32.S,v 1.6 2020
 	.long	.Lconstants - .
 
 	.section .rodata
-	.p2align 4
+	.p2align 5
 .Lconstants:
 
+.Linv_inva:	/* inv and inva must be consecutive */
 	.type	inv,_ASM_TYPE_OBJECT
 inv:
 	.byte	0x80,0x01,0x08,0x0D,0x0F,0x06,0x05,0x0E
@@ -99,125 +100,85 @@ sr:
 	.byte	0x08,0x05,0x02,0x0F,0x0C,0x09,0x06,0x03
 END(sr)
 
-	.type	iptlo,_ASM_TYPE_OBJECT
-iptlo:
-	.byte	0x00,0x70,0x2A,0x5A,0x98,0xE8,0xB2,0xC2
+	.type	ipt,_ASM_TYPE_OBJECT
+ipt:
+	.byte	0x00,0x70,0x2A,0x5A,0x98,0xE8,0xB2,0xC2	/* lo */
 	.byte	0x08,0x78,0x22,0x52,0x90,0xE0,0xBA,0xCA
-END(iptlo)
-
-	.type	ipthi,_ASM_TYPE_OBJECT
-ipthi:
-	.byte	0x00,0x4D,0x7C,0x31,0x7D,0x30,0x01,0x4C
+	.byte	0x00,0x4D,0x7C,0x31,0x7D,0x30,0x01,0x4C /* hi */
 	.byte	0x81,0xCC,0xFD,0xB0,0xFC,0xB1,0x80,0xCD
-END(ipthi)
+END(ipt)
 
-	.type	sb1_0,_ASM_TYPE_OBJECT
-sb1_0:
-	.byte	0x00,0x3E,0x50,0xCB,0x8F,0xE1,0x9B,0xB1
+	.type	sb1,_ASM_TYPE_OBJECT
+sb1:
+	.byte	0x00,0x3E,0x50,0xCB,0x8F,0xE1,0x9B,0xB1 /* 0 */
 	.byte	0x44,0xF5,0x2A,0x14,0x6E,0x7A,0xDF,0xA5
-END(sb1_0)
-
-	.type	sb1_1,_ASM_TYPE_OBJECT
-sb1_1:
-	.byte	0x00,0x23,0xE2,0xFA,0x15,0xD4,0x18,0x36
+	.byte	0x00,0x23,0xE2,0xFA,0x15,0xD4,0x18,0x36 /* 1 */
 	.byte	0xEF,0xD9,0x2E,0x0D,0xC1,0xCC,0xF7,0x3B
-END(sb1_1)
+END(sb1)
 
-	.type	sb2_0,_ASM_TYPE_OBJECT
-sb2_0:
-	.byte	0x00,0x24,0x71,0x0B,0xC6,0x93,0x7A,0xE2
+	.type	sb2,_ASM_TYPE_OBJECT
+sb2:
+	.byte	0x00,0x24,0x71,0x0B,0xC6,0x93,0x7A,0xE2 /* 0 */
 	.byte	0xCD,0x2F,0x98,0xBC,0x55,0xE9,0xB7,0x5E
-END(sb2_0)
-
-	.type	sb2_1,_ASM_TYPE_OBJECT
-sb2_1:
-	.byte	0x00,0x29,0xE1,0x0A,0x40,0x88,0xEB,0x69
+	.byte	0x00,0x29,0xE1,0x0A,0x40,0x88,0xEB,0x69 /* 1 */
 	.byte	0x4A,0x23,0x82,0xAB,0xC8,0x63,0xA1,0xC2
-END(sb2_1)
+END(sb2)
 
-	.type	sbo_0,_ASM_TYPE_OBJECT
-sbo_0:
-	.byte	0x00,0xC7,0xBD,0x6F,0x17,0x6D,0xD2,0xD0
+	.type	sbo,_ASM_TYPE_OBJECT
+sbo:
+	.byte	0x00,0xC7,0xBD,0x6F,0x17,0x6D,0xD2,0xD0 /* 0 */
 	.byte	0x78,0xA8,0x02,0xC5,0x7A,0xBF,0xAA,0x15
-END(sbo_0)
-
-	.type	sbo_1,_ASM_TYPE_OBJECT
-sbo_1:
-	.byte	0x00,0x6A,0xBB,0x5F,0xA5,0x74,0xE4,0xCF
+	.byte	0x00,0x6A,0xBB,0x5F,0xA5,0x74,0xE4,0xCF /* 1 */
 	.byte	0xFA,0x35,0x2B,0x41,0xD1,0x90,0x1E,0x8E
-END(sbo_1)
+END(sbo)
 
-	.type	diptlo,_ASM_TYPE_OBJECT
-diptlo:
-	.byte	0x00,0x5F,0x54,0x0B,0x04,0x5B,0x50,0x0F
+	.type	dipt,_ASM_TYPE_OBJECT
+dipt:
+	.byte	0x00,0x5F,0x54,0x0B,0x04,0x5B,0x50,0x0F	/* lo */
 	.byte	0x1A,0x45,0x4E,0x11,0x1E,0x41,0x4A,0x15
-END(diptlo)
-
-	.type	dipthi,_ASM_TYPE_OBJECT
-dipthi:
-	.byte	0x00,0x65,0x05,0x60,0xE6,0x83,0xE3,0x86
+	.byte	0x00,0x65,0x05,0x60,0xE6,0x83,0xE3,0x86	/* hi */
 	.byte	0x94,0xF1,0x91,0xF4,0x72,0x17,0x77,0x12
-END(dipthi)
+END(dipt)
 
-	.type	dsb9_0,_ASM_TYPE_OBJECT
-dsb9_0:
-	.byte	0x00,0xD6,0x86,0x9A,0x53,0x03,0x1C,0x85
+	.type	dsb9,_ASM_TYPE_OBJECT
+dsb9:
+	.byte	0x00,0xD6,0x86,0x9A,0x53,0x03,0x1C,0x85	/* 0 */
 	.byte	0xC9,0x4C,0x99,0x4F,0x50,0x1F,0xD5,0xCA
-END(dsb9_0)
-
-	.type	dsb9_1,_ASM_TYPE_OBJECT
-dsb9_1:
-	.byte	0x00,0x49,0xD7,0xEC,0x89,0x17,0x3B,0xC0
+	.byte	0x00,0x49,0xD7,0xEC,0x89,0x17,0x3B,0xC0	/* 1 */
 	.byte	0x65,0xA5,0xFB,0xB2,0x9E,0x2C,0x5E,0x72
-END(dsb9_1)
+END(dsb9)
 
-	.type	dsbd_0,_ASM_TYPE_OBJECT
-dsbd_0:
-	.byte	0x00,0xA2,0xB1,0xE6,0xDF,0xCC,0x57,0x7D
+	.type	dsbd,_ASM_TYPE_OBJECT
+dsbd:
+	.byte	0x00,0xA2,0xB1,0xE6,0xDF,0xCC,0x57,0x7D	/* 0 */
 	.byte	0x39,0x44,0x2A,0x88,0x13,0x9B,0x6E,0xF5
-END(dsbd_0)
-
-	.type	dsbd_1,_ASM_TYPE_OBJECT
-dsbd_1:
-	.byte	0x00,0xCB,0xC6,0x24,0xF7,0xFA,0xE2,0x3C
+	.byte	0x00,0xCB,0xC6,0x24,0xF7,0xFA,0xE2,0x3C	/* 1 */
 	.byte	0xD3,0xEF,0xDE,0x15,0x0D,0x18,0x31,0x29
-END(dsbd_1)
+END(dsbd)
 
-	.type	dsbb_0,_ASM_TYPE_OBJECT
-dsbb_0:
-	.byte	0x00,0x42,0xB4,0x96,0x92,0x64,0x22,0xD0

CVS commit: src/sys/crypto/aes/arch/arm

2020-09-08 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Tue Sep  8 23:58:09 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_armv8_64.S

Log Message:
aesarmv8: Reallocate registers to shave off unnecessary MOV.


To generate a diff of this commit:
cvs rdiff -u -r1.14 -r1.15 src/sys/crypto/aes/arch/arm/aes_armv8_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S
diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.14 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.15
--- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.14	Tue Sep  8 23:57:43 2020
+++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S	Tue Sep  8 23:58:09 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_armv8_64.S,v 1.14 2020/09/08 23:57:43 riastradh Exp $	*/
+/*	$NetBSD: aes_armv8_64.S,v 1.15 2020/09/08 23:58:09 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,7 +28,7 @@
 
 #include 
 
-RCSID("$NetBSD: aes_armv8_64.S,v 1.14 2020/09/08 23:57:43 riastradh Exp $")
+RCSID("$NetBSD: aes_armv8_64.S,v 1.15 2020/09/08 23:58:09 riastradh Exp $")
 
 	.arch_extension	aes
 
@@ -917,13 +917,12 @@ END(aesarmv8_cbcmac_update1)
 ENTRY(aesarmv8_ccm_enc1)
 	stp	fp, lr, [sp, #-16]!	/* push stack frame */
 	mov	fp, sp
-	ld1	{v0.16b, v1.16b}, [x4]	/* q0 := auth, q2 := ctr (be) */
-	mov	v2.16b, v1.16b
+	ld1	{v0.16b-v1.16b}, [x4]	/* q0 := auth, q1 := ctr (be) */
 	adrl	x11, ctr32_inc		/* x11 := _inc */
 	ld1	{v5.4s}, [x11]		/* q5 := (0,0,0,1) (host-endian) */
 	mov	x9, x0			/* x9 := enckey */
 	mov	x10, x3			/* x10 := nbytes */
-	rev32	v2.16b, v2.16b		/* q2 := ctr (host-endian) */
+	rev32	v2.16b, v1.16b		/* q2 := ctr (host-endian) */
 	_ALIGN_TEXT
 1:	ld1	{v3.16b}, [x1], #0x10	/* q3 := plaintext block */
 	add	v2.4s, v2.4s, v5.4s	/* increment ctr (32-bit) */
@@ -937,9 +936,8 @@ ENTRY(aesarmv8_ccm_enc1)
 	subs	x10, x10, #0x10		/* count down bytes */
 	st1	{v3.16b}, [x2], #0x10	/* store ciphertext block */
 	b.ne	1b			/* repeat if more blocks */
-	rev32	v2.16b, v2.16b		/* q2 := ctr (big-endian) */
-	mov	v1.16b, v2.16b		/* store updated auth/ctr */
-	st1	{v0.16b-v1.16b}, [x4]
+	rev32	v1.16b, v2.16b		/* q1 := ctr (big-endian) */
+	st1	{v0.16b-v1.16b}, [x4]	/* store updated auth/ctr */
 	ldp	fp, lr, [sp], #16	/* pop stack frame */
 	ret
 END(aesarmv8_ccm_enc1)



CVS commit: src/sys/crypto/aes/arch/arm

2020-09-08 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Tue Sep  8 23:57:43 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_armv8_64.S

Log Message:
aesarmv8: Issue two 4-register ld/st, not four 2-register ld/st.


To generate a diff of this commit:
cvs rdiff -u -r1.13 -r1.14 src/sys/crypto/aes/arch/arm/aes_armv8_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S
diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.13 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.14
--- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.13	Tue Sep  8 23:57:13 2020
+++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S	Tue Sep  8 23:57:43 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_armv8_64.S,v 1.13 2020/09/08 23:57:13 riastradh Exp $	*/
+/*	$NetBSD: aes_armv8_64.S,v 1.14 2020/09/08 23:57:43 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,7 +28,7 @@
 
 #include 
 
-RCSID("$NetBSD: aes_armv8_64.S,v 1.13 2020/09/08 23:57:13 riastradh Exp $")
+RCSID("$NetBSD: aes_armv8_64.S,v 1.14 2020/09/08 23:57:43 riastradh Exp $")
 
 	.arch_extension	aes
 
@@ -693,10 +693,8 @@ ENTRY(aesarmv8_xts_enc8)
 	mov	v30.16b, v31.16b	/* q30 := tweak[6] */
 	bl	aesarmv8_xts_mulx	/* q31 *= x; trash x0/q0/q1 */
 	/* q31 := tweak[7] */
-	ld1	{v0.16b,v1.16b}, [x1], #0x20	/* q[i] := ptxt[i] */
-	ld1	{v2.16b,v3.16b}, [x1], #0x20
-	ld1	{v4.16b,v5.16b}, [x1], #0x20
-	ld1	{v6.16b,v7.16b}, [x1], #0x20
+	ld1	{v0.16b-v3.16b}, [x1], #0x40	/* q[i] := ptxt[i] */
+	ld1	{v4.16b-v7.16b}, [x1], #0x40
 	eor	v0.16b, v0.16b, v24.16b	/* q[i] := ptxt[i] ^ tweak[i] */
 	eor	v1.16b, v1.16b, v25.16b
 	eor	v2.16b, v2.16b, v26.16b
@@ -716,10 +714,8 @@ ENTRY(aesarmv8_xts_enc8)
 	eor	v5.16b, v5.16b, v29.16b
 	eor	v6.16b, v6.16b, v30.16b
 	eor	v7.16b, v7.16b, v31.16b
-	st1	{v0.16b,v1.16b}, [x2], #0x20	/* store ciphertext blocks */
-	st1	{v2.16b,v3.16b}, [x2], #0x20
-	st1	{v4.16b,v5.16b}, [x2], #0x20
-	st1	{v6.16b,v7.16b}, [x2], #0x20
+	st1	{v0.16b-v3.16b}, [x2], #0x40	/* store ciphertext blocks */
+	st1	{v4.16b-v7.16b}, [x2], #0x40
 	bl	aesarmv8_xts_mulx	/* q31 *= x; trash x0/q0/q1 */
 	subs	x10, x10, #0x80		/* count down nbytes */
 	b.ne	1b			/* repeat if more block groups */



CVS commit: src/sys/crypto/aes/arch/arm

2020-09-08 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Tue Sep  8 23:57:43 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_armv8_64.S

Log Message:
aesarmv8: Issue two 4-register ld/st, not four 2-register ld/st.


To generate a diff of this commit:
cvs rdiff -u -r1.13 -r1.14 src/sys/crypto/aes/arch/arm/aes_armv8_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/aes/arch/arm

2020-09-08 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Tue Sep  8 23:57:13 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_armv8_64.S

Log Message:
aesarmv8: Adapt aes_armv8_64.S to big-endian.

Patch mainly from (and tested by) jakllsch@ with minor tweaks by me.


To generate a diff of this commit:
cvs rdiff -u -r1.12 -r1.13 src/sys/crypto/aes/arch/arm/aes_armv8_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/aes/arch/arm

2020-09-08 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Tue Sep  8 23:58:09 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_armv8_64.S

Log Message:
aesarmv8: Reallocate registers to shave off unnecessary MOV.


To generate a diff of this commit:
cvs rdiff -u -r1.14 -r1.15 src/sys/crypto/aes/arch/arm/aes_armv8_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/aes/arch/arm

2020-09-08 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Tue Sep  8 23:57:13 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_armv8_64.S

Log Message:
aesarmv8: Adapt aes_armv8_64.S to big-endian.

Patch mainly from (and tested by) jakllsch@ with minor tweaks by me.


To generate a diff of this commit:
cvs rdiff -u -r1.12 -r1.13 src/sys/crypto/aes/arch/arm/aes_armv8_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S
diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.12 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.13
--- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.12	Sat Aug  8 14:47:01 2020
+++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S	Tue Sep  8 23:57:13 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_armv8_64.S,v 1.12 2020/08/08 14:47:01 riastradh Exp $	*/
+/*	$NetBSD: aes_armv8_64.S,v 1.13 2020/09/08 23:57:13 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,7 +28,7 @@
 
 #include 
 
-RCSID("$NetBSD: aes_armv8_64.S,v 1.12 2020/08/08 14:47:01 riastradh Exp $")
+RCSID("$NetBSD: aes_armv8_64.S,v 1.13 2020/09/08 23:57:13 riastradh Exp $")
 
 	.arch_extension	aes
 
@@ -114,11 +114,11 @@ END(unshiftrows_rotword_3)
  *	Standard ABI calling convention.
  */
 ENTRY(aesarmv8_setenckey128)
-	ldr	q1, [x1]	/* q1 := master key */
+	ld1	{v1.16b}, [x1]	/* q1 := master key */
 
 	adrl	x4, unshiftrows_rotword_3
 	eor	v0.16b, v0.16b, v0.16b	/* q0 := 0 */
-	ldr	q16, [x4]	/* q16 := unshiftrows_rotword_3 table */
+	ld1	{v16.16b}, [x4]	/* q16 := unshiftrows_rotword_3 table */
 
 	str	q1, [x0], #0x10	/* store master key as first round key */
 	mov	x2, #10		/* round count */
@@ -171,14 +171,14 @@ END(aesarmv8_setenckey128)
  *	Standard ABI calling convention.
  */
 ENTRY(aesarmv8_setenckey192)
-	ldr	q1, [x1], #0x10	/* q1 := master key[0:128) */
-	ldr	d2, [x1]	/* d2 := master key[128:192) */
+	ld1	{v1.16b}, [x1], #0x10	/* q1 := master key[0:128) */
+	ld1	{v2.8b}, [x1]	/* d2 := master key[128:192) */
 
 	adrl	x4, unshiftrows_rotword_1
 	adrl	x5, unshiftrows_rotword_3
 	eor	v0.16b, v0.16b, v0.16b	/* q0 := 0 */
-	ldr	q16, [x4]	/* q16 := unshiftrows_rotword_1 */
-	ldr	q17, [x5]	/* q17 := unshiftrows_rotword_3 */
+	ld1	{v16.16b}, [x4]	/* q16 := unshiftrows_rotword_1 */
+	ld1	{v17.16b}, [x5]	/* q17 := unshiftrows_rotword_3 */
 
 	str	q1, [x0], #0x10	/* store master key[0:128) as round key */
 	mov	x2, #12		/* round count */
@@ -351,13 +351,13 @@ END(aesarmv8_setenckey192)
  */
 ENTRY(aesarmv8_setenckey256)
 	/* q1 := key[0:128), q2 := key[128:256) */
-	ldp	q1, q2, [x1], #0x20
+	ld1	{v1.16b-v2.16b}, [x1], #0x20
 
 	adrl	x4, unshiftrows_rotword_3
 	adrl	x5, unshiftrows_3
 	eor	v0.16b, v0.16b, v0.16b	/* q0 := 0 */
-	ldr	q16, [x4]	/* q16 := unshiftrows_rotword_3 */
-	ldr	q17, [x5]	/* q17 := unshiftrows_3 */
+	ld1	{v16.16b}, [x4]	/* q16 := unshiftrows_rotword_3 */
+	ld1	{v17.16b}, [x5]	/* q17 := unshiftrows_3 */
 
 	/* store master key as first two round keys */
 	stp	q1, q2, [x0], #0x20
@@ -461,9 +461,9 @@ END(aesarmv8_enctodec)
 ENTRY(aesarmv8_enc)
 	stp	fp, lr, [sp, #-16]!	/* push stack frame */
 	mov	fp, sp
-	ldr	q0, [x1]	/* q0 := ptxt */
+	ld1	{v0.16b}, [x1]	/* q0 := ptxt */
 	bl	aesarmv8_enc1	/* q0 := ctxt; trash x0/x3/q16 */
-	str	q0, [x2]	/* store ctxt */
+	st1	{v0.16b}, [x2]	/* store ctxt */
 	ldp	fp, lr, [sp], #16	/* pop stack frame */
 	ret
 END(aesarmv8_enc)
@@ -479,9 +479,9 @@ END(aesarmv8_enc)
 ENTRY(aesarmv8_dec)
 	stp	fp, lr, [sp, #-16]!	/* push stack frame */
 	mov	fp, sp
-	ldr	q0, [x1]	/* q0 := ctxt */
+	ld1	{v0.16b}, [x1]	/* q0 := ctxt */
 	bl	aesarmv8_dec1	/* q0 := ptxt; trash x0/x3/q16 */
-	str	q0, [x2]	/* store ptxt */
+	st1	{v0.16b}, [x2]	/* store ptxt */
 	ldp	fp, lr, [sp], #16	/* pop stack frame */
 	ret
 END(aesarmv8_dec)
@@ -503,17 +503,17 @@ ENTRY(aesarmv8_cbc_enc)
 	mov	fp, sp
 	mov	x9, x0			/* x9 := enckey */
 	mov	x10, x3			/* x10 := nbytes */
-	ldr	q0, [x4]		/* q0 := chaining value */
+	ld1	{v0.16b}, [x4]		/* q0 := chaining value */
 	_ALIGN_TEXT
-1:	ldr	q1, [x1], #0x10		/* q1 := plaintext block */
+1:	ld1	{v1.16b}, [x1], #0x10	/* q1 := plaintext block */
 	eor	v0.16b, v0.16b, v1.16b	/* q0 := cv ^ ptxt */
 	mov	x0, x9			/* x0 := enckey */
 	mov	x3, x5			/* x3 := nrounds */
 	bl	aesarmv8_enc1		/* q0 := ctxt; trash x0/x3/q16 */
 	subs	x10, x10, #0x10		/* count down nbytes */
-	str	q0, [x2], #0x10		/* store ciphertext block */
+	st1	{v0.16b}, [x2], #0x10	/* store ciphertext block */
 	b.ne	1b			/* repeat if x10 is nonzero */
-	str	q0, [x4]		/* store chaining value */
+	st1	{v0.16b}, [x4]		/* store chaining value */
 	ldp	fp, lr, [sp], #16	/* pop stack frame */
 2:	ret
 END(aesarmv8_cbc_enc)
@@ -533,18 +533,21 @@ END(aesarmv8_cbc_enc)
 ENTRY(aesarmv8_cbc_dec1)
 	stp	fp, lr, [sp, #-16]!	/* push stack frame */
 	mov	fp, sp
-	ldr	q24, [x4]		/* q24 := iv */
+	ld1	{v24.16b}, [x4]		/* q24 := iv */
 	mov	x9, x0			/* x9 := enckey */
 	

CVS commit: src/sys/crypto/aes

2020-09-08 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Tue Sep  8 22:48:24 UTC 2020

Modified Files:
src/sys/crypto/aes: aes_selftest.c
src/sys/crypto/aes/arch/x86: aes_sse2_subr.c

Log Message:
aes(9): Fix edge case in bitsliced SSE2 AES-CBC decryption.

Make sure self-tests exercise this edge case.

Discovered by confusion over code inspection of jak's adaptation of
aes_armv8_64.S for big-endian.


To generate a diff of this commit:
cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/aes_selftest.c
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/x86/aes_sse2_subr.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/aes_selftest.c
diff -u src/sys/crypto/aes/aes_selftest.c:1.5 src/sys/crypto/aes/aes_selftest.c:1.6
--- src/sys/crypto/aes/aes_selftest.c:1.5	Sat Jul 25 22:36:42 2020
+++ src/sys/crypto/aes/aes_selftest.c	Tue Sep  8 22:48:24 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_selftest.c,v 1.5 2020/07/25 22:36:42 riastradh Exp $	*/
+/*	$NetBSD: aes_selftest.c,v 1.6 2020/09/08 22:48:24 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_selftest.c,v 1.5 2020/07/25 22:36:42 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_selftest.c,v 1.6 2020/09/08 22:48:24 riastradh Exp $");
 
 #ifdef _KERNEL
 
@@ -210,7 +210,7 @@ aes_selftest_encdec_cbc(const struct aes
 	uint8_t in[144];
 	uint8_t outbuf[146] = { [0] = 0x1a, [145] = 0x1a }, *out = outbuf + 1;
 	uint8_t iv0[16], iv[16];
-	unsigned i;
+	unsigned i, j;
 
 	for (i = 0; i < 32; i++)
 		key[i] = i;
@@ -237,21 +237,26 @@ aes_selftest_encdec_cbc(const struct aes
 			"AES-%u-CBC dec", aes_keybits[i]);
 
 		/* Try incrementally, with IV update.  */
-		memcpy(iv, iv0, 16);
-		impl->ai_cbc_enc(, in, out, 16, iv, aes_nrounds[i]);
-		impl->ai_cbc_enc(, in + 16, out + 16, 128, iv,
-		aes_nrounds[i]);
-		if (memcmp(out, expected[i], 144))
-			return aes_selftest_fail(impl, out, expected[i], 144,
-			"AES-%u-CBC enc incremental", aes_keybits[i]);
-
-		memcpy(iv, iv0, 16);
-		impl->ai_cbc_dec(, out, out, 128, iv, aes_nrounds[i]);
-		impl->ai_cbc_dec(, out + 128, out + 128, 16, iv,
-		aes_nrounds[i]);
-		if (memcmp(out, in, 144))
-			return aes_selftest_fail(impl, out, in, 144,
-			"AES-%u-CBC dec incremental", aes_keybits[i]);
+		for (j = 0; j < 144; j += 16) {
+			memcpy(iv, iv0, 16);
+			impl->ai_cbc_enc(, in, out, j, iv, aes_nrounds[i]);
+			impl->ai_cbc_enc(, in + j, out + j, 144 - j, iv,
+			aes_nrounds[i]);
+			if (memcmp(out, expected[i], 144))
+return aes_selftest_fail(impl, out,
+expected[i], 144, "AES-%u-CBC enc inc %u",
+aes_keybits[i], j);
+
+			memcpy(iv, iv0, 16);
+			impl->ai_cbc_dec(, out, out, j, iv,
+			aes_nrounds[i]);
+			impl->ai_cbc_dec(, out + j, out + j, 144 - j, iv,
+			aes_nrounds[i]);
+			if (memcmp(out, in, 144))
+return aes_selftest_fail(impl, out,
+in, 144, "AES-%u-CBC dec inc %u",
+aes_keybits[i], j);
+		}
 	}
 
 	if (outbuf[0] != 0x1a)

Index: src/sys/crypto/aes/arch/x86/aes_sse2_subr.c
diff -u src/sys/crypto/aes/arch/x86/aes_sse2_subr.c:1.3 src/sys/crypto/aes/arch/x86/aes_sse2_subr.c:1.4
--- src/sys/crypto/aes/arch/x86/aes_sse2_subr.c:1.3	Sat Jul 25 22:29:56 2020
+++ src/sys/crypto/aes/arch/x86/aes_sse2_subr.c	Tue Sep  8 22:48:24 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_sse2_subr.c,v 1.3 2020/07/25 22:29:56 riastradh Exp $	*/
+/*	$NetBSD: aes_sse2_subr.c,v 1.4 2020/09/08 22:48:24 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_sse2_subr.c,v 1.3 2020/07/25 22:29:56 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_sse2_subr.c,v 1.4 2020/09/08 22:48:24 riastradh Exp $");
 
 #ifdef _KERNEL
 #include 
@@ -200,11 +200,13 @@ aes_sse2_cbc_dec(const struct aesdec *de
 		case 48:
 			w = _mm_loadu_epi8(in + nbytes - 32);
 			q[1] = aes_sse2_interleave_in(w);
-			/*FALLTHROUGH*/
-		case 32:
 			w = _mm_loadu_epi8(in + nbytes - 48);
 			q[0] = aes_sse2_interleave_in(w);
-			/*FALLTHROUGH*/
+			break;
+		case 32:
+			w = _mm_loadu_epi8(in + nbytes - 32);
+			q[0] = aes_sse2_interleave_in(w);
+			break;
 		case 16:
 			break;
 		}



CVS commit: src/sys/crypto/aes

2020-09-08 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Tue Sep  8 22:48:24 UTC 2020

Modified Files:
src/sys/crypto/aes: aes_selftest.c
src/sys/crypto/aes/arch/x86: aes_sse2_subr.c

Log Message:
aes(9): Fix edge case in bitsliced SSE2 AES-CBC decryption.

Make sure self-tests exercise this edge case.

Discovered by confusion over code inspection of jak's adaptation of
aes_armv8_64.S for big-endian.


To generate a diff of this commit:
cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/aes_selftest.c
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/x86/aes_sse2_subr.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/chacha/arch/arm

2020-09-08 Thread Jonathan A. Kollasch
Module Name:src
Committed By:   jakllsch
Date:   Tue Sep  8 17:17:32 UTC 2020

Modified Files:
src/sys/crypto/chacha/arch/arm: files.chacha_arm

Log Message:
use correct condition


To generate a diff of this commit:
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/chacha/arch/arm/files.chacha_arm

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/chacha/arch/arm

2020-09-08 Thread Jonathan A. Kollasch
Module Name:src
Committed By:   jakllsch
Date:   Tue Sep  8 17:17:32 UTC 2020

Modified Files:
src/sys/crypto/chacha/arch/arm: files.chacha_arm

Log Message:
use correct condition


To generate a diff of this commit:
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/chacha/arch/arm/files.chacha_arm

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/chacha/arch/arm/files.chacha_arm
diff -u src/sys/crypto/chacha/arch/arm/files.chacha_arm:1.3 src/sys/crypto/chacha/arch/arm/files.chacha_arm:1.4
--- src/sys/crypto/chacha/arch/arm/files.chacha_arm:1.3	Tue Jul 28 20:08:48 2020
+++ src/sys/crypto/chacha/arch/arm/files.chacha_arm	Tue Sep  8 17:17:32 2020
@@ -1,9 +1,9 @@
-#	$NetBSD: files.chacha_arm,v 1.3 2020/07/28 20:08:48 riastradh Exp $
+#	$NetBSD: files.chacha_arm,v 1.4 2020/09/08 17:17:32 jakllsch Exp $
 
 ifdef aarch64
 makeoptions	chacha	"COPTS.chacha_neon.c"+="-march=armv8-a"
 else
-makeoptions	aes	"COPTS.chacha_neon.c"+="-mfloat-abi=softfp -mfpu=neon"
+makeoptions	chacha	"COPTS.chacha_neon.c"+="-mfloat-abi=softfp -mfpu=neon"
 endif
 
 file	crypto/chacha/arch/arm/chacha_neon.c	chacha & (cpu_cortex | aarch64)



CVS commit: src/sys/crypto

2020-09-07 Thread Jonathan A. Kollasch
Module Name:src
Committed By:   jakllsch
Date:   Mon Sep  7 18:06:13 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: arm_neon.h
src/sys/crypto/chacha/arch/arm: arm_neon.h

Log Message:
Fix vgetq_lane_u32 for aarch64eb with GCC

Fixes NEON AES on aarch64eb


To generate a diff of this commit:
cvs rdiff -u -r1.10 -r1.11 src/sys/crypto/aes/arch/arm/arm_neon.h
cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/chacha/arch/arm/arm_neon.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/arm_neon.h
diff -u src/sys/crypto/aes/arch/arm/arm_neon.h:1.10 src/sys/crypto/aes/arch/arm/arm_neon.h:1.11
--- src/sys/crypto/aes/arch/arm/arm_neon.h:1.10	Sun Aug  9 02:49:38 2020
+++ src/sys/crypto/aes/arch/arm/arm_neon.h	Mon Sep  7 18:06:13 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: arm_neon.h,v 1.10 2020/08/09 02:49:38 riastradh Exp $	*/
+/*	$NetBSD: arm_neon.h,v 1.11 2020/09/07 18:06:13 jakllsch Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -232,7 +232,7 @@ static __inline uint32_t
 vgetq_lane_u32(uint32x4_t __v, uint8_t __i)
 {
 #ifdef __aarch64__
-	return __v[__i];
+	return __v[__neon_laneq_index(__v,__i)];
 #else
 	return (uint32_t)__builtin_neon_vget_laneuv4si((int32x4_t)__v, __i);
 #endif

Index: src/sys/crypto/chacha/arch/arm/arm_neon.h
diff -u src/sys/crypto/chacha/arch/arm/arm_neon.h:1.6 src/sys/crypto/chacha/arch/arm/arm_neon.h:1.7
--- src/sys/crypto/chacha/arch/arm/arm_neon.h:1.6	Sun Aug  9 02:49:38 2020
+++ src/sys/crypto/chacha/arch/arm/arm_neon.h	Mon Sep  7 18:06:13 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: arm_neon.h,v 1.6 2020/08/09 02:49:38 riastradh Exp $	*/
+/*	$NetBSD: arm_neon.h,v 1.7 2020/09/07 18:06:13 jakllsch Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -232,7 +232,7 @@ static __inline uint32_t
 vgetq_lane_u32(uint32x4_t __v, uint8_t __i)
 {
 #ifdef __aarch64__
-	return __v[__i];
+	return __v[__neon_laneq_index(__v, __i)];
 #else
 	return (uint32_t)__builtin_neon_vget_laneuv4si((int32x4_t)__v, __i);
 #endif



CVS commit: src/sys/crypto

2020-09-07 Thread Jonathan A. Kollasch
Module Name:src
Committed By:   jakllsch
Date:   Mon Sep  7 18:06:13 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: arm_neon.h
src/sys/crypto/chacha/arch/arm: arm_neon.h

Log Message:
Fix vgetq_lane_u32 for aarch64eb with GCC

Fixes NEON AES on aarch64eb


To generate a diff of this commit:
cvs rdiff -u -r1.10 -r1.11 src/sys/crypto/aes/arch/arm/arm_neon.h
cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/chacha/arch/arm/arm_neon.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/chacha/arch/arm

2020-09-07 Thread Jonathan A. Kollasch
Module Name:src
Committed By:   jakllsch
Date:   Mon Sep  7 18:05:17 UTC 2020

Modified Files:
src/sys/crypto/chacha/arch/arm: chacha_neon_64.S

Log Message:
Use a working macro to detect big endian aarch64.

Fixes aarch64eb NEON ChaCha.


To generate a diff of this commit:
cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/chacha/arch/arm

2020-09-07 Thread Jonathan A. Kollasch
Module Name:src
Committed By:   jakllsch
Date:   Mon Sep  7 18:05:17 UTC 2020

Modified Files:
src/sys/crypto/chacha/arch/arm: chacha_neon_64.S

Log Message:
Use a working macro to detect big endian aarch64.

Fixes aarch64eb NEON ChaCha.


To generate a diff of this commit:
cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/chacha/arch/arm/chacha_neon_64.S
diff -u src/sys/crypto/chacha/arch/arm/chacha_neon_64.S:1.6 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S:1.7
--- src/sys/crypto/chacha/arch/arm/chacha_neon_64.S:1.6	Sat Aug  8 14:47:01 2020
+++ src/sys/crypto/chacha/arch/arm/chacha_neon_64.S	Mon Sep  7 18:05:17 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: chacha_neon_64.S,v 1.6 2020/08/08 14:47:01 riastradh Exp $	*/
+/*	$NetBSD: chacha_neon_64.S,v 1.7 2020/09/07 18:05:17 jakllsch Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,7 +28,7 @@
 
 #include 
 
-RCSID("$NetBSD: chacha_neon_64.S,v 1.6 2020/08/08 14:47:01 riastradh Exp $")
+RCSID("$NetBSD: chacha_neon_64.S,v 1.7 2020/09/07 18:05:17 jakllsch Exp $")
 
 #define	ROUND(a0,b0,c0,d0,a1,b1,c1,d1,a2,b2,c2,d2,a3,b3,c3,d3,t0,t1,t2,t3, r) \
 STEP(STEP0,a0,b0,c0,d0,a1,b1,c1,d1,a2,b2,c2,d2,a3,b3,c3,d3,t0,t1,t2,t3, r);   \
@@ -130,12 +130,12 @@ STEP(STEP19,a0,b0,c0,d0,a1,b1,c1,d1,a2,b
 #define	STEP19(a,b,c,d, t, r)	/* nothing */
 #endif
 
-#if _BYTE_ORDER == _LITTLE_ENDIAN
-#define	HTOLE32(x)
-#define	LE32TOH(x)
-#elif _BYTE_ORDER == _BIG_ENDIAN
+#if defined(__AARCH64EB__)
 #define	HTOLE32(x)	rev32	x, x
 #define	LE32TOH(x)	rev32	x, x
+#else
+#define	LE32TOH(x)
+#define	HTOLE32(x)
 #endif
 
 /*



CVS commit: src/sys/crypto/chacha/arch/arm

2020-08-23 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sun Aug 23 16:39:06 UTC 2020

Modified Files:
src/sys/crypto/chacha/arch/arm: chacha_neon_32.S

Log Message:
Adjust sp, not fp, to allocate a 32-byte temporary.

Costs another couple MOV instructions, but we can't skimp on this --
there's no red zone below sp for interrupts on arm, so we can't touch
anything there.  So just use fp to save sp and then adjust sp itself,
rather than using fp as a temporary register to point just below sp.

Should fix PR port-arm/55598 -- previously the ChaCha self-test
failed 33/1 trials triggered by sysctl during running system;
with the patch it has failed 0/1 trials.

(Presumably it happened more often at boot time, leading to 5/26
failures in the test bed, because we just enabled interrupts and some
devices are starting to deliver interrupts.)


To generate a diff of this commit:
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/chacha/arch/arm/chacha_neon_32.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/chacha/arch/arm/chacha_neon_32.S
diff -u src/sys/crypto/chacha/arch/arm/chacha_neon_32.S:1.3 src/sys/crypto/chacha/arch/arm/chacha_neon_32.S:1.4
--- src/sys/crypto/chacha/arch/arm/chacha_neon_32.S:1.3	Sat Aug  8 14:47:01 2020
+++ src/sys/crypto/chacha/arch/arm/chacha_neon_32.S	Sun Aug 23 16:39:06 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: chacha_neon_32.S,v 1.3 2020/08/08 14:47:01 riastradh Exp $	*/
+/*	$NetBSD: chacha_neon_32.S,v 1.4 2020/08/23 16:39:06 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,7 +28,7 @@
 
 #include 
 
-RCSID("$NetBSD: chacha_neon_32.S,v 1.3 2020/08/08 14:47:01 riastradh Exp $")
+RCSID("$NetBSD: chacha_neon_32.S,v 1.4 2020/08/23 16:39:06 riastradh Exp $")
 
 	.fpu	neon
 
@@ -54,7 +54,7 @@ RCSID("$NetBSD: chacha_neon_32.S,v 1.3 2
  */
 
 .macro	ROUNDLD	a0,a1,a2,a3, b0,b1,b2,b3, c0,c1,c2,c3, d0,d1,d2,d3
-	vld1.8		{\c2-\c3}, [fp, :256]
+	vld1.8		{\c2-\c3}, [sp, :256]
 .endm
 
 .macro	ROUND	a0,a1,a2,a3, b0,b1,b2,b3, c0,c1,c2,c3, d0,d1,d2,d3, c0l, d0l,d0h,d1l,d1h,d2l,d2h,d3l,d3h
@@ -80,7 +80,7 @@ RCSID("$NetBSD: chacha_neon_32.S,v 1.3 2
 	vadd.u32	\c2, \c2, \d2
 	vadd.u32	\c3, \c3, \d3
 
-	vst1.8		{\c0-\c1}, [fp, :256]	/* free c0 and c1 as temps */
+	vst1.8		{\c0-\c1}, [sp, :256]	/* free c0 and c1 as temps */
 
 	veor		\c0, \b0, \c0
 	veor		\c1, \b1, \c1
@@ -118,7 +118,7 @@ RCSID("$NetBSD: chacha_neon_32.S,v 1.3 2
 	vtbl.8		\d3l, {\d3l}, \c0l
 	vtbl.8		\d3h, {\d3h}, \c0l
 
-	vld1.8		{\c0-\c1}, [fp, :256]	/* restore c0 and c1 */
+	vld1.8		{\c0-\c1}, [sp, :256]	/* restore c0 and c1 */
 
 	/* c += d; b ^= c; b <<<= 7 */
 	vadd.u32	\c2, \c2, \d2
@@ -126,7 +126,7 @@ RCSID("$NetBSD: chacha_neon_32.S,v 1.3 2
 	vadd.u32	\c0, \c0, \d0
 	vadd.u32	\c1, \c1, \d1
 
-	vst1.8		{\c2-\c3}, [fp, :256]	/* free c2 and c3 as temps */
+	vst1.8		{\c2-\c3}, [sp, :256]	/* free c2 and c3 as temps */
 
 	veor		\c2, \b2, \c2
 	veor		\c3, \b3, \c3
@@ -160,17 +160,18 @@ ENTRY(chacha_stream256_neon)
 	/* save callee-saves registers */
 	push	{r4, r5, r6, r7, r8, r10, fp, lr}
 	vpush	{d8-d15}
+	mov	fp, sp
 
 	/* r7 := .Lconstants - .Lconstants_addr, r6 := .Lconstants_addr */
 	ldr	r7, .Lconstants_addr
 	adr	r6, .Lconstants_addr
 
 	/* reserve space for two 128-bit/16-byte q registers */
-	sub	fp, sp, #0x20
-	bic	fp, fp, #0x1f	/* align */
+	sub	sp, sp, #0x20
+	bic	sp, sp, #0x1f	/* align */
 
 	/* get parameters */
-	add	ip, sp, #96
+	add	ip, fp, #96
 	add	r7, r7, r6	/* r7 := .Lconstants (= v0123) */
 	ldm	ip, {r4, r5}	/* r4 := const, r5 := nr */
 	ldm	r2, {r6, r8, r10}	/* (r6, r8, r10) := nonce[0:12) */
@@ -311,7 +312,7 @@ ENTRY(chacha_stream256_neon)
 	vadd.u32 q3, q3, q8
 	vadd.u32 q7, q7, q8
 
-	vld1.8	{q8-q9}, [fp, :256]	/* restore q8-q9 */
+	vld1.8	{q8-q9}, [sp, :256]	/* restore q8-q9 */
 
 	vst1.8	{q0-q1}, [r0]!
 	vld1.8	{q0}, [r3]	/* q0 := key[16:32) */
@@ -354,9 +355,10 @@ ENTRY(chacha_stream256_neon)
 	/* zero temporary space on the stack */
 	vmov.i32 q0, #0
 	vmov.i32 q1, #0
-	vst1.8	{q0-q1}, [fp, :256]
+	vst1.8	{q0-q1}, [sp, :256]
 
 	/* restore callee-saves registers and stack */
+	mov	sp, fp
 	vpop	{d8-d15}
 	pop	{r4, r5, r6, r7, r8, r10, fp, lr}
 	bx	lr
@@ -374,17 +376,18 @@ ENTRY(chacha_stream_xor256_neon)
 	/* save callee-saves registers */
 	push	{r4, r5, r6, r7, r8, r10, fp, lr}
 	vpush	{d8-d15}
+	mov	fp, sp
 
 	/* r7 := .Lconstants - .Lconstants_addr, r6 := .Lconstants_addr */
 	ldr	r7, .Lconstants_addr
 	adr	r6, .Lconstants_addr
 
 	/* reserve space for two 128-bit/16-byte q registers */
-	sub	fp, sp, #0x20
-	bic	fp, fp, #0x1f	/* align */
+	sub	sp, sp, #0x20
+	bic	sp, sp, #0x1f	/* align */
 
 	/* get parameters */
-	add	ip, sp, #96
+	add	ip, fp, #96
 	add	r7, r7, r6	/* r7 := .Lconstants (= v0123) */
 	ldm	ip, {r4, r5, ip}	/* r4 := key, r5 := const, ip := nr */
 	ldm	r3, {r6, r8, r10}	/* (r6, r8, r10) := nonce[0:12) */
@@ -475,7 +478,7 @@ 

CVS commit: src/sys/crypto/chacha/arch/arm

2020-08-23 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sun Aug 23 16:39:06 UTC 2020

Modified Files:
src/sys/crypto/chacha/arch/arm: chacha_neon_32.S

Log Message:
Adjust sp, not fp, to allocate a 32-byte temporary.

Costs another couple MOV instructions, but we can't skimp on this --
there's no red zone below sp for interrupts on arm, so we can't touch
anything there.  So just use fp to save sp and then adjust sp itself,
rather than using fp as a temporary register to point just below sp.

Should fix PR port-arm/55598 -- previously the ChaCha self-test
failed 33/1 trials triggered by sysctl during running system;
with the patch it has failed 0/1 trials.

(Presumably it happened more often at boot time, leading to 5/26
failures in the test bed, because we just enabled interrupts and some
devices are starting to deliver interrupts.)


To generate a diff of this commit:
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/chacha/arch/arm/chacha_neon_32.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/blake2

2020-08-20 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Thu Aug 20 21:21:05 UTC 2020

Added Files:
src/sys/crypto/blake2: blake2s.c blake2s.h files.blake2s

Log Message:
Import small BLAKE2s implementation.


To generate a diff of this commit:
cvs rdiff -u -r0 -r1.1 src/sys/crypto/blake2/blake2s.c \
src/sys/crypto/blake2/blake2s.h src/sys/crypto/blake2/files.blake2s

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Added files:

Index: src/sys/crypto/blake2/blake2s.c
diff -u /dev/null src/sys/crypto/blake2/blake2s.c:1.1
--- /dev/null	Thu Aug 20 21:21:05 2020
+++ src/sys/crypto/blake2/blake2s.c	Thu Aug 20 21:21:05 2020
@@ -0,0 +1,350 @@
+/*	$NetBSD: blake2s.c,v 1.1 2020/08/20 21:21:05 riastradh Exp $	*/
+
+/*-
+ * Copyright (c) 2015 Taylor R. Campbell
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in the
+ *documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifdef _KERNEL
+
+#include 
+__KERNEL_RCSID(0, "$NetBSD: blake2s.c,v 1.1 2020/08/20 21:21:05 riastradh Exp $");
+
+#include 
+#include 
+
+#include 
+
+#else
+
+#define	_POSIX_C_SOURCE	200809L
+
+#include 
+#include 
+#include 
+
+#endif
+
+#include "blake2s.h"
+
+#include 
+
+static inline uint32_t
+rotr32(uint32_t x, unsigned c)
+{
+
+	return ((x >> c) | (x << (32 - c)));
+}
+
+#define	BLAKE2S_G(VA, VB, VC, VD, X, Y)	do  \
+{	  \
+	(VA) = (VA) + (VB) + (X);	  \
+	(VD) = rotr32((VD) ^ (VA), 16);	  \
+	(VC) = (VC) + (VD);		  \
+	(VB) = rotr32((VB) ^ (VC), 12);	  \
+	(VA) = (VA) + (VB) + (Y);	  \
+	(VD) = rotr32((VD) ^ (VA), 8);	  \
+	(VC) = (VC) + (VD);		  \
+	(VB) = rotr32((VB) ^ (VC), 7);	  \
+} while (0)
+
+static const uint32_t blake2s_iv[8] = {
+	0x6a09e667U, 0xbb67ae85U, 0x3c6ef372U, 0xa54ff53aU,
+	0x510e527fU, 0x9b05688cU, 0x1f83d9abU, 0x5be0cd19U,
+};
+
+static const uint8_t blake2s_sigma[10][16] = {
+	{  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 },
+	{ 14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3 },
+	{ 11,  8, 12,  0,  5,  2, 15, 13, 10, 14,  3,  6,  7,  1,  9,  4 },
+	{  7,  9,  3,  1, 13, 12, 11, 14,  2,  6,  5, 10,  4,  0, 15,  8 },
+	{  9,  0,  5,  7,  2,  4, 10, 15, 14,  1, 11, 12,  6,  8,  3, 13 },
+	{  2, 12,  6, 10,  0, 11,  8,  3,  4, 13,  7,  5, 15, 14,  1,  9 },
+	{ 12,  5,  1, 15, 14, 13,  4, 10,  0,  7,  6,  3,  9,  2,  8, 11 },
+	{ 13, 11,  7, 14, 12,  1,  3,  9,  5,  0, 15,  4,  8,  6,  2, 10 },
+	{  6, 15, 14,  9, 11,  3,  0,  8, 12,  2, 13,  7,  1,  4, 10,  5 },
+	{ 10,  2,  8,  4,  7,  6,  1,  5, 15, 11,  9, 14,  3, 12, 13,  0 },
+};
+
+static void
+blake2s_compress(uint32_t h[8], uint64_t c, uint32_t last,
+const uint8_t in[64])
+{
+	uint32_t v0,v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15;
+	uint32_t m[16];
+	unsigned i;
+
+	/* Load the variables: first 8 from state, next 8 from IV.  */
+	v0 = h[0];
+	v1 = h[1];
+	v2 = h[2];
+	v3 = h[3];
+	v4 = h[4];
+	v5 = h[5];
+	v6 = h[6];
+	v7 = h[7];
+	v8 = blake2s_iv[0];
+	v9 = blake2s_iv[1];
+	v10 = blake2s_iv[2];
+	v11 = blake2s_iv[3];
+	v12 = blake2s_iv[4];
+	v13 = blake2s_iv[5];
+	v14 = blake2s_iv[6];
+	v15 = blake2s_iv[7];
+
+	/* Incorporate the block counter and whether this is last.  */
+	v12 ^= c & 0xU;
+	v13 ^= c >> 32;
+	v14 ^= last;
+
+	/* Load the message block.  */
+	for (i = 0; i < 16; i++)
+		m[i] = le32dec(in + 4*i);
+
+	/* Transform the variables.  */
+	for (i = 0; i < 10; i++) {
+		const uint8_t *sigma = blake2s_sigma[i];
+
+		BLAKE2S_G(v0, v4,  v8, v12, m[sigma[ 0]], m[sigma[ 1]]);
+		BLAKE2S_G(v1, v5,  v9, v13, m[sigma[ 2]], m[sigma[ 3]]);
+		BLAKE2S_G(v2, v6, v10, v14, m[sigma[ 4]], m[sigma[ 5]]);
+		BLAKE2S_G(v3, v7, v11, v15, m[sigma[ 

CVS commit: src/sys/crypto/blake2

2020-08-20 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Thu Aug 20 21:21:05 UTC 2020

Added Files:
src/sys/crypto/blake2: blake2s.c blake2s.h files.blake2s

Log Message:
Import small BLAKE2s implementation.


To generate a diff of this commit:
cvs rdiff -u -r0 -r1.1 src/sys/crypto/blake2/blake2s.c \
src/sys/crypto/blake2/blake2s.h src/sys/crypto/blake2/files.blake2s

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/aes/arch/arm

2020-08-16 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sun Aug 16 18:02:03 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon_32.S files.aesneon

Log Message:
Fix AES NEON code for big-endian softfp ARM.

...which is how the kernel runs.  Switch to using __SOFTFP__ for
consistency with how it gets exposed to C, although I'm not sure how
to get it defined automagically in the toolchain for .S files so
that's set manually in files.aesneon for now.


To generate a diff of this commit:
cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/arm/aes_neon_32.S
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/arm/files.aesneon

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S
diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.5 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.6
--- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.5	Sat Aug  8 14:47:01 2020
+++ src/sys/crypto/aes/arch/arm/aes_neon_32.S	Sun Aug 16 18:02:03 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_neon_32.S,v 1.5 2020/08/08 14:47:01 riastradh Exp $	*/
+/*	$NetBSD: aes_neon_32.S,v 1.6 2020/08/16 18:02:03 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,7 +28,7 @@
 
 #include 
 
-RCSID("$NetBSD: aes_neon_32.S,v 1.5 2020/08/08 14:47:01 riastradh Exp $")
+RCSID("$NetBSD: aes_neon_32.S,v 1.6 2020/08/16 18:02:03 riastradh Exp $")
 
 	.fpu	neon
 
@@ -228,15 +228,19 @@ END(dsbo_1)
  * aes_neon_enc1(const struct aesenc *enc@r0, uint8x16_t x@q0,
  * unsigned nrounds@r1)
  *
- *	With -mfloat-abi=soft(fp) (here spelled `#ifdef _KERNEL'):
+ *	With -mfloat-abi=soft(fp) (i.e., __SOFTFP__):
  *
  * uint8x16_t@(r0,r1,r2,r3)
  * aes_neon_enc1(const struct aesenc *enc@r0,
  * uint8x16_t x@(r2,r3,sp[0],sp[4]), nrounds@sp[8])
  */
 ENTRY(aes_neon_enc1)
-#ifdef _KERNEL
+#ifdef __SOFTFP__
+#ifdef __ARM_BIG_ENDIAN
+	vmov	d0, r3, r2		/* d0 := x lo */
+#else
 	vmov	d0, r2, r3		/* d0 := x lo */
+#endif
 	vldr	d1, [sp]		/* d1 := x hi */
 	ldr	r1, [sp, #8]		/* r1 := nrounds */
 #endif
@@ -434,10 +438,15 @@ ENTRY(aes_neon_enc1)
 
 	vpop	{d8-d15}
 	pop	{r4, r5, r6, r7, r8, r10, r11, lr}
-#ifdef _KERNEL
+#ifdef __SOFTFP__
+#ifdef __ARM_BIG_ENDIAN
+	vmov	r1, r0, d0
+	vmov	r3, r2, d1
+#else
 	vmov	r0, r1, d0
 	vmov	r2, r3, d1
 #endif
+#endif
 	bx	lr
 END(aes_neon_enc1)
 
@@ -457,8 +466,12 @@ END(aes_neon_enc1)
  * uint8x16_t x@(r2,r3,sp[0],sp[4]), nrounds@sp[8])
  */
 ENTRY(aes_neon_dec1)
-#ifdef _KERNEL
+#ifdef __SOFTFP__
+#ifdef __ARM_BIG_ENDIAN
+	vmov	d0, r3, r2		/* d0 := x lo */
+#else
 	vmov	d0, r2, r3		/* d0 := x lo */
+#endif
 	vldr	d1, [sp]		/* d1 := x hi */
 	ldr	r1, [sp, #8]		/* r1 := nrounds */
 #endif
@@ -669,9 +682,14 @@ ENTRY(aes_neon_dec1)
 
 	vpop	{d8-d15}
 	pop	{r4, r5, r6, r7, r8, r10, r11, lr}
-#ifdef _KERNEL
+#ifdef __SOFTFP__
+#ifdef __ARM_BIG_ENDIAN
+	vmov	r1, r0, d0
+	vmov	r3, r2, d1
+#else
 	vmov	r0, r1, d0
 	vmov	r2, r3, d1
 #endif
+#endif
 	bx	lr
 END(aes_neon_dec1)

Index: src/sys/crypto/aes/arch/arm/files.aesneon
diff -u src/sys/crypto/aes/arch/arm/files.aesneon:1.3 src/sys/crypto/aes/arch/arm/files.aesneon:1.4
--- src/sys/crypto/aes/arch/arm/files.aesneon:1.3	Tue Jun 30 17:03:13 2020
+++ src/sys/crypto/aes/arch/arm/files.aesneon	Sun Aug 16 18:02:03 2020
@@ -1,4 +1,4 @@
-#	$NetBSD: files.aesneon,v 1.3 2020/06/30 17:03:13 riastradh Exp $
+#	$NetBSD: files.aesneon,v 1.4 2020/08/16 18:02:03 riastradh Exp $
 
 ifdef aarch64
 makeoptions	aes	"COPTS.aes_neon.c"+="-march=armv8-a"
@@ -8,6 +8,8 @@ makeoptions	aes	"COPTS.aes_neon.c"+="-mf
 makeoptions	aes	"COPTS.aes_neon_subr.c"+="-mfloat-abi=softfp -mfpu=neon"
 endif
 
+makeoptions	aes	"AOPTS.aes_neon_32.S"+="-D__SOFTFP__"
+
 file	crypto/aes/arch/arm/aes_neon.c		aes & (cpu_cortex | aarch64)
 file	crypto/aes/arch/arm/aes_neon_impl.c	aes & (cpu_cortex | aarch64)
 file	crypto/aes/arch/arm/aes_neon_subr.c	aes & (cpu_cortex | aarch64)



CVS commit: src/sys/crypto/aes/arch/arm

2020-08-16 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sun Aug 16 18:02:03 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon_32.S files.aesneon

Log Message:
Fix AES NEON code for big-endian softfp ARM.

...which is how the kernel runs.  Switch to using __SOFTFP__ for
consistency with how it gets exposed to C, although I'm not sure how
to get it defined automagically in the toolchain for .S files so
that's set manually in files.aesneon for now.


To generate a diff of this commit:
cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/arm/aes_neon_32.S
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/arm/files.aesneon

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/aes

2020-08-10 Thread Rin Okuyama
Module Name:src
Committed By:   rin
Date:   Mon Aug 10 06:27:29 UTC 2020

Modified Files:
src/sys/crypto/aes: aes_ccm.c

Log Message:
Add hack to compile aes_ccm_tag() with -O0 for m68k for GCC8.

GCC 8 miscompiles aes_ccm_tag() for m68k with optimization level -O[12],
which results in failure in aes_ccm_selftest():

| aes_ccm_selftest: tag 0: 8 bytes @ 0x4d3e38
| 03 80 5f 08 22 6f cb fe  | .._."o..
| aes_ccm_selftest: verify 0 failed
| ...
| WARNING: module error: built-in module aes_ccm failed its MODULE_CMD_INIT, 
error 5

This is observed for amiga (A1200, 68060), mac68k (Quadra 840AV, 68040),
and luna68k (nono, 68030 emulator). However, it is not for sun3 (TME, 68020
emulator) and sun2 (TME, 68010 emulator). At the moment, it is unclear
whether this is due to differences b/w 68010-20 vs 68030-60, or something
wrong with TME.


To generate a diff of this commit:
cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/aes_ccm.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/aes

2020-08-10 Thread Rin Okuyama
Module Name:src
Committed By:   rin
Date:   Mon Aug 10 06:27:29 UTC 2020

Modified Files:
src/sys/crypto/aes: aes_ccm.c

Log Message:
Add hack to compile aes_ccm_tag() with -O0 for m68k for GCC8.

GCC 8 miscompiles aes_ccm_tag() for m68k with optimization level -O[12],
which results in failure in aes_ccm_selftest():

| aes_ccm_selftest: tag 0: 8 bytes @ 0x4d3e38
| 03 80 5f 08 22 6f cb fe  | .._."o..
| aes_ccm_selftest: verify 0 failed
| ...
| WARNING: module error: built-in module aes_ccm failed its MODULE_CMD_INIT, 
error 5

This is observed for amiga (A1200, 68060), mac68k (Quadra 840AV, 68040),
and luna68k (nono, 68030 emulator). However, it is not for sun3 (TME, 68020
emulator) and sun2 (TME, 68010 emulator). At the moment, it is unclear
whether this is due to differences b/w 68010-20 vs 68030-60, or something
wrong with TME.


To generate a diff of this commit:
cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/aes_ccm.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/aes_ccm.c
diff -u src/sys/crypto/aes/aes_ccm.c:1.4 src/sys/crypto/aes/aes_ccm.c:1.5
--- src/sys/crypto/aes/aes_ccm.c:1.4	Mon Jul 27 20:44:30 2020
+++ src/sys/crypto/aes/aes_ccm.c	Mon Aug 10 06:27:29 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_ccm.c,v 1.4 2020/07/27 20:44:30 riastradh Exp $	*/
+/*	$NetBSD: aes_ccm.c,v 1.5 2020/08/10 06:27:29 rin Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -35,7 +35,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_ccm.c,v 1.4 2020/07/27 20:44:30 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_ccm.c,v 1.5 2020/08/10 06:27:29 rin Exp $");
 
 #include 
 #include 
@@ -301,6 +301,9 @@ aes_ccm_dec(struct aes_ccm *C, const voi
 }
 
 void
+#if defined(__m68k__) && __GNUC_PREREQ__(8, 0)
+__attribute__((__optimize__("O0")))
+#endif
 aes_ccm_tag(struct aes_ccm *C, void *out)
 {
 	uint8_t *auth = C->authctr;



CVS commit: src/sys/crypto

2020-08-08 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sun Aug  9 02:49:38 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: arm_neon.h
src/sys/crypto/chacha/arch/arm: arm_neon.h

Log Message:
Fix some clang neon intrinsics.

Compile-tested only, with -Wno-nonportable-vector-initializers.  Need
to address -- and test -- this stuff properly but this is progress.


To generate a diff of this commit:
cvs rdiff -u -r1.9 -r1.10 src/sys/crypto/aes/arch/arm/arm_neon.h
cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/chacha/arch/arm/arm_neon.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/arm_neon.h
diff -u src/sys/crypto/aes/arch/arm/arm_neon.h:1.9 src/sys/crypto/aes/arch/arm/arm_neon.h:1.10
--- src/sys/crypto/aes/arch/arm/arm_neon.h:1.9	Sun Aug  9 02:48:38 2020
+++ src/sys/crypto/aes/arch/arm/arm_neon.h	Sun Aug  9 02:49:38 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: arm_neon.h,v 1.9 2020/08/09 02:48:38 riastradh Exp $	*/
+/*	$NetBSD: arm_neon.h,v 1.10 2020/08/09 02:49:38 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -85,6 +85,8 @@ typedef __attribute__((neon_vector_type(
 typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t;
 typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t;
 
+typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t;
+
 typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t;
 
 typedef struct { uint8x8_t val[2]; } uint8x8x2_t;
@@ -218,7 +220,7 @@ vextq_u8(uint8x16_t __lo, uint8x16_t __h
 	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);			  \
 	uint8x16_t __r = __builtin_neon_vextq_v((int8x16_t)__lo_r,	  \
 	(int8x16_t)__hi_r, (__i), 48);  \
-	return __builtin_shufflevector(__r, __r,			  \
+	__builtin_shufflevector(__r, __r,  \
 	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);			  \
 })
 #endif	/* __LITTLE_ENDIAN */
@@ -326,19 +328,37 @@ vqtbl1q_u8(uint8x16_t __tab, uint8x16_t 
 	return (uint8x16_t)__out64;
 #endif
 #elif defined(__clang__)
-#ifdef __LITTLE_ENDIAN__
-	return (uint8x16_t)__builtin_neon_vqtbl1q_v((int8x16_t)__tab,
-	(int8x16_t)__idx, 48);
-#else
-	uint32x4_t __lo_r = __builtin_shufflevector(__lo, __lo,
+#ifndef __LITTLE_ENDIAN__
+	__tab = __builtin_shufflevector(__tab, __tab,
 	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);
-	uint32x4_t __hi_r = __builtin_shufflevector(__hi, __hi,
+	__idx = __builtin_shufflevector(__idx, __idx,
 	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);
-	uint32x4_t __r = __builtin_neon_vqtbl1q_v((int8x16_t)__tab,
-	(int8x16_t)__idx, __i, 48);
-	return __builtin_shufflevector(__r, __r,
+#endif
+	uint8x16_t __r;
+#ifdef __aarch64__
+	__r = __builtin_neon_vqtbl1q_v((int8x16_t)__tab, (int8x16_t)__idx, 48);
+#else
+	uint64x2_t __tab64 = (uint64x2_t)__tab;
+	uint8x8_t __tablo = (uint8x8_t)__tab64[0];
+	uint8x8_t __tabhi = (uint8x8_t)__tab64[1];
+	uint64x2_t __idx64, __out64;
+	int8x8_t __idxlo, __idxhi, __outlo, __outhi;
+
+	__idx64 = (uint64x2_t)__idx;
+	__idxlo = (int8x8_t)__idx64[0];
+	__idxhi = (int8x8_t)__idx64[1];
+	__outlo = (uint8x8_t)__builtin_neon_vtbl2_v((int8x8_t)__tablo,
+	(int8x8_t)__tabhi, (int8x8_t)__idxlo, 16);
+	__outhi = (uint8x8_t)__builtin_neon_vtbl2_v((int8x8_t)__tablo,
+	(int8x8_t)__tabhi, (int8x8_t)__idxhi, 16);
+	__out64 = (uint64x2_t) { (uint64_t)__outlo, (uint64_t)__outhi };
+	__r = (uint8x16_t)__out64;
+#endif
+#ifndef __LITTLE_ENDIAN__
+	__r = __builtin_shufflevector(__r, __r,
 	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);
 #endif
+	return __r;
 #endif
 }
 
@@ -579,7 +599,7 @@ vsriq_n_u32(uint32x4_t __vins, uint32x4_
 	(int32x4_t)__builtin_neon_vsriq_n_v((int32x4_t)(__vins),	  \
 	(int32x4_t)(__vsh), (__bits), 34)
 #else
-#define	vsliq_n_s32(__vins, __vsh, __bits) (  \
+#define	vsriq_n_s32(__vins, __vsh, __bits) (  \
 {	  \
 	int32x4_t __tvins = (__vins);	  \
 	int32x4_t __tvsh = (__vsh);	  \

Index: src/sys/crypto/chacha/arch/arm/arm_neon.h
diff -u src/sys/crypto/chacha/arch/arm/arm_neon.h:1.5 src/sys/crypto/chacha/arch/arm/arm_neon.h:1.6
--- src/sys/crypto/chacha/arch/arm/arm_neon.h:1.5	Sun Aug  9 02:48:38 2020
+++ src/sys/crypto/chacha/arch/arm/arm_neon.h	Sun Aug  9 02:49:38 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: arm_neon.h,v 1.5 2020/08/09 02:48:38 riastradh Exp $	*/
+/*	$NetBSD: arm_neon.h,v 1.6 2020/08/09 02:49:38 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -85,6 +85,8 @@ typedef __attribute__((neon_vector_type(
 typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t;
 typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t;
 
+typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t;
+
 typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t;
 
 typedef struct { uint8x8_t val[2]; } uint8x8x2_t;
@@ -218,7 +220,7 @@ vextq_u8(uint8x16_t __lo, uint8x16_t __h
 	

CVS commit: src/sys/crypto

2020-08-08 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sun Aug  9 02:49:38 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: arm_neon.h
src/sys/crypto/chacha/arch/arm: arm_neon.h

Log Message:
Fix some clang neon intrinsics.

Compile-tested only, with -Wno-nonportable-vector-initializers.  Need
to address -- and test -- this stuff properly but this is progress.


To generate a diff of this commit:
cvs rdiff -u -r1.9 -r1.10 src/sys/crypto/aes/arch/arm/arm_neon.h
cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/chacha/arch/arm/arm_neon.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto

2020-08-08 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sun Aug  9 02:48:38 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon_subr.c arm_neon.h
src/sys/crypto/chacha/arch/arm: arm_neon.h

Log Message:
Use vshlq_n_s32 rather than vsliq_n_s32 with zero destination.

Not sure why I reached for vsliq_n_s32 at first -- probably so I
wouldn't have to deal with a new intrinsic in arm_neon.h!


To generate a diff of this commit:
cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/aes/arch/arm/aes_neon_subr.c
cvs rdiff -u -r1.8 -r1.9 src/sys/crypto/aes/arch/arm/arm_neon.h
cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/chacha/arch/arm/arm_neon.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_neon_subr.c
diff -u src/sys/crypto/aes/arch/arm/aes_neon_subr.c:1.6 src/sys/crypto/aes/arch/arm/aes_neon_subr.c:1.7
--- src/sys/crypto/aes/arch/arm/aes_neon_subr.c:1.6	Sun Aug  9 02:00:57 2020
+++ src/sys/crypto/aes/arch/arm/aes_neon_subr.c	Sun Aug  9 02:48:38 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_neon_subr.c,v 1.6 2020/08/09 02:00:57 riastradh Exp $	*/
+/*	$NetBSD: aes_neon_subr.c,v 1.7 2020/08/09 02:48:38 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_neon_subr.c,v 1.6 2020/08/09 02:00:57 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_neon_subr.c,v 1.7 2020/08/09 02:48:38 riastradh Exp $");
 
 #ifdef _KERNEL
 #include 
@@ -151,7 +151,7 @@ aes_neon_xts_update(uint8x16_t t8)
 	t = vreinterpretq_s32_u8(t8);
 	mask = vcltq_s32(t, zero);		/* -1 if high bit set else 0 */
 	mask = vextq_u32(mask, mask, 3);	/* rotate quarters */
-	t_ = vsliq_n_s32(zero, t, 1);		/* shift */
+	t_ = vshlq_n_s32(t, 1);			/* shift */
 	t_ ^= carry & mask;
 
 	return vreinterpretq_u8_s32(t_);

Index: src/sys/crypto/aes/arch/arm/arm_neon.h
diff -u src/sys/crypto/aes/arch/arm/arm_neon.h:1.8 src/sys/crypto/aes/arch/arm/arm_neon.h:1.9
--- src/sys/crypto/aes/arch/arm/arm_neon.h:1.8	Sat Aug  8 14:47:01 2020
+++ src/sys/crypto/aes/arch/arm/arm_neon.h	Sun Aug  9 02:48:38 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: arm_neon.h,v 1.8 2020/08/08 14:47:01 riastradh Exp $	*/
+/*	$NetBSD: arm_neon.h,v 1.9 2020/08/09 02:48:38 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -466,6 +466,22 @@ vsetq_lane_u64(uint64_t __x, uint64x2_t 
 
 #if defined(__GNUC__) && !defined(__clang__)
 _INTRINSATTR
+static __inline int32x4_t
+vshlq_n_s32(int32x4_t __v, uint8_t __bits)
+{
+#ifdef __aarch64__
+	return (int32x4_t)__builtin_aarch64_ashlv4si(__v, __bits);
+#else
+	return (int32x4_t)__builtin_neon_vshl_nv4si(__v, __bits);
+#endif
+}
+#elif defined(__clang__)
+#define	vshlq_n_s32(__v, __bits)	  \
+	(int32x4_t)__builtin_neon_vshlq_n_v((int32x4_t)(__v), (__bits), 34)
+#endif
+
+#if defined(__GNUC__) && !defined(__clang__)
+_INTRINSATTR
 static __inline uint32x4_t
 vshlq_n_u32(uint32x4_t __v, uint8_t __bits)
 {

Index: src/sys/crypto/chacha/arch/arm/arm_neon.h
diff -u src/sys/crypto/chacha/arch/arm/arm_neon.h:1.4 src/sys/crypto/chacha/arch/arm/arm_neon.h:1.5
--- src/sys/crypto/chacha/arch/arm/arm_neon.h:1.4	Sat Aug  8 14:47:01 2020
+++ src/sys/crypto/chacha/arch/arm/arm_neon.h	Sun Aug  9 02:48:38 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: arm_neon.h,v 1.4 2020/08/08 14:47:01 riastradh Exp $	*/
+/*	$NetBSD: arm_neon.h,v 1.5 2020/08/09 02:48:38 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -466,6 +466,22 @@ vsetq_lane_u64(uint64_t __x, uint64x2_t 
 
 #if defined(__GNUC__) && !defined(__clang__)
 _INTRINSATTR
+static __inline int32x4_t
+vshlq_n_s32(int32x4_t __v, uint8_t __bits)
+{
+#ifdef __aarch64__
+	return (int32x4_t)__builtin_aarch64_ashlv4si(__v, __bits);
+#else
+	return (int32x4_t)__builtin_neon_vshl_nv4si(__v, __bits);
+#endif
+}
+#elif defined(__clang__)
+#define	vshlq_n_s32(__v, __bits)	  \
+	(int32x4_t)__builtin_neon_vshlq_n_v((int32x4_t)(__v), (__bits), 34)
+#endif
+
+#if defined(__GNUC__) && !defined(__clang__)
+_INTRINSATTR
 static __inline uint32x4_t
 vshlq_n_u32(uint32x4_t __v, uint8_t __bits)
 {



CVS commit: src/sys/crypto

2020-08-08 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sun Aug  9 02:48:38 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon_subr.c arm_neon.h
src/sys/crypto/chacha/arch/arm: arm_neon.h

Log Message:
Use vshlq_n_s32 rather than vsliq_n_s32 with zero destination.

Not sure why I reached for vsliq_n_s32 at first -- probably so I
wouldn't have to deal with a new intrinsic in arm_neon.h!


To generate a diff of this commit:
cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/aes/arch/arm/aes_neon_subr.c
cvs rdiff -u -r1.8 -r1.9 src/sys/crypto/aes/arch/arm/arm_neon.h
cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/chacha/arch/arm/arm_neon.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/aes/arch/arm

2020-08-08 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sun Aug  9 02:00:57 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon_subr.c

Log Message:
Nix outdated comment.

I implemented this parallelism a couple weeks ago.


To generate a diff of this commit:
cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/arm/aes_neon_subr.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/aes/arch/arm

2020-08-08 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sun Aug  9 02:00:57 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon_subr.c

Log Message:
Nix outdated comment.

I implemented this parallelism a couple weeks ago.


To generate a diff of this commit:
cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/arm/aes_neon_subr.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_neon_subr.c
diff -u src/sys/crypto/aes/arch/arm/aes_neon_subr.c:1.5 src/sys/crypto/aes/arch/arm/aes_neon_subr.c:1.6
--- src/sys/crypto/aes/arch/arm/aes_neon_subr.c:1.5	Sat Aug  8 14:47:01 2020
+++ src/sys/crypto/aes/arch/arm/aes_neon_subr.c	Sun Aug  9 02:00:57 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_neon_subr.c,v 1.5 2020/08/08 14:47:01 riastradh Exp $	*/
+/*	$NetBSD: aes_neon_subr.c,v 1.6 2020/08/09 02:00:57 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_neon_subr.c,v 1.5 2020/08/08 14:47:01 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_neon_subr.c,v 1.6 2020/08/09 02:00:57 riastradh Exp $");
 
 #ifdef _KERNEL
 #include 
@@ -287,12 +287,6 @@ aes_neon_cbcmac_update1(const struct aes
 	storeblock(auth0, auth);
 }
 
-/*
- * XXX On aarch64, we have enough registers that we should be able to
- * pipeline two simultaneous vpaes computations in an `aes_neon_enc2'
- * function, which should substantially improve CCM throughput.
- */
-
 void
 aes_neon_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16],
 uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32],



CVS commit: src/sys/crypto

2020-08-08 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sun Aug  9 01:59:04 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: arm_neon_imm.h
src/sys/crypto/chacha/arch/arm: arm_neon_imm.h

Log Message:
Fix mistake in big-endian arm clang.

Swapped the two halves (only gcc does that, I think) and wrote j,i
backwards, oops.

(I don't have a big-endian arm clang build handy to test; hoping this
works.)


To generate a diff of this commit:
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/arm/arm_neon_imm.h
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/chacha/arch/arm/arm_neon_imm.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/arm_neon_imm.h
diff -u src/sys/crypto/aes/arch/arm/arm_neon_imm.h:1.1 src/sys/crypto/aes/arch/arm/arm_neon_imm.h:1.2
--- src/sys/crypto/aes/arch/arm/arm_neon_imm.h:1.1	Sat Aug  8 14:47:01 2020
+++ src/sys/crypto/aes/arch/arm/arm_neon_imm.h	Sun Aug  9 01:59:04 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: arm_neon_imm.h,v 1.1 2020/08/08 14:47:01 riastradh Exp $	*/
+/*	$NetBSD: arm_neon_imm.h,v 1.2 2020/08/09 01:59:04 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -71,7 +71,7 @@
 #define	V_N_U8(a,b,c,d,e,f,g,h)		  \
 	{h,g,f,e,d,c,b,a}
 #define	VQ_N_U8(a,b,c,d,e,f,g,h, i,j,k,l,m,n,o,p)			  \
-	{h,g,f,e,d,c,b,a, p,o,n,m,l,k,i,j}
+	{p,o,n,m,l,k,j,i, h,g,f,e,d,c,b,a}
 #define	VQ_N_U32(a,b,c,d)		  \
 	{d,c, b,a}
 #endif

Index: src/sys/crypto/chacha/arch/arm/arm_neon_imm.h
diff -u src/sys/crypto/chacha/arch/arm/arm_neon_imm.h:1.1 src/sys/crypto/chacha/arch/arm/arm_neon_imm.h:1.2
--- src/sys/crypto/chacha/arch/arm/arm_neon_imm.h:1.1	Sat Aug  8 14:47:01 2020
+++ src/sys/crypto/chacha/arch/arm/arm_neon_imm.h	Sun Aug  9 01:59:04 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: arm_neon_imm.h,v 1.1 2020/08/08 14:47:01 riastradh Exp $	*/
+/*	$NetBSD: arm_neon_imm.h,v 1.2 2020/08/09 01:59:04 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -71,7 +71,7 @@
 #define	V_N_U8(a,b,c,d,e,f,g,h)		  \
 	{h,g,f,e,d,c,b,a}
 #define	VQ_N_U8(a,b,c,d,e,f,g,h, i,j,k,l,m,n,o,p)			  \
-	{h,g,f,e,d,c,b,a, p,o,n,m,l,k,i,j}
+	{p,o,n,m,l,k,j,i, h,g,f,e,d,c,b,a}
 #define	VQ_N_U32(a,b,c,d)		  \
 	{d,c, b,a}
 #endif



CVS commit: src/sys/crypto

2020-08-08 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sun Aug  9 01:59:04 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: arm_neon_imm.h
src/sys/crypto/chacha/arch/arm: arm_neon_imm.h

Log Message:
Fix mistake in big-endian arm clang.

Swapped the two halves (only gcc does that, I think) and wrote j,i
backwards, oops.

(I don't have a big-endian arm clang build handy to test; hoping this
works.)


To generate a diff of this commit:
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/arm/arm_neon_imm.h
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/chacha/arch/arm/arm_neon_imm.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto

2020-08-08 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sat Aug  8 14:47:01 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_armv8_64.S aes_neon.c aes_neon_32.S
aes_neon_impl.h aes_neon_subr.c arm_neon.h
src/sys/crypto/chacha/arch/arm: arm_neon.h chacha_neon.c
chacha_neon_32.S chacha_neon_64.S
Added Files:
src/sys/crypto/aes/arch/arm: arm_neon_imm.h
src/sys/crypto/chacha/arch/arm: arm_neon_imm.h

Log Message:
Fix ARM NEON implementations of AES and ChaCha on big-endian ARM.

New macros such as VQ_N_U32(a,b,c,d) for NEON vector initializers.
Needed because GCC and Clang disagree on the ordering of lanes,
depending on whether it's 64-bit big-endian, 32-bit big-endian, or
little-endian -- and, bizarrely, both of them disagree with the
architectural numbering of lanes.

Experimented with using

static const uint8_t x8[16] = {...};

uint8x16_t x = vld1q_u8(x8);

which doesn't require knowing anything about the ordering of lanes,
but this generates considerably worse code and apparently confuses
GCC into not recognizing the constant value of x8.

Fix some clang mistakes while here too.


To generate a diff of this commit:
cvs rdiff -u -r1.11 -r1.12 src/sys/crypto/aes/arch/arm/aes_armv8_64.S
cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/arch/arm/aes_neon.c \
src/sys/crypto/aes/arch/arm/aes_neon_32.S \
src/sys/crypto/aes/arch/arm/aes_neon_subr.c
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/arm/aes_neon_impl.h
cvs rdiff -u -r1.7 -r1.8 src/sys/crypto/aes/arch/arm/arm_neon.h
cvs rdiff -u -r0 -r1.1 src/sys/crypto/aes/arch/arm/arm_neon_imm.h
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/chacha/arch/arm/arm_neon.h
cvs rdiff -u -r0 -r1.1 src/sys/crypto/chacha/arch/arm/arm_neon_imm.h
cvs rdiff -u -r1.7 -r1.8 src/sys/crypto/chacha/arch/arm/chacha_neon.c
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/chacha/arch/arm/chacha_neon_32.S
cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S
diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.11 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.12
--- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.11	Mon Jul 27 20:57:23 2020
+++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S	Sat Aug  8 14:47:01 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_armv8_64.S,v 1.11 2020/07/27 20:57:23 riastradh Exp $	*/
+/*	$NetBSD: aes_armv8_64.S,v 1.12 2020/08/08 14:47:01 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -26,11 +26,9 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include 
-
 #include 
 
-RCSID("$NetBSD: aes_armv8_64.S,v 1.11 2020/07/27 20:57:23 riastradh Exp $")
+RCSID("$NetBSD: aes_armv8_64.S,v 1.12 2020/08/08 14:47:01 riastradh Exp $")
 
 	.arch_extension	aes
 
@@ -921,19 +919,13 @@ ENTRY(aesarmv8_ccm_enc1)
 	ld1	{v5.4s}, [x11]		/* q5 := (0,0,0,1) (host-endian) */
 	mov	x9, x0			/* x9 := enckey */
 	mov	x10, x3			/* x10 := nbytes */
-#if _BYTE_ORDER == _LITTLE_ENDIAN
 	rev32	v2.16b, v2.16b		/* q2 := ctr (host-endian) */
-#endif
 	_ALIGN_TEXT
 1:	ldr	q3, [x1], #0x10		/* q3 := plaintext block */
 	add	v2.4s, v2.4s, v5.4s	/* increment ctr (32-bit) */
 	mov	x0, x9			/* x0 := enckey */
 	mov	x3, x5			/* x3 := nrounds */
-#if _BYTE_ORDER == _LITTLE_ENDIAN
 	rev32	v1.16b, v2.16b		/* q1 := ctr (big-endian) */
-#else
-	mov	v1.16b, v2.16b		/* q1 := ctr (big-endian) */
-#endif
 	eor	v0.16b, v0.16b, v3.16b	/* q0 := auth ^ ptxt */
 	bl	aesarmv8_enc2		/* q0 := auth', q1 := pad;
 	 * trash x0/x3/q16 */
@@ -941,9 +933,7 @@ ENTRY(aesarmv8_ccm_enc1)
 	subs	x10, x10, #0x10		/* count down bytes */
 	str	q3, [x2], #0x10		/* store ciphertext block */
 	b.ne	1b			/* repeat if more blocks */
-#if _BYTE_ORDER == _LITTLE_ENDIAN
 	rev32	v2.16b, v2.16b		/* q2 := ctr (big-endian) */
-#endif
 	stp	q0, q2, [x4]		/* store updated auth/ctr */
 	ldp	fp, lr, [sp], #16	/* pop stack frame */
 	ret
@@ -968,18 +958,12 @@ ENTRY(aesarmv8_ccm_dec1)
 	ld1	{v5.4s}, [x11]		/* q5 := (0,0,0,1) (host-endian) */
 	mov	x9, x0			/* x9 := enckey */
 	mov	x10, x3			/* x10 := nbytes */
-#if _BYTE_ORDER == _LITTLE_ENDIAN
 	rev32	v2.16b, v2.16b		/* q2 := ctr (host-endian) */
-#endif
 
 	/* Decrypt the first block.  */
 	add	v2.4s, v2.4s, v5.4s	/* increment ctr (32-bit) */
 	mov	x3, x5			/* x3 := nrounds */
-#if _BYTE_ORDER == _LITTLE_ENDIAN
 	rev32	v0.16b, v2.16b		/* q0 := ctr (big-endian) */
-#else
-	mov	v0.16b, v2.16b		/* q0 := ctr (big-endian) */
-#endif
 	ldr	q3, [x1], #0x10		/* q3 := ctxt */
 	bl	aesarmv8_enc1		/* q0 := pad; trash x0/x3/q16 */
 	b	2f
@@ -995,11 +979,7 @@ ENTRY(aesarmv8_ccm_dec1)
 	add	v2.4s, v2.4s, v5.4s	/* increment ctr (32-bit) */
 	mov	x0, x9			/* x0 := enckey */
 	mov	x3, x5			/* x3 := nrounds */
-#if _BYTE_ORDER == _LITTLE_ENDIAN
 	rev32	v0.16b, v2.16b		/* q0 := ctr (big-endian) */
-#else
-	mov	v0.16b, v2.16b		/* q0 := ctr 

CVS commit: src/sys/crypto

2020-08-08 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sat Aug  8 14:47:01 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_armv8_64.S aes_neon.c aes_neon_32.S
aes_neon_impl.h aes_neon_subr.c arm_neon.h
src/sys/crypto/chacha/arch/arm: arm_neon.h chacha_neon.c
chacha_neon_32.S chacha_neon_64.S
Added Files:
src/sys/crypto/aes/arch/arm: arm_neon_imm.h
src/sys/crypto/chacha/arch/arm: arm_neon_imm.h

Log Message:
Fix ARM NEON implementations of AES and ChaCha on big-endian ARM.

New macros such as VQ_N_U32(a,b,c,d) for NEON vector initializers.
Needed because GCC and Clang disagree on the ordering of lanes,
depending on whether it's 64-bit big-endian, 32-bit big-endian, or
little-endian -- and, bizarrely, both of them disagree with the
architectural numbering of lanes.

Experimented with using

static const uint8_t x8[16] = {...};

uint8x16_t x = vld1q_u8(x8);

which doesn't require knowing anything about the ordering of lanes,
but this generates considerably worse code and apparently confuses
GCC into not recognizing the constant value of x8.

Fix some clang mistakes while here too.


To generate a diff of this commit:
cvs rdiff -u -r1.11 -r1.12 src/sys/crypto/aes/arch/arm/aes_armv8_64.S
cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/arch/arm/aes_neon.c \
src/sys/crypto/aes/arch/arm/aes_neon_32.S \
src/sys/crypto/aes/arch/arm/aes_neon_subr.c
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/arm/aes_neon_impl.h
cvs rdiff -u -r1.7 -r1.8 src/sys/crypto/aes/arch/arm/arm_neon.h
cvs rdiff -u -r0 -r1.1 src/sys/crypto/aes/arch/arm/arm_neon_imm.h
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/chacha/arch/arm/arm_neon.h
cvs rdiff -u -r0 -r1.1 src/sys/crypto/chacha/arch/arm/arm_neon_imm.h
cvs rdiff -u -r1.7 -r1.8 src/sys/crypto/chacha/arch/arm/chacha_neon.c
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/chacha/arch/arm/chacha_neon_32.S
cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/chacha/arch/arm

2020-07-29 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Wed Jul 29 14:23:59 UTC 2020

Modified Files:
src/sys/crypto/chacha/arch/arm: chacha_neon_32.S

Log Message:
Issue three more swaps to save eight stores.

Reduces code size and yields a small (~2%) cgd throughput boost.

Remove duplicate comment while here.


To generate a diff of this commit:
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/chacha/arch/arm/chacha_neon_32.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/chacha/arch/arm

2020-07-29 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Wed Jul 29 14:23:59 UTC 2020

Modified Files:
src/sys/crypto/chacha/arch/arm: chacha_neon_32.S

Log Message:
Issue three more swaps to save eight stores.

Reduces code size and yields a small (~2%) cgd throughput boost.

Remove duplicate comment while here.


To generate a diff of this commit:
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/chacha/arch/arm/chacha_neon_32.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/chacha/arch/arm/chacha_neon_32.S
diff -u src/sys/crypto/chacha/arch/arm/chacha_neon_32.S:1.1 src/sys/crypto/chacha/arch/arm/chacha_neon_32.S:1.2
--- src/sys/crypto/chacha/arch/arm/chacha_neon_32.S:1.1	Tue Jul 28 20:08:48 2020
+++ src/sys/crypto/chacha/arch/arm/chacha_neon_32.S	Wed Jul 29 14:23:59 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: chacha_neon_32.S,v 1.1 2020/07/28 20:08:48 riastradh Exp $	*/
+/*	$NetBSD: chacha_neon_32.S,v 1.2 2020/07/29 14:23:59 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,7 +28,7 @@
 
 #include 
 
-RCSID("$NetBSD: chacha_neon_32.S,v 1.1 2020/07/28 20:08:48 riastradh Exp $")
+RCSID("$NetBSD: chacha_neon_32.S,v 1.2 2020/07/29 14:23:59 riastradh Exp $")
 
 	.fpu	neon
 
@@ -305,21 +305,29 @@ ENTRY(chacha_stream256_neon)
 	 *	q7 = (x3[4], x3[5]; x3[6], x3[7])
 	 *
 	 * The first two rows to write out are q0 = x0[0:4) and q4 =
-	 * x0[4:8).  If we first swap q1 and q4, then once we've
-	 * written them out we free up consecutive registers q0-q1 for
-	 * store-multiple.
+	 * x0[4:8).  Swapping q1<->q4, q3<->q6, q9<->q12, and q11<->q14
+	 * enables us to issue all stores in consecutive pairs:
+	 *	x0 in q0-q1
+	 *	x1 in q8-q9
+	 *	x2 in q2-q3
+	 *	x3 in q10-q11
+	 *	x4 in q4-q5
+	 *	x5 in q12-q3
+	 *	x6 in q6-q7
+	 *	x7 in q14-q15
 	 */
 
 	vswp	q1, q4
+	vswp	q3, q6
 
 	vadd.u32 q0, q0, q9
 	vadd.u32 q4, q4, q9
 	vadd.u32 q2, q2, q9
-	vadd.u32 q3, q3, q9
+	vadd.u32 q6, q6, q9
 
 	vadd.u32 q1, q1, q8
 	vadd.u32 q5, q5, q8
-	vadd.u32 q6, q6, q8
+	vadd.u32 q3, q3, q8
 	vadd.u32 q7, q7, q8
 
 	vld1.32 {q8-q9}, [fp, :256]	/* restore q8-q9 */
@@ -349,14 +357,17 @@ ENTRY(chacha_stream256_neon)
 	vswp	d19, d22
 	vswp	d27, d30
 
+	vswp	q9, q12
+	vswp	q11, q14
+
 	vadd.u32 q8, q8, q0
-	vadd.u32 q9, q9, q0
+	vadd.u32 q12, q12, q0
 	vadd.u32 q10, q10, q0
-	vadd.u32 q11, q11, q0
+	vadd.u32 q14, q14, q0
 
-	vadd.u32 q12, q12, q1
+	vadd.u32 q9, q9, q1
 	vadd.u32 q13, q13, q1
-	vadd.u32 q14, q14, q1
+	vadd.u32 q11, q11, q1
 	vadd.u32 q15, q15, q1
 
 	LE32TOH(q8)
@@ -368,28 +379,18 @@ ENTRY(chacha_stream256_neon)
 	LE32TOH(q14)
 	LE32TOH(q15)
 
-	/* prepare to zero temporary space on stack */
-	vmov.i32 q0, #0
-	vmov.i32 q1, #0
-
-	/* vst1.32	{q0}, [r0]! */
-	/* vst1.32	{q1}, [r0]! */	/* (was q4 before vswp) */
-	vst1.32	{q8}, [r0]!
-	vst1.32	{q12}, [r0]!
-	vst1.32	{q2}, [r0]!
-	vst1.32	{q6}, [r0]!
-	vst1.32	{q10}, [r0]!
-	vst1.32	{q14}, [r0]!
-	vst1.32	{q4}, [r0]!	/* (was q1 before vswp) */
-	vst1.32	{q5}, [r0]!
-	vst1.32	{q9}, [r0]!
-	vst1.32	{q13}, [r0]!
-	vst1.32 {q3}, [r0]!
-	vst1.32 {q7}, [r0]!
-	vst1.32 {q11}, [r0]!
-	vst1.32 {q15}, [r0]
+	/* vst1.32	{q0-q1}, [r0]! */
+	vst1.32	{q8-q9}, [r0]!
+	vst1.32	{q2-q3}, [r0]!
+	vst1.32	{q10-q11}, [r0]!
+	vst1.32	{q4-q5}, [r0]!
+	vst1.32	{q12-q13}, [r0]!
+	vst1.32 {q6-q7}, [r0]!
+	vst1.32 {q14-q15}, [r0]
 
 	/* zero temporary space on the stack */
+	vmov.i32 q0, #0
+	vmov.i32 q1, #0
 	vst1.8	{q0-q1}, [fp, :256]
 
 	/* restore callee-saves registers and stack */
@@ -481,42 +482,8 @@ ENTRY(chacha_stream_xor256_neon)
 	 * in only 16 registers, compute p[i] ^ (y[i] + x[i]) for i in
 	 * {0,1,2,...,15}.  The twist is that the p[i] and the y[i] are
 	 * transposed from one another, and the x[i] are in general
-	 * registers and memory.  So we have:
-	 *
-	 *	q0 = (x0[0], x1[0]; x2[0], x3[0])
-	 *	q1 = (x0[1], x1[1]; x2[1], x3[1])
-	 *	q2 = (x0[2], x1[2]; x2[2], x3[2])
-	 *	q3 = (x0[3], x1[3]; x2[3], x3[3])
-	 *	...
-	 *	q15 = (x0[15], x1[15]; x2[15], x3[15])
-	 *
-	 * where xi[j] is the jth word of the ith 16-word block.  Zip
-	 * consecutive pairs with vzip.32, and you get:
-	 *
-	 *	q0 = (x0[0], x0[1]; x1[0], x1[1])
-	 *	q1 = (x2[0], x2[1]; x3[0], x3[1])
-	 *	q2 = (x0[2], x0[3]; x1[2], x1[3])
-	 *	q3 = (x2[2], x2[3]; x3[2], x3[3])
-	 *	...
-	 *	q15 = (x2[14], x2[15]; x3[14], x3[15])
-	 *
-	 * As 64-bit d registers, this is:
-	 *
-	 *	d0 = (x0[0], x0[1])	d1 = (x1[0], x1[1])
-	 *	d2 = (x2[0], x2[1])	d3 = (x3[0], x3[1])
-	 *	d4 = (x0[2], x0[3])	d5 = (x1[2], x1[3])
-	 *	d6 = (x2[2], x2[3])	d7 = (x3[2], x3[3])
-	 *	...
-	 *	d30 = (x2[14], x2[15])	d31 = (x3[14], x3[15])
-	 *
-	 * Swap d1<->d4, d3<->d6, ..., and you get:
-	 *
-	 *	q0 = (x0[0], x0[1]; x0[2], x0[3])
-	 *	q1 = (x2[0], x2[1]; x2[2], x2[3])
-	 *	q2 = (x1[0], x1[1]; x1[2], x1[3])
-	 *	q3 = (x3[0], x3[1]; x3[2], x3[3])
-	 *	...
-	 *	q15 = (x15[0], x15[1]; x15[2], x15[3])
+	 * registers and memory.  

CVS commit: src/sys/crypto/aes/arch/arm

2020-07-28 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Tue Jul 28 20:11:09 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon.c aes_neon_impl.h aes_neon_subr.c
arm_neon.h

Log Message:
Draft 2x vectorized neon vpaes for aarch64.

Gives a modest speed boost on rk3399 (Cortex-A53/A72), around 20% in
cgd tests, for parallelizable operations like CBC decryption; same
improvement should probably carry over to rpi4 CPU which lacks
ARMv8.0-AES.


To generate a diff of this commit:
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/arm/aes_neon.c \
src/sys/crypto/aes/arch/arm/aes_neon_subr.c
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/arm/aes_neon_impl.h
cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/aes/arch/arm/arm_neon.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_neon.c
diff -u src/sys/crypto/aes/arch/arm/aes_neon.c:1.3 src/sys/crypto/aes/arch/arm/aes_neon.c:1.4
--- src/sys/crypto/aes/arch/arm/aes_neon.c:1.3	Tue Jun 30 20:32:11 2020
+++ src/sys/crypto/aes/arch/arm/aes_neon.c	Tue Jul 28 20:11:09 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_neon.c,v 1.3 2020/06/30 20:32:11 riastradh Exp $	*/
+/*	$NetBSD: aes_neon.c,v 1.4 2020/07/28 20:11:09 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -39,7 +39,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_neon.c,v 1.3 2020/06/30 20:32:11 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_neon.c,v 1.4 2020/07/28 20:11:09 riastradh Exp $");
 
 #include 
 
@@ -589,6 +589,59 @@ aes_neon_enc1(const struct aesenc *enc, 
 	return vqtbl1q_u8(x, sr[rmod4]);
 }
 
+uint8x16x2_t
+aes_neon_enc2(const struct aesenc *enc, uint8x16x2_t x, unsigned nrounds)
+{
+	const uint32_t *rk32 = enc->aese_aes.aes_rk;
+	uint8x16_t inv_ = *(const volatile uint8x16_t *)
+	uint8x16_t inva_ = *(const volatile uint8x16_t *)
+	uint8x16_t sb1_0 = ((const volatile uint8x16_t *)sb1)[0];
+	uint8x16_t sb1_1 = ((const volatile uint8x16_t *)sb1)[1];
+	uint8x16_t sb2_0 = ((const volatile uint8x16_t *)sb2)[0];
+	uint8x16_t sb2_1 = ((const volatile uint8x16_t *)sb2)[1];
+	uint8x16_t x0 = x.val[0], x1 = x.val[1];
+	uint8x16_t io0, jo0, io1, jo1;
+	unsigned rmod4 = 0;
+
+	x0 = aes_schedule_transform(x0, ipt);
+	x1 = aes_schedule_transform(x1, ipt);
+	x0 ^= loadroundkey(rk32);
+	x1 ^= loadroundkey(rk32);
+	for (;;) {
+		uint8x16_t A_0, A2_0, A2_B_0, A2_B_D_0;
+		uint8x16_t A_1, A2_1, A2_B_1, A2_B_D_1;
+
+		subbytes(, , x0, inv_, inva_);
+		subbytes(, , x1, inv_, inva_);
+
+		rk32 += 4;
+		rmod4 = (rmod4 + 1) % 4;
+		if (--nrounds == 0)
+			break;
+
+		A_0 = vqtbl1q_u8(sb1_0, io0) ^ vqtbl1q_u8(sb1_1, jo0);
+		A_1 = vqtbl1q_u8(sb1_0, io1) ^ vqtbl1q_u8(sb1_1, jo1);
+		A_0 ^= loadroundkey(rk32);
+		A_1 ^= loadroundkey(rk32);
+		A2_0 = vqtbl1q_u8(sb2_0, io0) ^ vqtbl1q_u8(sb2_1, jo0);
+		A2_1 = vqtbl1q_u8(sb2_0, io1) ^ vqtbl1q_u8(sb2_1, jo1);
+		A2_B_0 = A2_0 ^ vqtbl1q_u8(A_0, mc_forward[rmod4]);
+		A2_B_1 = A2_1 ^ vqtbl1q_u8(A_1, mc_forward[rmod4]);
+		A2_B_D_0 = A2_B_0 ^ vqtbl1q_u8(A_0, mc_backward[rmod4]);
+		A2_B_D_1 = A2_B_1 ^ vqtbl1q_u8(A_1, mc_backward[rmod4]);
+		x0 = A2_B_D_0 ^ vqtbl1q_u8(A2_B_0, mc_forward[rmod4]);
+		x1 = A2_B_D_1 ^ vqtbl1q_u8(A2_B_1, mc_forward[rmod4]);
+	}
+	x0 = vqtbl1q_u8(sbo[0], io0) ^ vqtbl1q_u8(sbo[1], jo0);
+	x1 = vqtbl1q_u8(sbo[0], io1) ^ vqtbl1q_u8(sbo[1], jo1);
+	x0 ^= loadroundkey(rk32);
+	x1 ^= loadroundkey(rk32);
+	return (uint8x16x2_t) { .val = {
+		[0] = vqtbl1q_u8(x0, sr[rmod4]),
+		[1] = vqtbl1q_u8(x1, sr[rmod4]),
+	} };
+}
+
 uint8x16_t
 aes_neon_dec1(const struct aesdec *dec, uint8x16_t x, unsigned nrounds)
 {
@@ -628,4 +681,60 @@ aes_neon_dec1(const struct aesdec *dec, 
 	return vqtbl1q_u8(x, sr[i]);
 }
 
+uint8x16x2_t
+aes_neon_dec2(const struct aesdec *dec, uint8x16x2_t x, unsigned nrounds)
+{
+	const uint32_t *rk32 = dec->aesd_aes.aes_rk;
+	unsigned i = 3 & ~(nrounds - 1);
+	uint8x16_t inv_ = *(const volatile uint8x16_t *)
+	uint8x16_t inva_ = *(const volatile uint8x16_t *)
+	uint8x16_t x0 = x.val[0], x1 = x.val[1];
+	uint8x16_t io0, jo0, io1, jo1, mc;
+
+	x0 = aes_schedule_transform(x0, dipt);
+	x1 = aes_schedule_transform(x1, dipt);
+	x0 ^= loadroundkey(rk32);
+	x1 ^= loadroundkey(rk32);
+	rk32 += 4;
+
+	mc = mc_forward[3];
+	for (;;) {
+		subbytes(, , x0, inv_, inva_);
+		subbytes(, , x1, inv_, inva_);
+		if (--nrounds == 0)
+			break;
+
+		x0 = vqtbl1q_u8(dsb9[0], io0) ^ vqtbl1q_u8(dsb9[1], jo0);
+		x1 = vqtbl1q_u8(dsb9[0], io1) ^ vqtbl1q_u8(dsb9[1], jo1);
+		x0 ^= loadroundkey(rk32);
+		x1 ^= loadroundkey(rk32);
+		rk32 += 4;/* next round key */
+
+		x0 = vqtbl1q_u8(x0, mc);
+		x1 = vqtbl1q_u8(x1, mc);
+		x0 ^= vqtbl1q_u8(dsbd[0], io0) ^ vqtbl1q_u8(dsbd[1], jo0);
+		x1 ^= vqtbl1q_u8(dsbd[0], io1) ^ vqtbl1q_u8(dsbd[1], jo1);
+
+		x0 = vqtbl1q_u8(x0, mc);
+		x1 = vqtbl1q_u8(x1, mc);
+		x0 ^= vqtbl1q_u8(dsbb[0], io0) ^ vqtbl1q_u8(dsbb[1], jo0);
+		x1 ^= vqtbl1q_u8(dsbb[0], 

CVS commit: src/sys/crypto/aes/arch/arm

2020-07-28 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Tue Jul 28 20:11:09 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon.c aes_neon_impl.h aes_neon_subr.c
arm_neon.h

Log Message:
Draft 2x vectorized neon vpaes for aarch64.

Gives a modest speed boost on rk3399 (Cortex-A53/A72), around 20% in
cgd tests, for parallelizable operations like CBC decryption; same
improvement should probably carry over to rpi4 CPU which lacks
ARMv8.0-AES.


To generate a diff of this commit:
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/arm/aes_neon.c \
src/sys/crypto/aes/arch/arm/aes_neon_subr.c
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/arm/aes_neon_impl.h
cvs rdiff -u -r1.6 -r1.7 src/sys/crypto/aes/arch/arm/arm_neon.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/chacha/arch/arm

2020-07-28 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Tue Jul 28 20:05:33 UTC 2020

Modified Files:
src/sys/crypto/chacha/arch/arm: chacha_neon.c

Log Message:
Fix big-endian build with appropriate casts around vrev32q_u8.


To generate a diff of this commit:
cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/chacha/arch/arm/chacha_neon.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/chacha/arch/arm/chacha_neon.c
diff -u src/sys/crypto/chacha/arch/arm/chacha_neon.c:1.5 src/sys/crypto/chacha/arch/arm/chacha_neon.c:1.6
--- src/sys/crypto/chacha/arch/arm/chacha_neon.c:1.5	Mon Jul 27 20:58:56 2020
+++ src/sys/crypto/chacha/arch/arm/chacha_neon.c	Tue Jul 28 20:05:33 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: chacha_neon.c,v 1.5 2020/07/27 20:58:56 riastradh Exp $	*/
+/*	$NetBSD: chacha_neon.c,v 1.6 2020/07/28 20:05:33 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -53,7 +53,7 @@ vhtole_u32(uint32x4_t x)
 #if _BYTE_ORDER == _LITTLE_ENDIAN
 	return x;
 #elif _BYTE_ORDER == _BIG_ENDIAN
-	return vrev32q_u8(x);
+	return vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(x)));
 #endif
 }
 
@@ -63,7 +63,7 @@ vletoh_u32(uint32x4_t x)
 #if _BYTE_ORDER == _LITTLE_ENDIAN
 	return x;
 #elif _BYTE_ORDER == _BIG_ENDIAN
-	return vrev32q_u8(x);
+	return vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(x)));
 #endif
 }
 



CVS commit: src/sys/crypto/chacha/arch/arm

2020-07-28 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Tue Jul 28 20:05:33 UTC 2020

Modified Files:
src/sys/crypto/chacha/arch/arm: chacha_neon.c

Log Message:
Fix big-endian build with appropriate casts around vrev32q_u8.


To generate a diff of this commit:
cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/chacha/arch/arm/chacha_neon.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/chacha/arch/arm

2020-07-28 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Tue Jul 28 15:42:41 UTC 2020

Modified Files:
src/sys/crypto/chacha/arch/arm: chacha_neon_64.S

Log Message:
Fix typo in comment.


To generate a diff of this commit:
cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/chacha/arch/arm/chacha_neon_64.S
diff -u src/sys/crypto/chacha/arch/arm/chacha_neon_64.S:1.4 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S:1.5
--- src/sys/crypto/chacha/arch/arm/chacha_neon_64.S:1.4	Mon Jul 27 20:57:23 2020
+++ src/sys/crypto/chacha/arch/arm/chacha_neon_64.S	Tue Jul 28 15:42:41 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: chacha_neon_64.S,v 1.4 2020/07/27 20:57:23 riastradh Exp $	*/
+/*	$NetBSD: chacha_neon_64.S,v 1.5 2020/07/28 15:42:41 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,7 +28,7 @@
 
 #include 
 
-RCSID("$NetBSD: chacha_neon_64.S,v 1.4 2020/07/27 20:57:23 riastradh Exp $")
+RCSID("$NetBSD: chacha_neon_64.S,v 1.5 2020/07/28 15:42:41 riastradh Exp $")
 
 #define	ROUND(a0,b0,c0,d0,a1,b1,c1,d1,a2,b2,c2,d2,a3,b3,c3,d3,t0,t1,t2,t3, r) \
 STEP(STEP0,a0,b0,c0,d0,a1,b1,c1,d1,a2,b2,c2,d2,a3,b3,c3,d3,t0,t1,t2,t3, r);   \
@@ -142,7 +142,7 @@ STEP(STEP19,a0,b0,c0,d0,a1,b1,c1,d1,a2,b
  * chacha_stream256_neon(uint8_t s[256]@x0,
  * uint32_t blkno@w1,
  * const uint8_t nonce[12]@x2,
- * const uint8_t key[12]@x3,
+ * const uint8_t key[32]@x3,
  * const uint8_t const[16]@x4,
  * unsigned nr@w5)
  */



CVS commit: src/sys/crypto/chacha/arch/arm

2020-07-28 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Tue Jul 28 15:42:41 UTC 2020

Modified Files:
src/sys/crypto/chacha/arch/arm: chacha_neon_64.S

Log Message:
Fix typo in comment.


To generate a diff of this commit:
cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/aes/arch/x86

2020-07-28 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Tue Jul 28 14:01:35 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/x86: aes_via.c

Log Message:
Initialize authctr in both branches.

I guess I didn't test the unaligned case, weird.


To generate a diff of this commit:
cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/x86/aes_via.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/x86/aes_via.c
diff -u src/sys/crypto/aes/arch/x86/aes_via.c:1.5 src/sys/crypto/aes/arch/x86/aes_via.c:1.6
--- src/sys/crypto/aes/arch/x86/aes_via.c:1.5	Sat Jul 25 22:31:32 2020
+++ src/sys/crypto/aes/arch/x86/aes_via.c	Tue Jul 28 14:01:35 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_via.c,v 1.5 2020/07/25 22:31:32 riastradh Exp $	*/
+/*	$NetBSD: aes_via.c,v 1.6 2020/07/28 14:01:35 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.5 2020/07/25 22:31:32 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.6 2020/07/28 14:01:35 riastradh Exp $");
 
 #ifdef _KERNEL
 #include 
@@ -739,6 +739,7 @@ aesvia_ccm_enc1(const struct aesenc *enc
 		authctr = authctrbuf;
 		ccmenc_unaligned_evcnt.ev_count++;
 	} else {
+		authctr = authctr0;
 		ccmenc_aligned_evcnt.ev_count++;
 	}
 	c0 = le32dec(authctr0 + 16 + 4*0);
@@ -812,6 +813,7 @@ aesvia_ccm_dec1(const struct aesenc *enc
 		le32enc(authctr + 16 + 4*2, c2);
 		ccmdec_unaligned_evcnt.ev_count++;
 	} else {
+		authctr = authctr0;
 		ccmdec_aligned_evcnt.ev_count++;
 	}
 



CVS commit: src/sys/crypto/aes/arch/x86

2020-07-28 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Tue Jul 28 14:01:35 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/x86: aes_via.c

Log Message:
Initialize authctr in both branches.

I guess I didn't test the unaligned case, weird.


To generate a diff of this commit:
cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/x86/aes_via.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



Re: CVS commit: src/sys/crypto/aes

2020-07-28 Thread Roy Marples

On 27/07/2020 21:44, Taylor R Campbell wrote:

Module Name:src
Committed By:   riastradh
Date:   Mon Jul 27 20:44:30 UTC 2020

Modified Files:
src/sys/crypto/aes: aes_ccm.c aes_ccm.h

Log Message:
Gather auth[16] and ctr[16] into one authctr[32].

Should appease clang.


clang is still not appeased :/

--- aes_via.o ---
/home/roy/src/hg/src/sys/crypto/aes/arch/x86/aes_via.c:807:6: error: variable 
'authctr' is used uninitialized whenever 'if' condition is false 
[-Werror,-Wsometimes-uninitialized]

if ((uintptr_t)authctr0 & 0xf) {
^
/home/roy/src/hg/src/sys/crypto/aes/arch/x86/aes_via.c:820:10: note: 
uninitialized use occurs here

be32enc(authctr + 16 + 4*3, ++c3);
^~~
/home/roy/src/hg/src/sys/crypto/aes/arch/x86/aes_via.c:807:2: note: remove the 
'if' if its condition is always true

if ((uintptr_t)authctr0 & 0xf) {
^~~
/home/roy/src/hg/src/sys/crypto/aes/arch/x86/aes_via.c:796:18: note: initialize 
the variable 'authctr' to silence this warning

uint8_t *authctr;
^
 = NULL
1 error generated.
*** [aes_via.o] Error code 1


CVS commit: src/sys/crypto/chacha/arch/arm

2020-07-27 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Mon Jul 27 20:58:56 UTC 2020

Modified Files:
src/sys/crypto/chacha/arch/arm: arm_neon.h chacha_neon.c

Log Message:
Note that VSRI seems to hurt here.


To generate a diff of this commit:
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/chacha/arch/arm/arm_neon.h
cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/chacha/arch/arm/chacha_neon.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/chacha/arch/arm

2020-07-27 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Mon Jul 27 20:58:56 UTC 2020

Modified Files:
src/sys/crypto/chacha/arch/arm: arm_neon.h chacha_neon.c

Log Message:
Note that VSRI seems to hurt here.


To generate a diff of this commit:
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/chacha/arch/arm/arm_neon.h
cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/chacha/arch/arm/chacha_neon.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/chacha/arch/arm/arm_neon.h
diff -u src/sys/crypto/chacha/arch/arm/arm_neon.h:1.2 src/sys/crypto/chacha/arch/arm/arm_neon.h:1.3
--- src/sys/crypto/chacha/arch/arm/arm_neon.h:1.2	Mon Jul 27 20:58:06 2020
+++ src/sys/crypto/chacha/arch/arm/arm_neon.h	Mon Jul 27 20:58:56 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: arm_neon.h,v 1.2 2020/07/27 20:58:06 riastradh Exp $	*/
+/*	$NetBSD: arm_neon.h,v 1.3 2020/07/27 20:58:56 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -529,6 +529,40 @@ vsliq_n_s32(int32x4_t __vins, int32x4_t 
 #endif	/* __LITTLE_ENDIAN__ */
 #endif
 
+#if defined(__GNUC__) && !defined(__clang__)
+_INTRINSATTR
+static __inline uint32x4_t
+vsriq_n_u32(uint32x4_t __vins, uint32x4_t __vsh, uint8_t __bits)
+{
+#ifdef __aarch64__
+	return __builtin_aarch64_usri_nv4si_uuus(__vins, __vsh, __bits);
+#else
+	return (uint32x4_t)__builtin_neon_vsri_nv4si((int32x4_t)__vins,
+	(int32x4_t)__vsh, __bits);
+#endif
+}
+#elif defined(__clang__)
+#ifdef __LITTLE_ENDIAN__
+#define	vsriq_n_u32(__vins, __vsh, __bits)  \
+	(int32x4_t)__builtin_neon_vsriq_n_v((int32x4_t)(__vins),	  \
+	(int32x4_t)(__vsh), (__bits), 34)
+#else
+#define	vsliq_n_s32(__vins, __vsh, __bits) (  \
+{	  \
+	int32x4_t __tvins = (__vins);	  \
+	int32x4_t __tvsh = (__vsh);	  \
+	uint8_t __tbits = (__bits);	  \
+	int32x4_t __vins_r = __builtin_shufflevector(__tvins, __tvins,	  \
+	3,2,1,0);			  \
+	int32x4_t __vsh_r = __builtin_shufflevector(__tvsh, __tvsh,	  \
+	3,2,1,0);			  \
+	int32x4_t __r = __builtin_neon_vsriq_n_v(__tvins, __tvsh, __tbits,\
+	34);			  \
+	__builtin_shufflevector(__r, __r, 3,2,1,0);			  \
+})
+#endif
+#endif
+
 _INTRINSATTR
 static __inline void
 vst1q_u32(uint32_t *__p32, uint32x4_t __v)

Index: src/sys/crypto/chacha/arch/arm/chacha_neon.c
diff -u src/sys/crypto/chacha/arch/arm/chacha_neon.c:1.4 src/sys/crypto/chacha/arch/arm/chacha_neon.c:1.5
--- src/sys/crypto/chacha/arch/arm/chacha_neon.c:1.4	Mon Jul 27 20:58:06 2020
+++ src/sys/crypto/chacha/arch/arm/chacha_neon.c	Mon Jul 27 20:58:56 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: chacha_neon.c,v 1.4 2020/07/27 20:58:06 riastradh Exp $	*/
+/*	$NetBSD: chacha_neon.c,v 1.5 2020/07/27 20:58:56 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -36,7 +36,15 @@ static inline uint32x4_t
 vrolq_n_u32(uint32x4_t x, uint8_t n)
 {
 
+	/*
+	 * Tempting to use VSHL/VSRI instead of VSHL/VSHR/VORR, but in
+	 * practice it hurts performance at least on Cortex-A8.
+	 */
+#if 1
 	return vshlq_n_u32(x, n) | vshrq_n_u32(x, 32 - n);
+#else
+	return vsriq_n_u32(vshlq_n_u32(x, n), x, 32 - n);
+#endif
 }
 
 static inline uint32x4_t



CVS commit: src/sys/crypto/chacha/arch/arm

2020-07-27 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Mon Jul 27 20:58:07 UTC 2020

Modified Files:
src/sys/crypto/chacha/arch/arm: arm_neon.h chacha_neon.c

Log Message:
Take advantage of REV32 and TBL for 16-bit and 8-bit rotations.

However, disable use of (V)TBL on armv7/aarch32 for now, because for
some reason GCC spills things to the stack despite having plenty of
free registers, which hurts performance more than it helps at least
on ARM Cortex-A8.


To generate a diff of this commit:
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/chacha/arch/arm/arm_neon.h
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/chacha/arch/arm/chacha_neon.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/chacha/arch/arm/arm_neon.h
diff -u src/sys/crypto/chacha/arch/arm/arm_neon.h:1.1 src/sys/crypto/chacha/arch/arm/arm_neon.h:1.2
--- src/sys/crypto/chacha/arch/arm/arm_neon.h:1.1	Sat Jul 25 22:51:57 2020
+++ src/sys/crypto/chacha/arch/arm/arm_neon.h	Mon Jul 27 20:58:06 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: arm_neon.h,v 1.1 2020/07/25 22:51:57 riastradh Exp $	*/
+/*	$NetBSD: arm_neon.h,v 1.2 2020/07/27 20:58:06 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -39,6 +39,7 @@
 typedef __Int32x4_t int32x4_t;
 typedef __Int64x2_t int64x2_t;
 typedef __Int8x16_t int8x16_t;
+typedef __Uint16x8_t uint16x8_t;
 typedef __Uint32x4_t uint32x4_t;
 typedef __Uint64x2_t uint64x2_t;
 typedef __Uint8x16_t uint8x16_t;
@@ -46,6 +47,7 @@ typedef __Uint8x16_t uint8x16_t;
 typedef __simd128_int32_t int32x4_t;
 typedef __simd128_int64_t int64x2_t;
 typedef __simd128_int8_t int8x16_t;
+typedef __simd128_uint16_t uint16x8_t;
 typedef __simd128_uint32_t uint32x4_t;
 typedef __simd128_uint64_t uint64x2_t;
 typedef __simd128_uint8_t uint8x16_t;
@@ -70,9 +72,11 @@ typedef struct { uint8x8_t val[2]; } uin
 typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t;
 typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t;
 typedef __attribute__((neon_vector_type(4))) int32_t int32x4_t;
+
 typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t;
 typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t;
 typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t;
+typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t;
 
 typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t;
 typedef struct { uint8x8_t val[2]; } uint8x8x2_t;
@@ -330,6 +334,27 @@ vreinterpretq_s32_u8(uint8x16_t __v)
 }
 
 _INTRINSATTR
+static __inline uint16x8_t
+vreinterpretq_u16_u32(uint32x4_t __v)
+{
+	return (uint16x8_t)__v;
+}
+
+_INTRINSATTR
+static __inline uint32x4_t
+vreinterpretq_u32_u16(uint16x8_t __v)
+{
+	return (uint32x4_t)__v;
+}
+
+_INTRINSATTR
+static __inline uint32x4_t
+vreinterpretq_u32_u64(uint64x2_t __v)
+{
+	return (uint32x4_t)__v;
+}
+
+_INTRINSATTR
 static __inline uint32x4_t
 vreinterpretq_u32_u8(uint8x16_t __v)
 {
@@ -338,6 +363,13 @@ vreinterpretq_u32_u8(uint8x16_t __v)
 
 _INTRINSATTR
 static __inline uint64x2_t
+vreinterpretq_u64_u32(uint32x4_t __v)
+{
+	return (uint64x2_t)__v;
+}
+
+_INTRINSATTR
+static __inline uint64x2_t
 vreinterpretq_u64_u8(uint8x16_t __v)
 {
 	return (uint64x2_t)__v;
@@ -365,6 +397,17 @@ vreinterpretq_u8_u64(uint64x2_t __v)
 }
 
 _INTRINSATTR
+static __inline uint16x8_t
+vrev32q_u16(uint16x8_t __v)
+{
+#if defined(__GNUC__) && !defined(__clang__)
+	return __builtin_shuffle(__v, (uint16x8_t) { 1,0, 3,2, 5,4, 7,6 });
+#elif defined(__clang__)
+	return __builtin_shufflevector(__v,  1,0, 3,2, 5,4, 7,6);
+#endif
+}
+
+_INTRINSATTR
 static __inline uint8x16_t
 vrev32q_u8(uint8x16_t __v)
 {
@@ -531,4 +574,58 @@ vst1q_u8(uint8_t *__p8, uint8x16_t __v)
 #endif
 }
 
+#ifndef __aarch64__		/* XXX */
+
+_INTRINSATTR
+static __inline uint8x8_t
+vtbl1_u8(uint8x8_t __tab, uint8x8_t __idx)
+{
+#if defined(__GNUC__) && !defined(__clang__)
+	return (uint8x8_t)__builtin_neon_vtbl1v8qi((int8x8_t)__tab,
+	(int8x8_t)__idx);
+#elif defined(__clang__)
+	uint8x8_t __ret;
+#ifndef __LITTLE_ENDIAN__
+	__tab = __builtin_shufflevector(__tab, __tab, 7,6,5,4,3,2,1,0);
+	__idx = __builtin_shufflevector(__idx, __idx, 7,6,5,4,3,2,1,0);
+#endif
+	__ret = (uint8x8_t)__builtin_neon_vtbl1_v((int8x8_t)__tab,
+	(int8x8_t)__idx, 16);
+#ifndef __LITTLE_ENDIAN__
+	__ret = __builtin_shufflevector(__ret, __ret, 7,6,5,4,3,2,1,0);
+#endif
+	return __ret;
+#endif
+}
+
+_INTRINSATTR
+static __inline uint8x8_t
+vtbl2_u8(uint8x8x2_t __tab, uint8x8_t __idx)
+{
+#if defined(__GNUC__) && !defined(__clang__)
+	union {
+		uint8x8x2_t __u8x8x82;
+		__builtin_neon_ti __ti;
+	} __u = { __tab };
+	return (uint8x8_t)__builtin_neon_vtbl2v8qi(__u.__ti, (int8x8_t)__idx);
+#elif defined(__clang__)
+	uint8x8_t __ret;
+#ifndef __LITTLE_ENDIAN__
+	__tab.val[0] = __builtin_shufflevector(__tab.val[0], __tab.val[0],
+	7,6,5,4,3,2,1,0);
+	__tab.val[1] = __builtin_shufflevector(__tab.val[1], __tab.val[1],
+	

CVS commit: src/sys/crypto

2020-07-27 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Mon Jul 27 20:57:23 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_armv8_64.S aes_neon_32.S
src/sys/crypto/aes/arch/x86: aes_ni_64.S
src/sys/crypto/chacha/arch/arm: chacha_neon_64.S

Log Message:
Add RCSIDs to the AES and ChaCha .S sources.


To generate a diff of this commit:
cvs rdiff -u -r1.10 -r1.11 src/sys/crypto/aes/arch/arm/aes_armv8_64.S
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/arm/aes_neon_32.S
cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/x86/aes_ni_64.S
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S
diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.10 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.11
--- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.10	Mon Jul 27 20:54:11 2020
+++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S	Mon Jul 27 20:57:23 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_armv8_64.S,v 1.10 2020/07/27 20:54:11 riastradh Exp $	*/
+/*	$NetBSD: aes_armv8_64.S,v 1.11 2020/07/27 20:57:23 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -30,6 +30,8 @@
 
 #include 
 
+RCSID("$NetBSD: aes_armv8_64.S,v 1.11 2020/07/27 20:57:23 riastradh Exp $")
+
 	.arch_extension	aes
 
 /*

Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S
diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.3 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.4
--- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.3	Mon Jul 27 20:53:22 2020
+++ src/sys/crypto/aes/arch/arm/aes_neon_32.S	Mon Jul 27 20:57:23 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_neon_32.S,v 1.3 2020/07/27 20:53:22 riastradh Exp $	*/
+/*	$NetBSD: aes_neon_32.S,v 1.4 2020/07/27 20:57:23 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,6 +28,8 @@
 
 #include 
 
+RCSID("$NetBSD: aes_neon_32.S,v 1.4 2020/07/27 20:57:23 riastradh Exp $")
+
 	.fpu	neon
 
 	.text

Index: src/sys/crypto/aes/arch/x86/aes_ni_64.S
diff -u src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.5 src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.6
--- src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.5	Mon Jul 27 20:53:22 2020
+++ src/sys/crypto/aes/arch/x86/aes_ni_64.S	Mon Jul 27 20:57:23 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_ni_64.S,v 1.5 2020/07/27 20:53:22 riastradh Exp $	*/
+/*	$NetBSD: aes_ni_64.S,v 1.6 2020/07/27 20:57:23 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,6 +28,8 @@
 
 #include 
 
+RCSID("$NetBSD: aes_ni_64.S,v 1.6 2020/07/27 20:57:23 riastradh Exp $")
+
 /*
  * MOVDQA/MOVDQU are Move Double Quadword (Aligned/Unaligned), defined
  * to operate on integers; MOVAPS/MOVUPS are Move (Aligned/Unaligned)

Index: src/sys/crypto/chacha/arch/arm/chacha_neon_64.S
diff -u src/sys/crypto/chacha/arch/arm/chacha_neon_64.S:1.3 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S:1.4
--- src/sys/crypto/chacha/arch/arm/chacha_neon_64.S:1.3	Mon Jul 27 20:53:23 2020
+++ src/sys/crypto/chacha/arch/arm/chacha_neon_64.S	Mon Jul 27 20:57:23 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: chacha_neon_64.S,v 1.3 2020/07/27 20:53:23 riastradh Exp $	*/
+/*	$NetBSD: chacha_neon_64.S,v 1.4 2020/07/27 20:57:23 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -28,6 +28,8 @@
 
 #include 
 
+RCSID("$NetBSD: chacha_neon_64.S,v 1.4 2020/07/27 20:57:23 riastradh Exp $")
+
 #define	ROUND(a0,b0,c0,d0,a1,b1,c1,d1,a2,b2,c2,d2,a3,b3,c3,d3,t0,t1,t2,t3, r) \
 STEP(STEP0,a0,b0,c0,d0,a1,b1,c1,d1,a2,b2,c2,d2,a3,b3,c3,d3,t0,t1,t2,t3, r);   \
 STEP(STEP1,a0,b0,c0,d0,a1,b1,c1,d1,a2,b2,c2,d2,a3,b3,c3,d3,t0,t1,t2,t3, r);   \



CVS commit: src/sys/crypto/chacha/arch/arm

2020-07-27 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Mon Jul 27 20:58:07 UTC 2020

Modified Files:
src/sys/crypto/chacha/arch/arm: arm_neon.h chacha_neon.c

Log Message:
Take advantage of REV32 and TBL for 16-bit and 8-bit rotations.

However, disable use of (V)TBL on armv7/aarch32 for now, because for
some reason GCC spills things to the stack despite having plenty of
free registers, which hurts performance more than it helps at least
on ARM Cortex-A8.


To generate a diff of this commit:
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/chacha/arch/arm/arm_neon.h
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/chacha/arch/arm/chacha_neon.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto

2020-07-27 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Mon Jul 27 20:57:23 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_armv8_64.S aes_neon_32.S
src/sys/crypto/aes/arch/x86: aes_ni_64.S
src/sys/crypto/chacha/arch/arm: chacha_neon_64.S

Log Message:
Add RCSIDs to the AES and ChaCha .S sources.


To generate a diff of this commit:
cvs rdiff -u -r1.10 -r1.11 src/sys/crypto/aes/arch/arm/aes_armv8_64.S
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/arm/aes_neon_32.S
cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/x86/aes_ni_64.S
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/aes/arch/arm

2020-07-27 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Mon Jul 27 20:54:12 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_armv8_64.S

Log Message:
Issue aese/aesmc and aesd/aesimc in pairs.

Advised by the aarch64 optimization guide; increases cgd throughput
by about 10%.


To generate a diff of this commit:
cvs rdiff -u -r1.9 -r1.10 src/sys/crypto/aes/arch/arm/aes_armv8_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/aes/arch/arm

2020-07-27 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Mon Jul 27 20:54:12 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_armv8_64.S

Log Message:
Issue aese/aesmc and aesd/aesimc in pairs.

Advised by the aarch64 optimization guide; increases cgd throughput
by about 10%.


To generate a diff of this commit:
cvs rdiff -u -r1.9 -r1.10 src/sys/crypto/aes/arch/arm/aes_armv8_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S
diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.9 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.10
--- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.9	Mon Jul 27 20:53:22 2020
+++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S	Mon Jul 27 20:54:11 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_armv8_64.S,v 1.9 2020/07/27 20:53:22 riastradh Exp $	*/
+/*	$NetBSD: aes_armv8_64.S,v 1.10 2020/07/27 20:54:11 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -1041,15 +1041,18 @@ END(ctr32_inc)
 	.type	aesarmv8_enc1,@function
 aesarmv8_enc1:
 	ldr	q16, [x0], #0x10	/* load round key */
-	b	2f
+	sub	x3, x3, #1
 	_ALIGN_TEXT
-1:	/* q0 := MixColumns(q0) */
+1:	/* q0 := MixColumns(ShiftRows(SubBytes(AddRoundKey_q16(q0 */
+	aese	v0.16b, v16.16b
 	aesmc	v0.16b, v0.16b
-2:	subs	x3, x3, #1
+	ldr	q16, [x0], #0x10
+	subs	x3, x3, #1
+	b.ne	1b
 	/* q0 := ShiftRows(SubBytes(AddRoundKey_q16(q0))) */
 	aese	v0.16b, v16.16b
-	ldr	q16, [x0], #0x10		/* load next round key */
-	b.ne	1b
+	ldr	q16, [x0]		/* load last round key */
+	/* q0 := AddRoundKey_q16(q0) */
 	eor	v0.16b, v0.16b, v16.16b
 	ret
 END(aesarmv8_enc1)
@@ -1067,17 +1070,21 @@ END(aesarmv8_enc1)
 	.type	aesarmv8_enc2,@function
 aesarmv8_enc2:
 	ldr	q16, [x0], #0x10	/* load round key */
-	b	2f
+	sub	x3, x3, #1
 	_ALIGN_TEXT
-1:	/* q[i] := MixColumns(q[i]) */
+1:	/* q[i] := MixColumns(ShiftRows(SubBytes(AddRoundKey_q16(q[i] */
+	aese	v0.16b, v16.16b
 	aesmc	v0.16b, v0.16b
+	aese	v1.16b, v16.16b
 	aesmc	v1.16b, v1.16b
-2:	subs	x3, x3, #1
+	ldr	q16, [x0], #0x10	/* load next round key */
+	subs	x3, x3, #1
+	b.ne	1b
 	/* q[i] := ShiftRows(SubBytes(AddRoundKey_q16(q[i]))) */
 	aese	v0.16b, v16.16b
 	aese	v1.16b, v16.16b
-	ldr	q16, [x0], #0x10		/* load next round key */
-	b.ne	1b
+	ldr	q16, [x0]		/* load last round key */
+	/* q[i] := AddRoundKey_q16(q[i]) */
 	eor	v0.16b, v0.16b, v16.16b
 	eor	v1.16b, v1.16b, v16.16b
 	ret
@@ -1097,18 +1104,28 @@ END(aesarmv8_enc2)
 	.type	aesarmv8_enc8,@function
 aesarmv8_enc8:
 	ldr	q16, [x0], #0x10	/* load round key */
-	b	2f
+	sub	x3, x3, #1
 	_ALIGN_TEXT
-1:	/* q[i] := MixColumns(q[i]) */
+1:	/* q[i] := MixColumns(ShiftRows(SubBytes(AddRoundKey_q16(q[i] */
+	aese	v0.16b, v16.16b
 	aesmc	v0.16b, v0.16b
+	aese	v1.16b, v16.16b
 	aesmc	v1.16b, v1.16b
+	aese	v2.16b, v16.16b
 	aesmc	v2.16b, v2.16b
+	aese	v3.16b, v16.16b
 	aesmc	v3.16b, v3.16b
+	aese	v4.16b, v16.16b
 	aesmc	v4.16b, v4.16b
+	aese	v5.16b, v16.16b
 	aesmc	v5.16b, v5.16b
+	aese	v6.16b, v16.16b
 	aesmc	v6.16b, v6.16b
+	aese	v7.16b, v16.16b
 	aesmc	v7.16b, v7.16b
-2:	subs	x3, x3, #1
+	ldr	q16, [x0], #0x10	/* load next round key */
+	subs	x3, x3, #1
+	b.ne	1b
 	/* q[i] := ShiftRows(SubBytes(AddRoundKey_q16(q[i]))) */
 	aese	v0.16b, v16.16b
 	aese	v1.16b, v16.16b
@@ -1118,9 +1135,9 @@ aesarmv8_enc8:
 	aese	v5.16b, v16.16b
 	aese	v6.16b, v16.16b
 	aese	v7.16b, v16.16b
-	ldr	q16, [x0], #0x10	/* load next round key */
-	b.ne	1b
-	eor	v0.16b, v0.16b, v16.16b	/* AddRoundKey */
+	ldr	q16, [x0]		/* load last round key */
+	/* q[i] := AddRoundKey_q16(q[i]) */
+	eor	v0.16b, v0.16b, v16.16b
 	eor	v1.16b, v1.16b, v16.16b
 	eor	v2.16b, v2.16b, v16.16b
 	eor	v3.16b, v3.16b, v16.16b
@@ -1144,15 +1161,19 @@ END(aesarmv8_enc8)
 	.type	aesarmv8_dec1,@function
 aesarmv8_dec1:
 	ldr	q16, [x0], #0x10	/* load round key */
-	b	2f
+	sub	x3, x3, #1
 	_ALIGN_TEXT
-1:	/* q0 := InMixColumns(q0) */
-	aesimc	v0.16b, v0.16b
-2:	subs	x3, x3, #1
-	/* q0 := InSubBytes(InShiftRows(AddRoundKey_q16(q0))) */
+1:	/* q0 := InSubBytes(InShiftRows(AddRoundKey_q16(q0))) */
 	aesd	v0.16b, v16.16b
+	/* q0 := InMixColumns(q0) */
+	aesimc	v0.16b, v0.16b
 	ldr	q16, [x0], #0x10	/* load next round key */
+	subs	x3, x3, #1
 	b.ne	1b
+	/* q0 := InSubBytes(InShiftRows(AddRoundKey_q16(q0))) */
+	aesd	v0.16b, v16.16b
+	ldr	q16, [x0]		/* load last round key */
+	/* q0 := AddRoundKey_q16(q0) */
 	eor	v0.16b, v0.16b, v16.16b
 	ret
 END(aesarmv8_dec1)
@@ -1171,18 +1192,29 @@ END(aesarmv8_dec1)
 	.type	aesarmv8_dec8,@function
 aesarmv8_dec8:
 	ldr	q16, [x0], #0x10	/* load round key */
-	b	2f
+	sub	x3, x3, #1
 	_ALIGN_TEXT
-1:	/* q[i] := InMixColumns(q[i]) */
+1:	/* q[i] := InSubBytes(InShiftRows(AddRoundKey_q16(q[i]))) */
+	aesd	v0.16b, v16.16b
+	/* q[i] := InMixColumns(q[i]) */
 	aesimc	v0.16b, v0.16b
+	aesd	v1.16b, v16.16b
 	aesimc	v1.16b, v1.16b
+	aesd	v2.16b, v16.16b
 	aesimc	v2.16b, v2.16b
+	aesd	v3.16b, v16.16b
 	aesimc	v3.16b, 

CVS commit: src/sys/crypto

2020-07-27 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Mon Jul 27 20:53:23 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_armv8_64.S aes_neon_32.S
src/sys/crypto/aes/arch/x86: aes_ni_64.S
src/sys/crypto/chacha/arch/arm: chacha_neon_64.S

Log Message:
Align critical-path loops in AES and ChaCha.


To generate a diff of this commit:
cvs rdiff -u -r1.8 -r1.9 src/sys/crypto/aes/arch/arm/aes_armv8_64.S
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/arm/aes_neon_32.S
cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/arch/x86/aes_ni_64.S
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S
diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.8 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.9
--- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.8	Sat Jul 25 22:33:04 2020
+++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S	Mon Jul 27 20:53:22 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_armv8_64.S,v 1.8 2020/07/25 22:33:04 riastradh Exp $	*/
+/*	$NetBSD: aes_armv8_64.S,v 1.9 2020/07/27 20:53:22 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -440,6 +440,7 @@ END(aesarmv8_setenckey256)
 ENTRY(aesarmv8_enctodec)
 	ldr	q0, [x0, x2, lsl #4]	/* load last round key */
 	b	2f
+	_ALIGN_TEXT
 1:	aesimc	v0.16b, v0.16b	/* convert encryption to decryption */
 2:	str	q0, [x1], #0x10	/* store round key */
 	subs	x2, x2, #1	/* count down round */
@@ -503,6 +504,7 @@ ENTRY(aesarmv8_cbc_enc)
 	mov	x9, x0			/* x9 := enckey */
 	mov	x10, x3			/* x10 := nbytes */
 	ldr	q0, [x4]		/* q0 := chaining value */
+	_ALIGN_TEXT
 1:	ldr	q1, [x1], #0x10		/* q1 := plaintext block */
 	eor	v0.16b, v0.16b, v1.16b	/* q0 := cv ^ ptxt */
 	mov	x0, x9			/* x0 := enckey */
@@ -539,6 +541,7 @@ ENTRY(aesarmv8_cbc_dec1)
 	ldr	q0, [x1, #-0x10]!	/* q0 := last ciphertext block */
 	str	q0, [x4]		/* update iv */
 	b	2f
+	_ALIGN_TEXT
 1:	ldr	q31, [x1, #-0x10]!	/* q31 := chaining value */
 	eor	v0.16b, v0.16b, v31.16b	/* q0 := plaintext block */
 	str	q0, [x2, #-0x10]!	/* store plaintext block */
@@ -576,6 +579,7 @@ ENTRY(aesarmv8_cbc_dec8)
 	ldp	q6, q7, [x1, #-0x20]!	/* q6, q7 := last ciphertext blocks */
 	str	q7, [x4]		/* update iv */
 	b	2f
+	_ALIGN_TEXT
 1:	ldp	q6, q7, [x1, #-0x20]!
 	eor	v0.16b, v0.16b, v7.16b	/* q0 := pt0 */
 	stp	q0, q1, [x2, #-0x20]!
@@ -629,6 +633,7 @@ ENTRY(aesarmv8_xts_enc1)
 	mov	x9, x0			/* x9 := enckey */
 	mov	x10, x3			/* x10 := nbytes */
 	ldr	q31, [x4]		/* q31 := tweak */
+	_ALIGN_TEXT
 1:	ldr	q0, [x1], #0x10		/* q0 := ptxt */
 	mov	x0, x9			/* x0 := enckey */
 	mov	x3, x5			/* x3 := nrounds */
@@ -661,6 +666,7 @@ ENTRY(aesarmv8_xts_enc8)
 	mov	x9, x0			/* x9 := enckey */
 	mov	x10, x3			/* x10 := nbytes */
 	ldr	q31, [x4]		/* q31 := tweak */
+	_ALIGN_TEXT
 1:	mov	v24.16b, v31.16b	/* q24 := tweak[0] */
 	bl	aesarmv8_xts_mulx	/* q31 *= x; trash x0/q0/q1 */
 	mov	v25.16b, v31.16b	/* q25 := tweak[1] */
@@ -729,6 +735,7 @@ ENTRY(aesarmv8_xts_dec1)
 	mov	x9, x0			/* x9 := deckey */
 	mov	x10, x3			/* x10 := nbytes */
 	ldr	q31, [x4]		/* q31 := tweak */
+	_ALIGN_TEXT
 1:	ldr	q0, [x1], #0x10		/* q0 := ctxt */
 	mov	x0, x9			/* x0 := deckey */
 	mov	x3, x5			/* x3 := nrounds */
@@ -761,6 +768,7 @@ ENTRY(aesarmv8_xts_dec8)
 	mov	x9, x0			/* x9 := deckey */
 	mov	x10, x3			/* x10 := nbytes */
 	ldr	q31, [x4]		/* q31 := tweak */
+	_ALIGN_TEXT
 1:	mov	v24.16b, v31.16b	/* q24 := tweak[0] */
 	bl	aesarmv8_xts_mulx	/* q31 *= x; trash x0/q0/q1 */
 	mov	v25.16b, v31.16b	/* q25 := tweak[1] */
@@ -879,6 +887,7 @@ ENTRY(aesarmv8_cbcmac_update1)
 	ldr	q0, [x3]		/* q0 := initial authenticator */
 	mov	x9, x0			/* x9 := enckey */
 	mov	x5, x3			/* x5 :=  (enc1 trashes x3) */
+	_ALIGN_TEXT
 1:	ldr	q1, [x1], #0x10		/* q1 := plaintext block */
 	mov	x0, x9			/* x0 := enckey */
 	mov	x3, x4			/* x3 := nrounds */
@@ -913,6 +922,7 @@ ENTRY(aesarmv8_ccm_enc1)
 #if _BYTE_ORDER == _LITTLE_ENDIAN
 	rev32	v2.16b, v2.16b		/* q2 := ctr (host-endian) */
 #endif
+	_ALIGN_TEXT
 1:	ldr	q3, [x1], #0x10		/* q3 := plaintext block */
 	add	v2.4s, v2.4s, v5.4s	/* increment ctr (32-bit) */
 	mov	x0, x9			/* x0 := enckey */
@@ -972,6 +982,7 @@ ENTRY(aesarmv8_ccm_dec1)
 	bl	aesarmv8_enc1		/* q0 := pad; trash x0/x3/q16 */
 	b	2f
 
+	_ALIGN_TEXT
 1:	/*
 	 * Authenticate the last block and decrypt the next block
 	 * simultaneously.
@@ -1031,6 +1042,7 @@ END(ctr32_inc)
 aesarmv8_enc1:
 	ldr	q16, [x0], #0x10	/* load round key */
 	b	2f
+	_ALIGN_TEXT
 1:	/* q0 := MixColumns(q0) */
 	aesmc	v0.16b, v0.16b
 2:	subs	x3, x3, #1
@@ -1056,6 +1068,7 @@ END(aesarmv8_enc1)
 aesarmv8_enc2:
 	ldr	q16, [x0], #0x10	/* load round key */
 	b	2f
+	_ALIGN_TEXT
 1:	/* q[i] := MixColumns(q[i]) */
 	aesmc	v0.16b, v0.16b
 	aesmc	v1.16b, v1.16b
@@ -1085,6 +1098,7 @@ END(aesarmv8_enc2)
 aesarmv8_enc8:
 	ldr	q16, [x0], #0x10	/* load round key */
 	b	2f
+	

CVS commit: src/sys/crypto

2020-07-27 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Mon Jul 27 20:53:23 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_armv8_64.S aes_neon_32.S
src/sys/crypto/aes/arch/x86: aes_ni_64.S
src/sys/crypto/chacha/arch/arm: chacha_neon_64.S

Log Message:
Align critical-path loops in AES and ChaCha.


To generate a diff of this commit:
cvs rdiff -u -r1.8 -r1.9 src/sys/crypto/aes/arch/arm/aes_armv8_64.S
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/arm/aes_neon_32.S
cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/arch/x86/aes_ni_64.S
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/aes/arch/arm

2020-07-27 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Mon Jul 27 20:52:11 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon_32.S

Log Message:
PIC for aes_neon_32.S.

Without this, tests/sys/crypto/aes/t_aes fails to start on armv7
because of R_ARM_ABS32 relocations in a nonwritable text segment for
a PIE -- which atf quietly ignores in the final report!  Yikes.


To generate a diff of this commit:
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/arm/aes_neon_32.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S
diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.1 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.2
--- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.1	Mon Jun 29 23:57:56 2020
+++ src/sys/crypto/aes/arch/arm/aes_neon_32.S	Mon Jul 27 20:52:10 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_neon_32.S,v 1.1 2020/06/29 23:57:56 riastradh Exp $	*/
+/*	$NetBSD: aes_neon_32.S,v 1.2 2020/07/27 20:52:10 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -30,8 +30,14 @@
 
 	.fpu	neon
 
+	.text
+	.p2align 2
+.Lconstants_addr:
+	.long	.Lconstants - .
+
 	.section .rodata
 	.p2align 4
+.Lconstants:
 
 	.type	inv,_ASM_TYPE_OBJECT
 inv:
@@ -239,7 +245,7 @@ ENTRY(aes_neon_enc1)
 	 * r3: rmod4
 	 * r4: mc_forward
 	 * r5: mc_backward
-	 * r6,r7,r8,r10,r11: temporaries
+	 * r6,r7,r8,r10,r11,r12: temporaries
 	 * q0={d0-d1}: x/ak/A
 	 * q1={d2-d3}: 0x0f0f...
 	 * q2={d4-d5}: lo/k/j/io
@@ -258,23 +264,30 @@ ENTRY(aes_neon_enc1)
 	 * q15={d30-d31}: A2_B/sr[rmod4]
 	 */
 
+	/* r12 := .Lconstants - .Lconstants_addr, r11 := .Lconstants_addr */
+	ldr	r12, .Lconstants_addr
+	adr	r11, .Lconstants_addr
+
 	vld1.64	{d28-d29}, [r0 :128]!	/* q14 = *rk++ */
 	movw	r3, #0
 	vmov.i8	q1, #0x0f
 
+	/* r12 := .Lconstants */
+	add	r12, r12, r11
+
 	/* (q4, q5) := (iptlo, ipthi) */
-	ldr	r6, =iptlo
-	ldr	r7, =ipthi
+	add	r6, r12, #(iptlo - .Lconstants)
+	add	r7, r12, #(ipthi - .Lconstants)
 	vld1.64	{d8-d9}, [r6 :128]
 	vld1.64	{d10-d11}, [r7 :128]
 
 	/* load the rest of the constants */
-	ldr	r4, =sb1_0
-	ldr	r5, =sb1_1
-	ldr	r6, =sb2_0
-	ldr	r7, =sb2_1
-	ldr	r8, =inv
-	ldr	r10, =inva
+	add	r4, r12, #(sb1_0 - .Lconstants)
+	add	r5, r12, #(sb1_1 - .Lconstants)
+	add	r6, r12, #(sb2_0 - .Lconstants)
+	add	r7, r12, #(sb2_1 - .Lconstants)
+	add	r8, r12, #(inv - .Lconstants)
+	add	r10, r12, #(inva - .Lconstants)
 	vld1.64	{d12-d13}, [r4 :128]	/* q6 = sb1[0] */
 	vld1.64	{d14-d15}, [r5 :128]	/* q7 = sb1[1] */
 	vld1.64	{d16-d17}, [r6 :128]	/* q8 = sb2[0] */
@@ -283,8 +296,8 @@ ENTRY(aes_neon_enc1)
 	vld1.64	{d22-d23}, [r10 :128]	/* q11 = inva */
 
 	/* (r4, r5) := (_forward[0], _backward[0]) */
-	ldr	r4, =mc_forward
-	ldr	r5, =mc_backward
+	add	r4, r12, #(mc_forward - .Lconstants)
+	add	r5, r12, #(mc_backward - .Lconstants)
 
 	/* (q2, q3) := (lo, hi) */
 	vshr.u8	q3, q0, #4
@@ -392,9 +405,9 @@ ENTRY(aes_neon_enc1)
 	bne	1b
 
 	/* (q6, q7, q15) := (sbo[0], sbo[1], sr[rmod4]) */
-	ldr	r8, =sr
-	ldr	r6, =sbo_0
-	ldr	r7, =sbo_1
+	add	r8, r12, #(sr - .Lconstants)
+	add	r6, r12, #(sbo_0 - .Lconstants)
+	add	r7, r12, #(sbo_1 - .Lconstants)
 	add	r8, r8, r3, lsl #4
 	vld1.64	{d12-d13}, [r6 :128]
 	vld1.64	{d14-d15}, [r7 :128]
@@ -469,23 +482,30 @@ ENTRY(aes_neon_dec1)
 	 * q15={d30-d31}: mc/sr[3 & ~(nrounds - 1)]
 	 */
 
+	/* r12 := .Lconstants - .Lconstants_addr, r11 := .Lconstants_addr */
+	ldr	r12, .Lconstants_addr
+	adr	r11, .Lconstants_addr
+
 	vld1.64	{d28-d29}, [r0 :128]!	/* q14 = *rk++ */
 	rsb	r3, r1, #0		/* r3 := ~(x - 1) = -x */
 	vmov.i8	q1, #0x0f
 	and	r3, r3, #3		/* r3 := 3 & ~(x - 1) */
 
+	/* r12 := .Lconstants */
+	add	r12, r12, r11
+
 	/* (q4, q5) := (diptlo, dipthi) */
-	ldr	r6, =diptlo
-	ldr	r7, =dipthi
+	add	r6, r12, #(diptlo - .Lconstants)
+	add	r7, r12, #(dipthi - .Lconstants)
 	vld1.64	{d8-d9}, [r6 :128]
 	vld1.64	{d10-d11}, [r7 :128]
 
 	/* load the rest of the constants */
-	ldr	r4, =dsbb_0
-	ldr	r5, =dsbb_1
-	ldr	r6, =inv
-	ldr	r7, =inva
-	ldr	r8, =.Lmc_forward_3
+	add	r4, r12, #(dsbb_0 - .Lconstants)
+	add	r5, r12, #(dsbb_1 - .Lconstants)
+	add	r6, r12, #(inv - .Lconstants)
+	add	r7, r12, #(inva - .Lconstants)
+	add	r8, r12, #(.Lmc_forward_3 - .Lconstants)
 	vld1.64	{d12-d13}, [r4 :128]	/* q6 := dsbb[0] */
 	vld1.64	{d14-d15}, [r5 :128]	/* q7 := dsbb[1] */
 	vld1.64	{d20-d21}, [r6 :128]	/* q10 := inv */
@@ -504,8 +524,8 @@ ENTRY(aes_neon_dec1)
 	vtbl.8	d7, {d10-d11}, d7
 
 	/* load dsb9 */
-	ldr	r4, =dsb9_0
-	ldr	r5, =dsb9_1
+	add	r4, r12, #(dsb9_0 - .Lconstants)
+	add	r5, r12, #(dsb9_1 - .Lconstants)
 	vld1.64	{d8-d9}, [r4 :128]	/* q4 := dsb9[0] */
 	vld1.64	{d10-d11}, [r5 :128]	/* q5 := dsb9[1] */
 
@@ -516,7 +536,7 @@ ENTRY(aes_neon_dec1)
 	b	2f
 
 1:	/* load dsbd */
-	ldr	r4, =dsbd_0
+	add	r4, r12, #(dsbd_0 - .Lconstants)
 	vld1.64	{d16-d17}, [r4 :128]!	/* q8 := dsbd[0] */
 	vld1.64	{d18-d19}, [r4 :128]	/* q9 := dsbd[1] */
 
@@ -543,7 +563,7 

CVS commit: src/sys/crypto/aes/arch/arm

2020-07-27 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Mon Jul 27 20:52:11 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/arm: aes_neon_32.S

Log Message:
PIC for aes_neon_32.S.

Without this, tests/sys/crypto/aes/t_aes fails to start on armv7
because of R_ARM_ABS32 relocations in a nonwritable text segment for
a PIE -- which atf quietly ignores in the final report!  Yikes.


To generate a diff of this commit:
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/arm/aes_neon_32.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/chacha/arch/arm

2020-07-27 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Mon Jul 27 20:50:25 UTC 2020

Modified Files:
src/sys/crypto/chacha/arch/arm: chacha_neon_64.S

Log Message:
Use  rather than copying things from it here.

Vestige from userland build on netbsd-9 during development.


To generate a diff of this commit:
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/chacha/arch/arm/chacha_neon_64.S
diff -u src/sys/crypto/chacha/arch/arm/chacha_neon_64.S:1.1 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S:1.2
--- src/sys/crypto/chacha/arch/arm/chacha_neon_64.S:1.1	Sat Jul 25 22:51:57 2020
+++ src/sys/crypto/chacha/arch/arm/chacha_neon_64.S	Mon Jul 27 20:50:25 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: chacha_neon_64.S,v 1.1 2020/07/25 22:51:57 riastradh Exp $	*/
+/*	$NetBSD: chacha_neon_64.S,v 1.2 2020/07/27 20:50:25 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -26,23 +26,7 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-.macro	adrl 	reg, addr
-	adrp	\reg, \addr
-	add	\reg, \reg, #:lo12:\addr
-.endm
-
-#define	_ALIGN_TEXT			  \
-	.p2align 4
-
-#define	ENTRY(x)			  \
-	.text;  \
-	_ALIGN_TEXT;			  \
-	.global	x;			  \
-	.type	x,@function;		  \
-x:
-
-#define	END(x)  \
-	.size x, . - x
+#include 
 
 #define	ROUND(a0,b0,c0,d0,a1,b1,c1,d1,a2,b2,c2,d2,a3,b3,c3,d3,t0,t1,t2,t3, r) \
 STEP(STEP0,a0,b0,c0,d0,a1,b1,c1,d1,a2,b2,c2,d2,a3,b3,c3,d3,t0,t1,t2,t3, r);   \



CVS commit: src/sys/crypto/chacha/arch/arm

2020-07-27 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Mon Jul 27 20:50:25 UTC 2020

Modified Files:
src/sys/crypto/chacha/arch/arm: chacha_neon_64.S

Log Message:
Use  rather than copying things from it here.

Vestige from userland build on netbsd-9 during development.


To generate a diff of this commit:
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/chacha

2020-07-27 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Mon Jul 27 20:49:10 UTC 2020

Modified Files:
src/sys/crypto/chacha: chacha_impl.c

Log Message:
Simplify ChaCha selection and allow it to be used much earlier.

This way we can use it for cprng_fast early on.  ChaCha is easy
because there's no data formats that must be preserved from call to
call but vary from implementation to implementation -- we could even
make it a sysctl knob to dynamically select it with negligible cost.

(In contrast, different AES implementations use different expanded
key formats which must be preserved from aes_setenckey to aes_enc,
for example, which means a considerably greater burden on dynamic
selection that's not really worth it.)


To generate a diff of this commit:
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/chacha/chacha_impl.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/chacha/chacha_impl.c
diff -u src/sys/crypto/chacha/chacha_impl.c:1.2 src/sys/crypto/chacha/chacha_impl.c:1.3
--- src/sys/crypto/chacha/chacha_impl.c:1.2	Mon Jul 27 20:45:15 2020
+++ src/sys/crypto/chacha/chacha_impl.c	Mon Jul 27 20:49:10 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: chacha_impl.c,v 1.2 2020/07/27 20:45:15 riastradh Exp $	*/
+/*	$NetBSD: chacha_impl.c,v 1.3 2020/07/27 20:49:10 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -38,17 +38,14 @@
 #include "chacha.h"
 #include "chacha_ref.h"
 
-static const struct chacha_impl	*chacha_md_impl	__read_mostly;
-static const struct chacha_impl	*chacha_impl	__read_mostly;
+static const struct chacha_impl	*chacha_md_impl __read_mostly;
+static const struct chacha_impl	*chacha_impl __read_mostly = _ref_impl;
 
 static int
 sysctl_kern_crypto_chacha_selected(SYSCTLFN_ARGS)
 {
 	struct sysctlnode node;
 
-	KASSERTMSG(chacha_impl != NULL,
-	"sysctl ran before ChaCha implementation was selected");
-
 	node = *rnode;
 	node.sysctl_data = __UNCONST(chacha_impl->ci_name);
 	node.sysctl_size = strlen(chacha_impl->ci_name) + 1;
@@ -79,8 +76,6 @@ static int
 chacha_select(void)
 {
 
-	KASSERT(chacha_impl == NULL);
-
 	if (chacha_md_impl) {
 		if (chacha_selftest(chacha_md_impl))
 			aprint_error("chacha: self-test failed: %s\n",
@@ -88,15 +83,6 @@ chacha_select(void)
 		else
 			chacha_impl = chacha_md_impl;
 	}
-	if (chacha_impl == NULL) {
-		if (chacha_selftest(_ref_impl))
-			aprint_error("chacha: self-test failed: %s\n",
-			chacha_ref_impl.ci_name);
-		else
-			chacha_impl = _ref_impl;
-	}
-	if (chacha_impl == NULL)
-		panic("ChaCha self-tests failed");
 
 	aprint_verbose("chacha: %s\n", chacha_impl->ci_name);
 	return 0;
@@ -118,26 +104,11 @@ chacha_modcmd(modcmd_t cmd, void *opaque
 	}
 }
 
-static void
-chacha_guarantee_selected(void)
-{
-#if 0
-	static once_t once;
-	int error;
-
-	error = RUN_ONCE(, chacha_select);
-	KASSERT(error == 0);
-#endif
-}
-
 void
 chacha_md_init(const struct chacha_impl *impl)
 {
 
 	KASSERT(cold);
-	KASSERTMSG(chacha_impl == NULL,
-	"ChaCha implementation `%s' already chosen, can't offer `%s'",
-	chacha_impl->ci_name, impl->ci_name);
 	KASSERTMSG(chacha_md_impl == NULL,
 	"ChaCha implementation `%s' already offered, can't offer `%s'",
 	chacha_md_impl->ci_name, impl->ci_name);
@@ -153,7 +124,6 @@ chacha_core(uint8_t out[restrict static 
 unsigned nr)
 {
 
-	chacha_guarantee_selected();
 	(*chacha_impl->ci_chacha_core)(out, in, k, c, nr);
 }
 
@@ -165,7 +135,6 @@ hchacha(uint8_t out[restrict static HCHA
 unsigned nr)
 {
 
-	chacha_guarantee_selected();
 	(*chacha_impl->ci_hchacha)(out, in, k, c, nr);
 }
 
@@ -176,7 +145,6 @@ chacha_stream(uint8_t *restrict s, size_
 unsigned nr)
 {
 
-	chacha_guarantee_selected();
 	(*chacha_impl->ci_chacha_stream)(s, nbytes, blkno, nonce, key, nr);
 }
 
@@ -187,7 +155,6 @@ chacha_stream_xor(uint8_t *c, const uint
 unsigned nr)
 {
 
-	chacha_guarantee_selected();
 	(*chacha_impl->ci_chacha_stream_xor)(c, p, nbytes, blkno, nonce, key,
 	nr);
 }
@@ -199,7 +166,6 @@ xchacha_stream(uint8_t *restrict s, size
 unsigned nr)
 {
 
-	chacha_guarantee_selected();
 	(*chacha_impl->ci_xchacha_stream)(s, nbytes, blkno, nonce, key, nr);
 }
 
@@ -210,7 +176,6 @@ xchacha_stream_xor(uint8_t *c, const uin
 unsigned nr)
 {
 
-	chacha_guarantee_selected();
 	(*chacha_impl->ci_xchacha_stream_xor)(c, p, nbytes, blkno, nonce, key,
 	nr);
 }



CVS commit: src/sys/crypto/chacha

2020-07-27 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Mon Jul 27 20:49:10 UTC 2020

Modified Files:
src/sys/crypto/chacha: chacha_impl.c

Log Message:
Simplify ChaCha selection and allow it to be used much earlier.

This way we can use it for cprng_fast early on.  ChaCha is easy
because there's no data formats that must be preserved from call to
call but vary from implementation to implementation -- we could even
make it a sysctl knob to dynamically select it with negligible cost.

(In contrast, different AES implementations use different expanded
key formats which must be preserved from aes_setenckey to aes_enc,
for example, which means a considerably greater burden on dynamic
selection that's not really worth it.)


To generate a diff of this commit:
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/chacha/chacha_impl.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto/chacha/arch

2020-07-27 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Mon Jul 27 20:48:18 UTC 2020

Modified Files:
src/sys/crypto/chacha/arch/arm: chacha_neon.c
src/sys/crypto/chacha/arch/x86: chacha_sse2.c

Log Message:
Reduce some duplication.

Shouldn't substantively hurt performance -- the comparison that has
been moved into the loop was essentially the former loop condition --
and may improve performance by reducing code size since there's only
one inline call to chacha_permute instead of two.


To generate a diff of this commit:
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/chacha/arch/arm/chacha_neon.c
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/chacha/arch/x86/chacha_sse2.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/chacha/arch/arm/chacha_neon.c
diff -u src/sys/crypto/chacha/arch/arm/chacha_neon.c:1.1 src/sys/crypto/chacha/arch/arm/chacha_neon.c:1.2
--- src/sys/crypto/chacha/arch/arm/chacha_neon.c:1.1	Sat Jul 25 22:51:57 2020
+++ src/sys/crypto/chacha/arch/arm/chacha_neon.c	Mon Jul 27 20:48:18 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: chacha_neon.c,v 1.1 2020/07/25 22:51:57 riastradh Exp $	*/
+/*	$NetBSD: chacha_neon.c,v 1.2 2020/07/27 20:48:18 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -168,7 +168,7 @@ chacha_stream_neon(uint8_t *restrict s, 
 			le32dec(nonce + 8)
 		};
 
-		for (; n >= 64; s += 64, n -= 64) {
+		for (; n; s += 64, n -= 64) {
 			r0 = in0;
 			r1 = in1;
 			r2 = in2;
@@ -178,32 +178,25 @@ chacha_stream_neon(uint8_t *restrict s, 
 			r1 = vhtole_u32(vaddq_u32(r1, in1));
 			r2 = vhtole_u32(vaddq_u32(r2, in2));
 			r3 = vhtole_u32(vaddq_u32(r3, in3));
+
+			if (n < 64) {
+uint8_t buf[64] __aligned(16);
+
+vst1q_u32((uint32_t *)buf + 4*0, r0);
+vst1q_u32((uint32_t *)buf + 4*1, r1);
+vst1q_u32((uint32_t *)buf + 4*2, r2);
+vst1q_u32((uint32_t *)buf + 4*3, r3);
+memcpy(s, buf, n);
+
+break;
+			}
+
 			vst1q_u32((uint32_t *)s + 4*0, r0);
 			vst1q_u32((uint32_t *)s + 4*1, r1);
 			vst1q_u32((uint32_t *)s + 4*2, r2);
 			vst1q_u32((uint32_t *)s + 4*3, r3);
 			in3 = vaddq_u32(in3, blkno_inc);
 		}
-
-		if (n) {
-			uint8_t buf[64];
-
-			r0 = in0;
-			r1 = in1;
-			r2 = in2;
-			r3 = in3;
-			chacha_permute(, , , , nr);
-			r0 = vhtole_u32(vaddq_u32(r0, in0));
-			r1 = vhtole_u32(vaddq_u32(r1, in1));
-			r2 = vhtole_u32(vaddq_u32(r2, in2));
-			r3 = vhtole_u32(vaddq_u32(r3, in3));
-			vst1q_u32((uint32_t *)buf + 4*0, r0);
-			vst1q_u32((uint32_t *)buf + 4*1, r1);
-			vst1q_u32((uint32_t *)buf + 4*2, r2);
-			vst1q_u32((uint32_t *)buf + 4*3, r3);
-
-			memcpy(s, buf, n);
-		}
 	}
 }
 
@@ -234,7 +227,7 @@ chacha_stream_xor_neon(uint8_t *s, const
 			le32dec(nonce + 8)
 		};
 
-		for (; n >= 64; s += 64, p += 64, n -= 64) {
+		for (; n; s += 64, p += 64, n -= 64) {
 			r0 = in0;
 			r1 = in1;
 			r2 = in2;
@@ -244,6 +237,25 @@ chacha_stream_xor_neon(uint8_t *s, const
 			r1 = vhtole_u32(vaddq_u32(r1, in1));
 			r2 = vhtole_u32(vaddq_u32(r2, in2));
 			r3 = vhtole_u32(vaddq_u32(r3, in3));
+
+			if (n < 64) {
+uint8_t buf[64] __aligned(16);
+unsigned i;
+
+vst1q_u32((uint32_t *)buf + 4*0, r0);
+vst1q_u32((uint32_t *)buf + 4*1, r1);
+vst1q_u32((uint32_t *)buf + 4*2, r2);
+vst1q_u32((uint32_t *)buf + 4*3, r3);
+
+for (i = 0; i < n - n%4; i += 4)
+	le32enc(s + i,
+	le32dec(p + i) ^ le32dec(buf + i));
+for (; i < n; i++)
+	s[i] = p[i] ^ buf[i];
+
+break;
+			}
+
 			r0 ^= vld1q_u32((const uint32_t *)p + 4*0);
 			r1 ^= vld1q_u32((const uint32_t *)p + 4*1);
 			r2 ^= vld1q_u32((const uint32_t *)p + 4*2);
@@ -254,31 +266,6 @@ chacha_stream_xor_neon(uint8_t *s, const
 			vst1q_u32((uint32_t *)s + 4*3, r3);
 			in3 = vaddq_u32(in3, blkno_inc);
 		}
-
-		if (n) {
-			uint8_t buf[64];
-			unsigned i;
-
-			r0 = in0;
-			r1 = in1;
-			r2 = in2;
-			r3 = in3;
-			chacha_permute(, , , , nr);
-			r0 = vhtole_u32(vaddq_u32(r0, in0));
-			r1 = vhtole_u32(vaddq_u32(r1, in1));
-			r2 = vhtole_u32(vaddq_u32(r2, in2));
-			r3 = vhtole_u32(vaddq_u32(r3, in3));
-			vst1q_u32((uint32_t *)buf + 4*0, r0);
-			vst1q_u32((uint32_t *)buf + 4*1, r1);
-			vst1q_u32((uint32_t *)buf + 4*2, r2);
-			vst1q_u32((uint32_t *)buf + 4*3, r3);
-
-			for (i = 0; i < n - n%4; i += 4)
-le32enc(s + i,
-le32dec(p + i) ^ le32dec(buf + i));
-			for (; i < n; i++)
-s[i] = p[i] ^ buf[i];
-		}
 	}
 }
 

Index: src/sys/crypto/chacha/arch/x86/chacha_sse2.c
diff -u src/sys/crypto/chacha/arch/x86/chacha_sse2.c:1.1 src/sys/crypto/chacha/arch/x86/chacha_sse2.c:1.2
--- src/sys/crypto/chacha/arch/x86/chacha_sse2.c:1.1	Sat Jul 25 22:49:20 2020
+++ src/sys/crypto/chacha/arch/x86/chacha_sse2.c	Mon Jul 27 20:48:18 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: chacha_sse2.c,v 1.1 2020/07/25 22:49:20 riastradh Exp $	*/
+/*	$NetBSD: chacha_sse2.c,v 1.2 2020/07/27 20:48:18 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD 

CVS commit: src/sys/crypto/chacha/arch

2020-07-27 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Mon Jul 27 20:48:18 UTC 2020

Modified Files:
src/sys/crypto/chacha/arch/arm: chacha_neon.c
src/sys/crypto/chacha/arch/x86: chacha_sse2.c

Log Message:
Reduce some duplication.

Shouldn't substantively hurt performance -- the comparison that has
been moved into the loop was essentially the former loop condition --
and may improve performance by reducing code size since there's only
one inline call to chacha_permute instead of two.


To generate a diff of this commit:
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/chacha/arch/arm/chacha_neon.c
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/chacha/arch/x86/chacha_sse2.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.



CVS commit: src/sys/crypto

2020-07-27 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Mon Jul 27 20:45:15 UTC 2020

Modified Files:
src/sys/crypto/aes: aes_impl.c
src/sys/crypto/chacha: chacha_impl.c

Log Message:
New sysctl subtree kern.crypto.

kern.crypto.aes.selected (formerly hw.aes_impl)
kern.crypto.chacha.selected (formerly hw.chacha_impl)

XXX Should maybe deduplicate creation of kern.crypto.


To generate a diff of this commit:
cvs rdiff -u -r1.8 -r1.9 src/sys/crypto/aes/aes_impl.c
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/chacha/chacha_impl.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/aes_impl.c
diff -u src/sys/crypto/aes/aes_impl.c:1.8 src/sys/crypto/aes/aes_impl.c:1.9
--- src/sys/crypto/aes/aes_impl.c:1.8	Sat Jul 25 22:42:03 2020
+++ src/sys/crypto/aes/aes_impl.c	Mon Jul 27 20:45:15 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_impl.c,v 1.8 2020/07/25 22:42:03 riastradh Exp $	*/
+/*	$NetBSD: aes_impl.c,v 1.9 2020/07/27 20:45:15 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_impl.c,v 1.8 2020/07/25 22:42:03 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_impl.c,v 1.9 2020/07/27 20:45:15 riastradh Exp $");
 
 #include 
 #include 
@@ -48,7 +48,7 @@ static const struct aes_impl	*aes_md_imp
 static const struct aes_impl	*aes_impl	__read_mostly;
 
 static int
-sysctl_hw_aes_impl(SYSCTLFN_ARGS)
+sysctl_kern_crypto_aes_selected(SYSCTLFN_ARGS)
 {
 	struct sysctlnode node;
 
@@ -61,14 +61,24 @@ sysctl_hw_aes_impl(SYSCTLFN_ARGS)
 	return sysctl_lookup(SYSCTLFN_CALL());
 }
 
-SYSCTL_SETUP(sysctl_hw_aes_setup, "sysctl hw.aes_impl setup")
+SYSCTL_SETUP(sysctl_kern_crypto_aes_setup, "sysctl kern.crypto.aes setup")
 {
+	const struct sysctlnode *cnode;
+	const struct sysctlnode *aes_node;
 
-	sysctl_createv(clog, 0, NULL, NULL,
-	CTLFLAG_PERMANENT|CTLFLAG_READONLY, CTLTYPE_STRING, "aes_impl",
+	sysctl_createv(clog, 0, NULL, , 0, CTLTYPE_NODE, "crypto",
+	SYSCTL_DESCR("Kernel cryptography"),
+	NULL, 0, NULL, 0,
+	CTL_KERN, CTL_CREATE, CTL_EOL);
+	sysctl_createv(clog, 0, , _node, 0, CTLTYPE_NODE, "aes",
+	SYSCTL_DESCR("AES -- Advanced Encryption Standard"),
+	NULL, 0, NULL, 0,
+	CTL_CREATE, CTL_EOL);
+	sysctl_createv(clog, 0, _node, NULL,
+	CTLFLAG_PERMANENT|CTLFLAG_READONLY, CTLTYPE_STRING, "selected",
 	SYSCTL_DESCR("Selected AES implementation"),
-	sysctl_hw_aes_impl, 0, NULL, 0,
-	CTL_HW, CTL_CREATE, CTL_EOL);
+	sysctl_kern_crypto_aes_selected, 0, NULL, 0,
+	CTL_CREATE, CTL_EOL);
 }
 
 /*

Index: src/sys/crypto/chacha/chacha_impl.c
diff -u src/sys/crypto/chacha/chacha_impl.c:1.1 src/sys/crypto/chacha/chacha_impl.c:1.2
--- src/sys/crypto/chacha/chacha_impl.c:1.1	Sat Jul 25 22:46:34 2020
+++ src/sys/crypto/chacha/chacha_impl.c	Mon Jul 27 20:45:15 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: chacha_impl.c,v 1.1 2020/07/25 22:46:34 riastradh Exp $	*/
+/*	$NetBSD: chacha_impl.c,v 1.2 2020/07/27 20:45:15 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -42,7 +42,7 @@ static const struct chacha_impl	*chacha_
 static const struct chacha_impl	*chacha_impl	__read_mostly;
 
 static int
-sysctl_hw_chacha_impl(SYSCTLFN_ARGS)
+sysctl_kern_crypto_chacha_selected(SYSCTLFN_ARGS)
 {
 	struct sysctlnode node;
 
@@ -55,14 +55,24 @@ sysctl_hw_chacha_impl(SYSCTLFN_ARGS)
 	return sysctl_lookup(SYSCTLFN_CALL());
 }
 
-SYSCTL_SETUP(sysctl_hw_chacha_setup, "sysctl hw.chacha_impl setup")
+SYSCTL_SETUP(sysctl_kern_crypto_chacha_setup, "sysctl kern.crypto.chacha setup")
 {
+	const struct sysctlnode *cnode;
+	const struct sysctlnode *chacha_node;
 
-	sysctl_createv(clog, 0, NULL, NULL,
-	CTLFLAG_PERMANENT|CTLFLAG_READONLY, CTLTYPE_STRING, "chacha_impl",
+	sysctl_createv(clog, 0, NULL, , 0, CTLTYPE_NODE, "crypto",
+	SYSCTL_DESCR("Kernel cryptography"),
+	NULL, 0, NULL, 0,
+	CTL_KERN, CTL_CREATE, CTL_EOL);
+	sysctl_createv(clog, 0, , _node, 0, CTLTYPE_NODE, "chacha",
+	SYSCTL_DESCR("ChaCha"),
+	NULL, 0, NULL, 0,
+	CTL_CREATE, CTL_EOL);
+	sysctl_createv(clog, 0, _node, NULL,
+	CTLFLAG_PERMANENT|CTLFLAG_READONLY, CTLTYPE_STRING, "selected",
 	SYSCTL_DESCR("Selected ChaCha implementation"),
-	sysctl_hw_chacha_impl, 0, NULL, 0,
-	CTL_HW, CTL_CREATE, CTL_EOL);
+	sysctl_kern_crypto_chacha_selected, 0, NULL, 0,
+	CTL_CREATE, CTL_EOL);
 }
 
 static int



  1   2   3   >