CVS commit: src/sys/crypto/aes/arch/x86

2020-07-28 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Tue Jul 28 14:01:35 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/x86: aes_via.c

Log Message:
Initialize authctr in both branches.

I guess I didn't test the unaligned case, weird.


To generate a diff of this commit:
cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/aes/arch/x86/aes_via.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/x86/aes_via.c
diff -u src/sys/crypto/aes/arch/x86/aes_via.c:1.5 src/sys/crypto/aes/arch/x86/aes_via.c:1.6
--- src/sys/crypto/aes/arch/x86/aes_via.c:1.5	Sat Jul 25 22:31:32 2020
+++ src/sys/crypto/aes/arch/x86/aes_via.c	Tue Jul 28 14:01:35 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_via.c,v 1.5 2020/07/25 22:31:32 riastradh Exp $	*/
+/*	$NetBSD: aes_via.c,v 1.6 2020/07/28 14:01:35 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.5 2020/07/25 22:31:32 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.6 2020/07/28 14:01:35 riastradh Exp $");
 
 #ifdef _KERNEL
 #include 
@@ -739,6 +739,7 @@ aesvia_ccm_enc1(const struct aesenc *enc
 		authctr = authctrbuf;
 		ccmenc_unaligned_evcnt.ev_count++;
 	} else {
+		authctr = authctr0;
 		ccmenc_aligned_evcnt.ev_count++;
 	}
 	c0 = le32dec(authctr0 + 16 + 4*0);
@@ -812,6 +813,7 @@ aesvia_ccm_dec1(const struct aesenc *enc
 		le32enc(authctr + 16 + 4*2, c2);
 		ccmdec_unaligned_evcnt.ev_count++;
 	} else {
+		authctr = authctr0;
 		ccmdec_aligned_evcnt.ev_count++;
 	}
 



CVS commit: src/sys/crypto/aes/arch/x86

2020-07-25 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sat Jul 25 22:45:10 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/x86: immintrin.h

Log Message:
Add some Intel intrinsics for ChaCha.

_mm_load1_ps
_mm_loadu_si128
_mm_movelh_ps
_mm_slli_epi32
_mm_storeu_si128
_mm_unpackhi_epi32
_mm_unpacklo_epi32


To generate a diff of this commit:
cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/arch/x86/immintrin.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/x86/immintrin.h
diff -u src/sys/crypto/aes/arch/x86/immintrin.h:1.4 src/sys/crypto/aes/arch/x86/immintrin.h:1.5
--- src/sys/crypto/aes/arch/x86/immintrin.h:1.4	Sat Jul 25 22:44:32 2020
+++ src/sys/crypto/aes/arch/x86/immintrin.h	Sat Jul 25 22:45:10 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: immintrin.h,v 1.4 2020/07/25 22:44:32 riastradh Exp $	*/
+/*	$NetBSD: immintrin.h,v 1.5 2020/07/25 22:45:10 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -103,6 +103,20 @@ _mm_add_epi32(__m128i __a, __m128i __b)
 #endif
 
 _INTRINSATTR
+static __inline __m128
+_mm_load1_ps(const float *__p)
+{
+	return __extension__ (__m128)(__v4sf) { *__p, *__p, *__p, *__p };
+}
+
+_INTRINSATTR
+static __inline __m128i
+_mm_loadu_si128(const __m128i_u *__p)
+{
+	return ((const struct { __m128i_u __v; } _PACKALIAS *)__p)->__v;
+}
+
+_INTRINSATTR
 static __inline __m128i
 _mm_loadu_si32(const void *__p)
 {
@@ -132,8 +146,18 @@ _mm_movehl_ps(__m128 __v0, __m128 __v1)
 #if defined(__GNUC__) && !defined(__clang__)
 	return (__m128)__builtin_ia32_movhlps((__v4sf)__v0, (__v4sf)__v1);
 #elif defined(__clang__)
-	return __builtin_shufflevector((__v4sf)__v0, (__v4sf)__v1,
-	6, 7, 2, 3);
+	return __builtin_shufflevector((__v4sf)__v0, (__v4sf)__v1, 6,7,2,3);
+#endif
+}
+
+_INTRINSATTR
+static __inline __m128
+_mm_movelh_ps(__m128 __v0, __m128 __v1)
+{
+#if defined(__GNUC__) && !defined(__clang__)
+	return (__m128)__builtin_ia32_movlhps((__v4sf)__v0, (__v4sf)__v1);
+#elif defined(__clang__)
+	return __builtin_shufflevector((__v4sf)__v0, (__v4sf)__v1, 0,1,4,5);
 #endif
 }
 
@@ -205,6 +229,13 @@ _mm_shuffle_epi8(__m128i __vtbl, __m128i
 
 _INTRINSATTR
 static __inline __m128i
+_mm_slli_epi32(__m128i __v, uint8_t __bits)
+{
+	return (__m128i)__builtin_ia32_pslldi128((__v4si)__v, (int)__bits);
+}
+
+_INTRINSATTR
+static __inline __m128i
 _mm_slli_epi64(__m128i __v, uint8_t __bits)
 {
 	return (__m128i)__builtin_ia32_psllqi128((__v2di)__v, (int)__bits);
@@ -245,6 +276,13 @@ _mm_srli_epi64(__m128i __v, uint8_t __bi
 
 _INTRINSATTR
 static __inline void
+_mm_storeu_si128(__m128i_u *__p, __m128i __v)
+{
+	((struct { __m128i_u __v; } _PACKALIAS *)__p)->__v = __v;
+}
+
+_INTRINSATTR
+static __inline void
 _mm_storeu_si32(void *__p, __m128i __v)
 {
 	((struct { int32_t __v; } _PACKALIAS *)__p)->__v = ((__v4si)__v)[0];
@@ -273,6 +311,32 @@ _mm_sub_epi64(__m128i __x, __m128i __y)
 
 _INTRINSATTR
 static __inline __m128i
+_mm_unpackhi_epi32(__m128i __lo, __m128i __hi)
+{
+#if defined(__GNUC__) && !defined(__clang__)
+	return (__m128i)__builtin_ia32_punpckhdq128((__v4si)__lo,
+	(__v4si)__hi);
+#elif defined(__clang__)
+	return (__m128i)__builtin_shufflevector((__v4si)__lo, (__v4si)__hi,
+	2,6,3,7);
+#endif
+}
+
+_INTRINSATTR
+static __inline __m128i
+_mm_unpacklo_epi32(__m128i __lo, __m128i __hi)
+{
+#if defined(__GNUC__) && !defined(__clang__)
+	return (__m128i)__builtin_ia32_punpckldq128((__v4si)__lo,
+	(__v4si)__hi);
+#elif defined(__clang__)
+	return (__m128i)__builtin_shufflevector((__v4si)__lo, (__v4si)__hi,
+	0,4,1,5);
+#endif
+}
+
+_INTRINSATTR
+static __inline __m128i
 _mm_unpacklo_epi64(__m128i __lo, __m128i __hi)
 {
 #if defined(__GNUC__) && !defined(__clang__)



CVS commit: src/sys/crypto/aes/arch/x86

2020-07-25 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sat Jul 25 22:44:32 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/x86: immintrin.h

Log Message:
Fix target attribute on _mm_movehl_ps, fix clang _mm_unpacklo_epi64.

- _mm_movehl_ps is available in SSE2, no need for SSSE3.
- _mm_unpacklo_epi64 operates on v2di, not v4si; fix.


To generate a diff of this commit:
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/x86/immintrin.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/x86/immintrin.h
diff -u src/sys/crypto/aes/arch/x86/immintrin.h:1.3 src/sys/crypto/aes/arch/x86/immintrin.h:1.4
--- src/sys/crypto/aes/arch/x86/immintrin.h:1.3	Sat Jul 25 22:31:04 2020
+++ src/sys/crypto/aes/arch/x86/immintrin.h	Sat Jul 25 22:44:32 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: immintrin.h,v 1.3 2020/07/25 22:31:04 riastradh Exp $	*/
+/*	$NetBSD: immintrin.h,v 1.4 2020/07/25 22:44:32 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -125,7 +125,7 @@ _mm_load_si128(const __m128i *__p)
 	return *__p;
 }
 
-_INTRINSATTR _SSSE3_ATTR
+_INTRINSATTR
 static __inline __m128
 _mm_movehl_ps(__m128 __v0, __m128 __v1)
 {
@@ -279,8 +279,8 @@ _mm_unpacklo_epi64(__m128i __lo, __m128i
 	return (__m128i)__builtin_ia32_punpcklqdq128((__v2di)__lo,
 	(__v2di)__hi);
 #elif defined(__clang__)
-	return (__m128i)__builtin_shufflevector((__v4si)__lo, (__v4si)__hi,
-	0, 4, 1, 5);
+	return (__m128i)__builtin_shufflevector((__v2di)__lo, (__v2di)__hi,
+	0,2);
 #endif
 }
 



CVS commit: src/sys/crypto/aes/arch/x86

2020-07-25 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sat Jul 25 22:31:32 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/x86: aes_via.c

Log Message:
Implement AES-CCM with VIA ACE.


To generate a diff of this commit:
cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/arch/x86/aes_via.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/x86/aes_via.c
diff -u src/sys/crypto/aes/arch/x86/aes_via.c:1.4 src/sys/crypto/aes/arch/x86/aes_via.c:1.5
--- src/sys/crypto/aes/arch/x86/aes_via.c:1.4	Sat Jul 25 22:12:57 2020
+++ src/sys/crypto/aes/arch/x86/aes_via.c	Sat Jul 25 22:31:32 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_via.c,v 1.4 2020/07/25 22:12:57 riastradh Exp $	*/
+/*	$NetBSD: aes_via.c,v 1.5 2020/07/25 22:31:32 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.4 2020/07/25 22:12:57 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.5 2020/07/25 22:31:32 riastradh Exp $");
 
 #ifdef _KERNEL
 #include 
@@ -674,6 +674,176 @@ aesvia_xts_dec(const struct aesdec *dec,
 	explicit_memset(t, 0, sizeof t);
 }
 
+static struct evcnt cbcmac_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
+NULL, "aesvia", "cbcmac aligned");
+EVCNT_ATTACH_STATIC(cbcmac_aligned_evcnt);
+static struct evcnt cbcmac_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
+NULL, "aesvia", "cbcmac unaligned");
+EVCNT_ATTACH_STATIC(cbcmac_unaligned_evcnt);
+
+static void
+aesvia_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16],
+size_t nbytes, uint8_t auth0[static 16], uint32_t nrounds)
+{
+	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
+	uint8_t authbuf[16] __aligned(16);
+	uint8_t *auth = auth0;
+
+	KASSERT(nbytes);
+	KASSERT(nbytes % 16 == 0);
+
+	if ((uintptr_t)auth0 & 0xf) {
+		memcpy(authbuf, auth0, 16);
+		auth = authbuf;
+		cbcmac_unaligned_evcnt.ev_count++;
+	} else {
+		cbcmac_aligned_evcnt.ev_count++;
+	}
+
+	fpu_kern_enter();
+	aesvia_reload_keys();
+	for (; nbytes; nbytes -= 16, in += 16) {
+		xor128(auth, auth, in);
+		aesvia_encN(enc, auth, auth, 1, cw0);
+	}
+	fpu_kern_leave();
+
+	if ((uintptr_t)auth0 & 0xf) {
+		memcpy(auth0, authbuf, 16);
+		explicit_memset(authbuf, 0, sizeof authbuf);
+	}
+}
+
+static struct evcnt ccmenc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
+NULL, "aesvia", "ccmenc aligned");
+EVCNT_ATTACH_STATIC(ccmenc_aligned_evcnt);
+static struct evcnt ccmenc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
+NULL, "aesvia", "ccmenc unaligned");
+EVCNT_ATTACH_STATIC(ccmenc_unaligned_evcnt);
+
+static void
+aesvia_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16],
+uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32],
+uint32_t nrounds)
+{
+	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
+	uint8_t authctrbuf[32] __aligned(16);
+	uint8_t *authctr;
+	uint32_t c0, c1, c2, c3;
+
+	KASSERT(nbytes);
+	KASSERT(nbytes % 16 == 0);
+
+	if ((uintptr_t)authctr0 & 0xf) {
+		memcpy(authctrbuf, authctr0, 16);
+		authctr = authctrbuf;
+		ccmenc_unaligned_evcnt.ev_count++;
+	} else {
+		ccmenc_aligned_evcnt.ev_count++;
+	}
+	c0 = le32dec(authctr0 + 16 + 4*0);
+	c1 = le32dec(authctr0 + 16 + 4*1);
+	c2 = le32dec(authctr0 + 16 + 4*2);
+	c3 = be32dec(authctr0 + 16 + 4*3);
+
+	/*
+	 * In principle we could use REP XCRYPTCTR here, but that
+	 * doesn't help to compute the CBC-MAC step, and certain VIA
+	 * CPUs have some weird errata with REP XCRYPTCTR that make it
+	 * kind of a pain to use.  So let's just use REP XCRYPTECB to
+	 * simultaneously compute the CBC-MAC step and the CTR step.
+	 * (Maybe some VIA CPUs will compute REP XCRYPTECB in parallel,
+	 * who knows...)
+	 */
+	fpu_kern_enter();
+	aesvia_reload_keys();
+	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
+		xor128(authctr, authctr, in);
+		le32enc(authctr + 16 + 4*0, c0);
+		le32enc(authctr + 16 + 4*1, c1);
+		le32enc(authctr + 16 + 4*2, c2);
+		be32enc(authctr + 16 + 4*3, ++c3);
+		aesvia_encN(enc, authctr, authctr, 2, cw0);
+		xor128(out, in, authctr + 16);
+	}
+	fpu_kern_leave();
+
+	if ((uintptr_t)authctr0 & 0xf) {
+		memcpy(authctr0, authctrbuf, 16);
+		explicit_memset(authctrbuf, 0, sizeof authctrbuf);
+	}
+
+	le32enc(authctr0 + 16 + 4*0, c0);
+	le32enc(authctr0 + 16 + 4*1, c1);
+	le32enc(authctr0 + 16 + 4*2, c2);
+	be32enc(authctr0 + 16 + 4*3, c3);
+}
+
+static struct evcnt ccmdec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
+NULL, "aesvia", "ccmdec aligned");
+EVCNT_ATTACH_STATIC(ccmdec_aligned_evcnt);
+static struct evcnt ccmdec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
+NULL, "aesvia", "ccmdec unaligned");
+EVCNT_ATTACH_STATIC(ccmdec_unaligned_evcnt);
+
+static void
+aesvia_ccm_dec1(const struct aesenc *enc, const uint8_t in[static 16],
+uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32],
+   

CVS commit: src/sys/crypto/aes/arch/x86

2020-07-25 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sat Jul 25 22:31:04 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/x86: aes_ssse3.h aes_ssse3_impl.c
aes_ssse3_subr.c immintrin.h

Log Message:
Implement AES-CCM with SSSE3.


To generate a diff of this commit:
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/x86/aes_ssse3.h \
src/sys/crypto/aes/arch/x86/aes_ssse3_subr.c \
src/sys/crypto/aes/arch/x86/immintrin.h
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/x86/aes_ssse3_impl.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/x86/aes_ssse3.h
diff -u src/sys/crypto/aes/arch/x86/aes_ssse3.h:1.2 src/sys/crypto/aes/arch/x86/aes_ssse3.h:1.3
--- src/sys/crypto/aes/arch/x86/aes_ssse3.h:1.2	Sat Jul 25 22:12:57 2020
+++ src/sys/crypto/aes/arch/x86/aes_ssse3.h	Sat Jul 25 22:31:04 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_ssse3.h,v 1.2 2020/07/25 22:12:57 riastradh Exp $	*/
+/*	$NetBSD: aes_ssse3.h,v 1.3 2020/07/25 22:31:04 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -59,6 +59,12 @@ void aes_ssse3_xts_enc(const struct aese
 uint8_t[static 16], size_t, uint8_t[static 16], uint32_t);
 void aes_ssse3_xts_dec(const struct aesdec *, const uint8_t[static 16],
 uint8_t[static 16], size_t, uint8_t[static 16], uint32_t);
+void aes_ssse3_cbcmac_update1(const struct aesenc *, const uint8_t[static 16],
+size_t, uint8_t[static 16], uint32_t);
+void aes_ssse3_ccm_enc1(const struct aesenc *, const uint8_t[static 16],
+uint8_t[static 16], size_t, uint8_t[static 32], uint32_t);
+void aes_ssse3_ccm_dec1(const struct aesenc *, const uint8_t[static 16],
+uint8_t[static 16], size_t, uint8_t[static 32], uint32_t);
 
 int aes_ssse3_selftest(void);
 
Index: src/sys/crypto/aes/arch/x86/aes_ssse3_subr.c
diff -u src/sys/crypto/aes/arch/x86/aes_ssse3_subr.c:1.2 src/sys/crypto/aes/arch/x86/aes_ssse3_subr.c:1.3
--- src/sys/crypto/aes/arch/x86/aes_ssse3_subr.c:1.2	Tue Jun 30 20:32:11 2020
+++ src/sys/crypto/aes/arch/x86/aes_ssse3_subr.c	Sat Jul 25 22:31:04 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_ssse3_subr.c,v 1.2 2020/06/30 20:32:11 riastradh Exp $	*/
+/*	$NetBSD: aes_ssse3_subr.c,v 1.3 2020/07/25 22:31:04 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_ssse3_subr.c,v 1.2 2020/06/30 20:32:11 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_ssse3_subr.c,v 1.3 2020/07/25 22:31:04 riastradh Exp $");
 
 #ifdef _KERNEL
 #include 
@@ -208,6 +208,75 @@ aes_ssse3_xts_dec(const struct aesdec *d
 	storeblock(tweak, t);
 }
 
+void
+aes_ssse3_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16],
+size_t nbytes, uint8_t auth0[static 16], uint32_t nrounds)
+{
+	__m128i auth;
+
+	KASSERT(nbytes);
+	KASSERT(nbytes % 16 == 0);
+
+	auth = loadblock(auth0);
+	for (; nbytes; nbytes -= 16, in += 16)
+		auth = aes_ssse3_enc1(enc, auth ^ loadblock(in), nrounds);
+	storeblock(auth0, auth);
+}
+
+void
+aes_ssse3_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16],
+uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32],
+uint32_t nrounds)
+{
+	const __m128i ctr32_inc = _mm_set_epi32(1, 0, 0, 0);
+	const __m128i bs32 =
+	_mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203);
+	__m128i auth, ctr_be, ctr, ptxt;
+
+	KASSERT(nbytes);
+	KASSERT(nbytes % 16 == 0);
+
+	auth = loadblock(authctr);
+	ctr_be = loadblock(authctr + 16);
+	ctr = _mm_shuffle_epi8(ctr_be, bs32);
+	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
+		ptxt = loadblock(in);
+		auth = aes_ssse3_enc1(enc, auth ^ ptxt, nrounds);
+		ctr = _mm_add_epi32(ctr, ctr32_inc);
+		ctr_be = _mm_shuffle_epi8(ctr, bs32);
+		storeblock(out, ptxt ^ aes_ssse3_enc1(enc, ctr_be, nrounds));
+	}
+	storeblock(authctr, auth);
+	storeblock(authctr + 16, ctr_be);
+}
+
+void
+aes_ssse3_ccm_dec1(const struct aesenc *enc, const uint8_t in[static 16],
+uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32],
+uint32_t nrounds)
+{
+	const __m128i ctr32_inc = _mm_set_epi32(1, 0, 0, 0);
+	const __m128i bs32 =
+	_mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203);
+	__m128i auth, ctr_be, ctr, ptxt;
+
+	KASSERT(nbytes);
+	KASSERT(nbytes % 16 == 0);
+
+	auth = loadblock(authctr);
+	ctr_be = loadblock(authctr + 16);
+	ctr = _mm_shuffle_epi8(ctr_be, bs32);
+	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
+		ctr = _mm_add_epi32(ctr, ctr32_inc);
+		ctr_be = _mm_shuffle_epi8(ctr, bs32);
+		ptxt = loadblock(in) ^ aes_ssse3_enc1(enc, ctr_be, nrounds);
+		storeblock(out, ptxt);
+		auth = aes_ssse3_enc1(enc, auth ^ ptxt, nrounds);
+	}
+	storeblock(authctr, auth);
+	storeblock(authctr + 16, ctr_be);
+}
+
 int
 aes_ssse3_selftest(void)
 {
Index: src/sys/crypto/aes/arch/x86/immintrin.h
diff -u src/sys/crypto/aes/arch/x86/immintrin.h:1.2 

CVS commit: src/sys/crypto/aes/arch/x86

2020-07-25 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sat Jul 25 22:29:56 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/x86: aes_sse2.h aes_sse2_impl.c aes_sse2_subr.c

Log Message:
Implement AES-CCM with SSE2.


To generate a diff of this commit:
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/x86/aes_sse2.h
cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/arch/x86/aes_sse2_impl.c
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/x86/aes_sse2_subr.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/x86/aes_sse2.h
diff -u src/sys/crypto/aes/arch/x86/aes_sse2.h:1.3 src/sys/crypto/aes/arch/x86/aes_sse2.h:1.4
--- src/sys/crypto/aes/arch/x86/aes_sse2.h:1.3	Sat Jul 25 22:12:57 2020
+++ src/sys/crypto/aes/arch/x86/aes_sse2.h	Sat Jul 25 22:29:56 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_sse2.h,v 1.3 2020/07/25 22:12:57 riastradh Exp $	*/
+/*	$NetBSD: aes_sse2.h,v 1.4 2020/07/25 22:29:56 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -58,6 +58,12 @@ void aes_sse2_xts_enc(const struct aesen
 uint8_t[static 16], size_t nbytes, uint8_t[static 16], uint32_t);
 void aes_sse2_xts_dec(const struct aesdec *, const uint8_t[static 16],
 uint8_t[static 16], size_t nbytes, uint8_t[static 16], uint32_t);
+void aes_sse2_cbcmac_update1(const struct aesenc *, const uint8_t[static 16],
+size_t, uint8_t[static 16], uint32_t);
+void aes_sse2_ccm_enc1(const struct aesenc *, const uint8_t[static 16],
+uint8_t[static 16], size_t, uint8_t[static 32], uint32_t);
+void aes_sse2_ccm_dec1(const struct aesenc *, const uint8_t[static 16],
+uint8_t[static 16], size_t, uint8_t[static 32], uint32_t);
 
 int aes_sse2_selftest(void);
 

Index: src/sys/crypto/aes/arch/x86/aes_sse2_impl.c
diff -u src/sys/crypto/aes/arch/x86/aes_sse2_impl.c:1.4 src/sys/crypto/aes/arch/x86/aes_sse2_impl.c:1.5
--- src/sys/crypto/aes/arch/x86/aes_sse2_impl.c:1.4	Sat Jul 25 22:12:57 2020
+++ src/sys/crypto/aes/arch/x86/aes_sse2_impl.c	Sat Jul 25 22:29:56 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_sse2_impl.c,v 1.4 2020/07/25 22:12:57 riastradh Exp $	*/
+/*	$NetBSD: aes_sse2_impl.c,v 1.5 2020/07/25 22:29:56 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_sse2_impl.c,v 1.4 2020/07/25 22:12:57 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_sse2_impl.c,v 1.5 2020/07/25 22:29:56 riastradh Exp $");
 
 #include 
 #include 
@@ -143,6 +143,39 @@ aes_sse2_xts_dec_impl(const struct aesde
 	fpu_kern_leave();
 }
 
+static void
+aes_sse2_cbcmac_update1_impl(const struct aesenc *enc,
+const uint8_t in[static 16], size_t nbytes, uint8_t auth[static 16],
+uint32_t nrounds)
+{
+
+	fpu_kern_enter();
+	aes_sse2_cbcmac_update1(enc, in, nbytes, auth, nrounds);
+	fpu_kern_leave();
+}
+
+static void
+aes_sse2_ccm_enc1_impl(const struct aesenc *enc, const uint8_t in[static 16],
+uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32],
+uint32_t nrounds)
+{
+
+	fpu_kern_enter();
+	aes_sse2_ccm_enc1(enc, in, out, nbytes, authctr, nrounds);
+	fpu_kern_leave();
+}
+
+static void
+aes_sse2_ccm_dec1_impl(const struct aesenc *enc, const uint8_t in[static 16],
+uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32],
+uint32_t nrounds)
+{
+
+	fpu_kern_enter();
+	aes_sse2_ccm_dec1(enc, in, out, nbytes, authctr, nrounds);
+	fpu_kern_leave();
+}
+
 static int
 aes_sse2_probe(void)
 {
@@ -182,4 +215,7 @@ struct aes_impl aes_sse2_impl = {
 	.ai_cbc_dec = aes_sse2_cbc_dec_impl,
 	.ai_xts_enc = aes_sse2_xts_enc_impl,
 	.ai_xts_dec = aes_sse2_xts_dec_impl,
+	.ai_cbcmac_update1 = aes_sse2_cbcmac_update1_impl,
+	.ai_ccm_enc1 = aes_sse2_ccm_enc1_impl,
+	.ai_ccm_dec1 = aes_sse2_ccm_dec1_impl,
 };

Index: src/sys/crypto/aes/arch/x86/aes_sse2_subr.c
diff -u src/sys/crypto/aes/arch/x86/aes_sse2_subr.c:1.2 src/sys/crypto/aes/arch/x86/aes_sse2_subr.c:1.3
--- src/sys/crypto/aes/arch/x86/aes_sse2_subr.c:1.2	Tue Jun 30 20:32:11 2020
+++ src/sys/crypto/aes/arch/x86/aes_sse2_subr.c	Sat Jul 25 22:29:56 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_sse2_subr.c,v 1.2 2020/06/30 20:32:11 riastradh Exp $	*/
+/*	$NetBSD: aes_sse2_subr.c,v 1.3 2020/07/25 22:29:56 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_sse2_subr.c,v 1.2 2020/06/30 20:32:11 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_sse2_subr.c,v 1.3 2020/07/25 22:29:56 riastradh Exp $");
 
 #ifdef _KERNEL
 #include 
@@ -518,6 +518,180 @@ out:	/* Store the updated tweak.  */
 	explicit_memset(t, 0, sizeof t);
 }
 
+void
+aes_sse2_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16],
+size_t nbytes, uint8_t auth[static 16], uint32_t nrounds)
+{
+	uint64_t sk_exp[120];
+	__m128i q[4];
+
+	KASSERT(nbytes);
+	KASSERT(nbytes % 16 == 0);
+
+	/* Expand 

CVS commit: src/sys/crypto/aes/arch/x86

2020-07-25 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sat Jul 25 22:29:06 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/x86: aes_ni.c aes_ni.h aes_ni_64.S

Log Message:
Implement AES-CCM with x86 AES-NI.


To generate a diff of this commit:
cvs rdiff -u -r1.3 -r1.4 src/sys/crypto/aes/arch/x86/aes_ni.c \
src/sys/crypto/aes/arch/x86/aes_ni_64.S
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/x86/aes_ni.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/x86/aes_ni.c
diff -u src/sys/crypto/aes/arch/x86/aes_ni.c:1.3 src/sys/crypto/aes/arch/x86/aes_ni.c:1.4
--- src/sys/crypto/aes/arch/x86/aes_ni.c:1.3	Sat Jul 25 22:12:57 2020
+++ src/sys/crypto/aes/arch/x86/aes_ni.c	Sat Jul 25 22:29:06 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_ni.c,v 1.3 2020/07/25 22:12:57 riastradh Exp $	*/
+/*	$NetBSD: aes_ni.c,v 1.4 2020/07/25 22:29:06 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_ni.c,v 1.3 2020/07/25 22:12:57 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_ni.c,v 1.4 2020/07/25 22:29:06 riastradh Exp $");
 
 #ifdef _KERNEL
 #include 
@@ -204,6 +204,48 @@ aesni_xts_dec_impl(const struct aesdec *
 	fpu_kern_leave();
 }
 
+static void
+aesni_cbcmac_update1_impl(const struct aesenc *enc,
+const uint8_t in[static 16], size_t nbytes, uint8_t auth[static 16],
+uint32_t nrounds)
+{
+
+	KASSERT(nbytes);
+	KASSERT(nbytes % 16 == 0);
+
+	fpu_kern_enter();
+	aesni_cbcmac_update1(enc, in, nbytes, auth, nrounds);
+	fpu_kern_leave();
+}
+
+static void
+aesni_ccm_enc1_impl(const struct aesenc *enc, const uint8_t in[static 16],
+uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32],
+uint32_t nrounds)
+{
+
+	KASSERT(nbytes);
+	KASSERT(nbytes % 16 == 0);
+
+	fpu_kern_enter();
+	aesni_ccm_enc1(enc, in, out, nbytes, authctr, nrounds);
+	fpu_kern_leave();
+}
+
+static void
+aesni_ccm_dec1_impl(const struct aesenc *enc, const uint8_t in[static 16],
+uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32],
+uint32_t nrounds)
+{
+
+	KASSERT(nbytes);
+	KASSERT(nbytes % 16 == 0);
+
+	fpu_kern_enter();
+	aesni_ccm_dec1(enc, in, out, nbytes, authctr, nrounds);
+	fpu_kern_leave();
+}
+
 static int
 aesni_xts_update_selftest(void)
 {
@@ -273,4 +315,7 @@ struct aes_impl aes_ni_impl = {
 	.ai_cbc_dec = aesni_cbc_dec_impl,
 	.ai_xts_enc = aesni_xts_enc_impl,
 	.ai_xts_dec = aesni_xts_dec_impl,
+	.ai_cbcmac_update1 = aesni_cbcmac_update1_impl,
+	.ai_ccm_enc1 = aesni_ccm_enc1_impl,
+	.ai_ccm_dec1 = aesni_ccm_dec1_impl,
 };
Index: src/sys/crypto/aes/arch/x86/aes_ni_64.S
diff -u src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.3 src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.4
--- src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.3	Sat Jul 25 22:11:05 2020
+++ src/sys/crypto/aes/arch/x86/aes_ni_64.S	Sat Jul 25 22:29:06 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_ni_64.S,v 1.3 2020/07/25 22:11:05 riastradh Exp $	*/
+/*	$NetBSD: aes_ni_64.S,v 1.4 2020/07/25 22:29:06 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -951,6 +951,142 @@ ENTRY(aesni_xts_update)
 END(aesni_xts_update)
 
 /*
+ * aesni_cbcmac_update1(const struct aesenc *enckey@rdi, const uint8_t *in@rsi,
+ * size_t nbytes@rdx, uint8_t auth[16] @rcx, uint32_t nrounds@r8d)
+ *
+ *	Update CBC-MAC.
+ *
+ *	nbytes must be a positive integral multiple of 16.
+ *
+ *	Standard ABI calling convention.
+ */
+ENTRY(aesni_cbcmac_update1)
+	movdqu	(%rcx),%xmm0		/* xmm0 := auth */
+	mov	%rdx,%r10		/* r10 := nbytes */
+	mov	%rcx,%rdx		/* rdx :=  */
+1:	pxor	(%rsi),%xmm0		/* xmm0 ^= plaintext block */
+	lea	0x10(%rsi),%rsi
+	mov	%r8d,%ecx		/* ecx := nrounds */
+	call	aesni_enc1		/* xmm0 := auth'; trash rax,rcx,xmm8 */
+	sub	$0x10,%r10
+	jnz	1b
+	movdqu	%xmm0,(%rdx)		/* store auth' */
+	ret
+END(aesni_cbcmac_update1)
+
+/*
+ * aesni_ccm_enc1(const struct aesenc *enckey@rdi, const uint8_t *in@rsi,
+ * uint8_t *out@rdx, size_t nbytes@rcx,
+ * uint8_t authctr[32] @r8, uint32_t nrounds@r9d)
+ *
+ *	Update CCM encryption.
+ *
+ *	nbytes must be a positive integral multiple of 16.
+ *
+ *	Standard ABI calling convention.
+ */
+ENTRY(aesni_ccm_enc1)
+	mov	%rcx,%r10		/* r10 := nbytes */
+	movdqu	0x10(%r8),%xmm2		/* xmm2 := ctr (be) */
+	movdqa	bswap32(%rip),%xmm4	/* xmm4 := bswap32 table */
+	movdqa	ctr32_inc(%rip),%xmm5	/* xmm5 := (0,0,0,1) (le) */
+	movdqu	(%r8),%xmm0		/* xmm0 := auth */
+	pshufb	%xmm4,%xmm2		/* xmm2 := ctr (le) */
+1:	movdqu	(%rsi),%xmm3		/* xmm3 := plaintext block */
+	paddd	%xmm5,%xmm2		/* increment ctr (32-bit) */
+	lea	0x10(%rsi),%rsi
+	movdqa	%xmm2,%xmm1		/* xmm1 := ctr (le) */
+	mov	%r9d,%ecx		/* ecx := nrounds */
+	pshufb	%xmm4,%xmm1		/* xmm1 := ctr (be) */
+	pxor	%xmm3,%xmm0		/* xmm0 := auth ^ ptxt */
+	call	aesni_enc2		/* trash rax/rcx/xmm8 */
+	pxor	%xmm1,%xmm3		/* xmm3 := ciphertext block */
+	sub	

CVS commit: src/sys/crypto/aes/arch/x86

2020-07-25 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Sat Jul 25 22:11:05 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/x86: aes_ni_64.S

Log Message:
Invert some loops to save a jmp instruction on each iteration.

No semantic change intended.


To generate a diff of this commit:
cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/x86/aes_ni_64.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/x86/aes_ni_64.S
diff -u src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.2 src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.3
--- src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.2	Tue Jun 30 21:41:04 2020
+++ src/sys/crypto/aes/arch/x86/aes_ni_64.S	Sat Jul 25 22:11:05 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_ni_64.S,v 1.2 2020/06/30 21:41:04 riastradh Exp $	*/
+/*	$NetBSD: aes_ni_64.S,v 1.3 2020/07/25 22:11:05 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -522,14 +522,14 @@ ENTRY(aesni_enctodec)
 	shl	$4,%edx		/* rdx := byte offset of last round key */
 	movdqa	(%rdi,%rdx),%xmm0	/* load last round key */
 	movdqa	%xmm0,(%rsi)	/* store last round key verbatim */
-1:	sub	$0x10,%rdx	/* advance to next round key */
-	lea	0x10(%rsi),%rsi
-	jz	2f		/* stop if this is the last one */
-	movdqa	(%rdi,%rdx),%xmm0	/* load round key */
+	jmp	2f
+1:	movdqa	(%rdi,%rdx),%xmm0	/* load round key */
 	aesimc	%xmm0,%xmm0	/* convert encryption to decryption */
 	movdqa	%xmm0,(%rsi)	/* store round key */
-	jmp	1b
-2:	movdqa	(%rdi),%xmm0	/* load first round key */
+2:	sub	$0x10,%rdx	/* advance to next round key */
+	lea	0x10(%rsi),%rsi
+	jnz	1b		/* repeat if more rounds */
+	movdqa	(%rdi),%xmm0	/* load first round key */
 	movdqa	%xmm0,(%rsi)	/* store first round key verbatim */
 	ret
 END(aesni_enctodec)
@@ -614,16 +614,16 @@ ENTRY(aesni_cbc_dec1)
 	mov	%rcx,%r10		/* r10 := nbytes */
 	movdqu	-0x10(%rsi,%r10),%xmm0	/* xmm0 := last ciphertext block */
 	movdqu	%xmm0,(%r8)		/* update iv */
-1:	mov	%r9d,%ecx		/* ecx := nrounds */
-	call	aesni_dec1		/* xmm0 := cv ^ ptxt */
-	sub	$0x10,%r10
-	jz	2f			/* first block if r10 is now zero */
-	movdqu	-0x10(%rsi,%r10),%xmm8	/* xmm8 := chaining value */
+	jmp	2f
+1:	movdqu	-0x10(%rsi,%r10),%xmm8	/* xmm8 := chaining value */
 	pxor	%xmm8,%xmm0		/* xmm0 := ptxt */
 	movdqu	%xmm0,(%rdx,%r10)	/* store plaintext block */
 	movdqa	%xmm8,%xmm0		/* move cv = ciphertext block */
-	jmp	1b
-2:	pxor	(%rsp),%xmm0		/* xmm0 := ptxt */
+2:	mov	%r9d,%ecx		/* ecx := nrounds */
+	call	aesni_dec1		/* xmm0 := cv ^ ptxt */
+	sub	$0x10,%r10
+	jnz	1b			/* repeat if more blocks */
+	pxor	(%rsp),%xmm0		/* xmm0 := ptxt */
 	movdqu	%xmm0,(%rdx)		/* store first plaintext block */
 	leave
 	ret
@@ -649,7 +649,11 @@ ENTRY(aesni_cbc_dec8)
 	mov	%rcx,%r10		/* r10 := nbytes */
 	movdqu	-0x10(%rsi,%r10),%xmm7	/* xmm7 := ciphertext block[n-1] */
 	movdqu	%xmm7,(%r8)		/* update iv */
-1:	movdqu	-0x20(%rsi,%r10),%xmm6	/* xmm6 := ciphertext block[n-2] */
+	jmp	2f
+1:	movdqu	-0x10(%rsi,%r10),%xmm7	/* xmm7 := cv[0] */
+	pxor	%xmm7,%xmm0		/* xmm0 := ptxt[0] */
+	movdqu	%xmm0,(%rdx,%r10)	/* store plaintext block */
+2:	movdqu	-0x20(%rsi,%r10),%xmm6	/* xmm6 := ciphertext block[n-2] */
 	movdqu	-0x30(%rsi,%r10),%xmm5	/* xmm5 := ciphertext block[n-3] */
 	movdqu	-0x40(%rsi,%r10),%xmm4	/* xmm4 := ciphertext block[n-4] */
 	movdqu	-0x50(%rsi,%r10),%xmm3	/* xmm3 := ciphertext block[n-5] */
@@ -680,12 +684,8 @@ ENTRY(aesni_cbc_dec8)
 	movdqu	%xmm2,-0x60(%rdx,%r10)
 	movdqu	%xmm1,-0x70(%rdx,%r10)
 	sub	$0x80,%r10
-	jz	2f			/* first block if r10 is now zero */
-	movdqu	-0x10(%rsi,%r10),%xmm7	/* xmm7 := cv[0] */
-	pxor	%xmm7,%xmm0		/* xmm0 := ptxt[0] */
-	movdqu	%xmm0,(%rdx,%r10)	/* store plaintext block */
-	jmp	1b
-2:	pxor	(%rsp),%xmm0		/* xmm0 := ptxt[0] */
+	jnz	1b			/* repeat if more blocks */
+	pxor	(%rsp),%xmm0		/* xmm0 := ptxt[0] */
 	movdqu	%xmm0,(%rdx)		/* store first plaintext block */
 	leave
 	ret
@@ -966,12 +966,12 @@ aesni_enc1:
 	shl	$4,%ecx		/* ecx := total byte size of round keys */
 	lea	0x10(%rdi,%rcx),%rax	/* rax := end of round key array */
 	neg	%rcx		/* rcx := byte offset of round key from end */
-1:	movdqa	(%rax,%rcx),%xmm8	/* load round key */
+	jmp	2f
+1:	aesenc	%xmm8,%xmm0
+2:	movdqa	(%rax,%rcx),%xmm8	/* load round key */
 	add	$0x10,%rcx
-	jz	2f		/* stop if this is the last one */
-	aesenc	%xmm8,%xmm0
-	jmp	1b
-2:	aesenclast %xmm8,%xmm0
+	jnz	1b		/* repeat if more rounds */
+	aesenclast %xmm8,%xmm0
 	ret
 END(aesni_enc1)
 
@@ -999,10 +999,8 @@ aesni_enc8:
 	shl	$4,%ecx		/* ecx := total byte size of round keys */
 	lea	0x10(%rdi,%rcx),%rax	/* rax := end of round key array */
 	neg	%rcx		/* rcx := byte offset of round key from end */
-1:	movdqa	(%rax,%rcx),%xmm8	/* load round key */
-	add	$0x10,%rcx
-	jz	2f		/* stop if this is the last one */
-	aesenc	%xmm8,%xmm0
+	jmp	2f
+1:	aesenc	%xmm8,%xmm0
 	aesenc	%xmm8,%xmm1
 	aesenc	%xmm8,%xmm2
 	aesenc	%xmm8,%xmm3
@@ -1010,8 +1008,10 @@ 

CVS commit: src/sys/crypto/aes/arch/x86

2020-06-29 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Mon Jun 29 23:50:05 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/x86: aes_sse2.h aes_sse2_impl.c aes_sse2_impl.h
files.aessse2
Added Files:
src/sys/crypto/aes/arch/x86: aes_sse2_subr.c

Log Message:
Split SSE2 logic into separate units.

Ensure that there are no paths into files compiled with -msse -msse2
at all except via fpu_kern_enter.

I didn't run into a practical problem with this, but let's not leave
a ticking time bomb for subsequent toolchain changes in case the mere
declaration of local __m128i variables causes trouble.


To generate a diff of this commit:
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/x86/aes_sse2.h \
src/sys/crypto/aes/arch/x86/aes_sse2_impl.c \
src/sys/crypto/aes/arch/x86/aes_sse2_impl.h \
src/sys/crypto/aes/arch/x86/files.aessse2
cvs rdiff -u -r0 -r1.1 src/sys/crypto/aes/arch/x86/aes_sse2_subr.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/x86/aes_sse2.h
diff -u src/sys/crypto/aes/arch/x86/aes_sse2.h:1.1 src/sys/crypto/aes/arch/x86/aes_sse2.h:1.2
--- src/sys/crypto/aes/arch/x86/aes_sse2.h:1.1	Mon Jun 29 23:47:54 2020
+++ src/sys/crypto/aes/arch/x86/aes_sse2.h	Mon Jun 29 23:50:05 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_sse2.h,v 1.1 2020/06/29 23:47:54 riastradh Exp $	*/
+/*	$NetBSD: aes_sse2.h,v 1.2 2020/06/29 23:50:05 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -31,6 +31,31 @@
 
 #include 
 
+/*
+ * These functions MUST NOT use any vector registers for parameters or
+ * results -- the caller is compiled with -mno-sse  in the kernel,
+ * and dynamically turns on the vector unit just before calling them.
+ * Internal subroutines that use the vector unit for parameters are
+ * declared in aes_sse2_impl.h instead.
+ */
+
+void aes_sse2_setkey(uint64_t[static 30], const void *, uint32_t);
+
+void aes_sse2_enc(const struct aesenc *, const uint8_t in[static 16],
+uint8_t[static 16], uint32_t);
+void aes_sse2_dec(const struct aesdec *, const uint8_t in[static 16],
+uint8_t[static 16], uint32_t);
+void aes_sse2_cbc_enc(const struct aesenc *, const uint8_t[static 16],
+uint8_t[static 16], size_t nbytes, uint8_t[static 16], uint32_t);
+void aes_sse2_cbc_dec(const struct aesdec *, const uint8_t[static 16],
+uint8_t[static 16], size_t nbytes, uint8_t[static 16], uint32_t);
+void aes_sse2_xts_enc(const struct aesenc *, const uint8_t[static 16],
+uint8_t[static 16], size_t nbytes, uint8_t[static 16], uint32_t);
+void aes_sse2_xts_dec(const struct aesdec *, const uint8_t[static 16],
+uint8_t[static 16], size_t nbytes, uint8_t[static 16], uint32_t);
+
+int aes_sse2_selftest(void);
+
 extern struct aes_impl aes_sse2_impl;
 
 #endif	/* _CRYPTO_AES_ARCH_X86_AES_SSE2_H */
Index: src/sys/crypto/aes/arch/x86/aes_sse2_impl.c
diff -u src/sys/crypto/aes/arch/x86/aes_sse2_impl.c:1.1 src/sys/crypto/aes/arch/x86/aes_sse2_impl.c:1.2
--- src/sys/crypto/aes/arch/x86/aes_sse2_impl.c:1.1	Mon Jun 29 23:47:54 2020
+++ src/sys/crypto/aes/arch/x86/aes_sse2_impl.c	Mon Jun 29 23:50:05 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_sse2_impl.c,v 1.1 2020/06/29 23:47:54 riastradh Exp $	*/
+/*	$NetBSD: aes_sse2_impl.c,v 1.2 2020/06/29 23:50:05 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,11 +27,10 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_sse2_impl.c,v 1.1 2020/06/29 23:47:54 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_sse2_impl.c,v 1.2 2020/06/29 23:50:05 riastradh Exp $");
 
 #include 
 #include 
-#include 
 
 #include 
 #include 
@@ -41,532 +40,99 @@ __KERNEL_RCSID(1, "$NetBSD: aes_sse2_imp
 #include 
 #include 
 
-#include "aes_sse2_impl.h"
-
 static void
-aes_sse2_setkey(uint64_t rk[static 30], const void *key, uint32_t nrounds)
+aes_sse2_setenckey_impl(struct aesenc *enc, const uint8_t *key,
+uint32_t nrounds)
 {
-	size_t key_len;
-
-	switch (nrounds) {
-	case 10:
-		key_len = 16;
-		break;
-	case 12:
-		key_len = 24;
-		break;
-	case 14:
-		key_len = 32;
-		break;
-	default:
-		panic("invalid AES nrounds: %u", nrounds);
-	}
 
 	fpu_kern_enter();
-	aes_sse2_keysched(rk, key, key_len);
-	fpu_kern_leave();
-}
-
-static void
-aes_sse2_setenckey(struct aesenc *enc, const uint8_t *key, uint32_t nrounds)
-{
-
 	aes_sse2_setkey(enc->aese_aes.aes_rk64, key, nrounds);
+	fpu_kern_leave();
 }
 
 static void
-aes_sse2_setdeckey(struct aesdec *dec, const uint8_t *key, uint32_t nrounds)
+aes_sse2_setdeckey_impl(struct aesdec *dec, const uint8_t *key,
+uint32_t nrounds)
 {
 
+	fpu_kern_enter();
 	/*
 	 * BearSSL computes InvMixColumns on the fly -- no need for
 	 * distinct decryption round keys.
 	 */
 	aes_sse2_setkey(dec->aesd_aes.aes_rk64, key, nrounds);
+	fpu_kern_leave();
 }
 
 static void
-aes_sse2_enc(const struct aesenc *enc, const uint8_t in[static 16],
+aes_sse2_enc_impl(const 

CVS commit: src/sys/crypto/aes/arch/x86

2020-06-29 Thread Taylor R Campbell
Module Name:src
Committed By:   riastradh
Date:   Mon Jun 29 23:41:35 UTC 2020

Modified Files:
src/sys/crypto/aes/arch/x86: aes_via.c

Log Message:
VIA AES: Batch AES-XTS computation into eight blocks at a time.

Experimental -- performance improvement is not clearly worth the
complexity.


To generate a diff of this commit:
cvs rdiff -u -r1.1 -r1.2 src/sys/crypto/aes/arch/x86/aes_via.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/crypto/aes/arch/x86/aes_via.c
diff -u src/sys/crypto/aes/arch/x86/aes_via.c:1.1 src/sys/crypto/aes/arch/x86/aes_via.c:1.2
--- src/sys/crypto/aes/arch/x86/aes_via.c:1.1	Mon Jun 29 23:39:30 2020
+++ src/sys/crypto/aes/arch/x86/aes_via.c	Mon Jun 29 23:41:35 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: aes_via.c,v 1.1 2020/06/29 23:39:30 riastradh Exp $	*/
+/*	$NetBSD: aes_via.c,v 1.2 2020/06/29 23:41:35 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -27,7 +27,7 @@
  */
 
 #include 
-__KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.1 2020/06/29 23:39:30 riastradh Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.2 2020/06/29 23:41:35 riastradh Exp $");
 
 #include 
 #include 
@@ -119,8 +119,8 @@ aesvia_setdeckey(struct aesdec *dec, con
 }
 
 static inline void
-aesvia_enc1(const struct aesenc *enc, const uint8_t in[static 16],
-uint8_t out[static 16], uint32_t cw0)
+aesvia_encN(const struct aesenc *enc, const uint8_t in[static 16],
+uint8_t out[static 16], size_t nblocks, uint32_t cw0)
 {
 	const uint32_t cw[4] __aligned(16) = {
 		[0] = (cw0
@@ -128,7 +128,6 @@ aesvia_enc1(const struct aesenc *enc, co
 		| C3_CRYPT_CWLO_ENCRYPT
 		| C3_CRYPT_CWLO_NORMAL),
 	};
-	size_t nblocks = 1;
 
 	KASSERT(((uintptr_t)enc & 0xf) == 0);
 	KASSERT(((uintptr_t)in & 0xf) == 0);
@@ -141,8 +140,8 @@ aesvia_enc1(const struct aesenc *enc, co
 }
 
 static inline void
-aesvia_dec1(const struct aesdec *dec, const uint8_t in[static 16],
-uint8_t out[static 16], uint32_t cw0)
+aesvia_decN(const struct aesdec *dec, const uint8_t in[static 16],
+uint8_t out[static 16], size_t nblocks, uint32_t cw0)
 {
 	const uint32_t cw[4] __aligned(16) = {
 		[0] = (cw0
@@ -150,7 +149,6 @@ aesvia_dec1(const struct aesdec *dec, co
 		| C3_CRYPT_CWLO_DECRYPT
 		| C3_CRYPT_CWLO_NORMAL),
 	};
-	size_t nblocks = 1;
 
 	KASSERT(((uintptr_t)dec & 0xf) == 0);
 	KASSERT(((uintptr_t)in & 0xf) == 0);
@@ -180,7 +178,7 @@ aesvia_enc(const struct aesenc *enc, con
 	if uintptr_t)in | (uintptr_t)out) & 0xf) == 0 &&
 	((uintptr_t)in & 0xff0) != 0xff0) {
 		enc_aligned_evcnt.ev_count++;
-		aesvia_enc1(enc, in, out, cw0);
+		aesvia_encN(enc, in, out, 1, cw0);
 	} else {
 		enc_unaligned_evcnt.ev_count++;
 		/*
@@ -194,7 +192,7 @@ aesvia_enc(const struct aesenc *enc, con
 		uint8_t outbuf[16] __aligned(16);
 
 		memcpy(inbuf, in, 16);
-		aesvia_enc1(enc, inbuf, outbuf, cw0);
+		aesvia_encN(enc, inbuf, outbuf, 1, cw0);
 		memcpy(out, outbuf, 16);
 
 		explicit_memset(inbuf, 0, sizeof inbuf);
@@ -221,7 +219,7 @@ aesvia_dec(const struct aesdec *dec, con
 	if uintptr_t)in | (uintptr_t)out) & 0xf) == 0 &&
 	((uintptr_t)in & 0xff0) != 0xff0) {
 		dec_aligned_evcnt.ev_count++;
-		aesvia_dec1(dec, in, out, cw0);
+		aesvia_decN(dec, in, out, 1, cw0);
 	} else {
 		dec_unaligned_evcnt.ev_count++;
 		/*
@@ -235,7 +233,7 @@ aesvia_dec(const struct aesdec *dec, con
 		uint8_t outbuf[16] __aligned(16);
 
 		memcpy(inbuf, in, 16);
-		aesvia_dec1(dec, inbuf, outbuf, cw0);
+		aesvia_decN(dec, inbuf, outbuf, 1, cw0);
 		memcpy(out, outbuf, 16);
 
 		explicit_memset(inbuf, 0, sizeof inbuf);
@@ -245,7 +243,7 @@ aesvia_dec(const struct aesdec *dec, con
 }
 
 static inline void
-aesvia_cbc_enc1(const struct aesenc *enc, const uint8_t in[static 16],
+aesvia_cbc_encN(const struct aesenc *enc, const uint8_t in[static 16],
 uint8_t out[static 16], size_t nblocks, uint8_t **ivp, uint32_t cw0)
 {
 	const uint32_t cw[4] __aligned(16) = {
@@ -274,7 +272,7 @@ aesvia_cbc_enc1(const struct aesenc *enc
 }
 
 static inline void
-aesvia_cbc_dec1(const struct aesdec *dec, const uint8_t in[static 16],
+aesvia_cbc_decN(const struct aesdec *dec, const uint8_t in[static 16],
 uint8_t out[static 16], size_t nblocks, uint8_t iv[static 16],
 uint32_t cw0)
 {
@@ -340,7 +338,7 @@ aesvia_cbc_enc(const struct aesenc *enc,
 	if uintptr_t)in | (uintptr_t)out | (uintptr_t)iv) & 0xf) == 0) {
 		cbcenc_aligned_evcnt.ev_count++;
 		uint8_t *ivp = iv;
-		aesvia_cbc_enc1(enc, in, out, nbytes/16, , cw0);
+		aesvia_cbc_encN(enc, in, out, nbytes/16, , cw0);
 		memcpy(iv, ivp, 16);
 	} else {
 		cbcenc_unaligned_evcnt.ev_count++;
@@ -351,7 +349,7 @@ aesvia_cbc_enc(const struct aesenc *enc,
 		for (; nbytes; nbytes -= 16, in += 16, out += 16) {
 			memcpy(tmp, in, 16);
 			xor128(tmp, tmp, cv);
-			aesvia_enc1(enc, tmp, cv, cw0);
+			aesvia_encN(enc, tmp, cv, 1, cw0);
 			memcpy(out, cv, 16);
 		}