Module Name: src Committed By: riastradh Date: Mon Jul 27 20:53:23 UTC 2020
Modified Files: src/sys/crypto/aes/arch/arm: aes_armv8_64.S aes_neon_32.S src/sys/crypto/aes/arch/x86: aes_ni_64.S src/sys/crypto/chacha/arch/arm: chacha_neon_64.S Log Message: Align critical-path loops in AES and ChaCha. To generate a diff of this commit: cvs rdiff -u -r1.8 -r1.9 src/sys/crypto/aes/arch/arm/aes_armv8_64.S cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/aes/arch/arm/aes_neon_32.S cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/arch/x86/aes_ni_64.S cvs rdiff -u -r1.2 -r1.3 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/crypto/aes/arch/arm/aes_armv8_64.S diff -u src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.8 src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.9 --- src/sys/crypto/aes/arch/arm/aes_armv8_64.S:1.8 Sat Jul 25 22:33:04 2020 +++ src/sys/crypto/aes/arch/arm/aes_armv8_64.S Mon Jul 27 20:53:22 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_armv8_64.S,v 1.8 2020/07/25 22:33:04 riastradh Exp $ */ +/* $NetBSD: aes_armv8_64.S,v 1.9 2020/07/27 20:53:22 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -440,6 +440,7 @@ END(aesarmv8_setenckey256) ENTRY(aesarmv8_enctodec) ldr q0, [x0, x2, lsl #4] /* load last round key */ b 2f + _ALIGN_TEXT 1: aesimc v0.16b, v0.16b /* convert encryption to decryption */ 2: str q0, [x1], #0x10 /* store round key */ subs x2, x2, #1 /* count down round */ @@ -503,6 +504,7 @@ ENTRY(aesarmv8_cbc_enc) mov x9, x0 /* x9 := enckey */ mov x10, x3 /* x10 := nbytes */ ldr q0, [x4] /* q0 := chaining value */ + _ALIGN_TEXT 1: ldr q1, [x1], #0x10 /* q1 := plaintext block */ eor v0.16b, v0.16b, v1.16b /* q0 := cv ^ ptxt */ mov x0, x9 /* x0 := enckey */ @@ -539,6 +541,7 @@ ENTRY(aesarmv8_cbc_dec1) ldr q0, [x1, #-0x10]! /* q0 := last ciphertext block */ str q0, [x4] /* update iv */ b 2f + _ALIGN_TEXT 1: ldr q31, [x1, #-0x10]! /* q31 := chaining value */ eor v0.16b, v0.16b, v31.16b /* q0 := plaintext block */ str q0, [x2, #-0x10]! /* store plaintext block */ @@ -576,6 +579,7 @@ ENTRY(aesarmv8_cbc_dec8) ldp q6, q7, [x1, #-0x20]! /* q6, q7 := last ciphertext blocks */ str q7, [x4] /* update iv */ b 2f + _ALIGN_TEXT 1: ldp q6, q7, [x1, #-0x20]! eor v0.16b, v0.16b, v7.16b /* q0 := pt0 */ stp q0, q1, [x2, #-0x20]! @@ -629,6 +633,7 @@ ENTRY(aesarmv8_xts_enc1) mov x9, x0 /* x9 := enckey */ mov x10, x3 /* x10 := nbytes */ ldr q31, [x4] /* q31 := tweak */ + _ALIGN_TEXT 1: ldr q0, [x1], #0x10 /* q0 := ptxt */ mov x0, x9 /* x0 := enckey */ mov x3, x5 /* x3 := nrounds */ @@ -661,6 +666,7 @@ ENTRY(aesarmv8_xts_enc8) mov x9, x0 /* x9 := enckey */ mov x10, x3 /* x10 := nbytes */ ldr q31, [x4] /* q31 := tweak */ + _ALIGN_TEXT 1: mov v24.16b, v31.16b /* q24 := tweak[0] */ bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */ mov v25.16b, v31.16b /* q25 := tweak[1] */ @@ -729,6 +735,7 @@ ENTRY(aesarmv8_xts_dec1) mov x9, x0 /* x9 := deckey */ mov x10, x3 /* x10 := nbytes */ ldr q31, [x4] /* q31 := tweak */ + _ALIGN_TEXT 1: ldr q0, [x1], #0x10 /* q0 := ctxt */ mov x0, x9 /* x0 := deckey */ mov x3, x5 /* x3 := nrounds */ @@ -761,6 +768,7 @@ ENTRY(aesarmv8_xts_dec8) mov x9, x0 /* x9 := deckey */ mov x10, x3 /* x10 := nbytes */ ldr q31, [x4] /* q31 := tweak */ + _ALIGN_TEXT 1: mov v24.16b, v31.16b /* q24 := tweak[0] */ bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */ mov v25.16b, v31.16b /* q25 := tweak[1] */ @@ -879,6 +887,7 @@ ENTRY(aesarmv8_cbcmac_update1) ldr q0, [x3] /* q0 := initial authenticator */ mov x9, x0 /* x9 := enckey */ mov x5, x3 /* x5 := &auth (enc1 trashes x3) */ + _ALIGN_TEXT 1: ldr q1, [x1], #0x10 /* q1 := plaintext block */ mov x0, x9 /* x0 := enckey */ mov x3, x4 /* x3 := nrounds */ @@ -913,6 +922,7 @@ ENTRY(aesarmv8_ccm_enc1) #if _BYTE_ORDER == _LITTLE_ENDIAN rev32 v2.16b, v2.16b /* q2 := ctr (host-endian) */ #endif + _ALIGN_TEXT 1: ldr q3, [x1], #0x10 /* q3 := plaintext block */ add v2.4s, v2.4s, v5.4s /* increment ctr (32-bit) */ mov x0, x9 /* x0 := enckey */ @@ -972,6 +982,7 @@ ENTRY(aesarmv8_ccm_dec1) bl aesarmv8_enc1 /* q0 := pad; trash x0/x3/q16 */ b 2f + _ALIGN_TEXT 1: /* * Authenticate the last block and decrypt the next block * simultaneously. @@ -1031,6 +1042,7 @@ END(ctr32_inc) aesarmv8_enc1: ldr q16, [x0], #0x10 /* load round key */ b 2f + _ALIGN_TEXT 1: /* q0 := MixColumns(q0) */ aesmc v0.16b, v0.16b 2: subs x3, x3, #1 @@ -1056,6 +1068,7 @@ END(aesarmv8_enc1) aesarmv8_enc2: ldr q16, [x0], #0x10 /* load round key */ b 2f + _ALIGN_TEXT 1: /* q[i] := MixColumns(q[i]) */ aesmc v0.16b, v0.16b aesmc v1.16b, v1.16b @@ -1085,6 +1098,7 @@ END(aesarmv8_enc2) aesarmv8_enc8: ldr q16, [x0], #0x10 /* load round key */ b 2f + _ALIGN_TEXT 1: /* q[i] := MixColumns(q[i]) */ aesmc v0.16b, v0.16b aesmc v1.16b, v1.16b @@ -1131,6 +1145,7 @@ END(aesarmv8_enc8) aesarmv8_dec1: ldr q16, [x0], #0x10 /* load round key */ b 2f + _ALIGN_TEXT 1: /* q0 := InMixColumns(q0) */ aesimc v0.16b, v0.16b 2: subs x3, x3, #1 @@ -1157,6 +1172,7 @@ END(aesarmv8_dec1) aesarmv8_dec8: ldr q16, [x0], #0x10 /* load round key */ b 2f + _ALIGN_TEXT 1: /* q[i] := InMixColumns(q[i]) */ aesimc v0.16b, v0.16b aesimc v1.16b, v1.16b Index: src/sys/crypto/aes/arch/arm/aes_neon_32.S diff -u src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.2 src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.3 --- src/sys/crypto/aes/arch/arm/aes_neon_32.S:1.2 Mon Jul 27 20:52:10 2020 +++ src/sys/crypto/aes/arch/arm/aes_neon_32.S Mon Jul 27 20:53:22 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_neon_32.S,v 1.2 2020/07/27 20:52:10 riastradh Exp $ */ +/* $NetBSD: aes_neon_32.S,v 1.3 2020/07/27 20:53:22 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -316,6 +316,7 @@ ENTRY(aes_neon_enc1) b 2f + _ALIGN_TEXT 1: vld1.64 {d28-d29}, [r0 :128]! /* q14 = *rk++ */ /* q0 := A = rk[i] + sb1_0(io) + sb1_1(jo) */ @@ -535,6 +536,7 @@ ENTRY(aes_neon_dec1) b 2f + _ALIGN_TEXT 1: /* load dsbd */ add r4, r12, #(dsbd_0 - .Lconstants) vld1.64 {d16-d17}, [r4 :128]! /* q8 := dsbd[0] */ Index: src/sys/crypto/aes/arch/x86/aes_ni_64.S diff -u src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.4 src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.5 --- src/sys/crypto/aes/arch/x86/aes_ni_64.S:1.4 Sat Jul 25 22:29:06 2020 +++ src/sys/crypto/aes/arch/x86/aes_ni_64.S Mon Jul 27 20:53:22 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: aes_ni_64.S,v 1.4 2020/07/25 22:29:06 riastradh Exp $ */ +/* $NetBSD: aes_ni_64.S,v 1.5 2020/07/27 20:53:22 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -523,6 +523,7 @@ ENTRY(aesni_enctodec) movdqa (%rdi,%rdx),%xmm0 /* load last round key */ movdqa %xmm0,(%rsi) /* store last round key verbatim */ jmp 2f + _ALIGN_TEXT 1: movdqa (%rdi,%rdx),%xmm0 /* load round key */ aesimc %xmm0,%xmm0 /* convert encryption to decryption */ movdqa %xmm0,(%rsi) /* store round key */ @@ -580,6 +581,7 @@ ENTRY(aesni_cbc_enc) jz 2f mov %rcx,%r10 /* r10 := nbytes */ movdqu (%r8),%xmm0 /* xmm0 := chaining value */ + _ALIGN_TEXT 1: movdqu (%rsi),%xmm1 /* xmm1 := plaintext block */ lea 0x10(%rsi),%rsi pxor %xmm1,%xmm0 /* xmm0 := cv ^ ptxt */ @@ -615,6 +617,7 @@ ENTRY(aesni_cbc_dec1) movdqu -0x10(%rsi,%r10),%xmm0 /* xmm0 := last ciphertext block */ movdqu %xmm0,(%r8) /* update iv */ jmp 2f + _ALIGN_TEXT 1: movdqu -0x10(%rsi,%r10),%xmm8 /* xmm8 := chaining value */ pxor %xmm8,%xmm0 /* xmm0 := ptxt */ movdqu %xmm0,(%rdx,%r10) /* store plaintext block */ @@ -650,6 +653,7 @@ ENTRY(aesni_cbc_dec8) movdqu -0x10(%rsi,%r10),%xmm7 /* xmm7 := ciphertext block[n-1] */ movdqu %xmm7,(%r8) /* update iv */ jmp 2f + _ALIGN_TEXT 1: movdqu -0x10(%rsi,%r10),%xmm7 /* xmm7 := cv[0] */ pxor %xmm7,%xmm0 /* xmm0 := ptxt[0] */ movdqu %xmm0,(%rdx,%r10) /* store plaintext block */ @@ -706,6 +710,7 @@ END(aesni_cbc_dec8) ENTRY(aesni_xts_enc1) mov %rcx,%r10 /* r10 := nbytes */ movdqu (%r8),%xmm15 /* xmm15 := tweak */ + _ALIGN_TEXT 1: movdqu (%rsi),%xmm0 /* xmm0 := ptxt */ lea 0x10(%rsi),%rsi /* advance rdi to next block */ pxor %xmm15,%xmm0 /* xmm0 := ptxt ^ tweak */ @@ -738,6 +743,7 @@ ENTRY(aesni_xts_enc8) sub $0x10,%rsp mov %rcx,%r10 /* r10 := nbytes */ movdqu (%r8),%xmm15 /* xmm15 := tweak[0] */ + _ALIGN_TEXT 1: movdqa %xmm15,%xmm8 /* xmm8 := tweak[0] */ call aesni_xts_mulx /* xmm15 := tweak[1] */ movdqa %xmm15,%xmm9 /* xmm9 := tweak[1] */ @@ -812,6 +818,7 @@ END(aesni_xts_enc8) ENTRY(aesni_xts_dec1) mov %rcx,%r10 /* r10 := nbytes */ movdqu (%r8),%xmm15 /* xmm15 := tweak */ + _ALIGN_TEXT 1: movdqu (%rsi),%xmm0 /* xmm0 := ctxt */ lea 0x10(%rsi),%rsi /* advance rdi to next block */ pxor %xmm15,%xmm0 /* xmm0 := ctxt ^ tweak */ @@ -844,6 +851,7 @@ ENTRY(aesni_xts_dec8) sub $0x10,%rsp mov %rcx,%r10 /* r10 := nbytes */ movdqu (%r8),%xmm15 /* xmm15 := tweak[0] */ + _ALIGN_TEXT 1: movdqa %xmm15,%xmm8 /* xmm8 := tweak[0] */ call aesni_xts_mulx /* xmm15 := tweak[1] */ movdqa %xmm15,%xmm9 /* xmm9 := tweak[1] */ @@ -964,6 +972,7 @@ ENTRY(aesni_cbcmac_update1) movdqu (%rcx),%xmm0 /* xmm0 := auth */ mov %rdx,%r10 /* r10 := nbytes */ mov %rcx,%rdx /* rdx := &auth */ + _ALIGN_TEXT 1: pxor (%rsi),%xmm0 /* xmm0 ^= plaintext block */ lea 0x10(%rsi),%rsi mov %r8d,%ecx /* ecx := nrounds */ @@ -992,6 +1001,7 @@ ENTRY(aesni_ccm_enc1) movdqa ctr32_inc(%rip),%xmm5 /* xmm5 := (0,0,0,1) (le) */ movdqu (%r8),%xmm0 /* xmm0 := auth */ pshufb %xmm4,%xmm2 /* xmm2 := ctr (le) */ + _ALIGN_TEXT 1: movdqu (%rsi),%xmm3 /* xmm3 := plaintext block */ paddd %xmm5,%xmm2 /* increment ctr (32-bit) */ lea 0x10(%rsi),%rsi @@ -1040,6 +1050,7 @@ ENTRY(aesni_ccm_dec1) call aesni_enc1 /* xmm0 := pad; trash rax/rcx/xmm8 */ jmp 2f + _ALIGN_TEXT 1: /* * Authenticate the last block and decrypt the next block * simultaneously. @@ -1103,6 +1114,7 @@ aesni_enc1: lea 0x10(%rdi,%rcx),%rax /* rax := end of round key array */ neg %rcx /* rcx := byte offset of round key from end */ jmp 2f + _ALIGN_TEXT 1: aesenc %xmm8,%xmm0 2: movdqa (%rax,%rcx),%xmm8 /* load round key */ add $0x10,%rcx @@ -1130,6 +1142,7 @@ aesni_enc2: pxor %xmm8,%xmm0 /* xor in first round key */ pxor %xmm8,%xmm1 jmp 2f + _ALIGN_TEXT 1: aesenc %xmm8,%xmm0 aesenc %xmm8,%xmm1 2: movdqa (%rax,%rcx),%xmm8 /* load round key */ @@ -1165,6 +1178,7 @@ aesni_enc8: lea 0x10(%rdi,%rcx),%rax /* rax := end of round key array */ neg %rcx /* rcx := byte offset of round key from end */ jmp 2f + _ALIGN_TEXT 1: aesenc %xmm8,%xmm0 aesenc %xmm8,%xmm1 aesenc %xmm8,%xmm2 @@ -1204,6 +1218,7 @@ aesni_dec1: lea 0x10(%rdi,%rcx),%rax /* rax := pointer to round key */ neg %rcx /* rcx := byte offset of round key from end */ jmp 2f + _ALIGN_TEXT 1: aesdec %xmm8,%xmm0 2: movdqa (%rax,%rcx),%xmm8 /* load round key */ add $0x10,%rcx @@ -1237,6 +1252,7 @@ aesni_dec8: lea 0x10(%rdi,%rcx),%rax /* rax := pointer to round key */ neg %rcx /* rcx := byte offset of round key from end */ jmp 2f + _ALIGN_TEXT 1: aesdec %xmm8,%xmm0 aesdec %xmm8,%xmm1 aesdec %xmm8,%xmm2 Index: src/sys/crypto/chacha/arch/arm/chacha_neon_64.S diff -u src/sys/crypto/chacha/arch/arm/chacha_neon_64.S:1.2 src/sys/crypto/chacha/arch/arm/chacha_neon_64.S:1.3 --- src/sys/crypto/chacha/arch/arm/chacha_neon_64.S:1.2 Mon Jul 27 20:50:25 2020 +++ src/sys/crypto/chacha/arch/arm/chacha_neon_64.S Mon Jul 27 20:53:23 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: chacha_neon_64.S,v 1.2 2020/07/27 20:50:25 riastradh Exp $ */ +/* $NetBSD: chacha_neon_64.S,v 1.3 2020/07/27 20:53:23 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -201,6 +201,7 @@ ENTRY(chacha_stream256_neon) mov w11, v14.s[0] mov w12, v15.s[0] + _ALIGN_TEXT 1: subs w5, w5, #2 ROUND(v0,v1,v2,v3, v4,v5,v6,v7, v8,v9,v10,v11, v12,v13,v14,v15, v28,v29,v30,v31, v27) @@ -339,6 +340,7 @@ ENTRY(chacha_stream_xor256_neon) mov w11, v14.s[0] mov w12, v15.s[0] + _ALIGN_TEXT 1: subs w6, w6, #2 ROUND(v0,v1,v2,v3, v4,v5,v6,v7, v8,v9,v10,v11, v12,v13,v14,v15, v28,v29,v30,v31, v27)