So I have revived a diff from drahn@/patrick@ to add kernel support for the FPU/SIMD unit on armv7. With that diff, it is possible to use the "NEON" SIMD instructions, even though we're still using the softfloat ABI. And it turns out libcrypto has code to detect this and starts using the SIMD codepaths for some of the assembly-optimized crypto functions.
Unfortunately those code paths suffer from the same problem as some of the other armv7-specific assembly code in libcrypto. They assume that unaligned access is allowed. In my first diff, I left the SIMD codepaths alone, in the hope that they would be allright. But here is a diff that disables them when __STRICT_ALIGNMENT is defined. This does raise the question how viable our approach of not allowing unaligned access on armv7 really is. I think all the SoCs we support include NEON support and for some of the crypto code it provides a significant performance boost. Thoughts? Index: lib/libcrypto/modes/gcm128.c =================================================================== RCS file: /cvs/src/lib/libcrypto/modes/gcm128.c,v retrieving revision 1.21 diff -u -p -r1.21 gcm128.c --- lib/libcrypto/modes/gcm128.c 9 Dec 2017 07:16:51 -0000 1.21 +++ lib/libcrypto/modes/gcm128.c 21 Jan 2018 12:46:16 -0000 @@ -661,7 +661,7 @@ void gcm_ghash_4bit_x86(u64 Xi[2],const # endif # elif defined(__arm__) || defined(__arm) # include "arm_arch.h" -# if __ARM_ARCH__>=7 +# if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT) # define GHASH_ASM_ARM # define GCM_FUNCREF_4BIT void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]); Index: lib/libcrypto/modes/asm/ghash-armv4.pl =================================================================== RCS file: /cvs/src/lib/libcrypto/modes/asm/ghash-armv4.pl,v retrieving revision 1.2 diff -u -p -r1.2 ghash-armv4.pl --- lib/libcrypto/modes/asm/ghash-armv4.pl 4 Jan 2017 22:54:05 -0000 1.2 +++ lib/libcrypto/modes/asm/ghash-armv4.pl 21 Jan 2018 12:46:16 -0000 @@ -319,7 +319,7 @@ sub Dhi() { shift=~m|q([1]?[0-9])|?"d" sub Q() { shift=~m|d([1-3]?[02468])|?"q".($1/2):""; } $code.=<<___; -#if __ARM_ARCH__>=7 +#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT) .fpu neon .global gcm_gmult_neon Index: lib/libcrypto/sha/asm/sha512-armv4.pl =================================================================== RCS file: /cvs/src/lib/libcrypto/sha/asm/sha512-armv4.pl,v retrieving revision 1.2 diff -u -p -r1.2 sha512-armv4.pl --- lib/libcrypto/sha/asm/sha512-armv4.pl 7 Jan 2018 12:35:52 -0000 1.2 +++ lib/libcrypto/sha/asm/sha512-armv4.pl 21 Jan 2018 12:46:16 -0000 @@ -229,7 +229,7 @@ WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c sha512_block_data_order: sub r3,pc,#8 @ sha512_block_data_order add $len,$inp,$len,lsl#7 @ len to point at the end of inp -#if __ARM_ARCH__>=7 +#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT) ldr r12,.LOPENSSL_armcap ldr r12,[r3,r12] @ OPENSSL_armcap_P tst r12,#1 @@ -533,7 +533,7 @@ ___ } $code.=<<___; -#if __ARM_ARCH__>=7 +#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT) .fpu neon .align 4