Module Name: src
Committed By: riastradh
Date: Sun Aug 9 02:49:38 UTC 2020
Modified Files:
src/sys/crypto/aes/arch/arm: arm_neon.h
src/sys/crypto/chacha/arch/arm: arm_neon.h
Log Message:
Fix some clang neon intrinsics.
Compile-tested only, with -Wno-nonportable-vector-initializers. Need
to address -- and test -- this stuff properly but this is progress.
To generate a diff of this commit:
cvs rdiff -u -r1.9 -r1.10 src/sys/crypto/aes/arch/arm/arm_neon.h
cvs rdiff -u -r1.5 -r1.6 src/sys/crypto/chacha/arch/arm/arm_neon.h
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/crypto/aes/arch/arm/arm_neon.h
diff -u src/sys/crypto/aes/arch/arm/arm_neon.h:1.9 src/sys/crypto/aes/arch/arm/arm_neon.h:1.10
--- src/sys/crypto/aes/arch/arm/arm_neon.h:1.9 Sun Aug 9 02:48:38 2020
+++ src/sys/crypto/aes/arch/arm/arm_neon.h Sun Aug 9 02:49:38 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: arm_neon.h,v 1.9 2020/08/09 02:48:38 riastradh Exp $ */
+/* $NetBSD: arm_neon.h,v 1.10 2020/08/09 02:49:38 riastradh Exp $ */
/*-
* Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -85,6 +85,8 @@ typedef __attribute__((neon_vector_type(
typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t;
typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t;
+typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t;
+
typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t;
typedef struct { uint8x8_t val[2]; } uint8x8x2_t;
@@ -218,7 +220,7 @@ vextq_u8(uint8x16_t __lo, uint8x16_t __h
15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0); \
uint8x16_t __r = __builtin_neon_vextq_v((int8x16_t)__lo_r, \
(int8x16_t)__hi_r, (__i), 48); \
- return __builtin_shufflevector(__r, __r, \
+ __builtin_shufflevector(__r, __r, \
15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0); \
})
#endif /* __LITTLE_ENDIAN */
@@ -326,19 +328,37 @@ vqtbl1q_u8(uint8x16_t __tab, uint8x16_t
return (uint8x16_t)__out64;
#endif
#elif defined(__clang__)
-#ifdef __LITTLE_ENDIAN__
- return (uint8x16_t)__builtin_neon_vqtbl1q_v((int8x16_t)__tab,
- (int8x16_t)__idx, 48);
-#else
- uint32x4_t __lo_r = __builtin_shufflevector(__lo, __lo,
+#ifndef __LITTLE_ENDIAN__
+ __tab = __builtin_shufflevector(__tab, __tab,
15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);
- uint32x4_t __hi_r = __builtin_shufflevector(__hi, __hi,
+ __idx = __builtin_shufflevector(__idx, __idx,
15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);
- uint32x4_t __r = __builtin_neon_vqtbl1q_v((int8x16_t)__tab,
- (int8x16_t)__idx, __i, 48);
- return __builtin_shufflevector(__r, __r,
+#endif
+ uint8x16_t __r;
+#ifdef __aarch64__
+ __r = __builtin_neon_vqtbl1q_v((int8x16_t)__tab, (int8x16_t)__idx, 48);
+#else
+ uint64x2_t __tab64 = (uint64x2_t)__tab;
+ uint8x8_t __tablo = (uint8x8_t)__tab64[0];
+ uint8x8_t __tabhi = (uint8x8_t)__tab64[1];
+ uint64x2_t __idx64, __out64;
+ int8x8_t __idxlo, __idxhi, __outlo, __outhi;
+
+ __idx64 = (uint64x2_t)__idx;
+ __idxlo = (int8x8_t)__idx64[0];
+ __idxhi = (int8x8_t)__idx64[1];
+ __outlo = (uint8x8_t)__builtin_neon_vtbl2_v((int8x8_t)__tablo,
+ (int8x8_t)__tabhi, (int8x8_t)__idxlo, 16);
+ __outhi = (uint8x8_t)__builtin_neon_vtbl2_v((int8x8_t)__tablo,
+ (int8x8_t)__tabhi, (int8x8_t)__idxhi, 16);
+ __out64 = (uint64x2_t) { (uint64_t)__outlo, (uint64_t)__outhi };
+ __r = (uint8x16_t)__out64;
+#endif
+#ifndef __LITTLE_ENDIAN__
+ __r = __builtin_shufflevector(__r, __r,
15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);
#endif
+ return __r;
#endif
}
@@ -579,7 +599,7 @@ vsriq_n_u32(uint32x4_t __vins, uint32x4_
(int32x4_t)__builtin_neon_vsriq_n_v((int32x4_t)(__vins), \
(int32x4_t)(__vsh), (__bits), 34)
#else
-#define vsliq_n_s32(__vins, __vsh, __bits) ( \
+#define vsriq_n_s32(__vins, __vsh, __bits) ( \
{ \
int32x4_t __tvins = (__vins); \
int32x4_t __tvsh = (__vsh); \
Index: src/sys/crypto/chacha/arch/arm/arm_neon.h
diff -u src/sys/crypto/chacha/arch/arm/arm_neon.h:1.5 src/sys/crypto/chacha/arch/arm/arm_neon.h:1.6
--- src/sys/crypto/chacha/arch/arm/arm_neon.h:1.5 Sun Aug 9 02:48:38 2020
+++ src/sys/crypto/chacha/arch/arm/arm_neon.h Sun Aug 9 02:49:38 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: arm_neon.h,v 1.5 2020/08/09 02:48:38 riastradh Exp $ */
+/* $NetBSD: arm_neon.h,v 1.6 2020/08/09 02:49:38 riastradh Exp $ */
/*-
* Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -85,6 +85,8 @@ typedef __attribute__((neon_vector_type(
typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t;
typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t;
+typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t;
+
typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t;
typedef struct { uint8x8_t val[2]; } uint8x8x2_t;
@@ -218,7 +220,7 @@ vextq_u8(uint8x16_t __lo, uint8x16_t __h
15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0); \
uint8x16_t __r = __builtin_neon_vextq_v((int8x16_t)__lo_r, \
(int8x16_t)__hi_r, (__i), 48); \
- return __builtin_shufflevector(__r, __r, \
+ __builtin_shufflevector(__r, __r, \
15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0); \
})
#endif /* __LITTLE_ENDIAN */
@@ -326,19 +328,37 @@ vqtbl1q_u8(uint8x16_t __tab, uint8x16_t
return (uint8x16_t)__out64;
#endif
#elif defined(__clang__)
-#ifdef __LITTLE_ENDIAN__
- return (uint8x16_t)__builtin_neon_vqtbl1q_v((int8x16_t)__tab,
- (int8x16_t)__idx, 48);
-#else
- uint32x4_t __lo_r = __builtin_shufflevector(__lo, __lo,
+#ifndef __LITTLE_ENDIAN__
+ __tab = __builtin_shufflevector(__tab, __tab,
15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);
- uint32x4_t __hi_r = __builtin_shufflevector(__hi, __hi,
+ __idx = __builtin_shufflevector(__idx, __idx,
15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);
- uint32x4_t __r = __builtin_neon_vqtbl1q_v((int8x16_t)__tab,
- (int8x16_t)__idx, __i, 48);
- return __builtin_shufflevector(__r, __r,
+#endif
+ uint8x16_t __r;
+#ifdef __aarch64__
+ __r = __builtin_neon_vqtbl1q_v((int8x16_t)__tab, (int8x16_t)__idx, 48);
+#else
+ uint64x2_t __tab64 = (uint64x2_t)__tab;
+ uint8x8_t __tablo = (uint8x8_t)__tab64[0];
+ uint8x8_t __tabhi = (uint8x8_t)__tab64[1];
+ uint64x2_t __idx64, __out64;
+ int8x8_t __idxlo, __idxhi, __outlo, __outhi;
+
+ __idx64 = (uint64x2_t)__idx;
+ __idxlo = (int8x8_t)__idx64[0];
+ __idxhi = (int8x8_t)__idx64[1];
+ __outlo = (uint8x8_t)__builtin_neon_vtbl2_v((int8x8_t)__tablo,
+ (int8x8_t)__tabhi, (int8x8_t)__idxlo, 16);
+ __outhi = (uint8x8_t)__builtin_neon_vtbl2_v((int8x8_t)__tablo,
+ (int8x8_t)__tabhi, (int8x8_t)__idxhi, 16);
+ __out64 = (uint64x2_t) { (uint64_t)__outlo, (uint64_t)__outhi };
+ __r = (uint8x16_t)__out64;
+#endif
+#ifndef __LITTLE_ENDIAN__
+ __r = __builtin_shufflevector(__r, __r,
15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);
#endif
+ return __r;
#endif
}
@@ -579,7 +599,7 @@ vsriq_n_u32(uint32x4_t __vins, uint32x4_
(int32x4_t)__builtin_neon_vsriq_n_v((int32x4_t)(__vins), \
(int32x4_t)(__vsh), (__bits), 34)
#else
-#define vsliq_n_s32(__vins, __vsh, __bits) ( \
+#define vsriq_n_s32(__vins, __vsh, __bits) ( \
{ \
int32x4_t __tvins = (__vins); \
int32x4_t __tvsh = (__vsh); \