Hi Damjan,
There's a compiling error with the debug image with the code segment below.
The error message is pasted as follow.
>>static_always_inline u8x16
u8x16_word_shift_left (u8x16 x, const int n)
{
return vextq_u8 (u8x16_splat (0), x, 16 - n);
}
In function 'vextq_u8',
inlined from 'u8x16_word_shift_left' at
vpp/src/vppinfra/vector_neon.h:188:10:
/usr/lib/gcc/aarch64-linux-gnu/8/include/arm_neon.h:16646:3: error: lane index
must be a constant immediate
__AARCH64_LANE_CHECK (__a, __c);
^~~~~~~~~~~~~~~~~~~~
The root-cause is the third parameter of vextq_u8 (uint8x16_t __a, uint8x16_t
__b, __const int __c) requires a compile-time immediate value instead of a
const variable, because the intrinsic eventually maps down to the instruction:
EXT Vd.16B,Vn.16B,Vm.16B,#n
Where #n is the operand __c. Unfortunately there is no good way of expressing
that requirement in the C language, so "const int" is the best compiler team
can do in the current implementation, but the __AARCH64_LANE is there to
enforce that constraint.
This failure is observed when building debug image, but not happening when
building release image. Because GCC can get the instant number for operand __c
at compile-time via the inlined functions. If the __always_inline attribute is
removed, GCC will not be able to figure out the instant number, and will report
errors. That's why VPP release image is fine, while debug image is reporting
errors.
#include "arm_neon.h"
static inline __attribute__ ((__always_inline__)) uint8x16_t shift_left
(uint8x16_t x, const int n)
{
return vextq_u8 (x, x, n+1);
}
uint8x16_t shift_left2 (uint8x16_t x) { return shift_left (x, 1); }
const int a = 3;
uint8x16_t shift_left3 (uint8x16_t x) { return shift_left (x, a); }
However, the above inline solution is not accepted by Clang. Clang will report
error with above code no matter the function is inlined or not.
For debug image, I tried to add __always_inline attribute for function
u8x16_word_shift_left (), but it doesn't work. It seems the callee and even
several upper layer callee of function u8x16_word_shift_left should be added
with attribute __always_inline. I didn't manage to resolve the errors this
ways, as it seems there are too many places requires such code change.
One solution in my mind is, making a wrapper for vextq_u8() with an array of
functions _vextq_u8[]. Each entry of the function array is calling vextq_u8
with an instant number. Below is some pseudo code. A complete patch is attached
in the end of the email.
Could you please share your idea on this solution and suggest on this issue?
+static u8x16 \
+ u8x16_word_shift_left (u8x16 x, const int n) \
+{ return (u8x16) _vextq_u8 [c - n] (vdupq_n_u8 (0), x); } \
static u8x16 \
_vextq_u8_0 (u8x16 a, u8x16 b) \
{ return (u8x16) vextq_u8 (a, b, 0); }
static u8x16 \
_vextq_u8_1 (u8x16 a, u8x16 b) \
{ return (u8x16) vextq_u8 (a, b, 1); }
....
static u8x16 \
_vextq_u8_16 (u8x16 a, u8x16 b) \
{ return (u8x16) vextq_u8 (a, b, 16); }
+static u8x16 (* _vextq_u8 [16]) (u8x16, u8x16) = { \
+ _vextq_u8_0, \
+ _vextq_u8_1, \
+ _vextq_u8_2, \
+ _vextq_u8_3, \
+ _vextq_u8_4, \
+ _vextq_u8_5, \
+ _vextq_u8_6, \
+ _vextq_u8_7, \
+ _vextq_u8_8, \
+ _vextq_u8_9, \
+ _vextq_u8_10, \
+ _vextq_u8_11, \
+ _vextq_u8_12, \
+ _vextq_u8_13, \
+ _vextq_u8_14, \
+ _vextq_u8_15, \
+}; \
Complete patch:
diff --git a/src/vppinfra/vector_neon.h b/src/vppinfra/vector_neon.h
index 3855f55ad..de77d0864 100644
--- a/src/vppinfra/vector_neon.h
+++ b/src/vppinfra/vector_neon.h
@@ -46,6 +46,81 @@ u8x16_compare_byte_mask (u8x16 v)
return (u32) (vgetq_lane_u64 (x64, 0) + (vgetq_lane_u64 (x64, 1) << 8));
}
+/* *INDENT-OFF* */
+#define foreach_neon_vec128i \
+ _(i,8,16,s8,0) _(i,8,16,s8,1) _(i,8,16,s8,2) _(i,8,16,s8,3) \
+ _(i,8,16,s8,4) _(i,8,16,s8,5) _(i,8,16,s8,6) _(i,8,16,s8,7) \
+ _(i,8,16,s8,8) _(i,8,16,s8,9) _(i,8,16,s8,10) _(i,8,16,s8,11) \
+ _(i,8,16,s8,12) _(i,8,16,s8,13) _(i,8,16,s8,14) _(i,8,16,s8,15) \
+ _(i,16,8,s16,0) _(i,16,8,s16,1) _(i,16,8,s16,2) _(i,16,8,s16,3) \
+ _(i,16,8,s16,4) _(i,16,8,s16,5) _(i,16,8,s16,6) _(i,16,8,s16,7) \
+ _(i,32,4,s32,0) _(i,32,4,s32,1) _(i,32,4,s32,2) _(i,32,4,s32,3) \
+ _(i,64,2,s64,0) _(i,64,2,s64,1)
+#define foreach_neon_vec128u \
+ _(u,8,16,u8,0) _(u,8,16,u8,1) _(u,8,16,u8,2) _(u,8,16,u8,3) \
+ _(u,8,16,u8,4) _(u,8,16,u8,5) _(u,8,16,u8,6) _(u,8,16,u8,7) \
+ _(u,8,16,u8,8) _(u,8,16,u8,9) _(u,8,16,u8,10) _(u,8,16,u8,11) \
+ _(u,8,16,u8,12) _(u,8,16,u8,13) _(u,8,16,u8,14) _(u,8,16,u8,15) \
+ _(u,16,8,u16,0) _(u,16,8,u16,1) _(u,16,8,u16,2) _(u,16,8,u16,3) \
+ _(u,16,8,u16,4) _(u,16,8,u16,5) _(u,16,8,u16,6) _(u,16,8,u16,7) \
+ _(u,32,4,u32,0) _(u,32,4,u32,1) _(u,32,4,u32,2) _(u,32,4,u32,3) \
+ _(u,64,2,u64,0) _(u,64,2,u64,1)
+
+#define _(t, s, c, i, n) \
+static t##s##x##c \
+_vextq_##i##_##n (t##s##x##c a, t##s##x##c b) \
+{ return (t##s##x##c) vextq_##i (a, b, n); }
+
+foreach_neon_vec128i foreach_neon_vec128u
+#undef _
+
+#define _(t, i) \
+static t##8x16 (* _vextq_##i##8 [16]) (t##8x16, t##8x16) = { \
+ _vextq_##i##8_0, \
+ _vextq_##i##8_1, \
+ _vextq_##i##8_2, \
+ _vextq_##i##8_3, \
+ _vextq_##i##8_4, \
+ _vextq_##i##8_5, \
+ _vextq_##i##8_6, \
+ _vextq_##i##8_7, \
+ _vextq_##i##8_8, \
+ _vextq_##i##8_9, \
+ _vextq_##i##8_10, \
+ _vextq_##i##8_11, \
+ _vextq_##i##8_12, \
+ _vextq_##i##8_13, \
+ _vextq_##i##8_14, \
+ _vextq_##i##8_15, \
+}; \
+static t##16x8 (* _vextq_##i##16 [8]) (t##16x8, t##16x8) = { \
+ _vextq_##i##16_0, \
+ _vextq_##i##16_1, \
+ _vextq_##i##16_2, \
+ _vextq_##i##16_3, \
+ _vextq_##i##16_4, \
+ _vextq_##i##16_5, \
+ _vextq_##i##16_6, \
+ _vextq_##i##16_7, \
+}; \
+static t##32x4 (* _vextq_##i##32 [4]) (t##32x4, t##32x4) = { \
+ _vextq_##i##32_0, \
+ _vextq_##i##32_1, \
+ _vextq_##i##32_2, \
+ _vextq_##i##32_3, \
+}; \
+static t##64x2 (* _vextq_##i##64 [2]) (t##64x2, t##64x2) = { \
+ (void *) _vextq_##i##64_0, \
+ (void *) _vextq_##i##64_1, \
+};
+_(i, s)
+_(u, u)
+#undef _
+#undef foreach_neon_vec128i
+#undef foreach_neon_vec128u
+/* *INDENT-ON* */
+
/* *INDENT-OFF* */
#define foreach_neon_vec128i \
_(i,8,16,s8) _(i,16,8,s16) _(i,32,4,s32) _(i,64,2,s64)
@@ -90,7 +165,17 @@ t##s##x##c##_is_greater (t##s##x##c a, t##s##x##c b)
\
\
static_always_inline t##s##x##c \
t##s##x##c##_blend (t##s##x##c dst, t##s##x##c src, u##s##x##c mask) \
-{ return (t##s##x##c) vbslq_##i (mask, src, dst); }
+{ return (t##s##x##c) vbslq_##i (mask, src, dst); } \
+\
+static_always_inline u##s##x##c \
+t##s##x##c##_word_shift_left (t##s##x##c x, const int n) \
+{ /*ASSERT ((c >= n) && (0 < n));*/ \
+ return (u##s##x##c) _vextq_##i [c - n] (t##s##x##c##_splat (0), x); } \
+\
+static_always_inline u##s##x##c \
+t##s##x##c##_word_shift_right (t##s##x##c x, const int n) \
+{ /*ASSERT ((0 <= n) && (c > n));*/ \
+ return (u##s##x##c) _vextq_##i [n] (x, t##s##x##c##_splat (0)); }
foreach_neon_vec128i foreach_neon_vec128u
Thanks.
-=-=-=-=-=-=-=-=-=-=-=-
Links: You receive all messages sent to this group.
View/Reply Online (#15442): https://lists.fd.io/g/vpp-dev/message/15442
Mute This Topic: https://lists.fd.io/mt/71367647/21656
Group Owner: [email protected]
Unsubscribe: https://lists.fd.io/g/vpp-dev/unsub [[email protected]]
-=-=-=-=-=-=-=-=-=-=-=-