This should help…. https://gerrit.fd.io/r/c/vpp/+/25243 <https://gerrit.fd.io/r/c/vpp/+/25243>
— Damjan > On 18 Feb 2020, at 09:38, Lijian Zhang <[email protected]> wrote: > > Hi Damjan, > There’s a compiling error with the debug image with the code segment below. > The error message is pasted as follow. > > >>static_always_inline u8x16 > u8x16_word_shift_left (u8x16 x, const int n) > { > return vextq_u8 (u8x16_splat (0), x, 16 - n); > } > > In function ‘vextq_u8’, > inlined from ‘u8x16_word_shift_left’ at > vpp/src/vppinfra/vector_neon.h:188:10: > /usr/lib/gcc/aarch64-linux-gnu/8/include/arm_neon.h:16646:3: error: lane > index must be a constant immediate > __AARCH64_LANE_CHECK (__a, __c); > ^~~~~~~~~~~~~~~~~~~~ > > The root-cause is the third parameter of vextq_u8 (uint8x16_t __a, > uint8x16_t __b, __const int __c) requires a compile-time immediate value > instead of a const variable, because the intrinsic eventually maps down to > the instruction: > EXT Vd.16B,Vn.16B,Vm.16B,#n > Where #n is the operand __c. Unfortunately there is no good way of expressing > that requirement in the C language, so “const int” is the best compiler team > can do in the current implementation, but the__AARCH64_LANE is there to > enforce that constraint. > > This failure is observed when building debug image, but not happening when > building release image. Because GCC can get the instant number for operand > __c at compile-time via the inlined functions. If the __always_inline > attribute is removed, GCC will not be able to figure out the instant number, > and will report errors. That’s why VPP release image is fine, while debug > image is reporting errors. > #include "arm_neon.h" > static inline __attribute__ ((__always_inline__)) uint8x16_t shift_left > (uint8x16_t x, const int n) > { > return vextq_u8 (x, x, n+1); > } > uint8x16_t shift_left2 (uint8x16_t x) { return shift_left (x, 1); } > const int a = 3; > uint8x16_t shift_left3 (uint8x16_t x) { return shift_left (x, a); } > > However, the above inline solution is not accepted by Clang. Clang will > report error with above code no matter the function is inlined or not. > > For debug image, I tried to add __always_inline attribute for function > u8x16_word_shift_left (), but it doesn’t work. It seems the callee and even > several upper layer callee of function u8x16_word_shift_left should be added > with attribute __always_inline. I didn’t manage to resolve the errors this > ways, as it seems there are too many places requires such code change. > > One solution in my mind is, making a wrapper for vextq_u8() with an array of > functions _vextq_u8[]. Each entry of the function array is calling vextq_u8 > with an instant number. Below is some pseudo code. A complete patch is > attached in the end of the email. > Could you please share your idea on this solution and suggest on this issue? > > +static u8x16 \ > + u8x16_word_shift_left (u8x16 x, const int n) \ > +{ return (u8x16) _vextq_u8 [c - n] (vdupq_n_u8 (0), x); } \ > > static u8x16 \ > _vextq_u8_0 (u8x16 a, u8x16 b) \ > { return (u8x16) vextq_u8 (a, b, 0); } > static u8x16 \ > _vextq_u8_1 (u8x16 a, u8x16 b) \ > { return (u8x16) vextq_u8 (a, b, 1); } > …. > static u8x16 \ > _vextq_u8_16 (u8x16 a, u8x16 b) \ > { return (u8x16) vextq_u8 (a, b, 16); } > > +static u8x16 (* _vextq_u8 [16]) (u8x16, u8x16) = { \ > + _vextq_u8_0, \ > + _vextq_u8_1, \ > + _vextq_u8_2, \ > + _vextq_u8_3, \ > + _vextq_u8_4, \ > + _vextq_u8_5, \ > + _vextq_u8_6, \ > + _vextq_u8_7, \ > + _vextq_u8_8, \ > + _vextq_u8_9, \ > + _vextq_u8_10, \ > + _vextq_u8_11, \ > + _vextq_u8_12, \ > + _vextq_u8_13, \ > + _vextq_u8_14, \ > + _vextq_u8_15, \ > +}; \ > > > > Complete patch: > diff --git a/src/vppinfra/vector_neon.h b/src/vppinfra/vector_neon.h > index 3855f55ad..de77d0864 100644 > --- a/src/vppinfra/vector_neon.h > +++ b/src/vppinfra/vector_neon.h > @@ -46,6 +46,81 @@ u8x16_compare_byte_mask (u8x16 v) > return (u32) (vgetq_lane_u64 (x64, 0) + (vgetq_lane_u64 (x64, 1) << 8)); > } > > +/* *INDENT-OFF* */ > +#define foreach_neon_vec128i \ > + _(i,8,16,s8,0) _(i,8,16,s8,1) _(i,8,16,s8,2) _(i,8,16,s8,3) \ > + _(i,8,16,s8,4) _(i,8,16,s8,5) _(i,8,16,s8,6) _(i,8,16,s8,7) \ > + _(i,8,16,s8,8) _(i,8,16,s8,9) _(i,8,16,s8,10) _(i,8,16,s8,11) \ > + _(i,8,16,s8,12) _(i,8,16,s8,13) _(i,8,16,s8,14) _(i,8,16,s8,15) \ > + _(i,16,8,s16,0) _(i,16,8,s16,1) _(i,16,8,s16,2) _(i,16,8,s16,3) \ > + _(i,16,8,s16,4) _(i,16,8,s16,5) _(i,16,8,s16,6) _(i,16,8,s16,7) \ > + _(i,32,4,s32,0) _(i,32,4,s32,1) _(i,32,4,s32,2) _(i,32,4,s32,3) \ > + _(i,64,2,s64,0) _(i,64,2,s64,1) > +#define foreach_neon_vec128u \ > + _(u,8,16,u8,0) _(u,8,16,u8,1) _(u,8,16,u8,2) _(u,8,16,u8,3) \ > + _(u,8,16,u8,4) _(u,8,16,u8,5) _(u,8,16,u8,6) _(u,8,16,u8,7) \ > + _(u,8,16,u8,8) _(u,8,16,u8,9) _(u,8,16,u8,10) _(u,8,16,u8,11) \ > + _(u,8,16,u8,12) _(u,8,16,u8,13) _(u,8,16,u8,14) _(u,8,16,u8,15) \ > + _(u,16,8,u16,0) _(u,16,8,u16,1) _(u,16,8,u16,2) _(u,16,8,u16,3) \ > + _(u,16,8,u16,4) _(u,16,8,u16,5) _(u,16,8,u16,6) _(u,16,8,u16,7) \ > + _(u,32,4,u32,0) _(u,32,4,u32,1) _(u,32,4,u32,2) _(u,32,4,u32,3) \ > + _(u,64,2,u64,0) _(u,64,2,u64,1) > + > +#define _(t, s, c, i, n) \ > +static t##s##x##c \ > +_vextq_##i##_##n (t##s##x##c a, t##s##x##c b) \ > +{ return (t##s##x##c) vextq_##i (a, b, n); } > + > +foreach_neon_vec128i foreach_neon_vec128u > +#undef _ > + > +#define _(t, i) \ > +static t##8x16 (* _vextq_##i##8 [16]) (t##8x16, t##8x16) = { \ > + _vextq_##i##8_0, \ > + _vextq_##i##8_1, \ > + _vextq_##i##8_2, \ > + _vextq_##i##8_3, \ > + _vextq_##i##8_4, \ > + _vextq_##i##8_5, \ > + _vextq_##i##8_6, \ > + _vextq_##i##8_7, \ > + _vextq_##i##8_8, \ > + _vextq_##i##8_9, \ > + _vextq_##i##8_10, \ > + _vextq_##i##8_11, \ > + _vextq_##i##8_12, \ > + _vextq_##i##8_13, \ > + _vextq_##i##8_14, \ > + _vextq_##i##8_15, \ > +}; \ > +static t##16x8 (* _vextq_##i##16 [8]) (t##16x8, t##16x8) = { \ > + _vextq_##i##16_0, \ > + _vextq_##i##16_1, \ > + _vextq_##i##16_2, \ > + _vextq_##i##16_3, \ > + _vextq_##i##16_4, \ > + _vextq_##i##16_5, \ > + _vextq_##i##16_6, \ > + _vextq_##i##16_7, \ > +}; \ > +static t##32x4 (* _vextq_##i##32 [4]) (t##32x4, t##32x4) = { \ > + _vextq_##i##32_0, \ > + _vextq_##i##32_1, \ > + _vextq_##i##32_2, \ > + _vextq_##i##32_3, \ > +}; \ > +static t##64x2 (* _vextq_##i##64 [2]) (t##64x2, t##64x2) = { \ > + (void *) _vextq_##i##64_0, \ > + (void *) _vextq_##i##64_1, \ > +}; > +_(i, s) > +_(u, u) > +#undef _ > +#undef foreach_neon_vec128i > +#undef foreach_neon_vec128u > +/* *INDENT-ON* */ > + > /* *INDENT-OFF* */ > #define foreach_neon_vec128i \ > _(i,8,16,s8) _(i,16,8,s16) _(i,32,4,s32) _(i,64,2,s64) > @@ -90,7 +165,17 @@ t##s##x##c##_is_greater (t##s##x##c a, t##s##x##c b) > \ > \ > static_always_inline t##s##x##c \ > t##s##x##c##_blend (t##s##x##c dst, t##s##x##c src, u##s##x##c mask) \ > -{ return (t##s##x##c) vbslq_##i (mask, src, dst); } > +{ return (t##s##x##c) vbslq_##i (mask, src, dst); } \ > +\ > +static_always_inline u##s##x##c \ > +t##s##x##c##_word_shift_left (t##s##x##c x, const int n) \ > +{ /*ASSERT ((c >= n) && (0 < n));*/ \ > + return (u##s##x##c) _vextq_##i [c - n] (t##s##x##c##_splat (0), x); } \ > +\ > +static_always_inline u##s##x##c \ > +t##s##x##c##_word_shift_right (t##s##x##c x, const int n) \ > +{ /*ASSERT ((0 <= n) && (c > n));*/ \ > + return (u##s##x##c) _vextq_##i [n] (x, t##s##x##c##_splat (0)); } > > foreach_neon_vec128i foreach_neon_vec128u > > Thanks. >
-=-=-=-=-=-=-=-=-=-=-=- Links: You receive all messages sent to this group. View/Reply Online (#15450): https://lists.fd.io/g/vpp-dev/message/15450 Mute This Topic: https://lists.fd.io/mt/71367647/21656 Group Owner: [email protected] Unsubscribe: https://lists.fd.io/g/vpp-dev/unsub [[email protected]] -=-=-=-=-=-=-=-=-=-=-=-
