This should help….

https://gerrit.fd.io/r/c/vpp/+/25243 <https://gerrit.fd.io/r/c/vpp/+/25243>

— 
Damjan

> On 18 Feb 2020, at 09:38, Lijian Zhang <[email protected]> wrote:
> 
> Hi Damjan,
> There’s a compiling error with the debug image with the code segment below.
> The error message is pasted as follow. 
>
> >>static_always_inline u8x16
>    u8x16_word_shift_left (u8x16 x, const int n)
>    {
>      return vextq_u8 (u8x16_splat (0), x, 16 - n);
>    }
>
> In function ‘vextq_u8’,
>     inlined from ‘u8x16_word_shift_left’ at
> vpp/src/vppinfra/vector_neon.h:188:10:
> /usr/lib/gcc/aarch64-linux-gnu/8/include/arm_neon.h:16646:3: error: lane 
> index must be a constant immediate
>    __AARCH64_LANE_CHECK (__a, __c);
>    ^~~~~~~~~~~~~~~~~~~~
>
> The root-cause is the third parameter of  vextq_u8 (uint8x16_t __a, 
> uint8x16_t __b, __const int __c) requires a compile-time immediate value 
> instead of a const variable, because the intrinsic eventually maps down to 
> the instruction:
> EXT Vd.16B,Vn.16B,Vm.16B,#n
> Where #n is the operand __c. Unfortunately there is no good way of expressing 
> that requirement in the C language, so “const int” is the best compiler team 
> can do in the current implementation, but the__AARCH64_LANE is there to 
> enforce that constraint.
>
> This failure is observed when building debug image, but not happening when 
> building release image. Because GCC can get the instant number for operand 
> __c at compile-time via the inlined functions. If the __always_inline 
> attribute is removed, GCC will not be able to figure out the instant number, 
> and will report errors. That’s why VPP release image is fine, while debug 
> image is reporting errors.
> #include "arm_neon.h"
> static inline __attribute__ ((__always_inline__)) uint8x16_t shift_left 
> (uint8x16_t x, const int n)
> { 
>   return vextq_u8 (x, x, n+1);
> }
> uint8x16_t shift_left2 (uint8x16_t x) { return shift_left (x, 1); }
> const int a = 3;
> uint8x16_t shift_left3 (uint8x16_t x) { return shift_left (x, a); }
>
> However, the above inline solution is not accepted by Clang. Clang will 
> report error with above code no matter the function is inlined or not.
>
> For debug image, I tried to add __always_inline attribute for function 
> u8x16_word_shift_left (), but it doesn’t work. It seems the callee and even 
> several upper layer callee of function u8x16_word_shift_left should be added 
> with attribute __always_inline. I didn’t manage to resolve the errors this 
> ways, as it seems there are too many places requires such code change.
>
> One solution in my mind is, making a wrapper for vextq_u8() with an array of 
> functions _vextq_u8[]. Each entry of the function array is calling vextq_u8 
> with an instant number. Below is some pseudo code. A complete patch is 
> attached in the end of the email.
> Could you please share your idea on this solution and suggest on this issue?
>
> +static u8x16                        \
> + u8x16_word_shift_left (u8x16 x, const int n)       \
> +{  return (u8x16) _vextq_u8 [c - n] (vdupq_n_u8 (0), x); }    \
>
> static u8x16                   \
> _vextq_u8_0 (u8x16 a, u8x16 b)       \
> { return (u8x16) vextq_u8 (a, b, 0); }
> static u8x16                   \
> _vextq_u8_1 (u8x16 a, u8x16 b)       \
> { return (u8x16) vextq_u8 (a, b, 1); }
> ….
> static u8x16                   \
> _vextq_u8_16 (u8x16 a, u8x16 b)       \
> { return (u8x16) vextq_u8 (a, b, 16); }
>
> +static u8x16 (* _vextq_u8 [16]) (u8x16, u8x16) = { \
> +   _vextq_u8_0, \
> +   _vextq_u8_1, \
> +   _vextq_u8_2, \
> +   _vextq_u8_3, \
> +   _vextq_u8_4, \
> +   _vextq_u8_5, \
> +   _vextq_u8_6, \
> +   _vextq_u8_7, \
> +   _vextq_u8_8, \
> +   _vextq_u8_9, \
> +   _vextq_u8_10, \
> +   _vextq_u8_11, \
> +   _vextq_u8_12, \
> +   _vextq_u8_13, \
> +   _vextq_u8_14, \
> +   _vextq_u8_15, \
> +}; \
>
>
>
> Complete patch:
> diff --git a/src/vppinfra/vector_neon.h b/src/vppinfra/vector_neon.h
> index 3855f55ad..de77d0864 100644
> --- a/src/vppinfra/vector_neon.h
> +++ b/src/vppinfra/vector_neon.h
> @@ -46,6 +46,81 @@ u8x16_compare_byte_mask (u8x16 v)
>     return (u32) (vgetq_lane_u64 (x64, 0) + (vgetq_lane_u64 (x64, 1) << 8));
>   }
>
> +/* *INDENT-OFF* */
> +#define foreach_neon_vec128i \
> +  _(i,8,16,s8,0) _(i,8,16,s8,1) _(i,8,16,s8,2) _(i,8,16,s8,3) \
> +  _(i,8,16,s8,4) _(i,8,16,s8,5) _(i,8,16,s8,6) _(i,8,16,s8,7) \
> +  _(i,8,16,s8,8) _(i,8,16,s8,9) _(i,8,16,s8,10) _(i,8,16,s8,11) \
> +  _(i,8,16,s8,12) _(i,8,16,s8,13) _(i,8,16,s8,14) _(i,8,16,s8,15) \
> +  _(i,16,8,s16,0) _(i,16,8,s16,1) _(i,16,8,s16,2) _(i,16,8,s16,3) \
> +  _(i,16,8,s16,4) _(i,16,8,s16,5) _(i,16,8,s16,6) _(i,16,8,s16,7) \
> +  _(i,32,4,s32,0) _(i,32,4,s32,1) _(i,32,4,s32,2) _(i,32,4,s32,3) \
> +  _(i,64,2,s64,0) _(i,64,2,s64,1)
> +#define foreach_neon_vec128u \
> +  _(u,8,16,u8,0) _(u,8,16,u8,1) _(u,8,16,u8,2) _(u,8,16,u8,3) \
> +  _(u,8,16,u8,4) _(u,8,16,u8,5) _(u,8,16,u8,6) _(u,8,16,u8,7) \
> +  _(u,8,16,u8,8) _(u,8,16,u8,9) _(u,8,16,u8,10) _(u,8,16,u8,11) \
> +  _(u,8,16,u8,12) _(u,8,16,u8,13) _(u,8,16,u8,14) _(u,8,16,u8,15) \
> +  _(u,16,8,u16,0) _(u,16,8,u16,1) _(u,16,8,u16,2) _(u,16,8,u16,3) \
> +  _(u,16,8,u16,4) _(u,16,8,u16,5) _(u,16,8,u16,6) _(u,16,8,u16,7) \
> +  _(u,32,4,u32,0) _(u,32,4,u32,1) _(u,32,4,u32,2) _(u,32,4,u32,3) \
> +  _(u,64,2,u64,0) _(u,64,2,u64,1)
> +
> +#define _(t, s, c, i, n) \
> +static t##s##x##c                  \
> +_vextq_##i##_##n (t##s##x##c a, t##s##x##c b)      \
> +{ return (t##s##x##c) vextq_##i (a, b, n); }
> +
> +foreach_neon_vec128i foreach_neon_vec128u
> +#undef _
> +
> +#define _(t, i) \
> +static t##8x16 (* _vextq_##i##8 [16]) (t##8x16, t##8x16) = { \
> +   _vextq_##i##8_0, \
> +   _vextq_##i##8_1, \
> +   _vextq_##i##8_2, \
> +   _vextq_##i##8_3, \
> +   _vextq_##i##8_4, \
> +   _vextq_##i##8_5, \
> +   _vextq_##i##8_6, \
> +   _vextq_##i##8_7, \
> +   _vextq_##i##8_8, \
> +   _vextq_##i##8_9, \
> +   _vextq_##i##8_10, \
> +   _vextq_##i##8_11, \
> +   _vextq_##i##8_12, \
> +   _vextq_##i##8_13, \
> +   _vextq_##i##8_14, \
> +   _vextq_##i##8_15, \
> +}; \
> +static t##16x8 (* _vextq_##i##16 [8]) (t##16x8, t##16x8) = { \
> +   _vextq_##i##16_0, \
> +   _vextq_##i##16_1, \
> +   _vextq_##i##16_2, \
> +   _vextq_##i##16_3, \
> +   _vextq_##i##16_4, \
> +   _vextq_##i##16_5, \
> +   _vextq_##i##16_6, \
> +   _vextq_##i##16_7, \
> +}; \
> +static t##32x4 (* _vextq_##i##32 [4]) (t##32x4, t##32x4) = { \
> +   _vextq_##i##32_0, \
> +   _vextq_##i##32_1, \
> +   _vextq_##i##32_2, \
> +   _vextq_##i##32_3, \
> +}; \
> +static t##64x2 (* _vextq_##i##64 [2]) (t##64x2, t##64x2) = { \
> +  (void *) _vextq_##i##64_0, \
> +  (void *) _vextq_##i##64_1, \
> +};
> +_(i, s)
> +_(u, u)
> +#undef _
> +#undef foreach_neon_vec128i
> +#undef foreach_neon_vec128u
> +/* *INDENT-ON* */
> +
>   /* *INDENT-OFF* */
>   #define foreach_neon_vec128i \
>     _(i,8,16,s8) _(i,16,8,s16) _(i,32,4,s32)  _(i,64,2,s64)
> @@ -90,7 +165,17 @@ t##s##x##c##_is_greater (t##s##x##c a, t##s##x##c b)      
>       \
>   \
>   static_always_inline t##s##x##c                        \
>   t##s##x##c##_blend (t##s##x##c dst, t##s##x##c src, u##s##x##c mask)   \
> -{ return (t##s##x##c) vbslq_##i (mask, src, dst); }
> +{ return (t##s##x##c) vbslq_##i (mask, src, dst); }            \
> +\
> +static_always_inline u##s##x##c                        \
> +t##s##x##c##_word_shift_left (t##s##x##c x, const int n)       \
> +{ /*ASSERT ((c >= n) && (0 < n));*/                    \
> +  return (u##s##x##c) _vextq_##i [c - n] (t##s##x##c##_splat (0), x); }    \
> +\
> +static_always_inline u##s##x##c                        \
> +t##s##x##c##_word_shift_right (t##s##x##c x, const int n)      \
> +{ /*ASSERT ((0 <= n) && (c > n));*/                    \
> +  return (u##s##x##c) _vextq_##i [n] (x, t##s##x##c##_splat (0)); }
>
>   foreach_neon_vec128i foreach_neon_vec128u
>
> Thanks.
> 

-=-=-=-=-=-=-=-=-=-=-=-
Links: You receive all messages sent to this group.

View/Reply Online (#15450): https://lists.fd.io/g/vpp-dev/message/15450
Mute This Topic: https://lists.fd.io/mt/71367647/21656
Group Owner: [email protected]
Unsubscribe: https://lists.fd.io/g/vpp-dev/unsub  [[email protected]]
-=-=-=-=-=-=-=-=-=-=-=-

Reply via email to