What version of CLANG are you using?

> -----Original Message-----
> From: Roger Melton (rmelton) <rmel...@cisco.com>
> Sent: Wednesday, December 4, 2024 11:24 AM
> To: Ruifeng Wang <ruifeng.w...@arm.com>; dev@dpdk.org
> Cc: Wathsala Wathawana Vithanage <wathsala.vithan...@arm.com>; nd
> <n...@arm.com>
> Subject: Re: lib/eal/arm/include/rte_vect.h fails to compile with clang14 for
> 32bit ARM
> 
> Considering this problem further, I don't see a way to avoid the CLANG
> compiler error with a function implementation.  We would need a macro
> implementation similar to CLANGS arm_neon.h.  In addition, it may be
> necessary to provide separate implementations for CLANG and non-CLANG
> compilers since the builtins between the toolchains are different.  One way to
> address this would be keep the existing function implementation, and add a
> new macro implementation for CLANG.
> 
> For example, something like:
> 
> 
> 
>       #if !defined(RTE_CC_CLANG)
>       #if (defined(RTE_ARCH_ARM) && defined(RTE_ARCH_32)) || \
>       (defined(RTE_ARCH_ARM64) && RTE_CC_IS_GNU && (GCC_VERSION
> < 70000))
>       /* NEON intrinsic vcopyq_laneq_u32() is not supported in ARMv7-
> A(AArch32)
>        * On AArch64, this intrinsic is supported since GCC version 7.
>        */
>       static inline uint32x4_t
>       vcopyq_laneq_u32(uint32x4_t a, const int lane_a,
>                uint32x4_t b, const int lane_b)
>       {
>           return vsetq_lane_u32(vgetq_lane_u32(b, lane_b), a, lane_a);
>       }
>       #endif
>       #else
>       #if defined(RTE_ARCH_ARM) && defined(RTE_ARCH_32)
>       /* NEON intrinsic vcopyq_laneq_u32() is not supported in ARMv7-
> A(AArch32)
>        * On AArch64, this intrinsic is supported
>        */
>       #ifdef LITTLE_ENDIAN
>       #define vcopyq_laneq_u32(__arg1, __arg2, __arg3, __arg4)
> __extension__ ({ \
>         uint32x4_t __ret; \
>         uint32x4_t __lcl_arg1 = __arg1; \
>         uint32x4_t __lcl_arg3 = __arg3; \
>         __ret = vsetq_lane_u32(vgetq_lane_u32(__lcl_arg3, __arg4),
> __lcl_arg1, __arg2); \
>         __ret; \
>       })
>       #else
>       #define __noswap_vsetq_lane_u32(__arg1, __arg2, __arg3)
> __extension__ ({ \
>         uint32x4_t __ret; \
>         uint32_t __lcl_arg1 = __arg1; \
>         uint32x4_t __lcl_arg2 = __arg2; \
>         __ret = (uint32x4_t) __builtin_neon_vsetq_lane_i32(__lcl_arg1,
> (int32x4_t)__lcl_arg2, __arg3); \
>         __ret; \
>       })
>       #define __noswap_vgetq_lane_u32(__arg1, __arg2) __extension__ ({
> \
>         uint32_t __ret; \
>         uint32x4_t __lcl_arg1 = __arg1; \
>         __ret = (uint32_t)
> __builtin_neon_vgetq_lane_i32((int32x4_t)__lcl_arg1, __arg2); \
>         __ret; \
>       })
>       #define vcopyq_laneq_u32(__arg1, __arg2, __arg3, __arg4)
> __extension__ ({ \
>         uint32x4_t __ret; \
>         uint32x4_t __lcl_arg1 = __arg1; \
>         uint32x4_t __lcl_arg3 = __arg3; \
>         uint32x4_t __rev1; \
>         uint32x4_t __rev3; \
>         __rev1 = __builtin_shufflevector(__lcl_arg1, __lcl_arg1, 3, 2, 1, 0); 
> \
>         __rev3 = __builtin_shufflevector(__lcl_arg3, __lcl_arg3, 3, 2, 1, 0); 
> \
>         __ret =
> __noswap_vsetq_lane_u32(__noswap_vgetq_lane_u32(__rev3, __arg4),
> __rev1, __arg2); \
>         __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \
>         __ret; \
>       })
>       #endif
>       #endif
>       #endif
> 
> 
> 
> NOTE1:  I saw no reason the CLANG arm_neon.h AARCH64 macros would not
> work for AARCH32, so the macros in this sample implementation are copies
> CLANG originals modified for (my) readability.  I'm not an attorney, but if 
> used,
> it may be necessary to include the banner from the CLANG arm_neon.h.
> 
> NOTE2: While I can build the CLANG ARM implementation, I lack the hardware
> to test it.
> 
> 
> Regards,
> Roger
> 
> On 12/3/24 7:37 PM, Roger Melton (rmelton) wrote:
> 
> 
>       After looking at this a bit closer today, I realize that my assertion 
> that
> CLANG14 does support vcopyq_laneq_u32() for 32bit ARM was incorrect.  It
> does not.  The reason that disabling the implementation in rte_vect.h works
> for our clang builds is that we do not build the l3fwd app nor the ixgbe PMD
> for our application, and they are the only libraries that reference that 
> function.
> 
>       The clang compile errors appear to be related to how clang handles
> compile time constants, but I'm am again unsure how to resolve them in a way
> that would work for both GNU and clang.
> 
>       Any suggestions?
> 
> 
>       Regards,
>       Roger
> 
> 
>       On 12/2/24 8:26 PM, Ruifeng Wang wrote:
> 
> 
>               +Arm folks.
> 
> 
> 
>               From: Roger Melton (rmelton) <rmel...@cisco.com>
> <mailto:rmel...@cisco.com>
>               Date: Tuesday, December 3, 2024 at 3:39 AM
>               To: dev@dpdk.org <mailto:dev@dpdk.org>  <dev@dpdk.org>
> <mailto:dev@dpdk.org> , Ruifeng Wang <ruifeng.w...@arm.com>
> <mailto:ruifeng.w...@arm.com>
>               Subject: lib/eal/arm/include/rte_vect.h fails to compile with
> clang14 for 32bit ARM
> 
>               Hey folks,
> 
>               We are building DPDK with clang14 for a 32bit armv8-a based
> CPU and ran into a compile error with the following from
> lib/eal/arm/include/rte_vect.h:
> 
> 
> 
> 
> 
>                       #if (defined(RTE_ARCH_ARM) &&
> defined(RTE_ARCH_32)) || \
>                       (defined(RTE_ARCH_ARM64) && RTE_CC_IS_GNU
> <https://elixir.bootlin.com/dpdk/v24.11/C/ident/RTE_CC_IS_GNU>  &&
> (GCC_VERSION
> <https://elixir.bootlin.com/dpdk/v24.11/C/ident/GCC_VERSION>  < 70000))
>                       /* NEON intrinsic vcopyq_laneq_u32() is not
> supported in ARMv7-A(AArch32)
>                        * On AArch64, this intrinsic is supported since GCC
> version 7.
>                        */
>                       static inline uint32x4_t
>                       vcopyq_laneq_u32
> <https://elixir.bootlin.com/dpdk/v24.11/C/ident/vcopyq_laneq_u32>
> (uint32x4_t a, const int lane_a,
>                                 uint32x4_t b, const int lane_b)
>                       {
>                         return vsetq_lane_u32(vgetq_lane_u32(b, lane_b),
> a, lane_a);
>                       }
>                       #endif
> 
> 
>               clang14 compile fails as follows:
> 
> 
> 
>                       In file included from ../../../../../../cisco-dpdk-
> upstream-arm-clang-fixes.git/lib/eal/common/eal_common_options.c:36:
>                       ../../../../../../cisco-dpdk-upstream-arm-clang-
> fixes.git/lib/eal/arm/include/rte_vect.h:80:24: error: argument to
> '__builtin_neon_vgetq_lane_i32' must be a constant integer
>                       return vsetq_lane_u32(vgetq_lane_u32(b, lane_b), a,
> lane_a);
>                       ^ ~~~~~~
>                       /auto/binos-tools/llvm14/llvm-14.0-
> p24/lib/clang/14.0.5/include/arm_neon.h:7697:22: note: expanded from
> macro 'vgetq_lane_u32'
>                       __ret = (uint32_t)
> __builtin_neon_vgetq_lane_i32((int32x4_t)__s0, __p1); \
>                       ^ ~~~~
>                       /auto/binos-tools/llvm14/llvm-14.0-
> p24/lib/clang/14.0.5/include/arm_neon.h:24148:19: note: expanded from
> macro 'vsetq_lane_u32'
>                       uint32_t __s0 = __p0; \
>                       ^~~~
>                       In file included from ../../../../../../cisco-dpdk-
> upstream-arm-clang-fixes.git/lib/eal/common/eal_common_options.c:36:
>                       ../../../../../../cisco-dpdk-upstream-arm-clang-
> fixes.git/lib/eal/arm/include/rte_vect.h:80:9: error: argument to
> '__builtin_neon_vsetq_lane_i32' must be a constant integer
>                       return vsetq_lane_u32(vgetq_lane_u32(b, lane_b), a,
> lane_a);
>                       ^ ~~~~~~
>                       /auto/binos-tools/llvm14/llvm-14.0-
> p24/lib/clang/14.0.5/include/arm_neon.h:24150:24: note: expanded from
> macro 'vsetq_lane_u32'
>                       __ret = (uint32x4_t)
> __builtin_neon_vsetq_lane_i32(__s0, (int32x4_t)__s1, __p2); \
>                       ^ ~~~~
>                       2 errors generated.
> 
> 
> 
>               clang14 does appear to support the vcopyq_laneq_u32()
> intrinsic, s0 we want to skip the conditional implementation.
> 
>               Two approaches I have tested to resolve the error are:
> 
>               1) skip if building with clang:
> 
> 
>                       #if !defined(__clang__) &&
> ((defined(RTE_ARCH_ARM) && defined(RTE_ARCH_32)) || \
>                       72 (defined(RTE_ARCH_ARM64) && RTE_CC_IS_GNU
> && (GCC_VERSION < 70000)))
> 
> 
> 
> 
>               2) skip if not building for ARMv7:
> 
> 
> 
> 
>                       #if (defined(RTE_ARCH_ARMv7) &&
> defined(RTE_ARCH_32)) || \
>                       (defined(RTE_ARCH_ARM64) && RTE_CC_IS_GNU &&
> (GCC_VERSION < 70000))
> 
> 
> 
>               Both address our immediate problem, but may not be a
> appropriate for all cases.
> 
>               Can anyone suggest the proper way to address this?  I'll be
> submitting an patch once I have a solution that is acceptable to the
> community.
> 
>               Regards,
>               Roger
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 

Reply via email to