Re: [PATCH 4/5] aarch64: rcpc3: add Neon ACLE wrapper functions to `arm_neon.h'
On Thu, 9 Nov 2023 at 19:44, Victor Do Nascimento wrote: > > Create the necessary mappings from the ACLE-defined Neon intrinsics > names[1] to the internal builtin function names. > > [1] https://arm-software.github.io/acle/neon_intrinsics/advsimd.html Hi Victor, It seems this patch broke kernel build after the recent patch to upgrade -Wincompatible-pointer-types to an error: 00:00:56 /home/tcwg-buildslave/workspace/tcwg_kernel_1/abe/builds/destdir/x86_64-pc-linux-gnu/lib/gcc/aarch64-linux-gnu/14.0.0/include/arm_neon.h: In function ‘vldap1_lane_s64’: 00:00:56 /home/tcwg-buildslave/workspace/tcwg_kernel_1/abe/builds/destdir/x86_64-pc-linux-gnu/lib/gcc/aarch64-linux-gnu/14.0.0/include/arm_neon.h:13474:48: error: passing argument 1 of ‘__builtin_aarch64_vec_ldap1_lanev1di’ from incompatible pointer type [-Wincompatible-pointer-types] 00:00:56 13474 | return __builtin_aarch64_vec_ldap1_lanev1di (__src, __vec, __lane); 00:00:56 |^ 00:00:56 || 00:00:56 |const int64_t * {aka const long long int *} 00:00:56 /home/tcwg-buildslave/workspace/tcwg_kernel_1/abe/builds/destdir/x86_64-pc-linux-gnu/lib/gcc/aarch64-linux-gnu/14.0.0/include/arm_neon.h:13474:48: note: expected ‘const long int *’ but argument is of type ‘const int64_t *’ {aka ‘const long long int *’} Looking cursorily at the code, should __src be casted to (__builtin_aarch64_simd_di *) before passing it to __builtin_aarch64_vec_ldap1_lanev1di ? For more details, please see: https://ci.linaro.org/job/tcwg_kernel--gnu-master-aarch64-next-defconfig-build/91/artifact/artifacts/notify/mail-body.txt/*view*/ Thanks, Prathamesh > > gcc/ChangeLog: > > * gcc/config/aarch64/arm_neon.h (vldap1_lane_u64): New. > (vldap1q_lane_u64): Likewise. > (vldap1_lane_s64): Likewise. > (vldap1q_lane_s64): Likewise. > (vldap1_lane_f64): Likewise. > (vldap1q_lane_f64): Likewise. > (vldap1_lane_p64): Likewise. > (vldap1q_lane_p64): Likewise. > (vstl1_lane_u64): Likewise. > (vstl1q_lane_u64): Likewise. > (vstl1_lane_s64): Likewise. > (vstl1q_lane_s64): Likewise. > (vstl1_lane_f64): Likewise. > (vstl1q_lane_f64): Likewise. > (vstl1_lane_p64): Likewise. > (vstl1q_lane_p64): Likewise. > --- > gcc/config/aarch64/arm_neon.h | 129 ++ > 1 file changed, 129 insertions(+) > > diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h > index 349f3167699..ef0d75e07ce 100644 > --- a/gcc/config/aarch64/arm_neon.h > +++ b/gcc/config/aarch64/arm_neon.h > @@ -13446,6 +13446,135 @@ vld1q_lane_u64 (const uint64_t *__src, uint64x2_t > __vec, const int __lane) >return __aarch64_vset_lane_any (*__src, __vec, __lane); > } > > +#pragma GCC push_options > +#pragma GCC target ("+nothing+rcpc3+simd") > + > +/* vldap1_lane. */ > + > +__extension__ extern __inline uint64x1_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +vldap1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane) > +{ > + return __builtin_aarch64_vec_ldap1_lanev1di_usus ( > + (__builtin_aarch64_simd_di *) __src, __vec, __lane); > +} > + > +__extension__ extern __inline uint64x2_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +vldap1q_lane_u64 (const uint64_t *__src, uint64x2_t __vec, const int __lane) > +{ > + return __builtin_aarch64_vec_ldap1_lanev2di_usus ( > + (__builtin_aarch64_simd_di *) __src, __vec, __lane); > +} > + > +__extension__ extern __inline int64x1_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +vldap1_lane_s64 (const int64_t *__src, int64x1_t __vec, const int __lane) > +{ > + return __builtin_aarch64_vec_ldap1_lanev1di (__src, __vec, __lane); > +} > + > +__extension__ extern __inline int64x2_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +vldap1q_lane_s64 (const int64_t *__src, int64x2_t __vec, const int __lane) > +{ > + return __builtin_aarch64_vec_ldap1_lanev2di (__src, __vec, __lane); > +} > + > +__extension__ extern __inline float64x1_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +vldap1_lane_f64 (const float64_t *__src, float64x1_t __vec, const int __lane) > +{ > + return __builtin_aarch64_vec_ldap1_lanev1df (__src, __vec, __lane); > +} > + > +__extension__ extern __inline float64x2_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +vldap1q_lane_f64 (const float64_t *__src, float64x2_t __vec, const int > __lane) > +{ > + return __builtin_aarch64_vec_ldap1_lanev2df (__src, __vec, __lane); > +} > + > +__extension__ extern __inline poly64x1_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +vldap1_lane_p64 (const poly64_t *__src, poly64x1_t __vec,
Re: [PATCH 4/5] aarch64: rcpc3: add Neon ACLE wrapper functions to `arm_neon.h'
Victor Do Nascimento writes: > Create the necessary mappings from the ACLE-defined Neon intrinsics > names[1] to the internal builtin function names. > > [1] https://arm-software.github.io/acle/neon_intrinsics/advsimd.html > > gcc/ChangeLog: > > * gcc/config/aarch64/arm_neon.h (vldap1_lane_u64): New. > (vldap1q_lane_u64): Likewise. > (vldap1_lane_s64): Likewise. > (vldap1q_lane_s64): Likewise. > (vldap1_lane_f64): Likewise. > (vldap1q_lane_f64): Likewise. > (vldap1_lane_p64): Likewise. > (vldap1q_lane_p64): Likewise. > (vstl1_lane_u64): Likewise. > (vstl1q_lane_u64): Likewise. > (vstl1_lane_s64): Likewise. > (vstl1q_lane_s64): Likewise. > (vstl1_lane_f64): Likewise. > (vstl1q_lane_f64): Likewise. > (vstl1_lane_p64): Likewise. > (vstl1q_lane_p64): Likewise. OK, thanks. Richard > --- > gcc/config/aarch64/arm_neon.h | 129 ++ > 1 file changed, 129 insertions(+) > > diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h > index 349f3167699..ef0d75e07ce 100644 > --- a/gcc/config/aarch64/arm_neon.h > +++ b/gcc/config/aarch64/arm_neon.h > @@ -13446,6 +13446,135 @@ vld1q_lane_u64 (const uint64_t *__src, uint64x2_t > __vec, const int __lane) >return __aarch64_vset_lane_any (*__src, __vec, __lane); > } > > +#pragma GCC push_options > +#pragma GCC target ("+nothing+rcpc3+simd") > + > +/* vldap1_lane. */ > + > +__extension__ extern __inline uint64x1_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +vldap1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane) > +{ > + return __builtin_aarch64_vec_ldap1_lanev1di_usus ( > + (__builtin_aarch64_simd_di *) __src, __vec, __lane); > +} > + > +__extension__ extern __inline uint64x2_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +vldap1q_lane_u64 (const uint64_t *__src, uint64x2_t __vec, const int __lane) > +{ > + return __builtin_aarch64_vec_ldap1_lanev2di_usus ( > + (__builtin_aarch64_simd_di *) __src, __vec, __lane); > +} > + > +__extension__ extern __inline int64x1_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +vldap1_lane_s64 (const int64_t *__src, int64x1_t __vec, const int __lane) > +{ > + return __builtin_aarch64_vec_ldap1_lanev1di (__src, __vec, __lane); > +} > + > +__extension__ extern __inline int64x2_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +vldap1q_lane_s64 (const int64_t *__src, int64x2_t __vec, const int __lane) > +{ > + return __builtin_aarch64_vec_ldap1_lanev2di (__src, __vec, __lane); > +} > + > +__extension__ extern __inline float64x1_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +vldap1_lane_f64 (const float64_t *__src, float64x1_t __vec, const int __lane) > +{ > + return __builtin_aarch64_vec_ldap1_lanev1df (__src, __vec, __lane); > +} > + > +__extension__ extern __inline float64x2_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +vldap1q_lane_f64 (const float64_t *__src, float64x2_t __vec, const int > __lane) > +{ > + return __builtin_aarch64_vec_ldap1_lanev2df (__src, __vec, __lane); > +} > + > +__extension__ extern __inline poly64x1_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +vldap1_lane_p64 (const poly64_t *__src, poly64x1_t __vec, const int __lane) > +{ > + return __builtin_aarch64_vec_ldap1_lanev1di_psps ( > + (__builtin_aarch64_simd_di *) __src, __vec, __lane); > +} > + > +__extension__ extern __inline poly64x2_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +vldap1q_lane_p64 (const poly64_t *__src, poly64x2_t __vec, const int __lane) > +{ > + return __builtin_aarch64_vec_ldap1_lanev2di_psps ( > + (__builtin_aarch64_simd_di *) __src, __vec, __lane); > +} > + > +/* vstl1_lane. */ > + > +__extension__ extern __inline void > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +vstl1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane) > +{ > + __builtin_aarch64_vec_stl1_lanev1di_sus ((__builtin_aarch64_simd_di *) > __src, > +__vec, __lane); > +} > + > +__extension__ extern __inline void > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +vstl1q_lane_u64 (uint64_t *__src, uint64x2_t __vec, const int __lane) > +{ > + __builtin_aarch64_vec_stl1_lanev2di_sus ((__builtin_aarch64_simd_di *) > __src, > +__vec, __lane); > +} > + > +__extension__ extern __inline void > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +vstl1_lane_s64 (int64_t *__src, int64x1_t __vec, const int __lane) > +{ > + __builtin_aarch64_vec_stl1_lanev1di (__src, __vec, __lane); > +} > + > +__extension__ extern __inline void > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
[PATCH 4/5] aarch64: rcpc3: add Neon ACLE wrapper functions to `arm_neon.h'
Create the necessary mappings from the ACLE-defined Neon intrinsics names[1] to the internal builtin function names. [1] https://arm-software.github.io/acle/neon_intrinsics/advsimd.html gcc/ChangeLog: * gcc/config/aarch64/arm_neon.h (vldap1_lane_u64): New. (vldap1q_lane_u64): Likewise. (vldap1_lane_s64): Likewise. (vldap1q_lane_s64): Likewise. (vldap1_lane_f64): Likewise. (vldap1q_lane_f64): Likewise. (vldap1_lane_p64): Likewise. (vldap1q_lane_p64): Likewise. (vstl1_lane_u64): Likewise. (vstl1q_lane_u64): Likewise. (vstl1_lane_s64): Likewise. (vstl1q_lane_s64): Likewise. (vstl1_lane_f64): Likewise. (vstl1q_lane_f64): Likewise. (vstl1_lane_p64): Likewise. (vstl1q_lane_p64): Likewise. --- gcc/config/aarch64/arm_neon.h | 129 ++ 1 file changed, 129 insertions(+) diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 349f3167699..ef0d75e07ce 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -13446,6 +13446,135 @@ vld1q_lane_u64 (const uint64_t *__src, uint64x2_t __vec, const int __lane) return __aarch64_vset_lane_any (*__src, __vec, __lane); } +#pragma GCC push_options +#pragma GCC target ("+nothing+rcpc3+simd") + +/* vldap1_lane. */ + +__extension__ extern __inline uint64x1_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vldap1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane) +{ + return __builtin_aarch64_vec_ldap1_lanev1di_usus ( + (__builtin_aarch64_simd_di *) __src, __vec, __lane); +} + +__extension__ extern __inline uint64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vldap1q_lane_u64 (const uint64_t *__src, uint64x2_t __vec, const int __lane) +{ + return __builtin_aarch64_vec_ldap1_lanev2di_usus ( + (__builtin_aarch64_simd_di *) __src, __vec, __lane); +} + +__extension__ extern __inline int64x1_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vldap1_lane_s64 (const int64_t *__src, int64x1_t __vec, const int __lane) +{ + return __builtin_aarch64_vec_ldap1_lanev1di (__src, __vec, __lane); +} + +__extension__ extern __inline int64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vldap1q_lane_s64 (const int64_t *__src, int64x2_t __vec, const int __lane) +{ + return __builtin_aarch64_vec_ldap1_lanev2di (__src, __vec, __lane); +} + +__extension__ extern __inline float64x1_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vldap1_lane_f64 (const float64_t *__src, float64x1_t __vec, const int __lane) +{ + return __builtin_aarch64_vec_ldap1_lanev1df (__src, __vec, __lane); +} + +__extension__ extern __inline float64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vldap1q_lane_f64 (const float64_t *__src, float64x2_t __vec, const int __lane) +{ + return __builtin_aarch64_vec_ldap1_lanev2df (__src, __vec, __lane); +} + +__extension__ extern __inline poly64x1_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vldap1_lane_p64 (const poly64_t *__src, poly64x1_t __vec, const int __lane) +{ + return __builtin_aarch64_vec_ldap1_lanev1di_psps ( + (__builtin_aarch64_simd_di *) __src, __vec, __lane); +} + +__extension__ extern __inline poly64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vldap1q_lane_p64 (const poly64_t *__src, poly64x2_t __vec, const int __lane) +{ + return __builtin_aarch64_vec_ldap1_lanev2di_psps ( + (__builtin_aarch64_simd_di *) __src, __vec, __lane); +} + +/* vstl1_lane. */ + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vstl1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane) +{ + __builtin_aarch64_vec_stl1_lanev1di_sus ((__builtin_aarch64_simd_di *) __src, + __vec, __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vstl1q_lane_u64 (uint64_t *__src, uint64x2_t __vec, const int __lane) +{ + __builtin_aarch64_vec_stl1_lanev2di_sus ((__builtin_aarch64_simd_di *) __src, + __vec, __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vstl1_lane_s64 (int64_t *__src, int64x1_t __vec, const int __lane) +{ + __builtin_aarch64_vec_stl1_lanev1di (__src, __vec, __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vstl1q_lane_s64 (int64_t *__src, int64x2_t __vec, const int __lane) +{ + __builtin_aarch64_vec_stl1_lanev2di (__src, __vec, __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vstl1_lane_f64