Re: [PATCH 4/5] aarch64: rcpc3: add Neon ACLE wrapper functions to `arm_neon.h'

2023-12-07 Thread Prathamesh Kulkarni
On Thu, 9 Nov 2023 at 19:44, Victor Do Nascimento
 wrote:
>
> Create the necessary mappings from the ACLE-defined Neon intrinsics
> names[1] to the internal builtin function names.
>
> [1] https://arm-software.github.io/acle/neon_intrinsics/advsimd.html
Hi Victor,
It seems this patch broke kernel build after the recent patch to
upgrade -Wincompatible-pointer-types to an error:

00:00:56 
/home/tcwg-buildslave/workspace/tcwg_kernel_1/abe/builds/destdir/x86_64-pc-linux-gnu/lib/gcc/aarch64-linux-gnu/14.0.0/include/arm_neon.h:
In function ‘vldap1_lane_s64’:
00:00:56 
/home/tcwg-buildslave/workspace/tcwg_kernel_1/abe/builds/destdir/x86_64-pc-linux-gnu/lib/gcc/aarch64-linux-gnu/14.0.0/include/arm_neon.h:13474:48:
error: passing argument 1 of ‘__builtin_aarch64_vec_ldap1_lanev1di’
from incompatible pointer type [-Wincompatible-pointer-types]
00:00:56 13474 |   return __builtin_aarch64_vec_ldap1_lanev1di (__src,
__vec, __lane);
00:00:56   |^
00:00:56   ||
00:00:56   |const
int64_t * {aka const long long int *}
00:00:56 
/home/tcwg-buildslave/workspace/tcwg_kernel_1/abe/builds/destdir/x86_64-pc-linux-gnu/lib/gcc/aarch64-linux-gnu/14.0.0/include/arm_neon.h:13474:48:
note: expected ‘const long int *’ but argument is of type ‘const
int64_t *’ {aka ‘const long long int *’}

Looking cursorily at the code, should __src be casted to
(__builtin_aarch64_simd_di *) before passing it to
__builtin_aarch64_vec_ldap1_lanev1di ?
For more details, please see:
https://ci.linaro.org/job/tcwg_kernel--gnu-master-aarch64-next-defconfig-build/91/artifact/artifacts/notify/mail-body.txt/*view*/

Thanks,
Prathamesh


>
> gcc/ChangeLog:
>
> * gcc/config/aarch64/arm_neon.h (vldap1_lane_u64): New.
> (vldap1q_lane_u64): Likewise.
> (vldap1_lane_s64): Likewise.
> (vldap1q_lane_s64): Likewise.
> (vldap1_lane_f64): Likewise.
> (vldap1q_lane_f64): Likewise.
> (vldap1_lane_p64): Likewise.
> (vldap1q_lane_p64): Likewise.
> (vstl1_lane_u64): Likewise.
> (vstl1q_lane_u64): Likewise.
> (vstl1_lane_s64): Likewise.
> (vstl1q_lane_s64): Likewise.
> (vstl1_lane_f64): Likewise.
> (vstl1q_lane_f64): Likewise.
> (vstl1_lane_p64): Likewise.
> (vstl1q_lane_p64): Likewise.
> ---
>  gcc/config/aarch64/arm_neon.h | 129 ++
>  1 file changed, 129 insertions(+)
>
> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
> index 349f3167699..ef0d75e07ce 100644
> --- a/gcc/config/aarch64/arm_neon.h
> +++ b/gcc/config/aarch64/arm_neon.h
> @@ -13446,6 +13446,135 @@ vld1q_lane_u64 (const uint64_t *__src, uint64x2_t 
> __vec, const int __lane)
>return __aarch64_vset_lane_any (*__src, __vec, __lane);
>  }
>
> +#pragma GCC push_options
> +#pragma GCC target ("+nothing+rcpc3+simd")
> +
> +/* vldap1_lane.  */
> +
> +__extension__ extern __inline uint64x1_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane)
> +{
> +  return __builtin_aarch64_vec_ldap1_lanev1di_usus (
> + (__builtin_aarch64_simd_di *) __src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline uint64x2_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1q_lane_u64 (const uint64_t *__src, uint64x2_t __vec, const int __lane)
> +{
> +  return __builtin_aarch64_vec_ldap1_lanev2di_usus (
> + (__builtin_aarch64_simd_di *) __src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline int64x1_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1_lane_s64 (const int64_t *__src, int64x1_t __vec, const int __lane)
> +{
> +  return __builtin_aarch64_vec_ldap1_lanev1di (__src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline int64x2_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1q_lane_s64 (const int64_t *__src, int64x2_t __vec, const int __lane)
> +{
> +  return __builtin_aarch64_vec_ldap1_lanev2di (__src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline float64x1_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1_lane_f64 (const float64_t *__src, float64x1_t __vec, const int __lane)
> +{
> +  return __builtin_aarch64_vec_ldap1_lanev1df (__src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline float64x2_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1q_lane_f64 (const float64_t *__src, float64x2_t __vec, const int 
> __lane)
> +{
> +  return __builtin_aarch64_vec_ldap1_lanev2df (__src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline poly64x1_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1_lane_p64 (const poly64_t *__src, poly64x1_t __vec, 

Re: [PATCH 4/5] aarch64: rcpc3: add Neon ACLE wrapper functions to `arm_neon.h'

2023-11-24 Thread Richard Sandiford
Victor Do Nascimento  writes:
> Create the necessary mappings from the ACLE-defined Neon intrinsics
> names[1] to the internal builtin function names.
>
> [1] https://arm-software.github.io/acle/neon_intrinsics/advsimd.html
>
> gcc/ChangeLog:
>
>   * gcc/config/aarch64/arm_neon.h (vldap1_lane_u64): New.
>   (vldap1q_lane_u64): Likewise.
>   (vldap1_lane_s64): Likewise.
>   (vldap1q_lane_s64): Likewise.
>   (vldap1_lane_f64): Likewise.
>   (vldap1q_lane_f64): Likewise.
>   (vldap1_lane_p64): Likewise.
>   (vldap1q_lane_p64): Likewise.
>   (vstl1_lane_u64): Likewise.
>   (vstl1q_lane_u64): Likewise.
>   (vstl1_lane_s64): Likewise.
>   (vstl1q_lane_s64): Likewise.
>   (vstl1_lane_f64): Likewise.
>   (vstl1q_lane_f64): Likewise.
>   (vstl1_lane_p64): Likewise.
>   (vstl1q_lane_p64): Likewise.

OK, thanks.

Richard

> ---
>  gcc/config/aarch64/arm_neon.h | 129 ++
>  1 file changed, 129 insertions(+)
>
> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
> index 349f3167699..ef0d75e07ce 100644
> --- a/gcc/config/aarch64/arm_neon.h
> +++ b/gcc/config/aarch64/arm_neon.h
> @@ -13446,6 +13446,135 @@ vld1q_lane_u64 (const uint64_t *__src, uint64x2_t 
> __vec, const int __lane)
>return __aarch64_vset_lane_any (*__src, __vec, __lane);
>  }
>  
> +#pragma GCC push_options
> +#pragma GCC target ("+nothing+rcpc3+simd")
> +
> +/* vldap1_lane.  */
> +
> +__extension__ extern __inline uint64x1_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane)
> +{
> +  return __builtin_aarch64_vec_ldap1_lanev1di_usus (
> +   (__builtin_aarch64_simd_di *) __src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline uint64x2_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1q_lane_u64 (const uint64_t *__src, uint64x2_t __vec, const int __lane)
> +{
> +  return __builtin_aarch64_vec_ldap1_lanev2di_usus (
> +   (__builtin_aarch64_simd_di *) __src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline int64x1_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1_lane_s64 (const int64_t *__src, int64x1_t __vec, const int __lane)
> +{
> +  return __builtin_aarch64_vec_ldap1_lanev1di (__src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline int64x2_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1q_lane_s64 (const int64_t *__src, int64x2_t __vec, const int __lane)
> +{
> +  return __builtin_aarch64_vec_ldap1_lanev2di (__src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline float64x1_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1_lane_f64 (const float64_t *__src, float64x1_t __vec, const int __lane)
> +{
> +  return __builtin_aarch64_vec_ldap1_lanev1df (__src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline float64x2_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1q_lane_f64 (const float64_t *__src, float64x2_t __vec, const int 
> __lane)
> +{
> +  return __builtin_aarch64_vec_ldap1_lanev2df (__src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline poly64x1_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1_lane_p64 (const poly64_t *__src, poly64x1_t __vec, const int __lane)
> +{
> +  return __builtin_aarch64_vec_ldap1_lanev1di_psps (
> +   (__builtin_aarch64_simd_di *) __src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline poly64x2_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vldap1q_lane_p64 (const poly64_t *__src, poly64x2_t __vec, const int __lane)
> +{
> +  return __builtin_aarch64_vec_ldap1_lanev2di_psps (
> +   (__builtin_aarch64_simd_di *) __src, __vec, __lane);
> +}
> +
> +/* vstl1_lane.  */
> +
> +__extension__ extern __inline void
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vstl1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane)
> +{
> +  __builtin_aarch64_vec_stl1_lanev1di_sus ((__builtin_aarch64_simd_di *) 
> __src,
> +__vec, __lane);
> +}
> +
> +__extension__ extern __inline void
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vstl1q_lane_u64 (uint64_t *__src, uint64x2_t __vec, const int __lane)
> +{
> +  __builtin_aarch64_vec_stl1_lanev2di_sus ((__builtin_aarch64_simd_di *) 
> __src,
> +__vec, __lane);
> +}
> +
> +__extension__ extern __inline void
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vstl1_lane_s64 (int64_t *__src, int64x1_t __vec, const int __lane)
> +{
> +  __builtin_aarch64_vec_stl1_lanev1di (__src, __vec, __lane);
> +}
> +
> +__extension__ extern __inline void
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))

[PATCH 4/5] aarch64: rcpc3: add Neon ACLE wrapper functions to `arm_neon.h'

2023-11-09 Thread Victor Do Nascimento
Create the necessary mappings from the ACLE-defined Neon intrinsics
names[1] to the internal builtin function names.

[1] https://arm-software.github.io/acle/neon_intrinsics/advsimd.html

gcc/ChangeLog:

* gcc/config/aarch64/arm_neon.h (vldap1_lane_u64): New.
(vldap1q_lane_u64): Likewise.
(vldap1_lane_s64): Likewise.
(vldap1q_lane_s64): Likewise.
(vldap1_lane_f64): Likewise.
(vldap1q_lane_f64): Likewise.
(vldap1_lane_p64): Likewise.
(vldap1q_lane_p64): Likewise.
(vstl1_lane_u64): Likewise.
(vstl1q_lane_u64): Likewise.
(vstl1_lane_s64): Likewise.
(vstl1q_lane_s64): Likewise.
(vstl1_lane_f64): Likewise.
(vstl1q_lane_f64): Likewise.
(vstl1_lane_p64): Likewise.
(vstl1q_lane_p64): Likewise.
---
 gcc/config/aarch64/arm_neon.h | 129 ++
 1 file changed, 129 insertions(+)

diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 349f3167699..ef0d75e07ce 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -13446,6 +13446,135 @@ vld1q_lane_u64 (const uint64_t *__src, uint64x2_t 
__vec, const int __lane)
   return __aarch64_vset_lane_any (*__src, __vec, __lane);
 }
 
+#pragma GCC push_options
+#pragma GCC target ("+nothing+rcpc3+simd")
+
+/* vldap1_lane.  */
+
+__extension__ extern __inline uint64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vldap1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane)
+{
+  return __builtin_aarch64_vec_ldap1_lanev1di_usus (
+ (__builtin_aarch64_simd_di *) __src, __vec, __lane);
+}
+
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vldap1q_lane_u64 (const uint64_t *__src, uint64x2_t __vec, const int __lane)
+{
+  return __builtin_aarch64_vec_ldap1_lanev2di_usus (
+ (__builtin_aarch64_simd_di *) __src, __vec, __lane);
+}
+
+__extension__ extern __inline int64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vldap1_lane_s64 (const int64_t *__src, int64x1_t __vec, const int __lane)
+{
+  return __builtin_aarch64_vec_ldap1_lanev1di (__src, __vec, __lane);
+}
+
+__extension__ extern __inline int64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vldap1q_lane_s64 (const int64_t *__src, int64x2_t __vec, const int __lane)
+{
+  return __builtin_aarch64_vec_ldap1_lanev2di (__src, __vec, __lane);
+}
+
+__extension__ extern __inline float64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vldap1_lane_f64 (const float64_t *__src, float64x1_t __vec, const int __lane)
+{
+  return __builtin_aarch64_vec_ldap1_lanev1df (__src, __vec, __lane);
+}
+
+__extension__ extern __inline float64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vldap1q_lane_f64 (const float64_t *__src, float64x2_t __vec, const int __lane)
+{
+  return __builtin_aarch64_vec_ldap1_lanev2df (__src, __vec, __lane);
+}
+
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vldap1_lane_p64 (const poly64_t *__src, poly64x1_t __vec, const int __lane)
+{
+  return __builtin_aarch64_vec_ldap1_lanev1di_psps (
+ (__builtin_aarch64_simd_di *) __src, __vec, __lane);
+}
+
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vldap1q_lane_p64 (const poly64_t *__src, poly64x2_t __vec, const int __lane)
+{
+  return __builtin_aarch64_vec_ldap1_lanev2di_psps (
+ (__builtin_aarch64_simd_di *) __src, __vec, __lane);
+}
+
+/* vstl1_lane.  */
+
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vstl1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane)
+{
+  __builtin_aarch64_vec_stl1_lanev1di_sus ((__builtin_aarch64_simd_di *) __src,
+  __vec, __lane);
+}
+
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vstl1q_lane_u64 (uint64_t *__src, uint64x2_t __vec, const int __lane)
+{
+  __builtin_aarch64_vec_stl1_lanev2di_sus ((__builtin_aarch64_simd_di *) __src,
+  __vec, __lane);
+}
+
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vstl1_lane_s64 (int64_t *__src, int64x1_t __vec, const int __lane)
+{
+  __builtin_aarch64_vec_stl1_lanev1di (__src, __vec, __lane);
+}
+
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vstl1q_lane_s64 (int64_t *__src, int64x2_t __vec, const int __lane)
+{
+  __builtin_aarch64_vec_stl1_lanev2di (__src, __vec, __lane);
+}
+
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vstl1_lane_f64