> -----Original Message-----
> From: Christophe Lyon <christophe.l...@linaro.org>
> Sent: Thursday, November 16, 2023 3:26 PM
> To: gcc-patches@gcc.gnu.org; Richard Sandiford
> <richard.sandif...@arm.com>; Richard Earnshaw
> <richard.earns...@arm.com>; Kyrylo Tkachov <kyrylo.tkac...@arm.com>
> Cc: Christophe Lyon <christophe.l...@linaro.org>
> Subject: [PATCH 6/6] arm: [MVE intrinsics] rework vldq1 vst1q
> 
> Implement vld1q, vst1q using the new MVE builtins framework.

Ok. Nice to see more MVE intrinsics getting the good treatment.
Thanks,
Kyrill

> 
> 2023-11-16  Christophe Lyon  <christophe.l...@linaro.org>
> 
>       gcc/
>       * config/arm/arm-mve-builtins-base.cc (vld1_impl, vld1q)
>       (vst1_impl, vst1q): New.
>       * config/arm/arm-mve-builtins-base.def (vld1q, vst1q): New.
>       * config/arm/arm-mve-builtins-base.h (vld1q, vst1q): New.
>       * config/arm/arm_mve.h
>       (vld1q): Delete.
>       (vst1q): Delete.
>       (vld1q_s8): Delete.
>       (vld1q_s32): Delete.
>       (vld1q_s16): Delete.
>       (vld1q_u8): Delete.
>       (vld1q_u32): Delete.
>       (vld1q_u16): Delete.
>       (vld1q_f32): Delete.
>       (vld1q_f16): Delete.
>       (vst1q_f32): Delete.
>       (vst1q_f16): Delete.
>       (vst1q_s8): Delete.
>       (vst1q_s32): Delete.
>       (vst1q_s16): Delete.
>       (vst1q_u8): Delete.
>       (vst1q_u32): Delete.
>       (vst1q_u16): Delete.
>       (__arm_vld1q_s8): Delete.
>       (__arm_vld1q_s32): Delete.
>       (__arm_vld1q_s16): Delete.
>       (__arm_vld1q_u8): Delete.
>       (__arm_vld1q_u32): Delete.
>       (__arm_vld1q_u16): Delete.
>       (__arm_vst1q_s8): Delete.
>       (__arm_vst1q_s32): Delete.
>       (__arm_vst1q_s16): Delete.
>       (__arm_vst1q_u8): Delete.
>       (__arm_vst1q_u32): Delete.
>       (__arm_vst1q_u16): Delete.
>       (__arm_vld1q_f32): Delete.
>       (__arm_vld1q_f16): Delete.
>       (__arm_vst1q_f32): Delete.
>       (__arm_vst1q_f16): Delete.
>       (__arm_vld1q): Delete.
>       (__arm_vst1q): Delete.
>       * config/arm/mve.md (mve_vld1q_f<mode>): Rename into ...
>       (@mve_vld1q_f<mode>): ... this.
>       (mve_vld1q_<supf><mode>): Rename into ...
>       (@mve_vld1q_<supf><mode>) ... this.
>       (mve_vst1q_f<mode>): Rename into ...
>       (@mve_vst1q_f<mode>): ... this.
>       (mve_vst1q_<supf><mode>): Rename into ...
>       (@mve_vst1q_<supf><mode>) ... this.
> ---
>  gcc/config/arm/arm-mve-builtins-base.cc  |  58 +++++
>  gcc/config/arm/arm-mve-builtins-base.def |   4 +
>  gcc/config/arm/arm-mve-builtins-base.h   |   4 +-
>  gcc/config/arm/arm_mve.h                 | 282 -----------------------
>  gcc/config/arm/mve.md                    |   8 +-
>  5 files changed, 69 insertions(+), 287 deletions(-)
> 
> diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-
> mve-builtins-base.cc
> index 5478cac8aeb..cfe1b954a29 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.cc
> +++ b/gcc/config/arm/arm-mve-builtins-base.cc
> @@ -83,6 +83,62 @@ class vuninitializedq_impl : public
> quiet<function_base>
>    }
>  };
> 
> +class vld1_impl : public full_width_access
> +{
> +public:
> +  unsigned int
> +  call_properties (const function_instance &) const override
> +  {
> +    return CP_READ_MEMORY;
> +  }
> +
> +  rtx
> +  expand (function_expander &e) const override
> +  {
> +    insn_code icode;
> +    if (e.type_suffix (0).float_p)
> +      icode = code_for_mve_vld1q_f(e.vector_mode (0));
> +    else
> +      {
> +     if (e.type_suffix (0).unsigned_p)
> +       icode = code_for_mve_vld1q(VLD1Q_U,
> +                                  e.vector_mode (0));
> +     else
> +       icode = code_for_mve_vld1q(VLD1Q_S,
> +                                  e.vector_mode (0));
> +      }
> +    return e.use_contiguous_load_insn (icode);
> +  }
> +};
> +
> +class vst1_impl : public full_width_access
> +{
> +public:
> +  unsigned int
> +  call_properties (const function_instance &) const override
> +  {
> +    return CP_WRITE_MEMORY;
> +  }
> +
> +  rtx
> +  expand (function_expander &e) const override
> +  {
> +    insn_code icode;
> +    if (e.type_suffix (0).float_p)
> +      icode = code_for_mve_vst1q_f(e.vector_mode (0));
> +    else
> +      {
> +     if (e.type_suffix (0).unsigned_p)
> +       icode = code_for_mve_vst1q(VST1Q_U,
> +                                  e.vector_mode (0));
> +     else
> +       icode = code_for_mve_vst1q(VST1Q_S,
> +                                  e.vector_mode (0));
> +      }
> +    return e.use_contiguous_store_insn (icode);
> +  }
> +};
> +
>  } /* end anonymous namespace */
> 
>  namespace arm_mve {
> @@ -290,6 +346,7 @@ FUNCTION (vfmasq,
> unspec_mve_function_exact_insn, (-1, -1, -1, -1, -1, VFMASQ_N_
>  FUNCTION (vfmsq, unspec_mve_function_exact_insn, (-1, -1, VFMSQ_F, -1, -
> 1, -1, -1, -1, VFMSQ_M_F, -1, -1, -1))
>  FUNCTION_WITH_M_N_NO_F (vhaddq, VHADDQ)
>  FUNCTION_WITH_M_N_NO_F (vhsubq, VHSUBQ)
> +FUNCTION (vld1q, vld1_impl,)
>  FUNCTION_PRED_P_S (vmaxavq, VMAXAVQ)
>  FUNCTION_WITHOUT_N_NO_U_F (vmaxaq, VMAXAQ)
>  FUNCTION_ONLY_F (vmaxnmaq, VMAXNMAQ)
> @@ -405,6 +462,7 @@ FUNCTION_ONLY_N_NO_F (vshrntq, VSHRNTQ)
>  FUNCTION_ONLY_N_NO_F (vshrq, VSHRQ)
>  FUNCTION_ONLY_N_NO_F (vsliq, VSLIQ)
>  FUNCTION_ONLY_N_NO_F (vsriq, VSRIQ)
> +FUNCTION (vst1q, vst1_impl,)
>  FUNCTION_WITH_RTX_M_N (vsubq, MINUS, VSUBQ)
>  FUNCTION (vuninitializedq, vuninitializedq_impl,)
> 
> diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-
> mve-builtins-base.def
> index 01dfbdef8a3..16879246237 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.def
> +++ b/gcc/config/arm/arm-mve-builtins-base.def
> @@ -47,6 +47,7 @@ DEF_MVE_FUNCTION (vhaddq, binary_opt_n,
> all_integer, mx_or_none)
>  DEF_MVE_FUNCTION (vhcaddq_rot90, binary, all_signed, mx_or_none)
>  DEF_MVE_FUNCTION (vhcaddq_rot270, binary, all_signed, mx_or_none)
>  DEF_MVE_FUNCTION (vhsubq, binary_opt_n, all_integer, mx_or_none)
> +DEF_MVE_FUNCTION (vld1q, load, all_integer, none)
>  DEF_MVE_FUNCTION (vmaxaq, binary_maxamina, all_signed, m_or_none)
>  DEF_MVE_FUNCTION (vmaxavq, binary_maxavminav, all_signed, p_or_none)
>  DEF_MVE_FUNCTION (vmaxq, binary, all_integer, mx_or_none)
> @@ -150,6 +151,7 @@ DEF_MVE_FUNCTION (vshrntq, binary_rshift_narrow,
> integer_16_32, m_or_none)
>  DEF_MVE_FUNCTION (vshrq, binary_rshift, all_integer, mx_or_none)
>  DEF_MVE_FUNCTION (vsliq, ternary_lshift, all_integer, m_or_none)
>  DEF_MVE_FUNCTION (vsriq, ternary_rshift, all_integer, m_or_none)
> +DEF_MVE_FUNCTION (vst1q, store, all_integer, none)
>  DEF_MVE_FUNCTION (vsubq, binary_opt_n, all_integer, mx_or_none)
>  DEF_MVE_FUNCTION (vuninitializedq, inherent, all_integer_with_64, none)
>  #undef REQUIRES_FLOAT
> @@ -182,6 +184,7 @@ DEF_MVE_FUNCTION (veorq, binary, all_float,
> mx_or_none)
>  DEF_MVE_FUNCTION (vfmaq, ternary_opt_n, all_float, m_or_none)
>  DEF_MVE_FUNCTION (vfmasq, ternary_n, all_float, m_or_none)
>  DEF_MVE_FUNCTION (vfmsq, ternary, all_float, m_or_none)
> +DEF_MVE_FUNCTION (vld1q, load, all_float, none)
>  DEF_MVE_FUNCTION (vmaxnmaq, binary, all_float, m_or_none)
>  DEF_MVE_FUNCTION (vmaxnmavq, binary_maxvminv, all_float, p_or_none)
>  DEF_MVE_FUNCTION (vmaxnmq, binary, all_float, mx_or_none)
> @@ -203,6 +206,7 @@ DEF_MVE_FUNCTION (vrndnq, unary, all_float,
> mx_or_none)
>  DEF_MVE_FUNCTION (vrndpq, unary, all_float, mx_or_none)
>  DEF_MVE_FUNCTION (vrndq, unary, all_float, mx_or_none)
>  DEF_MVE_FUNCTION (vrndxq, unary, all_float, mx_or_none)
> +DEF_MVE_FUNCTION (vst1q, store, all_float, none)
>  DEF_MVE_FUNCTION (vsubq, binary_opt_n, all_float, mx_or_none)
>  DEF_MVE_FUNCTION (vuninitializedq, inherent, all_float, none)
>  #undef REQUIRES_FLOAT
> diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-
> mve-builtins-base.h
> index c574c32ac53..8c7e5fe5c3e 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.h
> +++ b/gcc/config/arm/arm-mve-builtins-base.h
> @@ -63,6 +63,7 @@ extern const function_base *const vhaddq;
>  extern const function_base *const vhcaddq_rot270;
>  extern const function_base *const vhcaddq_rot90;
>  extern const function_base *const vhsubq;
> +extern const function_base *const vld1q;
>  extern const function_base *const vmaxaq;
>  extern const function_base *const vmaxavq;
>  extern const function_base *const vmaxnmaq;
> @@ -103,8 +104,8 @@ extern const function_base *const vmovnbq;
>  extern const function_base *const vmovntq;
>  extern const function_base *const vmulhq;
>  extern const function_base *const vmullbq_int;
> -extern const function_base *const vmulltq_int;
>  extern const function_base *const vmullbq_poly;
> +extern const function_base *const vmulltq_int;
>  extern const function_base *const vmulltq_poly;
>  extern const function_base *const vmulq;
>  extern const function_base *const vmvnq;
> @@ -178,6 +179,7 @@ extern const function_base *const vshrntq;
>  extern const function_base *const vshrq;
>  extern const function_base *const vsliq;
>  extern const function_base *const vsriq;
> +extern const function_base *const vst1q;
>  extern const function_base *const vsubq;
>  extern const function_base *const vuninitializedq;
> 
> diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
> index b82d94e59bd..cc027f9cbb5 100644
> --- a/gcc/config/arm/arm_mve.h
> +++ b/gcc/config/arm/arm_mve.h
> @@ -56,7 +56,6 @@
>  #define vstrbq_scatter_offset_p(__base, __offset, __value, __p)
> __arm_vstrbq_scatter_offset_p(__base, __offset, __value, __p)
>  #define vstrwq_scatter_base_p(__addr, __offset, __value, __p)
> __arm_vstrwq_scatter_base_p(__addr, __offset, __value, __p)
>  #define vldrbq_gather_offset_z(__base, __offset, __p)
> __arm_vldrbq_gather_offset_z(__base, __offset, __p)
> -#define vld1q(__base) __arm_vld1q(__base)
>  #define vldrhq_gather_offset(__base, __offset)
> __arm_vldrhq_gather_offset(__base, __offset)
>  #define vldrhq_gather_offset_z(__base, __offset, __p)
> __arm_vldrhq_gather_offset_z(__base, __offset, __p)
>  #define vldrhq_gather_shifted_offset(__base, __offset)
> __arm_vldrhq_gather_shifted_offset(__base, __offset)
> @@ -69,7 +68,6 @@
>  #define vldrwq_gather_offset_z(__base, __offset, __p)
> __arm_vldrwq_gather_offset_z(__base, __offset, __p)
>  #define vldrwq_gather_shifted_offset(__base, __offset)
> __arm_vldrwq_gather_shifted_offset(__base, __offset)
>  #define vldrwq_gather_shifted_offset_z(__base, __offset, __p)
> __arm_vldrwq_gather_shifted_offset_z(__base, __offset, __p)
> -#define vst1q(__addr, __value) __arm_vst1q(__addr, __value)
>  #define vstrhq_scatter_offset(__base, __offset, __value)
> __arm_vstrhq_scatter_offset(__base, __offset, __value)
>  #define vstrhq_scatter_offset_p(__base, __offset, __value, __p)
> __arm_vstrhq_scatter_offset_p(__base, __offset, __value, __p)
>  #define vstrhq_scatter_shifted_offset(__base, __offset, __value)
> __arm_vstrhq_scatter_shifted_offset(__base, __offset, __value)
> @@ -346,12 +344,6 @@
>  #define vldrbq_z_u32(__base, __p) __arm_vldrbq_z_u32(__base, __p)
>  #define vldrwq_gather_base_z_u32(__addr,  __offset, __p)
> __arm_vldrwq_gather_base_z_u32(__addr,  __offset, __p)
>  #define vldrwq_gather_base_z_s32(__addr,  __offset, __p)
> __arm_vldrwq_gather_base_z_s32(__addr,  __offset, __p)
> -#define vld1q_s8(__base) __arm_vld1q_s8(__base)
> -#define vld1q_s32(__base) __arm_vld1q_s32(__base)
> -#define vld1q_s16(__base) __arm_vld1q_s16(__base)
> -#define vld1q_u8(__base) __arm_vld1q_u8(__base)
> -#define vld1q_u32(__base) __arm_vld1q_u32(__base)
> -#define vld1q_u16(__base) __arm_vld1q_u16(__base)
>  #define vldrhq_gather_offset_s32(__base, __offset)
> __arm_vldrhq_gather_offset_s32(__base, __offset)
>  #define vldrhq_gather_offset_s16(__base, __offset)
> __arm_vldrhq_gather_offset_s16(__base, __offset)
>  #define vldrhq_gather_offset_u32(__base, __offset)
> __arm_vldrhq_gather_offset_u32(__base, __offset)
> @@ -380,8 +372,6 @@
>  #define vldrwq_u32(__base) __arm_vldrwq_u32(__base)
>  #define vldrwq_z_s32(__base, __p) __arm_vldrwq_z_s32(__base, __p)
>  #define vldrwq_z_u32(__base, __p) __arm_vldrwq_z_u32(__base, __p)
> -#define vld1q_f32(__base) __arm_vld1q_f32(__base)
> -#define vld1q_f16(__base) __arm_vld1q_f16(__base)
>  #define vldrhq_f16(__base) __arm_vldrhq_f16(__base)
>  #define vldrhq_z_f16(__base, __p) __arm_vldrhq_z_f16(__base, __p)
>  #define vldrwq_f32(__base) __arm_vldrwq_f32(__base)
> @@ -416,14 +406,6 @@
>  #define vldrwq_gather_shifted_offset_z_f32(__base, __offset, __p)
> __arm_vldrwq_gather_shifted_offset_z_f32(__base, __offset, __p)
>  #define vldrwq_gather_shifted_offset_z_s32(__base, __offset, __p)
> __arm_vldrwq_gather_shifted_offset_z_s32(__base, __offset, __p)
>  #define vldrwq_gather_shifted_offset_z_u32(__base, __offset, __p)
> __arm_vldrwq_gather_shifted_offset_z_u32(__base, __offset, __p)
> -#define vst1q_f32(__addr, __value) __arm_vst1q_f32(__addr, __value)
> -#define vst1q_f16(__addr, __value) __arm_vst1q_f16(__addr, __value)
> -#define vst1q_s8(__addr, __value) __arm_vst1q_s8(__addr, __value)
> -#define vst1q_s32(__addr, __value) __arm_vst1q_s32(__addr, __value)
> -#define vst1q_s16(__addr, __value) __arm_vst1q_s16(__addr, __value)
> -#define vst1q_u8(__addr, __value) __arm_vst1q_u8(__addr, __value)
> -#define vst1q_u32(__addr, __value) __arm_vst1q_u32(__addr, __value)
> -#define vst1q_u16(__addr, __value) __arm_vst1q_u16(__addr, __value)
>  #define vstrhq_f16(__addr, __value) __arm_vstrhq_f16(__addr, __value)
>  #define vstrhq_scatter_offset_s32( __base, __offset, __value)
> __arm_vstrhq_scatter_offset_s32( __base, __offset, __value)
>  #define vstrhq_scatter_offset_s16( __base, __offset, __value)
> __arm_vstrhq_scatter_offset_s16( __base, __offset, __value)
> @@ -1537,48 +1519,6 @@ __arm_vldrwq_gather_base_z_u32 (uint32x4_t
> __addr, const int __offset, mve_pred1
>    return __builtin_mve_vldrwq_gather_base_z_uv4si (__addr, __offset, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vld1q_s8 (int8_t const * __base)
> -{
> -  return __builtin_mve_vld1q_sv16qi ((__builtin_neon_qi *) __base);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vld1q_s32 (int32_t const * __base)
> -{
> -  return __builtin_mve_vld1q_sv4si ((__builtin_neon_si *) __base);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vld1q_s16 (int16_t const * __base)
> -{
> -  return __builtin_mve_vld1q_sv8hi ((__builtin_neon_hi *) __base);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vld1q_u8 (uint8_t const * __base)
> -{
> -  return __builtin_mve_vld1q_uv16qi ((__builtin_neon_qi *) __base);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vld1q_u32 (uint32_t const * __base)
> -{
> -  return __builtin_mve_vld1q_uv4si ((__builtin_neon_si *) __base);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vld1q_u16 (uint16_t const * __base)
> -{
> -  return __builtin_mve_vld1q_uv8hi ((__builtin_neon_hi *) __base);
> -}
> -
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vldrhq_gather_offset_s32 (int16_t const * __base, uint32x4_t
> __offset)
> @@ -1917,48 +1857,6 @@ __arm_vldrwq_gather_shifted_offset_z_u32
> (uint32_t const * __base, uint32x4_t __
>    return __builtin_mve_vldrwq_gather_shifted_offset_z_uv4si
> ((__builtin_neon_si *) __base, __offset, __p);
>  }
> 
> -__extension__ extern __inline void
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vst1q_s8 (int8_t * __addr, int8x16_t __value)
> -{
> -  __builtin_mve_vst1q_sv16qi ((__builtin_neon_qi *) __addr, __value);
> -}
> -
> -__extension__ extern __inline void
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vst1q_s32 (int32_t * __addr, int32x4_t __value)
> -{
> -  __builtin_mve_vst1q_sv4si ((__builtin_neon_si *) __addr, __value);
> -}
> -
> -__extension__ extern __inline void
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vst1q_s16 (int16_t * __addr, int16x8_t __value)
> -{
> -  __builtin_mve_vst1q_sv8hi ((__builtin_neon_hi *) __addr, __value);
> -}
> -
> -__extension__ extern __inline void
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vst1q_u8 (uint8_t * __addr, uint8x16_t __value)
> -{
> -  __builtin_mve_vst1q_uv16qi ((__builtin_neon_qi *) __addr, __value);
> -}
> -
> -__extension__ extern __inline void
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vst1q_u32 (uint32_t * __addr, uint32x4_t __value)
> -{
> -  __builtin_mve_vst1q_uv4si ((__builtin_neon_si *) __addr, __value);
> -}
> -
> -__extension__ extern __inline void
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vst1q_u16 (uint16_t * __addr, uint16x8_t __value)
> -{
> -  __builtin_mve_vst1q_uv8hi ((__builtin_neon_hi *) __addr, __value);
> -}
> -
>  __extension__ extern __inline void
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vstrhq_scatter_offset_s32 (int16_t * __base, uint32x4_t __offset,
> int32x4_t __value)
> @@ -4421,20 +4319,6 @@ __arm_vornq_m_f16 (float16x8_t __inactive,
> float16x8_t __a, float16x8_t __b, mve
>    return __builtin_mve_vornq_m_fv8hf (__inactive, __a, __b, __p);
>  }
> 
> -__extension__ extern __inline float32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vld1q_f32 (float32_t const * __base)
> -{
> -  return __builtin_mve_vld1q_fv4sf((__builtin_neon_si *) __base);
> -}
> -
> -__extension__ extern __inline float16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vld1q_f16 (float16_t const * __base)
> -{
> -  return __builtin_mve_vld1q_fv8hf((__builtin_neon_hi *) __base);
> -}
> -
>  __extension__ extern __inline float32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vldrwq_f32 (float32_t const * __base)
> @@ -4547,20 +4431,6 @@ __arm_vstrwq_f32 (float32_t * __addr,
> float32x4_t __value)
>    __builtin_mve_vstrwq_fv4sf ((__builtin_neon_si *) __addr, __value);
>  }
> 
> -__extension__ extern __inline void
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vst1q_f32 (float32_t * __addr, float32x4_t __value)
> -{
> -  __builtin_mve_vst1q_fv4sf ((__builtin_neon_si *) __addr, __value);
> -}
> -
> -__extension__ extern __inline void
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vst1q_f16 (float16_t * __addr, float16x8_t __value)
> -{
> -  __builtin_mve_vst1q_fv8hf ((__builtin_neon_hi *) __addr, __value);
> -}
> -
>  __extension__ extern __inline void
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vstrhq_f16 (float16_t * __addr, float16x8_t __value)
> @@ -5651,48 +5521,6 @@ __arm_vldrbq_gather_offset_z (uint8_t const *
> __base, uint16x8_t __offset, mve_p
>   return __arm_vldrbq_gather_offset_z_u16 (__base, __offset, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vld1q (int8_t const * __base)
> -{
> - return __arm_vld1q_s8 (__base);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vld1q (int32_t const * __base)
> -{
> - return __arm_vld1q_s32 (__base);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vld1q (int16_t const * __base)
> -{
> - return __arm_vld1q_s16 (__base);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vld1q (uint8_t const * __base)
> -{
> - return __arm_vld1q_u8 (__base);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vld1q (uint32_t const * __base)
> -{
> - return __arm_vld1q_u32 (__base);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vld1q (uint16_t const * __base)
> -{
> - return __arm_vld1q_u16 (__base);
> -}
> -
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vldrhq_gather_offset (int16_t const * __base, uint32x4_t __offset)
> @@ -5917,48 +5745,6 @@ __arm_vldrwq_gather_shifted_offset_z (uint32_t
> const * __base, uint32x4_t __offs
>   return __arm_vldrwq_gather_shifted_offset_z_u32 (__base, __offset, __p);
>  }
> 
> -__extension__ extern __inline void
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vst1q (int8_t * __addr, int8x16_t __value)
> -{
> - __arm_vst1q_s8 (__addr, __value);
> -}
> -
> -__extension__ extern __inline void
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vst1q (int32_t * __addr, int32x4_t __value)
> -{
> - __arm_vst1q_s32 (__addr, __value);
> -}
> -
> -__extension__ extern __inline void
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vst1q (int16_t * __addr, int16x8_t __value)
> -{
> - __arm_vst1q_s16 (__addr, __value);
> -}
> -
> -__extension__ extern __inline void
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vst1q (uint8_t * __addr, uint8x16_t __value)
> -{
> - __arm_vst1q_u8 (__addr, __value);
> -}
> -
> -__extension__ extern __inline void
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vst1q (uint32_t * __addr, uint32x4_t __value)
> -{
> - __arm_vst1q_u32 (__addr, __value);
> -}
> -
> -__extension__ extern __inline void
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vst1q (uint16_t * __addr, uint16x8_t __value)
> -{
> - __arm_vst1q_u16 (__addr, __value);
> -}
> -
>  __extension__ extern __inline void
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vstrhq_scatter_offset (int16_t * __base, uint32x4_t __offset,
> int32x4_t __value)
> @@ -7809,20 +7595,6 @@ __arm_vornq_m (float16x8_t __inactive,
> float16x8_t __a, float16x8_t __b, mve_pre
>   return __arm_vornq_m_f16 (__inactive, __a, __b, __p);
>  }
> 
> -__extension__ extern __inline float32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vld1q (float32_t const * __base)
> -{
> - return __arm_vld1q_f32 (__base);
> -}
> -
> -__extension__ extern __inline float16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vld1q (float16_t const * __base)
> -{
> - return __arm_vld1q_f16 (__base);
> -}
> -
>  __extension__ extern __inline float16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vldrhq_gather_offset (float16_t const * __base, uint16x8_t __offset)
> @@ -7893,20 +7665,6 @@ __arm_vstrwq (float32_t * __addr, float32x4_t
> __value)
>   __arm_vstrwq_f32 (__addr, __value);
>  }
> 
> -__extension__ extern __inline void
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vst1q (float32_t * __addr, float32x4_t __value)
> -{
> - __arm_vst1q_f32 (__addr, __value);
> -}
> -
> -__extension__ extern __inline void
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vst1q (float16_t * __addr, float16x8_t __value)
> -{
> - __arm_vst1q_f16 (__addr, __value);
> -}
> -
>  __extension__ extern __inline void
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vstrhq (float16_t * __addr, float16x8_t __value)
> @@ -8670,17 +8428,6 @@ extern void *__ARM_undef;
>    int
> (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_
> mve_type_float16x8_t]: __arm_vornq_m_f16 (__ARM_mve_coerce(__p0,
> float16x8_t), __ARM_mve_coerce(__p1, float16x8_t),
> __ARM_mve_coerce(__p2, float16x8_t), p3), \
>    int
> (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_
> mve_type_float32x4_t]: __arm_vornq_m_f32 (__ARM_mve_coerce(__p0,
> float32x4_t), __ARM_mve_coerce(__p1, float32x4_t),
> __ARM_mve_coerce(__p2, float32x4_t), p3));})
> 
> -#define __arm_vld1q(p0) (\
> -  _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
> -  int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_s8
> (__ARM_mve_coerce_s8_ptr(p0, int8_t *)), \
> -  int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_s16
> (__ARM_mve_coerce_s16_ptr(p0, int16_t *)), \
> -  int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_s32
> (__ARM_mve_coerce_s32_ptr(p0, int32_t *)), \
> -  int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_u8
> (__ARM_mve_coerce_u8_ptr(p0, uint8_t *)), \
> -  int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_u16
> (__ARM_mve_coerce_u16_ptr(p0, uint16_t *)), \
> -  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_u32
> (__ARM_mve_coerce_u32_ptr(p0, uint32_t *)), \
> -  int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld1q_f16
> (__ARM_mve_coerce_f16_ptr(p0, float16_t *)), \
> -  int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld1q_f32
> (__ARM_mve_coerce_f32_ptr(p0, float32_t *))))
> -
>  #define __arm_vld1q_z(p0,p1) ( \
>    _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
>    int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8
> (__ARM_mve_coerce_s8_ptr(p0, int8_t *), p1), \
> @@ -8792,17 +8539,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_float16x8x2_t]:
> __arm_vst2q_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *),
> __ARM_mve_coerce(__p1, float16x8x2_t)), \
>    int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4x2_t]:
> __arm_vst2q_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *),
> __ARM_mve_coerce(__p1, float32x4x2_t)));})
> 
> -#define __arm_vst1q(p0,p1) ({ __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
> -  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]:
> __arm_vst1q_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *),
> __ARM_mve_coerce(__p1, int8x16_t)), \
> -  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int16x8_t]:
> __arm_vst1q_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> -  int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
> __arm_vst1q_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *),
> __ARM_mve_coerce(__p1, int32x4_t)), \
> -  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]:
> __arm_vst1q_u8 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *),
> __ARM_mve_coerce(__p1, uint8x16_t)), \
> -  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]:
> __arm_vst1q_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *),
> __ARM_mve_coerce(__p1, uint16x8_t)), \
> -  int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vst1q_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *),
> __ARM_mve_coerce(__p1, uint32x4_t)), \
> -  int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_float16x8_t]:
> __arm_vst1q_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *),
> __ARM_mve_coerce(__p1, float16x8_t)), \
> -  int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]:
> __arm_vst1q_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *),
> __ARM_mve_coerce(__p1, float32x4_t)));})
> -
>  #define __arm_vstrhq(p0,p1) ({ __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
>    int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int16x8_t]:
> __arm_vstrhq_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> @@ -9149,15 +8885,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_p_s32
> (p0, p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
>    int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_p_u32
> (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t), p3));})
> 
> -#define __arm_vld1q(p0) (\
> -  _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
> -  int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_s8
> (__ARM_mve_coerce_s8_ptr(p0, int8_t *)), \
> -  int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_s16
> (__ARM_mve_coerce_s16_ptr(p0, int16_t *)), \
> -  int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_s32
> (__ARM_mve_coerce_s32_ptr(p0, int32_t *)), \
> -  int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_u8
> (__ARM_mve_coerce_u8_ptr(p0, uint8_t *)), \
> -  int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_u16
> (__ARM_mve_coerce_u16_ptr(p0, uint16_t *)), \
> -  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_u32
> (__ARM_mve_coerce_u32_ptr(p0, uint32_t *))))
> -
>  #define __arm_vldrhq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
>    int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t]:
> __arm_vldrhq_gather_offset_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t
> *), __ARM_mve_coerce(__p1, uint16x8_t)), \
> @@ -9206,15 +8933,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_int32_t_ptr]:
> __arm_vldrwq_gather_shifted_offset_z_s32
> (__ARM_mve_coerce_s32_ptr(__p0, int32_t *), p1, p2), \
>    int (*)[__ARM_mve_type_uint32_t_ptr]:
> __arm_vldrwq_gather_shifted_offset_z_u32
> (__ARM_mve_coerce_u32_ptr(__p0, uint32_t *), p1, p2));})
> 
> -#define __arm_vst1q(p0,p1) ({ __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
> -  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]:
> __arm_vst1q_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *),
> __ARM_mve_coerce(__p1, int8x16_t)), \
> -  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int16x8_t]:
> __arm_vst1q_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> -  int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
> __arm_vst1q_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *),
> __ARM_mve_coerce(__p1, int32x4_t)), \
> -  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]:
> __arm_vst1q_u8 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *),
> __ARM_mve_coerce(__p1, uint8x16_t)), \
> -  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]:
> __arm_vst1q_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *),
> __ARM_mve_coerce(__p1, uint16x8_t)), \
> -  int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vst1q_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *),
> __ARM_mve_coerce(__p1, uint32x4_t)));})
> -
>  #define __arm_vst1q_p(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
>    int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]:
> __arm_vst1q_p_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *),
> __ARM_mve_coerce(__p1, int8x16_t), p2), \
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index 366cec0812a..b0d3443da9c 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -3690,7 +3690,7 @@ (define_insn "mve_vldrwq_z_<supf>v4si"
>  }
>    [(set_attr "length" "8")])
> 
> -(define_expand "mve_vld1q_f<mode>"
> +(define_expand "@mve_vld1q_f<mode>"
>    [(match_operand:MVE_0 0 "s_register_operand")
>     (unspec:MVE_0 [(match_operand:<MVE_CNVT> 1
> "mve_memory_operand")] VLD1Q_F)
>    ]
> @@ -3700,7 +3700,7 @@ (define_expand "mve_vld1q_f<mode>"
>    DONE;
>  })
> 
> -(define_expand "mve_vld1q_<supf><mode>"
> +(define_expand "@mve_vld1q_<supf><mode>"
>    [(match_operand:MVE_2 0 "s_register_operand")
>     (unspec:MVE_2 [(match_operand:MVE_2 1 "mve_memory_operand")]
> VLD1Q)
>    ]
> @@ -4408,7 +4408,7 @@ (define_insn "mve_vstrwq_<supf>v4si"
>  }
>    [(set_attr "length" "4")])
> 
> -(define_expand "mve_vst1q_f<mode>"
> +(define_expand "@mve_vst1q_f<mode>"
>    [(match_operand:<MVE_CNVT> 0 "mve_memory_operand")
>     (unspec:<MVE_CNVT> [(match_operand:MVE_0 1 "s_register_operand")]
> VST1Q_F)
>    ]
> @@ -4418,7 +4418,7 @@ (define_expand "mve_vst1q_f<mode>"
>    DONE;
>  })
> 
> -(define_expand "mve_vst1q_<supf><mode>"
> +(define_expand "@mve_vst1q_<supf><mode>"
>    [(match_operand:MVE_2 0 "mve_memory_operand")
>     (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand")] VST1Q)
>    ]
> --
> 2.34.1


Reply via email to