Hello, This patch supports following MVE ACLE intrinsics with writeback.
vldrdq_gather_base_wb_s64, vldrdq_gather_base_wb_u64, vldrdq_gather_base_wb_z_s64, vldrdq_gather_base_wb_z_u64, vldrwq_gather_base_wb_f32, vldrwq_gather_base_wb_s32, vldrwq_gather_base_wb_u32, vldrwq_gather_base_wb_z_f32, vldrwq_gather_base_wb_z_s32, vldrwq_gather_base_wb_z_u32, vstrdq_scatter_base_wb_p_s64, vstrdq_scatter_base_wb_p_u64, vstrdq_scatter_base_wb_s64, vstrdq_scatter_base_wb_u64, vstrwq_scatter_base_wb_p_s32, vstrwq_scatter_base_wb_p_f32, vstrwq_scatter_base_wb_p_u32, vstrwq_scatter_base_wb_s32, vstrwq_scatter_base_wb_u32, vstrwq_scatter_base_wb_f32. Please refer to M-profile Vector Extension (MVE) intrinsics [1] for more details. [1] https://developer.arm.com/architectures/instruction-sets/simd-isas/helium/mve-intrinsics Regression tested on arm-none-eabi and found no regressions. Ok for trunk? Thanks, Srinath. gcc/ChangeLog: 2019-11-07 Andre Vieira <andre.simoesdiasvie...@arm.com> Mihail Ionescu <mihail.ione...@arm.com> Srinath Parvathaneni <srinath.parvathan...@arm.com> * config/arm/arm-builtins.c (LDRGBWBS_QUALIFIERS): Define builtin qualifier. (LDRGBWBU_QUALIFIERS): Likewise. (LDRGBWBS_Z_QUALIFIERS): Likewise. (LDRGBWBU_Z_QUALIFIERS): Likewise. (STRSBWBS_QUALIFIERS): Likewise. (STRSBWBU_QUALIFIERS): Likewise. (STRSBWBS_P_QUALIFIERS): Likewise. (STRSBWBU_P_QUALIFIERS): Likewise. * config/arm/arm_mve.h (vldrdq_gather_base_wb_s64): Define macro. (vldrdq_gather_base_wb_u64): Likewise. (vldrdq_gather_base_wb_z_s64): Likewise. (vldrdq_gather_base_wb_z_u64): Likewise. (vldrwq_gather_base_wb_f32): Likewise. (vldrwq_gather_base_wb_s32): Likewise. (vldrwq_gather_base_wb_u32): Likewise. (vldrwq_gather_base_wb_z_f32): Likewise. (vldrwq_gather_base_wb_z_s32): Likewise. (vldrwq_gather_base_wb_z_u32): Likewise. (vstrdq_scatter_base_wb_p_s64): Likewise. (vstrdq_scatter_base_wb_p_u64): Likewise. (vstrdq_scatter_base_wb_s64): Likewise. (vstrdq_scatter_base_wb_u64): Likewise. (vstrwq_scatter_base_wb_p_s32): Likewise. (vstrwq_scatter_base_wb_p_f32): Likewise. (vstrwq_scatter_base_wb_p_u32): Likewise. (vstrwq_scatter_base_wb_s32): Likewise. (vstrwq_scatter_base_wb_u32): Likewise. (vstrwq_scatter_base_wb_f32): Likewise. (__arm_vldrdq_gather_base_wb_s64): Define intrinsic. (__arm_vldrdq_gather_base_wb_u64): Likewise. (__arm_vldrdq_gather_base_wb_z_s64): Likewise. (__arm_vldrdq_gather_base_wb_z_u64): Likewise. (__arm_vldrwq_gather_base_wb_s32): Likewise. (__arm_vldrwq_gather_base_wb_u32): Likewise. (__arm_vldrwq_gather_base_wb_z_s32): Likewise. (__arm_vldrwq_gather_base_wb_z_u32): Likewise. (__arm_vstrdq_scatter_base_wb_s64): Likewise. (__arm_vstrdq_scatter_base_wb_u64): Likewise. (__arm_vstrdq_scatter_base_wb_p_s64): Likewise. (__arm_vstrdq_scatter_base_wb_p_u64): Likewise. (__arm_vstrwq_scatter_base_wb_p_s32): Likewise. (__arm_vstrwq_scatter_base_wb_p_u32): Likewise. (__arm_vstrwq_scatter_base_wb_s32): Likewise. (__arm_vstrwq_scatter_base_wb_u32): Likewise. (__arm_vldrwq_gather_base_wb_f32): Likewise. (__arm_vldrwq_gather_base_wb_z_f32): Likewise. (__arm_vstrwq_scatter_base_wb_f32): Likewise. (__arm_vstrwq_scatter_base_wb_p_f32): Likewise. (vstrwq_scatter_base_wb): Define polymorphic variant. (vstrwq_scatter_base_wb_p): Likewise. (vstrdq_scatter_base_wb_p): Likewise. (vstrdq_scatter_base_wb): Likewise. * config/arm/arm_mve_builtins.def (LDRGBWBS_QUALIFIERS): Use builtin qualifier. * config/arm/mve.md (mve_vstrwq_scatter_base_wb_<supf>v4si): Define RTL pattern. (mve_vstrwq_scatter_base_wb_add_<supf>v4si): Likewise. (mve_vstrwq_scatter_base_wb_<supf>v4si_insn): Likewise. (mve_vstrwq_scatter_base_wb_p_<supf>v4si): Likewise. (mve_vstrwq_scatter_base_wb_p_add_<supf>v4si): Likewise. (mve_vstrwq_scatter_base_wb_p_<supf>v4si_insn): Likewise. (mve_vstrwq_scatter_base_wb_fv4sf): Likewise. (mve_vstrwq_scatter_base_wb_add_fv4sf): Likewise. (mve_vstrwq_scatter_base_wb_fv4sf_insn): Likewise. (mve_vstrwq_scatter_base_wb_p_fv4sf): Likewise. (mve_vstrwq_scatter_base_wb_p_add_fv4sf): Likewise. (mve_vstrwq_scatter_base_wb_p_fv4sf_insn): Likewise. (mve_vstrdq_scatter_base_wb_<supf>v2di): Likewise. (mve_vstrdq_scatter_base_wb_add_<supf>v2di): Likewise. (mve_vstrdq_scatter_base_wb_<supf>v2di_insn): Likewise. (mve_vstrdq_scatter_base_wb_p_<supf>v2di): Likewise. (mve_vstrdq_scatter_base_wb_p_add_<supf>v2di): Likewise. (mve_vstrdq_scatter_base_wb_p_<supf>v2di_insn): Likewise. (mve_vldrwq_gather_base_wb_<supf>v4si): Likewise. (mve_vldrwq_gather_base_wb_<supf>v4si_insn): Likewise. (mve_vldrwq_gather_base_wb_z_<supf>v4si): Likewise. (mve_vldrwq_gather_base_wb_z_<supf>v4si_insn): Likewise. (mve_vldrwq_gather_base_wb_fv4sf): Likewise. (mve_vldrwq_gather_base_wb_fv4sf_insn): Likewise. (mve_vldrwq_gather_base_wb_z_fv4sf): Likewise. (mve_vldrwq_gather_base_wb_z_fv4sf_insn): Likewise. (mve_vldrdq_gather_base_wb_<supf>v2di): Likewise. (mve_vldrdq_gather_base_wb_<supf>v2di_insn): Likewise. (mve_vldrdq_gather_base_wb_z_<supf>v2di): Likewise. (mve_vldrdq_gather_base_wb_z_<supf>v2di_insn): Likewise. gcc/testsuite/ChangeLog: 2019-11-07 Andre Vieira <andre.simoesdiasvie...@arm.com> Mihail Ionescu <mihail.ione...@arm.com> Srinath Parvathaneni <srinath.parvathan...@arm.com> * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c: New test. * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_s64.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_u64.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_s64.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_u64.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_s64.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_u64.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_u32.c: Likewise. ############### Attachment also inlined for ease of reply ############### diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c index 283e22897be16accb8ec9f00af8021161f8cd90c..9e87dff264d6b535f64407f669c6e83b0ed639a6 100644 --- a/gcc/config/arm/arm-builtins.c +++ b/gcc/config/arm/arm-builtins.c @@ -694,6 +694,50 @@ arm_quinop_unone_unone_unone_unone_imm_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] #define QUINOP_UNONE_UNONE_UNONE_UNONE_IMM_UNONE_QUALIFIERS \ (arm_quinop_unone_unone_unone_unone_imm_unone_qualifiers) +static enum arm_type_qualifiers +arm_ldrgbwbs_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_unsigned, qualifier_immediate}; +#define LDRGBWBS_QUALIFIERS (arm_ldrgbwbs_qualifiers) + +static enum arm_type_qualifiers +arm_ldrgbwbu_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate}; +#define LDRGBWBU_QUALIFIERS (arm_ldrgbwbu_qualifiers) + +static enum arm_type_qualifiers +arm_ldrgbwbs_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_unsigned, qualifier_immediate, + qualifier_unsigned}; +#define LDRGBWBS_Z_QUALIFIERS (arm_ldrgbwbs_z_qualifiers) + +static enum arm_type_qualifiers +arm_ldrgbwbu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate, + qualifier_unsigned}; +#define LDRGBWBU_Z_QUALIFIERS (arm_ldrgbwbu_z_qualifiers) + +static enum arm_type_qualifiers +arm_strsbwbs_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_void, qualifier_unsigned, qualifier_const, qualifier_none}; +#define STRSBWBS_QUALIFIERS (arm_strsbwbs_qualifiers) + +static enum arm_type_qualifiers +arm_strsbwbu_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_void, qualifier_unsigned, qualifier_const, qualifier_unsigned}; +#define STRSBWBU_QUALIFIERS (arm_strsbwbu_qualifiers) + +static enum arm_type_qualifiers +arm_strsbwbs_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_void, qualifier_unsigned, qualifier_const, + qualifier_none, qualifier_unsigned}; +#define STRSBWBS_P_QUALIFIERS (arm_strsbwbs_p_qualifiers) + +static enum arm_type_qualifiers +arm_strsbwbu_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_void, qualifier_unsigned, qualifier_const, + qualifier_unsigned, qualifier_unsigned}; +#define STRSBWBU_P_QUALIFIERS (arm_strsbwbu_p_qualifiers) + /* End of Qualifier for MVE builtins. */ /* void ([T element type] *, T, immediate). */ diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h index 4ae40c247b96ad1bf6c7bc38e5be1e5a489d1ef0..842d47367b54d826acea7fe358cc58c78f6e0256 100644 --- a/gcc/config/arm/arm_mve.h +++ b/gcc/config/arm/arm_mve.h @@ -2054,6 +2054,26 @@ typedef struct { uint8x16_t val[4]; } uint8x16x4_t; #define viwdupq_wb_u8( __a, __b, __imm) __arm_viwdupq_wb_u8( __a, __b, __imm) #define viwdupq_wb_u32( __a, __b, __imm) __arm_viwdupq_wb_u32( __a, __b, __imm) #define viwdupq_wb_u16( __a, __b, __imm) __arm_viwdupq_wb_u16( __a, __b, __imm) +#define vldrdq_gather_base_wb_s64(__addr, __offset) __arm_vldrdq_gather_base_wb_s64(__addr, __offset) +#define vldrdq_gather_base_wb_u64(__addr, __offset) __arm_vldrdq_gather_base_wb_u64(__addr, __offset) +#define vldrdq_gather_base_wb_z_s64(__addr, __offset, __p) __arm_vldrdq_gather_base_wb_z_s64(__addr, __offset, __p) +#define vldrdq_gather_base_wb_z_u64(__addr, __offset, __p) __arm_vldrdq_gather_base_wb_z_u64(__addr, __offset, __p) +#define vldrwq_gather_base_wb_f32(__addr, __offset) __arm_vldrwq_gather_base_wb_f32(__addr, __offset) +#define vldrwq_gather_base_wb_s32(__addr, __offset) __arm_vldrwq_gather_base_wb_s32(__addr, __offset) +#define vldrwq_gather_base_wb_u32(__addr, __offset) __arm_vldrwq_gather_base_wb_u32(__addr, __offset) +#define vldrwq_gather_base_wb_z_f32(__addr, __offset, __p) __arm_vldrwq_gather_base_wb_z_f32(__addr, __offset, __p) +#define vldrwq_gather_base_wb_z_s32(__addr, __offset, __p) __arm_vldrwq_gather_base_wb_z_s32(__addr, __offset, __p) +#define vldrwq_gather_base_wb_z_u32(__addr, __offset, __p) __arm_vldrwq_gather_base_wb_z_u32(__addr, __offset, __p) +#define vstrdq_scatter_base_wb_p_s64(__addr, __offset, __value, __p) __arm_vstrdq_scatter_base_wb_p_s64(__addr, __offset, __value, __p) +#define vstrdq_scatter_base_wb_p_u64(__addr, __offset, __value, __p) __arm_vstrdq_scatter_base_wb_p_u64(__addr, __offset, __value, __p) +#define vstrdq_scatter_base_wb_s64(__addr, __offset, __value) __arm_vstrdq_scatter_base_wb_s64(__addr, __offset, __value) +#define vstrdq_scatter_base_wb_u64(__addr, __offset, __value) __arm_vstrdq_scatter_base_wb_u64(__addr, __offset, __value) +#define vstrwq_scatter_base_wb_p_s32(__addr, __offset, __value, __p) __arm_vstrwq_scatter_base_wb_p_s32(__addr, __offset, __value, __p) +#define vstrwq_scatter_base_wb_p_f32(__addr, __offset, __value, __p) __arm_vstrwq_scatter_base_wb_p_f32(__addr, __offset, __value, __p) +#define vstrwq_scatter_base_wb_p_u32(__addr, __offset, __value, __p) __arm_vstrwq_scatter_base_wb_p_u32(__addr, __offset, __value, __p) +#define vstrwq_scatter_base_wb_s32(__addr, __offset, __value) __arm_vstrwq_scatter_base_wb_s32(__addr, __offset, __value) +#define vstrwq_scatter_base_wb_u32(__addr, __offset, __value) __arm_vstrwq_scatter_base_wb_u32(__addr, __offset, __value) +#define vstrwq_scatter_base_wb_f32(__addr, __offset, __value) __arm_vstrwq_scatter_base_wb_f32(__addr, __offset, __value) #endif __extension__ extern __inline void @@ -13388,6 +13408,150 @@ __arm_viwdupq_wb_u16 (uint32_t * __a, uint32_t __b, const int __imm) return __res; } +__extension__ extern __inline int64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrdq_gather_base_wb_s64 (uint64x2_t * __addr, const int __offset) +{ + int64x2_t + result = __builtin_mve_vldrdq_gather_base_wb_sv2di (*__addr, __offset); + __addr += __offset; + return result; +} + +__extension__ extern __inline uint64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrdq_gather_base_wb_u64 (uint64x2_t * __addr, const int __offset) +{ + uint64x2_t + result = __builtin_mve_vldrdq_gather_base_wb_uv2di (*__addr, __offset); + __addr += __offset; + return result; +} + +__extension__ extern __inline int64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrdq_gather_base_wb_z_s64 (uint64x2_t * __addr, const int __offset, mve_pred16_t __p) +{ + int64x2_t + result = __builtin_mve_vldrdq_gather_base_wb_z_sv2di (*__addr, __offset, __p); + __addr += __offset; + return result; +} + +__extension__ extern __inline uint64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrdq_gather_base_wb_z_u64 (uint64x2_t * __addr, const int __offset, mve_pred16_t __p) +{ + uint64x2_t + result = __builtin_mve_vldrdq_gather_base_wb_z_uv2di (*__addr, __offset, __p); + __addr += __offset; + return result; +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrwq_gather_base_wb_s32 (uint32x4_t * __addr, const int __offset) +{ + int32x4_t + result = __builtin_mve_vldrwq_gather_base_wb_sv4si (*__addr, __offset); + __addr += __offset; + return result; +} + +__extension__ extern __inline uint32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrwq_gather_base_wb_u32 (uint32x4_t * __addr, const int __offset) +{ + uint32x4_t + result = __builtin_mve_vldrwq_gather_base_wb_uv4si (*__addr, __offset); + __addr += __offset; + return result; +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrwq_gather_base_wb_z_s32 (uint32x4_t * __addr, const int __offset, mve_pred16_t __p) +{ + int32x4_t + result = __builtin_mve_vldrwq_gather_base_wb_z_sv4si (*__addr, __offset, __p); + __addr += __offset; + return result; +} + +__extension__ extern __inline uint32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrwq_gather_base_wb_z_u32 (uint32x4_t * __addr, const int __offset, mve_pred16_t __p) +{ + uint32x4_t + result = __builtin_mve_vldrwq_gather_base_wb_z_uv4si (*__addr, __offset, __p); + __addr += __offset; + return result; +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrdq_scatter_base_wb_s64 (uint64x2_t * __addr, const int __offset, int64x2_t __value) +{ + __builtin_mve_vstrdq_scatter_base_wb_sv2di (*__addr, __offset, __value); + __builtin_mve_vstrdq_scatter_base_wb_add_sv2di (*__addr, __offset, *__addr); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrdq_scatter_base_wb_u64 (uint64x2_t * __addr, const int __offset, uint64x2_t __value) +{ + __builtin_mve_vstrdq_scatter_base_wb_uv2di (*__addr, __offset, __value); + __builtin_mve_vstrdq_scatter_base_wb_add_uv2di (*__addr, __offset, *__addr); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrdq_scatter_base_wb_p_s64 (uint64x2_t * __addr, const int __offset, int64x2_t __value, mve_pred16_t __p) +{ + __builtin_mve_vstrdq_scatter_base_wb_p_sv2di (*__addr, __offset, __value, __p); + __builtin_mve_vstrdq_scatter_base_wb_p_add_sv2di (*__addr, __offset, *__addr, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrdq_scatter_base_wb_p_u64 (uint64x2_t * __addr, const int __offset, uint64x2_t __value, mve_pred16_t __p) +{ + __builtin_mve_vstrdq_scatter_base_wb_p_uv2di (*__addr, __offset, __value, __p); + __builtin_mve_vstrdq_scatter_base_wb_p_add_uv2di (*__addr, __offset, *__addr, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrwq_scatter_base_wb_p_s32 (uint32x4_t * __addr, const int __offset, int32x4_t __value, mve_pred16_t __p) +{ + __builtin_mve_vstrwq_scatter_base_wb_p_sv4si (*__addr, __offset, __value, __p); + __builtin_mve_vstrwq_scatter_base_wb_p_add_sv4si (*__addr, __offset, *__addr, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrwq_scatter_base_wb_p_u32 (uint32x4_t * __addr, const int __offset, uint32x4_t __value, mve_pred16_t __p) +{ + __builtin_mve_vstrwq_scatter_base_wb_p_uv4si (*__addr, __offset, __value, __p); + __builtin_mve_vstrwq_scatter_base_wb_p_add_uv4si (*__addr, __offset, *__addr, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrwq_scatter_base_wb_s32 (uint32x4_t * __addr, const int __offset, int32x4_t __value) +{ + __builtin_mve_vstrwq_scatter_base_wb_sv4si (*__addr, __offset, __value); + __builtin_mve_vstrwq_scatter_base_wb_add_sv4si (*__addr, __offset, *__addr); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrwq_scatter_base_wb_u32 (uint32x4_t * __addr, const int __offset, uint32x4_t __value) +{ + __builtin_mve_vstrwq_scatter_base_wb_uv4si (*__addr, __offset, __value); + __builtin_mve_vstrwq_scatter_base_wb_add_uv4si (*__addr, __offset, *__addr); +} + #if (__ARM_FEATURE_MVE & 2) /* MVE Floating point. */ __extension__ extern __inline void @@ -16024,6 +16188,42 @@ __arm_vreinterpretq_f32_u8 (uint8x16_t __a) return (float32x4_t) __a; } +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrwq_gather_base_wb_f32 (uint32x4_t * __addr, const int __offset) +{ + float32x4_t + result = __builtin_mve_vldrwq_gather_base_wb_fv4sf (*__addr, __offset); + __addr += __offset; + return result; +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vldrwq_gather_base_wb_z_f32 (uint32x4_t * __addr, const int __offset, mve_pred16_t __p) +{ + float32x4_t + result = __builtin_mve_vldrwq_gather_base_wb_z_fv4sf (*__addr, __offset, __p); + __addr += __offset; + return result; +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrwq_scatter_base_wb_f32 (uint32x4_t * __addr, const int __offset, float32x4_t __value) +{ + __builtin_mve_vstrwq_scatter_base_wb_fv4sf (*__addr, __offset, __value); + __builtin_mve_vstrwq_scatter_base_wb_add_fv4sf (*__addr, __offset, *__addr); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrwq_scatter_base_wb_p_f32 (uint32x4_t * __addr, const int __offset, float32x4_t __value, mve_pred16_t __p) +{ + __builtin_mve_vstrwq_scatter_base_wb_p_fv4sf (*__addr, __offset, __value, __p); + __builtin_mve_vstrwq_scatter_base_wb_p_add_fv4sf (*__addr, __offset, *__addr, __p); +} + #endif enum { @@ -18037,6 +18237,20 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint64x2_t]: __arm_vreinterpretq_u8_u64 (__ARM_mve_coerce(__p0, uint64x2_t)), \ int (*)[__ARM_mve_type_float32x4_t]: __arm_vreinterpretq_u8_f32 (__ARM_mve_coerce(__p0, float32x4_t)));}) +#define vstrwq_scatter_base_wb(p0,p1,p2) __arm_vstrwq_scatter_base_wb(p0,p1,p2) +#define __arm_vstrwq_scatter_base_wb(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_wb_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t)), \ + int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_wb_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t)), \ + int (*)[__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_base_wb_f32 (p0, p1, __ARM_mve_coerce(__p2, float32x4_t)));}) + +#define vstrwq_scatter_base_wb_p(p0,p1,p2,p3) __arm_vstrwq_scatter_base_wb_p(p0,p1,p2,p3) +#define __arm_vstrwq_scatter_base_wb_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_wb_p_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_wb_p_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t), p3), \ + int (*)[__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_base_wb_p_f32 (p0, p1, __ARM_mve_coerce(__p2, float32x4_t), p3));}) + #else /* MVE Interger. */ #define vst4q(p0,p1) __arm_vst4q(p0,p1) @@ -21497,6 +21711,30 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint32_t]: __arm_vdwdupq_n_u8 (__ARM_mve_coerce(__p0, uint32_t), p1, p2), \ int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vdwdupq_wb_u8 (__ARM_mve_coerce(__p0, uint32_t *), p1, p2));}) +#define vstrdq_scatter_base_wb_p(p0,p1,p2,p3) __arm_vstrdq_scatter_base_wb_p(p0,p1,p2,p3) +#define __arm_vstrdq_scatter_base_wb_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_wb_p_s64 (p0, p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \ + int (*)[__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_base_wb_p_u64 (p0, p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));}) + +#define vstrdq_scatter_base_wb(p0,p1,p2) __arm_vstrdq_scatter_base_wb(p0,p1,p2) +#define __arm_vstrdq_scatter_base_wb(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_wb_s64 (p0, p1, __ARM_mve_coerce(__p2, int64x2_t)), \ + int (*)[__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_base_wb_u64 (p0, p1, __ARM_mve_coerce(__p2, uint64x2_t)));}) + +#define vstrwq_scatter_base_wb(p0,p1,p2) __arm_vstrwq_scatter_base_wb(p0,p1,p2) +#define __arm_vstrwq_scatter_base_wb(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_wb_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t)), \ + int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_wb_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t)));}) + +#define vstrwq_scatter_base_wb_p(p0,p1,p2,p3) __arm_vstrwq_scatter_base_wb_p(p0,p1,p2,p3) +#define __arm_vstrwq_scatter_base_wb_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); \ + _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \ + int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_wb_p_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \ + int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_wb_p_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t), p3));}) + #endif /* MVE Floating point. */ #ifdef __cplusplus diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def index ac920d327e98a2db0c5e6aa8680dd40f295b579a..b77335cff133872558b48b5574dccc0f17df9ed1 100644 --- a/gcc/config/arm/arm_mve_builtins.def +++ b/gcc/config/arm/arm_mve_builtins.def @@ -827,3 +827,33 @@ VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vddupq_m_n_u, v16qi, v8hi, v4si) VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vidupq_m_n_u, v16qi, v8hi, v4si) VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, vdwdupq_n_u, v16qi, v4si, v8hi) VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, viwdupq_n_u, v16qi, v4si, v8hi) +VAR1 (STRSBWBU, vstrwq_scatter_base_wb_u, v4si) +VAR1 (STRSBWBU, vstrwq_scatter_base_wb_add_u, v4si) +VAR1 (STRSBWBU, vstrwq_scatter_base_wb_add_s, v4si) +VAR1 (STRSBWBU, vstrwq_scatter_base_wb_add_f, v4sf) +VAR1 (STRSBWBU, vstrdq_scatter_base_wb_u, v2di) +VAR1 (STRSBWBU, vstrdq_scatter_base_wb_add_u, v2di) +VAR1 (STRSBWBU, vstrdq_scatter_base_wb_add_s, v2di) +VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_u, v4si) +VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_add_u, v4si) +VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_add_s, v4si) +VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_add_f, v4sf) +VAR1 (STRSBWBU_P, vstrdq_scatter_base_wb_p_u, v2di) +VAR1 (STRSBWBU_P, vstrdq_scatter_base_wb_p_add_u, v2di) +VAR1 (STRSBWBU_P, vstrdq_scatter_base_wb_p_add_s, v2di) +VAR1 (STRSBWBS, vstrwq_scatter_base_wb_s, v4si) +VAR1 (STRSBWBS, vstrwq_scatter_base_wb_f, v4sf) +VAR1 (STRSBWBS, vstrdq_scatter_base_wb_s, v2di) +VAR1 (STRSBWBS_P, vstrwq_scatter_base_wb_p_s, v4si) +VAR1 (STRSBWBS_P, vstrwq_scatter_base_wb_p_f, v4sf) +VAR1 (STRSBWBS_P, vstrdq_scatter_base_wb_p_s, v2di) +VAR1 (LDRGBWBU_Z, vldrwq_gather_base_wb_z_u, v4si) +VAR1 (LDRGBWBU_Z, vldrdq_gather_base_wb_z_u, v2di) +VAR1 (LDRGBWBU, vldrwq_gather_base_wb_u, v4si) +VAR1 (LDRGBWBU, vldrdq_gather_base_wb_u, v2di) +VAR1 (LDRGBWBS_Z, vldrwq_gather_base_wb_z_s, v4si) +VAR1 (LDRGBWBS_Z, vldrwq_gather_base_wb_z_f, v4sf) +VAR1 (LDRGBWBS_Z, vldrdq_gather_base_wb_z_s, v2di) +VAR1 (LDRGBWBS, vldrwq_gather_base_wb_s, v4si) +VAR1 (LDRGBWBS, vldrwq_gather_base_wb_f, v4sf) +VAR1 (LDRGBWBS, vldrdq_gather_base_wb_s, v2di) diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index b1b35ab23e677800c049d7c7873ab98d76b52538..a938e0922f8dc6749dc7192961ae2091d666c6e7 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -208,7 +208,10 @@ VSTRDQSSO_U VSTRWQSO_S VSTRWQSO_U VSTRWQSSO_S VSTRWQSSO_U VSTRHQSO_F VSTRHQSSO_F VSTRWQSB_F VSTRWQSO_F VSTRWQSSO_F VDDUPQ VDDUPQ_M VDWDUPQ - VDWDUPQ_M VIDUPQ VIDUPQ_M VIWDUPQ VIWDUPQ_M]) + VDWDUPQ_M VIDUPQ VIDUPQ_M VIWDUPQ VIWDUPQ_M + VSTRWQSBWB_S VSTRWQSBWB_U VLDRWQGBWB_S VLDRWQGBWB_U + VSTRWQSBWB_F VLDRWQGBWB_F VSTRDQSBWB_S VSTRDQSBWB_U + VLDRDQGBWB_S VLDRDQGBWB_U]) (define_mode_attr MVE_CNVT [(V8HI "V8HF") (V4SI "V4SF") (V8HF "V8HI") (V4SF "V4SI")]) @@ -377,7 +380,10 @@ (VSTRDQSB_S "s") (VSTRDQSB_U "u") (VSTRDQSO_S "s") (VSTRDQSO_U "u") (VSTRDQSSO_S "s") (VSTRDQSSO_U "u") (VSTRWQSO_U "u") (VSTRWQSO_S "s") (VSTRWQSSO_U "u") - (VSTRWQSSO_S "s")]) + (VSTRWQSSO_S "s") (VSTRWQSBWB_S "s") (VSTRWQSBWB_U "u") + (VLDRWQGBWB_S "s") (VLDRWQGBWB_U "u") (VLDRDQGBWB_S "s") + (VLDRDQGBWB_U "u") (VSTRDQSBWB_S "s") + (VSTRDQSBWB_U "u")]) (define_int_attr mode1 [(VCTP8Q "8") (VCTP16Q "16") (VCTP32Q "32") (VCTP64Q "64") (VCTP8Q_M "8") (VCTP16Q_M "16") @@ -626,6 +632,10 @@ (define_int_iterator VSTRDSSOQ [VSTRDQSSO_S VSTRDQSSO_U]) (define_int_iterator VSTRWSOQ [VSTRWQSO_S VSTRWQSO_U]) (define_int_iterator VSTRWSSOQ [VSTRWQSSO_S VSTRWQSSO_U]) +(define_int_iterator VSTRWSBWBQ [VSTRWQSBWB_S VSTRWQSBWB_U]) +(define_int_iterator VLDRWGBWBQ [VLDRWQGBWB_S VLDRWQGBWB_U]) +(define_int_iterator VSTRDSBWBQ [VSTRDQSBWB_S VSTRDQSBWB_U]) +(define_int_iterator VLDRDGBWBQ [VLDRDQGBWB_S VLDRDQGBWB_U]) (define_insn "*mve_mov<mode>" [(set (match_operand:MVE_types 0 "s_register_operand" "=w,w,r,w,w,r,w") @@ -10035,3 +10045,572 @@ "vpst\;\tviwdupt.u%#<V_sz_elem>\t%q2, %3, %4, %5" [(set_attr "type" "mve_move") (set_attr "length""8")]) +(define_expand "mve_vstrwq_scatter_base_wb_<supf>v4si" + [(match_operand:V4SI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V4SI 2 "s_register_operand" "w") + (unspec:V4SI [(const_int 0)] VSTRWSBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_wb = gen_reg_rtx (V4SImode); + emit_insn ( + gen_mve_vstrwq_scatter_base_wb_<supf>v4si_insn (ignore_wb, operands[0], + operands[1], operands[2])); + DONE; +}) + +(define_expand "mve_vstrwq_scatter_base_wb_add_<supf>v4si" + [(match_operand:V4SI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V4SI 2 "s_register_operand" "0") + (unspec:V4SI [(const_int 0)] VSTRWSBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_vec = gen_reg_rtx (V4SImode); + emit_insn ( + gen_mve_vstrwq_scatter_base_wb_<supf>v4si_insn (operands[0], operands[2], + operands[1], ignore_vec)); + DONE; +}) + +;; +;; [vstrwq_scatter_base_wb_s vstrdq_scatter_base_wb_u] +;; +(define_insn "mve_vstrwq_scatter_base_wb_<supf>v4si_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:V4SI 1 "s_register_operand" "0") + (match_operand:SI 2 "mve_vldrd_immediate" "Ri") + (match_operand:V4SI 3 "s_register_operand" "w")] + VSTRWSBWBQ)) + (set (match_operand:V4SI 0 "s_register_operand" "=w") + (unspec:V4SI [(match_dup 1) (match_dup 2)] + VSTRWSBWBQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[1]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vstrw.u32\t%q2, [%q0, %1]!",ops); + return ""; +} + [(set_attr "length" "4")]) + +(define_expand "mve_vstrwq_scatter_base_wb_p_<supf>v4si" + [(match_operand:V4SI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V4SI 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VSTRWSBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_wb = gen_reg_rtx (V4SImode); + emit_insn ( + gen_mve_vstrwq_scatter_base_wb_p_<supf>v4si_insn (ignore_wb, operands[0], + operands[1], operands[2], + operands[3])); + DONE; +}) + +(define_expand "mve_vstrwq_scatter_base_wb_p_add_<supf>v4si" + [(match_operand:V4SI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V4SI 2 "s_register_operand" "0") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VSTRWSBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_vec = gen_reg_rtx (V4SImode); + emit_insn ( + gen_mve_vstrwq_scatter_base_wb_p_<supf>v4si_insn (operands[0], operands[2], + operands[1], ignore_vec, + operands[3])); + DONE; +}) + +;; +;; [vstrwq_scatter_base_wb_p_s vstrwq_scatter_base_wb_p_u] +;; +(define_insn "mve_vstrwq_scatter_base_wb_p_<supf>v4si_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:V4SI 1 "s_register_operand" "0") + (match_operand:SI 2 "mve_vldrd_immediate" "Ri") + (match_operand:V4SI 3 "s_register_operand" "w") + (match_operand:HI 4 "vpr_register_operand")] + VSTRWSBWBQ)) + (set (match_operand:V4SI 0 "s_register_operand" "=w") + (unspec:V4SI [(match_dup 1) (match_dup 2)] + VSTRWSBWBQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[1]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vpst\;\tvstrwt.u32\t%q2, [%q0, %1]!",ops); + return ""; +} + [(set_attr "length" "8")]) + +(define_expand "mve_vstrwq_scatter_base_wb_fv4sf" + [(match_operand:V4SI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V4SF 2 "s_register_operand" "w") + (unspec:V4SI [(const_int 0)] VSTRWQSBWB_F)] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ignore_wb = gen_reg_rtx (V4SImode); + emit_insn ( + gen_mve_vstrwq_scatter_base_wb_fv4sf_insn (ignore_wb,operands[0], + operands[1], operands[2])); + DONE; +}) + +(define_expand "mve_vstrwq_scatter_base_wb_add_fv4sf" + [(match_operand:V4SI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V4SI 2 "s_register_operand" "0") + (unspec:V4SI [(const_int 0)] VSTRWQSBWB_F)] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ignore_vec = gen_reg_rtx (V4SFmode); + emit_insn ( + gen_mve_vstrwq_scatter_base_wb_fv4sf_insn (operands[0], operands[2], + operands[1], ignore_vec)); + DONE; +}) + +;; +;; [vstrwq_scatter_base_wb_f] +;; +(define_insn "mve_vstrwq_scatter_base_wb_fv4sf_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:V4SI 1 "s_register_operand" "0") + (match_operand:SI 2 "mve_vldrd_immediate" "Ri") + (match_operand:V4SF 3 "s_register_operand" "w")] + VSTRWQSBWB_F)) + (set (match_operand:V4SI 0 "s_register_operand" "=w") + (unspec:V4SI [(match_dup 1) (match_dup 2)] + VSTRWQSBWB_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ops[3]; + ops[0] = operands[1]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vstrw.u32\t%q2, [%q0, %1]!",ops); + return ""; +} + [(set_attr "length" "4")]) + +(define_expand "mve_vstrwq_scatter_base_wb_p_fv4sf" + [(match_operand:V4SI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V4SF 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VSTRWQSBWB_F)] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ignore_wb = gen_reg_rtx (V4SImode); + emit_insn ( + gen_mve_vstrwq_scatter_base_wb_p_fv4sf_insn (ignore_wb, operands[0], + operands[1], operands[2], + operands[3])); + DONE; +}) + +(define_expand "mve_vstrwq_scatter_base_wb_p_add_fv4sf" + [(match_operand:V4SI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V4SI 2 "s_register_operand" "0") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VSTRWQSBWB_F)] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ignore_vec = gen_reg_rtx (V4SFmode); + emit_insn ( + gen_mve_vstrwq_scatter_base_wb_p_fv4sf_insn (operands[0], operands[2], + operands[1], ignore_vec, + operands[3])); + DONE; +}) + +;; +;; [vstrwq_scatter_base_wb_p_f] +;; +(define_insn "mve_vstrwq_scatter_base_wb_p_fv4sf_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:V4SI 1 "s_register_operand" "0") + (match_operand:SI 2 "mve_vldrd_immediate" "Ri") + (match_operand:V4SF 3 "s_register_operand" "w") + (match_operand:HI 4 "vpr_register_operand")] + VSTRWQSBWB_F)) + (set (match_operand:V4SI 0 "s_register_operand" "=w") + (unspec:V4SI [(match_dup 1) (match_dup 2)] + VSTRWQSBWB_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ops[3]; + ops[0] = operands[1]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vpst\;\tvstrwt.u32\t%q2, [%q0, %1]!",ops); + return ""; +} + [(set_attr "length" "8")]) + +(define_expand "mve_vstrdq_scatter_base_wb_<supf>v2di" + [(match_operand:V2DI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V2DI 2 "s_register_operand" "w") + (unspec:V2DI [(const_int 0)] VSTRDSBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_wb = gen_reg_rtx (V2DImode); + emit_insn ( + gen_mve_vstrdq_scatter_base_wb_<supf>v2di_insn (ignore_wb, operands[0], + operands[1], operands[2])); + DONE; +}) + +(define_expand "mve_vstrdq_scatter_base_wb_add_<supf>v2di" + [(match_operand:V2DI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V2DI 2 "s_register_operand" "0") + (unspec:V2DI [(const_int 0)] VSTRDSBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_vec = gen_reg_rtx (V2DImode); + emit_insn ( + gen_mve_vstrdq_scatter_base_wb_<supf>v2di_insn (operands[0], operands[2], + operands[1], ignore_vec)); + DONE; +}) + +;; +;; [vstrdq_scatter_base_wb_s vstrdq_scatter_base_wb_u] +;; +(define_insn "mve_vstrdq_scatter_base_wb_<supf>v2di_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:V2DI 1 "s_register_operand" "0") + (match_operand:SI 2 "mve_vldrd_immediate" "Ri") + (match_operand:V2DI 3 "s_register_operand" "w")] + VSTRDSBWBQ)) + (set (match_operand:V2DI 0 "s_register_operand" "=&w") + (unspec:V2DI [(match_dup 1) (match_dup 2)] + VSTRDSBWBQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[1]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vstrd.u64\t%q2, [%q0, %1]!",ops); + return ""; +} + [(set_attr "length" "4")]) + +(define_expand "mve_vstrdq_scatter_base_wb_p_<supf>v2di" + [(match_operand:V2DI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V2DI 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V2DI [(const_int 0)] VSTRDSBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_wb = gen_reg_rtx (V2DImode); + emit_insn ( + gen_mve_vstrdq_scatter_base_wb_p_<supf>v2di_insn (ignore_wb, operands[0], + operands[1], operands[2], + operands[3])); + DONE; +}) + +(define_expand "mve_vstrdq_scatter_base_wb_p_add_<supf>v2di" + [(match_operand:V2DI 0 "s_register_operand" "=w") + (match_operand:SI 1 "mve_vldrd_immediate" "Ri") + (match_operand:V2DI 2 "s_register_operand" "0") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V2DI [(const_int 0)] VSTRDSBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_vec = gen_reg_rtx (V2DImode); + emit_insn ( + gen_mve_vstrdq_scatter_base_wb_p_<supf>v2di_insn (operands[0], operands[2], + operands[1], ignore_vec, + operands[3])); + DONE; +}) + +;; +;; [vstrdq_scatter_base_wb_p_s vstrdq_scatter_base_wb_p_u] +;; +(define_insn "mve_vstrdq_scatter_base_wb_p_<supf>v2di_insn" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:V2DI 1 "s_register_operand" "0") + (match_operand:SI 2 "mve_vldrd_immediate" "Ri") + (match_operand:V2DI 3 "s_register_operand" "w") + (match_operand:HI 4 "vpr_register_operand")] + VSTRDSBWBQ)) + (set (match_operand:V2DI 0 "s_register_operand" "=w") + (unspec:V2DI [(match_dup 1) (match_dup 2)] + VSTRDSBWBQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[1]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vpst\;\tvstrdt.u64\t%q2, [%q0, %1]!",ops); + return ""; +} + [(set_attr "length" "8")]) + +(define_expand "mve_vldrwq_gather_base_wb_<supf>v4si" + [(match_operand:V4SI 0 "s_register_operand") + (match_operand:V4SI 1 "s_register_operand") + (match_operand:SI 2 "mve_vldrd_immediate") + (unspec:V4SI [(const_int 0)] VLDRWGBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_wb = gen_reg_rtx (V4SImode); + emit_insn ( + gen_mve_vldrwq_gather_base_wb_<supf>v4si_insn (operands[0], ignore_wb, + operands[1], operands[2])); + DONE; +}) + +;; +;; [vldrwq_gather_base_wb_s vldrwq_gather_base_wb_u] +;; +(define_insn "mve_vldrwq_gather_base_wb_<supf>v4si_insn" + [(set (match_operand:V4SI 0 "s_register_operand" "=&w") + (unspec:V4SI [(match_operand:V4SI 2 "s_register_operand" "1") + (match_operand:SI 3 "mve_vldrd_immediate" "Ri") + (mem:BLK (scratch))] + VLDRWGBWBQ)) + (set (match_operand:V4SI 1 "s_register_operand" "=&w") + (unspec:V4SI [(match_dup 2) (match_dup 3)] + VLDRWGBWBQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vldrw.u32\t%q0, [%q1, %2]!",ops); + return ""; +} + [(set_attr "length" "4")]) + +(define_expand "mve_vldrwq_gather_base_wb_z_<supf>v4si" + [(match_operand:V4SI 0 "s_register_operand") + (match_operand:V4SI 1 "s_register_operand") + (match_operand:SI 2 "mve_vldrd_immediate") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VLDRWGBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_wb = gen_reg_rtx (V4SImode); + emit_insn ( + gen_mve_vldrwq_gather_base_wb_z_<supf>v4si_insn (operands[0], ignore_wb, + operands[1], operands[2], + operands[3])); + DONE; +}) + +;; +;; [vldrwq_gather_base_wb_z_s vldrwq_gather_base_wb_z_u] +;; +(define_insn "mve_vldrwq_gather_base_wb_z_<supf>v4si_insn" + [(set (match_operand:V4SI 0 "s_register_operand" "=&w") + (unspec:V4SI [(match_operand:V4SI 2 "s_register_operand" "1") + (match_operand:SI 3 "mve_vldrd_immediate" "Ri") + (match_operand:HI 4 "vpr_register_operand" "Up") + (mem:BLK (scratch))] + VLDRWGBWBQ)) + (set (match_operand:V4SI 1 "s_register_operand" "=&w") + (unspec:V4SI [(match_dup 2) (match_dup 3)] + VLDRWGBWBQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vpst\;\tvldrwt.u32\t%q0, [%q1, %2]!",ops); + return ""; +} + [(set_attr "length" "8")]) + +(define_expand "mve_vldrwq_gather_base_wb_fv4sf" + [(match_operand:V4SF 0 "s_register_operand") + (match_operand:V4SI 1 "s_register_operand") + (match_operand:SI 2 "mve_vldrd_immediate") + (unspec:V4SI [(const_int 0)] VLDRWQGBWB_F)] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ignore_wb = gen_reg_rtx (V4SImode); + emit_insn ( + gen_mve_vldrwq_gather_base_wb_fv4sf_insn (operands[0], ignore_wb, + operands[1], operands[2])); + DONE; +}) + +;; +;; [vldrwq_gather_base_wb_f] +;; +(define_insn "mve_vldrwq_gather_base_wb_fv4sf_insn" + [(set (match_operand:V4SF 0 "s_register_operand" "=&w") + (unspec:V4SF [(match_operand:V4SI 2 "s_register_operand" "1") + (match_operand:SI 3 "mve_vldrd_immediate" "Ri") + (mem:BLK (scratch))] + VLDRWQGBWB_F)) + (set (match_operand:V4SI 1 "s_register_operand" "=&w") + (unspec:V4SI [(match_dup 2) (match_dup 3)] + VLDRWQGBWB_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vldrw.u32\t%q0, [%q1, %2]!",ops); + return ""; +} + [(set_attr "length" "4")]) + +(define_expand "mve_vldrwq_gather_base_wb_z_fv4sf" + [(match_operand:V4SF 0 "s_register_operand") + (match_operand:V4SI 1 "s_register_operand") + (match_operand:SI 2 "mve_vldrd_immediate") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VLDRWQGBWB_F)] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ignore_wb = gen_reg_rtx (V4SImode); + emit_insn ( + gen_mve_vldrwq_gather_base_wb_z_fv4sf_insn (operands[0], ignore_wb, + operands[1], operands[2], + operands[3])); + DONE; +}) + +;; +;; [vldrwq_gather_base_wb_z_f] +;; +(define_insn "mve_vldrwq_gather_base_wb_z_fv4sf_insn" + [(set (match_operand:V4SF 0 "s_register_operand" "=&w") + (unspec:V4SF [(match_operand:V4SI 2 "s_register_operand" "1") + (match_operand:SI 3 "mve_vldrd_immediate" "Ri") + (match_operand:HI 4 "vpr_register_operand" "Up") + (mem:BLK (scratch))] + VLDRWQGBWB_F)) + (set (match_operand:V4SI 1 "s_register_operand" "=&w") + (unspec:V4SI [(match_dup 2) (match_dup 3)] + VLDRWQGBWB_F)) + ] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vpst\;\tvldrwt.u32\t%q0, [%q1, %2]!",ops); + return ""; +} + [(set_attr "length" "8")]) + +(define_expand "mve_vldrdq_gather_base_wb_<supf>v2di" + [(match_operand:V2DI 0 "s_register_operand") + (match_operand:V2DI 1 "s_register_operand") + (match_operand:SI 2 "mve_vldrd_immediate") + (unspec:V2DI [(const_int 0)] VLDRDGBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_wb = gen_reg_rtx (V2DImode); + emit_insn ( + gen_mve_vldrdq_gather_base_wb_<supf>v2di_insn (operands[0], ignore_wb, + operands[1], operands[2])); + DONE; +}) + +;; +;; [vldrdq_gather_base_wb_s vldrdq_gather_base_wb_u] +;; +(define_insn "mve_vldrdq_gather_base_wb_<supf>v2di_insn" + [(set (match_operand:V2DI 0 "s_register_operand" "=&w") + (unspec:V2DI [(match_operand:V2DI 2 "s_register_operand" "1") + (match_operand:SI 3 "mve_vldrd_immediate" "Ri") + (mem:BLK (scratch))] + VLDRDGBWBQ)) + (set (match_operand:V2DI 1 "s_register_operand" "=&w") + (unspec:V2DI [(match_dup 2) (match_dup 3)] + VLDRDGBWBQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vldrd.64\t%q0, [%q1, %2]!",ops); + return ""; +} + [(set_attr "length" "4")]) + +(define_expand "mve_vldrdq_gather_base_wb_z_<supf>v2di" + [(match_operand:V2DI 0 "s_register_operand") + (match_operand:V2DI 1 "s_register_operand") + (match_operand:SI 2 "mve_vldrd_immediate") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V2DI [(const_int 0)] VLDRDGBWBQ)] + "TARGET_HAVE_MVE" +{ + rtx ignore_wb = gen_reg_rtx (V2DImode); + emit_insn ( + gen_mve_vldrdq_gather_base_wb_z_<supf>v2di_insn (operands[0], ignore_wb, + operands[1], operands[2], + operands[3])); + DONE; +}) + +;; +;; [vldrdq_gather_base_wb_z_s vldrdq_gather_base_wb_z_u] +;; +(define_insn "mve_vldrdq_gather_base_wb_z_<supf>v2di_insn" + [(set (match_operand:V2DI 0 "s_register_operand" "=&w") + (unspec:V2DI [(match_operand:V2DI 2 "s_register_operand" "1") + (match_operand:SI 3 "mve_vldrd_immediate" "Ri") + (match_operand:HI 4 "vpr_register_operand" "Up") + (mem:BLK (scratch))] + VLDRDGBWBQ)) + (set (match_operand:V2DI 1 "s_register_operand" "=&w") + (unspec:V2DI [(match_dup 2) (match_dup 3)] + VLDRDGBWBQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[2]; + ops[2] = operands[3]; + output_asm_insn ("vpst\;\tvldrdt.u64\t%q0, [%q1, %2]!",ops); + return ""; +} + [(set_attr "length" "8")]) diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c new file mode 100644 index 0000000000000000000000000000000000000000..244f67e71fb51e8332c80c52aca2c962885d2f13 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +int64x2_t +foo (uint64x2_t * addr) +{ + return vldrdq_gather_base_wb_s64 (addr, 8); +} + +/* { dg-final { scan-assembler "vldrd.64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c new file mode 100644 index 0000000000000000000000000000000000000000..f0ef919ab0ceee7318340a121d2307fbc51ceed9 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +uint64x2_t +foo (uint64x2_t * addr) +{ + return vldrdq_gather_base_wb_u64 (addr, 8); +} + +/* { dg-final { scan-assembler "vldrd.64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_s64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_s64.c new file mode 100644 index 0000000000000000000000000000000000000000..55f618bdb51a1c1ef06d83a8d5caab37415eeca0 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_s64.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ + +#include "arm_mve.h" + +int64x2_t foo (uint64x2_t * addr, mve_pred16_t p) +{ + return vldrdq_gather_base_wb_z_s64 (addr, 1016, p); +} + +/* { dg-final { scan-assembler "vldrdt.u64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_u64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_u64.c new file mode 100644 index 0000000000000000000000000000000000000000..f165840344bfdc41613b4602d3acca246e637ca5 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_u64.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ + +#include "arm_mve.h" + +uint64x2_t foo (uint64x2_t * addr, mve_pred16_t p) +{ + return vldrdq_gather_base_wb_z_u64 (addr, 8, p); +} + +/* { dg-final { scan-assembler "vldrdt.u64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_f32.c new file mode 100644 index 0000000000000000000000000000000000000000..c14829ee0b53df02fc27ab2507f0940cc665e500 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_f32.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve.fp -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +float32x4_t +foo (uint32x4_t * addr) +{ + return vldrwq_gather_base_wb_f32 (addr, 8); +} + +/* { dg-final { scan-assembler "vldrw.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_s32.c new file mode 100644 index 0000000000000000000000000000000000000000..91e91a88b65c6ee319ad8e00c1ea6879b6ec9d30 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_s32.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +int32x4_t +foo (uint32x4_t * addr) +{ + return vldrwq_gather_base_wb_s32 (addr, 8); +} + +/* { dg-final { scan-assembler "vldrw.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_u32.c new file mode 100644 index 0000000000000000000000000000000000000000..4dbee68d1e29f179ee0213bb5aaac21e716d429a --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_u32.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +uint32x4_t +foo (uint32x4_t * addr) +{ + return vldrwq_gather_base_wb_u32 (addr, 8); +} + +/* { dg-final { scan-assembler "vldrw.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_f32.c new file mode 100644 index 0000000000000000000000000000000000000000..457368a712f677c8a7278a1914af5979c7a8ce13 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_f32.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve.fp -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +float32x4_t +foo (uint32x4_t * addr, mve_pred16_t p) +{ + return vldrwq_gather_base_wb_z_f32 (addr, 8, p); +} + +/* { dg-final { scan-assembler "vldrwt.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_s32.c new file mode 100644 index 0000000000000000000000000000000000000000..5ce458edeb28b1c2d3c28d7751458398db77af68 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_s32.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +int32x4_t +foo (uint32x4_t * addr, mve_pred16_t p) +{ + return vldrwq_gather_base_wb_z_s32 (addr, 8, p); +} + +/* { dg-final { scan-assembler "vldrwt.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_u32.c new file mode 100644 index 0000000000000000000000000000000000000000..1c24ad852dd766934f38c8ce9725c5f6cefae938 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_u32.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +uint32x4_t +foo (uint32x4_t * addr, mve_pred16_t p) +{ + return vldrwq_gather_base_wb_z_u32 (addr, 8, p); +} + +/* { dg-final { scan-assembler "vldrwt.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_s64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_s64.c new file mode 100644 index 0000000000000000000000000000000000000000..756a3a154181e9835e87b0830d70fa17eb5f8c6c --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_s64.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +void +foo (uint64x2_t * addr, const int offset, int64x2_t value, mve_pred16_t p) +{ + vstrdq_scatter_base_wb_p_s64 (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrdt.u64" } } */ + +void +foo1 (uint64x2_t * addr, const int offset, int64x2_t value, mve_pred16_t p) +{ + vstrdq_scatter_base_wb_p (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrdt.u64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_u64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_u64.c new file mode 100644 index 0000000000000000000000000000000000000000..0e97dfadddec32b2c945417c8d8fb8620ec76cce --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_u64.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +void +foo (uint64x2_t * addr, const int offset, uint64x2_t value, mve_pred16_t p) +{ + vstrdq_scatter_base_wb_p_u64 (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrdt.u64" } } */ + +void +foo1 (uint64x2_t * addr, const int offset, uint64x2_t value, mve_pred16_t p) +{ + vstrdq_scatter_base_wb_p (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrdt.u64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_s64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_s64.c new file mode 100644 index 0000000000000000000000000000000000000000..b46c5b12c3d732cdcb19fcf2256a2b89f0f9a695 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_s64.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +void +foo (uint64x2_t * addr, const int offset, int64x2_t value) +{ + vstrdq_scatter_base_wb_s64 (addr, 8, value); +} + +/* { dg-final { scan-assembler "vstrd.u64" } } */ + +void +foo1 (uint64x2_t * addr, const int offset, int64x2_t value) +{ + vstrdq_scatter_base_wb (addr, 8, value); +} + +/* { dg-final { scan-assembler "vstrd.u64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_u64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_u64.c new file mode 100644 index 0000000000000000000000000000000000000000..62b0742cfb2f7ec7a3519969cad19f85cc272614 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_u64.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +void +foo (uint64x2_t * addr, const int offset, uint64x2_t value) +{ + vstrdq_scatter_base_wb_u64 (addr, 8, value); +} + +/* { dg-final { scan-assembler "vstrd.u64" } } */ + +void +foo1 (uint64x2_t * addr, const int offset, uint64x2_t value) +{ + vstrdq_scatter_base_wb (addr, 8, value); +} + +/* { dg-final { scan-assembler "vstrd.u64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_f32.c new file mode 100644 index 0000000000000000000000000000000000000000..45eaf31206f4addb33ad32b52cb45ce815876d24 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_f32.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve.fp -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +void +foo (uint32x4_t * addr, const int offset, float32x4_t value) +{ + vstrwq_scatter_base_wb_f32 (addr, 8, value); +} + +/* { dg-final { scan-assembler "vstrw.u32" } } */ + +void +foo1 (uint32x4_t * addr, const int offset, float32x4_t value) +{ + vstrwq_scatter_base_wb (addr, 8, value); +} + +/* { dg-final { scan-assembler "vstrw.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_f32.c new file mode 100644 index 0000000000000000000000000000000000000000..9592d386ebb95a1ae94201554448b75f6086ab78 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_f32.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve.fp -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +void +foo (uint32x4_t * addr, const int offset, float32x4_t value, mve_pred16_t p) +{ + vstrwq_scatter_base_wb_p_f32 (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrwt.u32" } } */ + +void +foo1 (uint32x4_t * addr, const int offset, float32x4_t value, mve_pred16_t p) +{ + vstrwq_scatter_base_wb_p (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrwt.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_s32.c new file mode 100644 index 0000000000000000000000000000000000000000..6b2b03dc26467d65b4c617c0d39bb6e062791bca --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_s32.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +void +foo (uint32x4_t * addr, const int offset, int32x4_t value, mve_pred16_t p) +{ + vstrwq_scatter_base_wb_p_s32 (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrwt.u32" } } */ + +void +foo1 (uint32x4_t * addr, const int offset, int32x4_t value, mve_pred16_t p) +{ + vstrwq_scatter_base_wb_p (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrwt.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_u32.c new file mode 100644 index 0000000000000000000000000000000000000000..afc5ff88287e6d39bf0209d92923769073b4a80d --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_u32.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +void +foo (uint32x4_t * addr, const int offset, uint32x4_t value, mve_pred16_t p) +{ + vstrwq_scatter_base_wb_p_u32 (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrwt.u32" } } */ + +void +foo1 (uint32x4_t * addr, const int offset, uint32x4_t value, mve_pred16_t p) +{ + vstrwq_scatter_base_wb_p (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrwt.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_s32.c new file mode 100644 index 0000000000000000000000000000000000000000..214cd3ee79067eb7f0887bc8018793647ad02e1f --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_s32.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +void +foo (uint32x4_t * addr, const int offset, int32x4_t value) +{ + vstrwq_scatter_base_wb_s32 (addr, 8, value); +} + +/* { dg-final { scan-assembler "vstrw.u32" } } */ + +void +foo1 (uint32x4_t * addr, const int offset, int32x4_t value) +{ + vstrwq_scatter_base_wb (addr, 8, value); +} + +/* { dg-final { scan-assembler "vstrw.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_u32.c new file mode 100644 index 0000000000000000000000000000000000000000..a0afb7ab448598536aaec568f55ddb22e12c3f99 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_u32.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2" } */ +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */ + +#include "arm_mve.h" + +void +foo (uint32x4_t * addr, uint32x4_t value) +{ + vstrwq_scatter_base_wb_u32 (addr, 8, value); +} + +/* { dg-final { scan-assembler "vstrw.u32" } } */ + +void +foo1 (uint32x4_t * addr, uint32x4_t value) +{ + vstrwq_scatter_base_wb (addr, 8, value); +} + +/* { dg-final { scan-assembler "vstrw.u32" } } */
diff31.patch.gz
Description: diff31.patch.gz