https://gcc.gnu.org/g:28e4682944e32d236640c0b310db82870008ae33
commit r15-6238-g28e4682944e32d236640c0b310db82870008ae33 Author: Christophe Lyon <christophe.l...@linaro.org> Date: Thu Oct 31 09:56:05 2024 +0000 arm: [MVE intrinsics] rework vldr gather_base_wb Implement vldr?q_gather_base_wb using the new MVE builtins framework. gcc/ChangeLog: * config/arm/arm-builtins.cc (arm_ldrgbwbxu_qualifiers) (arm_ldrgbwbxu_z_qualifiers, arm_ldrgbwbs_qualifiers) (arm_ldrgbwbu_qualifiers, arm_ldrgbwbs_z_qualifiers) (arm_ldrgbwbu_z_qualifiers): Delete. * config/arm/arm-mve-builtins-base.cc (vldrq_gather_base_impl): Add support for MODE_wb. * config/arm/arm-mve-builtins-shapes.cc (struct load_gather_base_def): Likewise. * config/arm/arm_mve.h (vldrdq_gather_base_wb_s64): Delete. (vldrdq_gather_base_wb_u64): Delete. (vldrdq_gather_base_wb_z_s64): Delete. (vldrdq_gather_base_wb_z_u64): Delete. (vldrwq_gather_base_wb_f32): Delete. (vldrwq_gather_base_wb_s32): Delete. (vldrwq_gather_base_wb_u32): Delete. (vldrwq_gather_base_wb_z_f32): Delete. (vldrwq_gather_base_wb_z_s32): Delete. (vldrwq_gather_base_wb_z_u32): Delete. (__arm_vldrdq_gather_base_wb_s64): Delete. (__arm_vldrdq_gather_base_wb_u64): Delete. (__arm_vldrdq_gather_base_wb_z_s64): Delete. (__arm_vldrdq_gather_base_wb_z_u64): Delete. (__arm_vldrwq_gather_base_wb_s32): Delete. (__arm_vldrwq_gather_base_wb_u32): Delete. (__arm_vldrwq_gather_base_wb_z_s32): Delete. (__arm_vldrwq_gather_base_wb_z_u32): Delete. (__arm_vldrwq_gather_base_wb_f32): Delete. (__arm_vldrwq_gather_base_wb_z_f32): Delete. * config/arm/arm_mve_builtins.def (vldrwq_gather_base_nowb_z_u) (vldrdq_gather_base_nowb_z_u, vldrwq_gather_base_nowb_u) (vldrdq_gather_base_nowb_u, vldrwq_gather_base_nowb_z_s) (vldrwq_gather_base_nowb_z_f, vldrdq_gather_base_nowb_z_s) (vldrwq_gather_base_nowb_s, vldrwq_gather_base_nowb_f) (vldrdq_gather_base_nowb_s, vldrdq_gather_base_wb_z_s) (vldrdq_gather_base_wb_z_u, vldrdq_gather_base_wb_s) (vldrdq_gather_base_wb_u, vldrwq_gather_base_wb_z_s) (vldrwq_gather_base_wb_z_f, vldrwq_gather_base_wb_z_u) (vldrwq_gather_base_wb_s, vldrwq_gather_base_wb_f) (vldrwq_gather_base_wb_u): Delete * config/arm/iterators.md (supf): Remove VLDRWQGBWB_S, VLDRWQGBWB_U, VLDRDQGBWB_S, VLDRDQGBWB_U. (VLDRWGBWBQ, VLDRDGBWBQ): Delete. * config/arm/mve.md (mve_vldrwq_gather_base_wb_<supf>v4si): Delete. (mve_vldrwq_gather_base_nowb_<supf>v4si): Delete. (mve_vldrwq_gather_base_wb_<supf>v4si_insn): Delete. (mve_vldrwq_gather_base_wb_z_<supf>v4si): Delete. (mve_vldrwq_gather_base_nowb_z_<supf>v4si): Delete. (mve_vldrwq_gather_base_wb_z_<supf>v4si_insn): Delete. (mve_vldrwq_gather_base_wb_fv4sf): Delete. (mve_vldrwq_gather_base_nowb_fv4sf): Delete. (mve_vldrwq_gather_base_wb_fv4sf_insn): Delete. (mve_vldrwq_gather_base_wb_z_fv4sf): Delete. (mve_vldrwq_gather_base_nowb_z_fv4sf): Delete. (mve_vldrwq_gather_base_wb_z_fv4sf_insn): Delete. (mve_vldrdq_gather_base_wb_<supf>v2di): Delete. (mve_vldrdq_gather_base_nowb_<supf>v2di): Delete. (mve_vldrdq_gather_base_wb_<supf>v2di_insn): Delete. (mve_vldrdq_gather_base_wb_z_<supf>v2di): Delete. (mve_vldrdq_gather_base_nowb_z_<supf>v2di): Delete. (mve_vldrdq_gather_base_wb_z_<supf>v2di_insn): Delete. (@mve_vldrq_gather_base_wb_<mode>): New. (@mve_vldrq_gather_base_wb_z_<mode>): New. * config/arm/unspecs.md (VLDRWQGBWB_S, VLDRWQGBWB_U, VLDRWQGBWB_F) (VLDRDQGBWB_S, VLDRDQGBWB_U): Delete (VLDRGBWBQ, VLDRGBWBQ_Z): New. gcc/testsuite/ChangeLog: * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c: Update expected output. * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c: Likewise. Diff: --- gcc/config/arm/arm-builtins.cc | 33 -- gcc/config/arm/arm-mve-builtins-base.cc | 39 ++- gcc/config/arm/arm-mve-builtins-shapes.cc | 4 +- gcc/config/arm/arm_mve.h | 110 ------- gcc/config/arm/arm_mve_builtins.def | 20 -- gcc/config/arm/iterators.md | 5 +- gcc/config/arm/mve.md | 352 ++------------------- gcc/config/arm/unspecs.md | 7 +- .../arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c | 4 +- .../arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c | 4 +- 10 files changed, 78 insertions(+), 500 deletions(-) diff --git a/gcc/config/arm/arm-builtins.cc b/gcc/config/arm/arm-builtins.cc index 56740d30fa8e..b28afc27853c 100644 --- a/gcc/config/arm/arm-builtins.cc +++ b/gcc/config/arm/arm-builtins.cc @@ -610,39 +610,6 @@ arm_quadop_unone_unone_unone_none_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] #define QUADOP_UNONE_UNONE_UNONE_NONE_PRED_QUALIFIERS \ (arm_quadop_unone_unone_unone_none_pred_qualifiers) -static enum arm_type_qualifiers -arm_ldrgbwbxu_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate}; -#define LDRGBWBXU_QUALIFIERS (arm_ldrgbwbxu_qualifiers) - -static enum arm_type_qualifiers -arm_ldrgbwbxu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate, - qualifier_predicate}; -#define LDRGBWBXU_Z_QUALIFIERS (arm_ldrgbwbxu_z_qualifiers) - -static enum arm_type_qualifiers -arm_ldrgbwbs_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_none, qualifier_unsigned, qualifier_immediate}; -#define LDRGBWBS_QUALIFIERS (arm_ldrgbwbs_qualifiers) - -static enum arm_type_qualifiers -arm_ldrgbwbu_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate}; -#define LDRGBWBU_QUALIFIERS (arm_ldrgbwbu_qualifiers) - -static enum arm_type_qualifiers -arm_ldrgbwbs_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_none, qualifier_unsigned, qualifier_immediate, - qualifier_predicate}; -#define LDRGBWBS_Z_QUALIFIERS (arm_ldrgbwbs_z_qualifiers) - -static enum arm_type_qualifiers -arm_ldrgbwbu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate, - qualifier_predicate}; -#define LDRGBWBU_Z_QUALIFIERS (arm_ldrgbwbu_z_qualifiers) - static enum arm_type_qualifiers arm_lsll_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned, qualifier_none}; diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc index 3a1a8af4fcdd..7938efcdf68d 100644 --- a/gcc/config/arm/arm-mve-builtins-base.cc +++ b/gcc/config/arm/arm-mve-builtins-base.cc @@ -483,19 +483,48 @@ class vldrq_gather_base_impl : public load_extending public: using load_extending::load_extending; + machine_mode memory_vector_mode (const function_instance &fi) const override + { + unsigned int element_bits = fi.type_suffix (0).element_bits; + type_suffix_index suffix = find_type_suffix (TYPE_unsigned, element_bits); + return type_suffixes[suffix].vector_mode; + } + rtx expand (function_expander &e) const override { insn_code icode; - rtx insns; + rtx insns, base_ptr, new_base; + machine_mode base_mode; + + if ((e.mode_suffix_id != MODE_none) + && (e.mode_suffix_id != MODE_wb)) + gcc_unreachable (); + + /* In _wb mode, the start offset is passed via a pointer, + dereference it. */ + if (e.mode_suffix_id == MODE_wb) + { + base_mode = e.memory_vector_mode (); + rtx base = gen_reg_rtx (base_mode); + base_ptr = e.args[0]; + emit_insn (gen_rtx_SET (base, gen_rtx_MEM (base_mode, base_ptr))); + e.args[0] = base; + new_base = gen_reg_rtx (base_mode); + e.args.quick_insert (0, new_base); + } switch (e.pred) { case PRED_none: - icode = code_for_mve_vldrq_gather_base (e.vector_mode (0)); + icode = (e.mode_suffix_id == MODE_none) + ? code_for_mve_vldrq_gather_base (e.vector_mode (0)) + : code_for_mve_vldrq_gather_base_wb (e.vector_mode (0)); break; case PRED_z: - icode = code_for_mve_vldrq_gather_base_z (e.vector_mode (0)); + icode = (e.mode_suffix_id == MODE_none) + ? code_for_mve_vldrq_gather_base_z (e.vector_mode (0)) + : code_for_mve_vldrq_gather_base_wb_z (e.vector_mode (0)); break; default: @@ -503,6 +532,10 @@ public: } insns = e.use_exact_insn (icode); + /* Update offset as appropriate. */ + if (e.mode_suffix_id == MODE_wb) + emit_insn (gen_rtx_SET (gen_rtx_MEM (base_mode, base_ptr), new_base)); + return insns; } }; diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc index fa4fee072e6d..d7cfdca3acd2 100644 --- a/gcc/config/arm/arm-mve-builtins-shapes.cc +++ b/gcc/config/arm/arm-mve-builtins-shapes.cc @@ -1558,7 +1558,8 @@ struct load_ext_gather : public overloaded_base<0> Example: vldrwq_gather_base int32x4_t [__arm_]vldrwq_gather_base_s32(uint32x4_t addr, const int offset) - float32x4_t [__arm_]vldrwq_gather_base_z_f32(uint32x4_t addr, const int offset, mve_pred16_t p) */ + float32x4_t [__arm_]vldrwq_gather_base_z_f32(uint32x4_t addr, const int offset, mve_pred16_t p) + int64x2_t [__arm_]vldrdq_gather_base_wb_s64(uint64x2_t *addr, const int offset) */ struct load_gather_base_def : public nonoverloaded_base { bool @@ -1578,6 +1579,7 @@ struct load_gather_base_def : public nonoverloaded_base bool preserve_user_namespace) const override { build_all (b, "v0,vu0,ss64", group, MODE_none, preserve_user_namespace); + build_all (b, "v0,b,ss64", group, MODE_wb, preserve_user_namespace); } bool diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h index c577c0379c16..45b27ed9fb88 100644 --- a/gcc/config/arm/arm_mve.h +++ b/gcc/config/arm/arm_mve.h @@ -73,16 +73,6 @@ #define vuninitializedq_s64(void) __arm_vuninitializedq_s64(void) #define vuninitializedq_f16(void) __arm_vuninitializedq_f16(void) #define vuninitializedq_f32(void) __arm_vuninitializedq_f32(void) -#define vldrdq_gather_base_wb_s64(__addr, __offset) __arm_vldrdq_gather_base_wb_s64(__addr, __offset) -#define vldrdq_gather_base_wb_u64(__addr, __offset) __arm_vldrdq_gather_base_wb_u64(__addr, __offset) -#define vldrdq_gather_base_wb_z_s64(__addr, __offset, __p) __arm_vldrdq_gather_base_wb_z_s64(__addr, __offset, __p) -#define vldrdq_gather_base_wb_z_u64(__addr, __offset, __p) __arm_vldrdq_gather_base_wb_z_u64(__addr, __offset, __p) -#define vldrwq_gather_base_wb_f32(__addr, __offset) __arm_vldrwq_gather_base_wb_f32(__addr, __offset) -#define vldrwq_gather_base_wb_s32(__addr, __offset) __arm_vldrwq_gather_base_wb_s32(__addr, __offset) -#define vldrwq_gather_base_wb_u32(__addr, __offset) __arm_vldrwq_gather_base_wb_u32(__addr, __offset) -#define vldrwq_gather_base_wb_z_f32(__addr, __offset, __p) __arm_vldrwq_gather_base_wb_z_f32(__addr, __offset, __p) -#define vldrwq_gather_base_wb_z_s32(__addr, __offset, __p) __arm_vldrwq_gather_base_wb_z_s32(__addr, __offset, __p) -#define vldrwq_gather_base_wb_z_u32(__addr, __offset, __p) __arm_vldrwq_gather_base_wb_z_u32(__addr, __offset, __p) #define vst2q_s8(__addr, __value) __arm_vst2q_s8(__addr, __value) #define vst2q_u8(__addr, __value) __arm_vst2q_u8(__addr, __value) #define vld2q_s8(__addr) __arm_vld2q_s8(__addr) @@ -218,86 +208,6 @@ __arm_vpnot (mve_pred16_t __a) return __builtin_mve_vpnotv16bi (__a); } -__extension__ extern __inline int64x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrdq_gather_base_wb_s64 (uint64x2_t * __addr, const int __offset) -{ - int64x2_t - result = __builtin_mve_vldrdq_gather_base_nowb_sv2di (*__addr, __offset); - *__addr = __builtin_mve_vldrdq_gather_base_wb_sv2di (*__addr, __offset); - return result; -} - -__extension__ extern __inline uint64x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrdq_gather_base_wb_u64 (uint64x2_t * __addr, const int __offset) -{ - uint64x2_t - result = __builtin_mve_vldrdq_gather_base_nowb_uv2di (*__addr, __offset); - *__addr = __builtin_mve_vldrdq_gather_base_wb_uv2di (*__addr, __offset); - return result; -} - -__extension__ extern __inline int64x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrdq_gather_base_wb_z_s64 (uint64x2_t * __addr, const int __offset, mve_pred16_t __p) -{ - int64x2_t - result = __builtin_mve_vldrdq_gather_base_nowb_z_sv2di (*__addr, __offset, __p); - *__addr = __builtin_mve_vldrdq_gather_base_wb_z_sv2di (*__addr, __offset, __p); - return result; -} - -__extension__ extern __inline uint64x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrdq_gather_base_wb_z_u64 (uint64x2_t * __addr, const int __offset, mve_pred16_t __p) -{ - uint64x2_t - result = __builtin_mve_vldrdq_gather_base_nowb_z_uv2di (*__addr, __offset, __p); - *__addr = __builtin_mve_vldrdq_gather_base_wb_z_uv2di (*__addr, __offset, __p); - return result; -} - -__extension__ extern __inline int32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrwq_gather_base_wb_s32 (uint32x4_t * __addr, const int __offset) -{ - int32x4_t - result = __builtin_mve_vldrwq_gather_base_nowb_sv4si (*__addr, __offset); - *__addr = __builtin_mve_vldrwq_gather_base_wb_sv4si (*__addr, __offset); - return result; -} - -__extension__ extern __inline uint32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrwq_gather_base_wb_u32 (uint32x4_t * __addr, const int __offset) -{ - uint32x4_t - result = __builtin_mve_vldrwq_gather_base_nowb_uv4si (*__addr, __offset); - *__addr = __builtin_mve_vldrwq_gather_base_wb_uv4si (*__addr, __offset); - return result; -} - -__extension__ extern __inline int32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrwq_gather_base_wb_z_s32 (uint32x4_t * __addr, const int __offset, mve_pred16_t __p) -{ - int32x4_t - result = __builtin_mve_vldrwq_gather_base_nowb_z_sv4si (*__addr, __offset, __p); - *__addr = __builtin_mve_vldrwq_gather_base_wb_z_sv4si (*__addr, __offset, __p); - return result; -} - -__extension__ extern __inline uint32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrwq_gather_base_wb_z_u32 (uint32x4_t * __addr, const int __offset, mve_pred16_t __p) -{ - uint32x4_t - result = __builtin_mve_vldrwq_gather_base_nowb_z_uv4si (*__addr, __offset, __p); - *__addr = __builtin_mve_vldrwq_gather_base_wb_z_uv4si (*__addr, __offset, __p); - return result; -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vst2q_s8 (int8_t * __addr, int8x16x2_t __value) @@ -728,26 +638,6 @@ __arm_vst4q_f32 (float32_t * __addr, float32x4x4_t __value) __builtin_mve_vst4qv4sf (__addr, __rv.__o); } -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrwq_gather_base_wb_f32 (uint32x4_t * __addr, const int __offset) -{ - float32x4_t - result = __builtin_mve_vldrwq_gather_base_nowb_fv4sf (*__addr, __offset); - *__addr = __builtin_mve_vldrwq_gather_base_wb_fv4sf (*__addr, __offset); - return result; -} - -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vldrwq_gather_base_wb_z_f32 (uint32x4_t * __addr, const int __offset, mve_pred16_t __p) -{ - float32x4_t - result = __builtin_mve_vldrwq_gather_base_nowb_z_fv4sf (*__addr, __offset, __p); - *__addr = __builtin_mve_vldrwq_gather_base_wb_z_fv4sf (*__addr, __offset, __p); - return result; -} - __extension__ extern __inline float16x8x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vld4q_f16 (float16_t const * __addr) diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def index 44428104d3e0..b85b334a81e4 100644 --- a/gcc/config/arm/arm_mve_builtins.def +++ b/gcc/config/arm/arm_mve_builtins.def @@ -663,26 +663,6 @@ VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vandq_m_f, v8hf, v4sf) VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vaddq_m_n_f, v8hf, v4sf) VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vaddq_m_f, v8hf, v4sf) VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vabdq_m_f, v8hf, v4sf) -VAR1 (LDRGBWBU_Z, vldrwq_gather_base_nowb_z_u, v4si) -VAR1 (LDRGBWBU_Z, vldrdq_gather_base_nowb_z_u, v2di) -VAR1 (LDRGBWBU, vldrwq_gather_base_nowb_u, v4si) -VAR1 (LDRGBWBU, vldrdq_gather_base_nowb_u, v2di) -VAR1 (LDRGBWBS_Z, vldrwq_gather_base_nowb_z_s, v4si) -VAR1 (LDRGBWBS_Z, vldrwq_gather_base_nowb_z_f, v4sf) -VAR1 (LDRGBWBS_Z, vldrdq_gather_base_nowb_z_s, v2di) -VAR1 (LDRGBWBS, vldrwq_gather_base_nowb_s, v4si) -VAR1 (LDRGBWBS, vldrwq_gather_base_nowb_f, v4sf) -VAR1 (LDRGBWBS, vldrdq_gather_base_nowb_s, v2di) -VAR1 (LDRGBWBXU_Z, vldrdq_gather_base_wb_z_s, v2di) -VAR1 (LDRGBWBXU_Z, vldrdq_gather_base_wb_z_u, v2di) -VAR1 (LDRGBWBXU, vldrdq_gather_base_wb_s, v2di) -VAR1 (LDRGBWBXU, vldrdq_gather_base_wb_u, v2di) -VAR1 (LDRGBWBXU_Z, vldrwq_gather_base_wb_z_s, v4si) -VAR1 (LDRGBWBXU_Z, vldrwq_gather_base_wb_z_f, v4sf) -VAR1 (LDRGBWBXU_Z, vldrwq_gather_base_wb_z_u, v4si) -VAR1 (LDRGBWBXU, vldrwq_gather_base_wb_s, v4si) -VAR1 (LDRGBWBXU, vldrwq_gather_base_wb_f, v4sf) -VAR1 (LDRGBWBXU, vldrwq_gather_base_wb_u, v4si) VAR1 (BINOP_NONE_NONE_NONE, vadciq_s, v4si) VAR1 (BINOP_UNONE_UNONE_UNONE, vadciq_u, v4si) VAR1 (BINOP_NONE_NONE_NONE, vadcq_s, v4si) diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 69c457c8d765..834c81da56ec 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -2538,8 +2538,7 @@ (VMLALDAVAXQ_P_S "s") (VMLALDAVAQ_P_S "s") (VMLALDAVAQ_P_U "u") (VSTRDQSB_S "s") (VSTRDQSB_U "u") - (VLDRWQGBWB_S "s") (VLDRWQGBWB_U "u") (VLDRDQGBWB_S "s") - (VLDRDQGBWB_U "u") (VADCQ_M_S "s") + (VADCQ_M_S "s") (VSBCQ_U "u") (VSBCQ_M_U "u") (VSBCQ_S "s") (VSBCQ_M_S "s") (VSBCIQ_U "u") (VSBCIQ_M_U "u") (VSBCIQ_S "s") (VSBCIQ_M_S "s") @@ -2938,8 +2937,6 @@ (define_int_iterator VSHLLxQ_M_N [VSHLLBQ_M_N_U VSHLLBQ_M_N_S VSHLLTQ_M_N_U VSHLLTQ_M_N_S]) (define_int_iterator VSHRNBQ_M_N [VSHRNBQ_M_N_S VSHRNBQ_M_N_U]) (define_int_iterator VSHRNTQ_M_N [VSHRNTQ_M_N_S VSHRNTQ_M_N_U]) -(define_int_iterator VLDRWGBWBQ [VLDRWQGBWB_S VLDRWQGBWB_U]) -(define_int_iterator VLDRDGBWBQ [VLDRDQGBWB_S VLDRDQGBWB_U]) (define_int_iterator VxCIQ [VADCIQ_U VADCIQ_S VSBCIQ_U VSBCIQ_S]) (define_int_iterator VxCIQ_M [VADCIQ_M_U VADCIQ_M_S VSBCIQ_M_U VSBCIQ_M_S]) (define_int_iterator VxCQ [VADCQ_U VADCQ_S VSBCQ_U VSBCQ_S]) diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index ef4448ef65a6..a0a59da40401 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -3917,313 +3917,51 @@ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_scatter_base_wb_<mode>")) (set_attr "length" "8")]) -(define_expand "mve_vldrwq_gather_base_wb_<supf>v4si" - [(match_operand:V4SI 0 "s_register_operand") - (match_operand:V4SI 1 "s_register_operand") - (match_operand:SI 2 "mve_vldrd_immediate") - (unspec:V4SI [(const_int 0)] VLDRWGBWBQ)] - "TARGET_HAVE_MVE" -{ - rtx ignore_result = gen_reg_rtx (V4SImode); - emit_insn ( - gen_mve_vldrwq_gather_base_wb_<supf>v4si_insn (ignore_result, operands[0], - operands[1], operands[2])); - DONE; -}) - -(define_expand "mve_vldrwq_gather_base_nowb_<supf>v4si" - [(match_operand:V4SI 0 "s_register_operand") - (match_operand:V4SI 1 "s_register_operand") - (match_operand:SI 2 "mve_vldrd_immediate") - (unspec:V4SI [(const_int 0)] VLDRWGBWBQ)] - "TARGET_HAVE_MVE" -{ - rtx ignore_wb = gen_reg_rtx (V4SImode); - emit_insn ( - gen_mve_vldrwq_gather_base_wb_<supf>v4si_insn (operands[0], ignore_wb, - operands[1], operands[2])); - DONE; -}) - +;; Vector gather loads with base and write-back ;; ;; [vldrwq_gather_base_wb_s vldrwq_gather_base_wb_u] -;; -(define_insn "mve_vldrwq_gather_base_wb_<supf>v4si_insn" - [(set (match_operand:V4SI 0 "s_register_operand" "=&w") - (unspec:V4SI [(match_operand:V4SI 2 "s_register_operand" "1") - (match_operand:SI 3 "mve_vldrd_immediate" "Ri") - (mem:BLK (scratch))] - VLDRWGBWBQ)) - (set (match_operand:V4SI 1 "s_register_operand" "=&w") - (unspec:V4SI [(match_dup 2) (match_dup 3)] - VLDRWGBWBQ)) - ] - "TARGET_HAVE_MVE" -{ - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[2]; - ops[2] = operands[3]; - output_asm_insn ("vldrw.u32\t%q0, [%q1, %2]!",ops); - return ""; -} - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_wb_<supf>v4si_insn")) - (set_attr "length" "4")]) - -(define_expand "mve_vldrwq_gather_base_wb_z_<supf>v4si" - [(match_operand:V4SI 0 "s_register_operand") - (match_operand:V4SI 1 "s_register_operand") - (match_operand:SI 2 "mve_vldrd_immediate") - (match_operand:V4BI 3 "vpr_register_operand") - (unspec:V4SI [(const_int 0)] VLDRWGBWBQ)] - "TARGET_HAVE_MVE" -{ - rtx ignore_result = gen_reg_rtx (V4SImode); - emit_insn ( - gen_mve_vldrwq_gather_base_wb_z_<supf>v4si_insn (ignore_result, operands[0], - operands[1], operands[2], - operands[3])); - DONE; -}) -(define_expand "mve_vldrwq_gather_base_nowb_z_<supf>v4si" - [(match_operand:V4SI 0 "s_register_operand") - (match_operand:V4SI 1 "s_register_operand") - (match_operand:SI 2 "mve_vldrd_immediate") - (match_operand:V4BI 3 "vpr_register_operand") - (unspec:V4SI [(const_int 0)] VLDRWGBWBQ)] - "TARGET_HAVE_MVE" -{ - rtx ignore_wb = gen_reg_rtx (V4SImode); - emit_insn ( - gen_mve_vldrwq_gather_base_wb_z_<supf>v4si_insn (operands[0], ignore_wb, - operands[1], operands[2], - operands[3])); - DONE; -}) - -;; -;; [vldrwq_gather_base_wb_z_s vldrwq_gather_base_wb_z_u] -;; -(define_insn "mve_vldrwq_gather_base_wb_z_<supf>v4si_insn" - [(set (match_operand:V4SI 0 "s_register_operand" "=&w") - (unspec:V4SI [(match_operand:V4SI 2 "s_register_operand" "1") - (match_operand:SI 3 "mve_vldrd_immediate" "Ri") - (match_operand:V4BI 4 "vpr_register_operand" "Up") - (mem:BLK (scratch))] - VLDRWGBWBQ)) - (set (match_operand:V4SI 1 "s_register_operand" "=&w") - (unspec:V4SI [(match_dup 2) (match_dup 3)] - VLDRWGBWBQ)) - ] - "TARGET_HAVE_MVE" -{ - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[2]; - ops[2] = operands[3]; - output_asm_insn ("vpst\;vldrwt.u32\t%q0, [%q1, %2]!",ops); - return ""; -} - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_wb_<supf>v4si_insn")) - (set_attr "length" "8")]) - -(define_expand "mve_vldrwq_gather_base_wb_fv4sf" - [(match_operand:V4SI 0 "s_register_operand") - (match_operand:V4SI 1 "s_register_operand") - (match_operand:SI 2 "mve_vldrd_immediate") - (unspec:V4SI [(const_int 0)] VLDRWQGBWB_F)] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" -{ - rtx ignore_result = gen_reg_rtx (V4SFmode); - emit_insn ( - gen_mve_vldrwq_gather_base_wb_fv4sf_insn (ignore_result, operands[0], - operands[1], operands[2])); - DONE; -}) - -(define_expand "mve_vldrwq_gather_base_nowb_fv4sf" - [(match_operand:V4SF 0 "s_register_operand") - (match_operand:V4SI 1 "s_register_operand") - (match_operand:SI 2 "mve_vldrd_immediate") - (unspec:V4SI [(const_int 0)] VLDRWQGBWB_F)] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" -{ - rtx ignore_wb = gen_reg_rtx (V4SImode); - emit_insn ( - gen_mve_vldrwq_gather_base_wb_fv4sf_insn (operands[0], ignore_wb, - operands[1], operands[2])); - DONE; -}) - -;; ;; [vldrwq_gather_base_wb_f] +;; [vldrdq_gather_base_wb_s vldrdq_gather_base_wb_u] ;; -(define_insn "mve_vldrwq_gather_base_wb_fv4sf_insn" - [(set (match_operand:V4SF 0 "s_register_operand" "=&w") - (unspec:V4SF [(match_operand:V4SI 2 "s_register_operand" "1") - (match_operand:SI 3 "mve_vldrd_immediate" "Ri") - (mem:BLK (scratch))] - VLDRWQGBWB_F)) - (set (match_operand:V4SI 1 "s_register_operand" "=&w") - (unspec:V4SI [(match_dup 2) (match_dup 3)] - VLDRWQGBWB_F)) +(define_insn "@mve_vldrq_gather_base_wb_<mode>" + [(set (match_operand:MVE_4 0 "s_register_operand" "=&w") + (unspec:MVE_4 [(match_operand:<MVE_scatter_offset> 2 "s_register_operand" "1") + (match_operand:SI 3 "mve_vldrd_immediate" "Ri") + (mem:BLK (scratch))] + VLDRGBWBQ)) + (set (match_operand:<MVE_scatter_offset> 1 "s_register_operand" "=&w") + (unspec:<MVE_scatter_offset> [(match_dup 2) (match_dup 3)] + VLDRGBWBQ)) ] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" -{ - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[2]; - ops[2] = operands[3]; - output_asm_insn ("vldrw.u32\t%q0, [%q1, %2]!",ops); - return ""; -} - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_wb_fv4sf_insn")) + "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode)) + || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))" + "vldr<MVE_elem_ch>.u<V_sz_elem>\t%q0, [%q1, %3]!" + [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrq_gather_base_wb_<mode>")) (set_attr "length" "4")]) -(define_expand "mve_vldrwq_gather_base_wb_z_fv4sf" - [(match_operand:V4SI 0 "s_register_operand") - (match_operand:V4SI 1 "s_register_operand") - (match_operand:SI 2 "mve_vldrd_immediate") - (match_operand:V4BI 3 "vpr_register_operand") - (unspec:V4SI [(const_int 0)] VLDRWQGBWB_F)] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" -{ - rtx ignore_result = gen_reg_rtx (V4SFmode); - emit_insn ( - gen_mve_vldrwq_gather_base_wb_z_fv4sf_insn (ignore_result, operands[0], - operands[1], operands[2], - operands[3])); - DONE; -}) - -(define_expand "mve_vldrwq_gather_base_nowb_z_fv4sf" - [(match_operand:V4SF 0 "s_register_operand") - (match_operand:V4SI 1 "s_register_operand") - (match_operand:SI 2 "mve_vldrd_immediate") - (match_operand:V4BI 3 "vpr_register_operand") - (unspec:V4SI [(const_int 0)] VLDRWQGBWB_F)] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" -{ - rtx ignore_wb = gen_reg_rtx (V4SImode); - emit_insn ( - gen_mve_vldrwq_gather_base_wb_z_fv4sf_insn (operands[0], ignore_wb, - operands[1], operands[2], - operands[3])); - DONE; -}) - +;; Predicated vector gather loads with base and write-back ;; +;; [vldrwq_gather_base_wb_z_s vldrwq_gather_base_wb_z_u] ;; [vldrwq_gather_base_wb_z_f] +;; [vldrdq_gather_base_wb_z_s vldrdq_gather_base_wb_z_u] ;; -(define_insn "mve_vldrwq_gather_base_wb_z_fv4sf_insn" - [(set (match_operand:V4SF 0 "s_register_operand" "=&w") - (unspec:V4SF [(match_operand:V4SI 2 "s_register_operand" "1") - (match_operand:SI 3 "mve_vldrd_immediate" "Ri") - (match_operand:V4BI 4 "vpr_register_operand" "Up") - (mem:BLK (scratch))] - VLDRWQGBWB_F)) - (set (match_operand:V4SI 1 "s_register_operand" "=&w") - (unspec:V4SI [(match_dup 2) (match_dup 3)] - VLDRWQGBWB_F)) +(define_insn "@mve_vldrq_gather_base_wb_z_<mode>" + [(set (match_operand:MVE_4 0 "s_register_operand" "=&w") + (unspec:MVE_4 [(match_operand:<MVE_scatter_offset> 2 "s_register_operand" "1") + (match_operand:SI 3 "mve_vldrd_immediate" "Ri") + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up") + (mem:BLK (scratch))] + VLDRGBWBQ_Z)) + (set (match_operand:<MVE_scatter_offset> 1 "s_register_operand" "=&w") + (unspec:<MVE_scatter_offset> [(match_dup 2) (match_dup 3)] + VLDRGBWBQ_Z)) ] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" -{ - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[2]; - ops[2] = operands[3]; - output_asm_insn ("vpst\;vldrwt.u32\t%q0, [%q1, %2]!",ops); - return ""; -} - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_wb_fv4sf_insn")) + "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode)) + || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))" + "vpst\;vldr<MVE_elem_ch>t.u<V_sz_elem>\t%q0, [%q1, %3]!" + [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrq_gather_base_wb_<mode>")) (set_attr "length" "8")]) -(define_expand "mve_vldrdq_gather_base_wb_<supf>v2di" - [(match_operand:V2DI 0 "s_register_operand") - (match_operand:V2DI 1 "s_register_operand") - (match_operand:SI 2 "mve_vldrd_immediate") - (unspec:V2DI [(const_int 0)] VLDRDGBWBQ)] - "TARGET_HAVE_MVE" -{ - rtx ignore_result = gen_reg_rtx (V2DImode); - emit_insn ( - gen_mve_vldrdq_gather_base_wb_<supf>v2di_insn (ignore_result, operands[0], - operands[1], operands[2])); - DONE; -}) - -(define_expand "mve_vldrdq_gather_base_nowb_<supf>v2di" - [(match_operand:V2DI 0 "s_register_operand") - (match_operand:V2DI 1 "s_register_operand") - (match_operand:SI 2 "mve_vldrd_immediate") - (unspec:V2DI [(const_int 0)] VLDRDGBWBQ)] - "TARGET_HAVE_MVE" -{ - rtx ignore_wb = gen_reg_rtx (V2DImode); - emit_insn ( - gen_mve_vldrdq_gather_base_wb_<supf>v2di_insn (operands[0], ignore_wb, - operands[1], operands[2])); - DONE; -}) - - -;; -;; [vldrdq_gather_base_wb_s vldrdq_gather_base_wb_u] -;; -(define_insn "mve_vldrdq_gather_base_wb_<supf>v2di_insn" - [(set (match_operand:V2DI 0 "s_register_operand" "=&w") - (unspec:V2DI [(match_operand:V2DI 2 "s_register_operand" "1") - (match_operand:SI 3 "mve_vldrd_immediate" "Ri") - (mem:BLK (scratch))] - VLDRDGBWBQ)) - (set (match_operand:V2DI 1 "s_register_operand" "=&w") - (unspec:V2DI [(match_dup 2) (match_dup 3)] - VLDRDGBWBQ)) - ] - "TARGET_HAVE_MVE" -{ - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[2]; - ops[2] = operands[3]; - output_asm_insn ("vldrd.64\t%q0, [%q1, %2]!",ops); - return ""; -} - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_base_wb_<supf>v2di_insn")) - (set_attr "length" "4")]) - -(define_expand "mve_vldrdq_gather_base_wb_z_<supf>v2di" - [(match_operand:V2DI 0 "s_register_operand") - (match_operand:V2DI 1 "s_register_operand") - (match_operand:SI 2 "mve_vldrd_immediate") - (match_operand:V2QI 3 "vpr_register_operand") - (unspec:V2DI [(const_int 0)] VLDRDGBWBQ)] - "TARGET_HAVE_MVE" -{ - rtx ignore_result = gen_reg_rtx (V2DImode); - emit_insn ( - gen_mve_vldrdq_gather_base_wb_z_<supf>v2di_insn (ignore_result, operands[0], - operands[1], operands[2], - operands[3])); - DONE; -}) - -(define_expand "mve_vldrdq_gather_base_nowb_z_<supf>v2di" - [(match_operand:V2DI 0 "s_register_operand") - (match_operand:V2DI 1 "s_register_operand") - (match_operand:SI 2 "mve_vldrd_immediate") - (match_operand:V2QI 3 "vpr_register_operand") - (unspec:V2DI [(const_int 0)] VLDRDGBWBQ)] - "TARGET_HAVE_MVE" -{ - rtx ignore_wb = gen_reg_rtx (V2DImode); - emit_insn ( - gen_mve_vldrdq_gather_base_wb_z_<supf>v2di_insn (operands[0], ignore_wb, - operands[1], operands[2], - operands[3])); - DONE; -}) - (define_insn "get_fpscr_nzcvqc" [(set (match_operand:SI 0 "register_operand" "=r") (unspec_volatile:SI [(reg:SI VFPCC_REGNUM)] UNSPEC_GET_FPSCR_NZCVQC))] @@ -4239,32 +3977,6 @@ "vmsr\\tFPSCR_nzcvqc, %0" [(set_attr "type" "mve_move")]) -;; -;; [vldrdq_gather_base_wb_z_s vldrdq_gather_base_wb_z_u] -;; -(define_insn "mve_vldrdq_gather_base_wb_z_<supf>v2di_insn" - [(set (match_operand:V2DI 0 "s_register_operand" "=&w") - (unspec:V2DI [(match_operand:V2DI 2 "s_register_operand" "1") - (match_operand:SI 3 "mve_vldrd_immediate" "Ri") - (match_operand:V2QI 4 "vpr_register_operand" "Up") - (mem:BLK (scratch))] - VLDRDGBWBQ)) - (set (match_operand:V2DI 1 "s_register_operand" "=&w") - (unspec:V2DI [(match_dup 2) (match_dup 3)] - VLDRDGBWBQ)) - ] - "TARGET_HAVE_MVE" -{ - rtx ops[3]; - ops[0] = operands[0]; - ops[1] = operands[2]; - ops[2] = operands[3]; - output_asm_insn ("vpst\;vldrdt.u64\t%q0, [%q1, %2]!",ops); - return ""; -} - [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_base_wb_<supf>v2di_insn")) - (set_attr "length" "8")]) - ;; ;; [vadciq_u, vadciq_s] ;; [vsbciq_s, vsbciq_u] diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md index 63a0168ea199..866e659938e5 100644 --- a/gcc/config/arm/unspecs.md +++ b/gcc/config/arm/unspecs.md @@ -1186,11 +1186,8 @@ VIWDUPQ_M VSTRSBWBQ VSTRSBWBQ_P - VLDRWQGBWB_S - VLDRWQGBWB_U - VLDRWQGBWB_F - VLDRDQGBWB_S - VLDRDQGBWB_U + VLDRGBWBQ + VLDRGBWBQ_Z VADCQ_U VADCQ_M_U VADCQ_S diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c index e3fd7f16a31e..5fb9510d64b8 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c @@ -12,7 +12,7 @@ extern "C" { /* **foo: ** ... -** vldrd.64 q[0-9]+, \[q[0-9]+, #[0-9]+\]!(?: @.*|) +** vldrd.u64 q[0-9]+, \[q[0-9]+, #[0-9]+\]!(?: @.*|) ** ... */ int64x2_t @@ -25,4 +25,4 @@ foo (uint64x2_t *addr) } #endif -/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c index 161cf00b65e9..2eb36f4d3d81 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c @@ -12,7 +12,7 @@ extern "C" { /* **foo: ** ... -** vldrd.64 q[0-9]+, \[q[0-9]+, #[0-9]+\]!(?: @.*|) +** vldrd.u64 q[0-9]+, \[q[0-9]+, #[0-9]+\]!(?: @.*|) ** ... */ uint64x2_t @@ -25,4 +25,4 @@ foo (uint64x2_t *addr) } #endif -/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file +/* { dg-final { scan-assembler-not "__ARM_undef" } } */