https://gcc.gnu.org/g:3cb1f86da1d2020fcf6d27e254d1e2f5e478a81b
commit r16-5397-g3cb1f86da1d2020fcf6d27e254d1e2f5e478a81b Author: Christophe Lyon <[email protected]> Date: Fri Sep 19 12:20:27 2025 +0000 arm: [MVE intrinsics] rework vuninitialized Implement vuninitialized using the new MVE builtins framework. This patch is a bit more intrusive than other similar patches because the two vuninitialized variants use different signatures. To handle this situation, the patch makes the inherent shape derive from overloaded_base instead of nonoverloaded_base, and adds a new parameter to add_unique_function so that we can choose between adding only the nonoverloaded version, only the overloaded ones, or both (the default). The patch introduces pop_and_resolve_to, which pops an argument (the only one in this case), and calls resolve_to, like many other shapes: this is because the overloaded version takes an argument (used to resolve which nonoverloaded version to call), which is not present in the nonoverloaded version prototype. gcc/ChangeLog: * config/arm/arm-mve-builtins-shapes.cc (build_one): Add which_overload parameter. (inherent): Derive from overloaded_base<0>. Add support for overloaded version. * config/arm/arm-mve-builtins-shapes.h (inherent): Update comment. * config/arm/arm-mve-builtins.cc (add_unique_function): Add support for new which_overload parameter. (pop_and_resolve_to): New. * config/arm/arm-mve-builtins.h (NONOVERLOADED_FORM) (OVERLOADED_FORM): New. (add_unique_function): Update prototype. (pop_and_resolve_to): New prototype. * config/arm/arm_mve.h (vuninitializedq): Delete. (vuninitializedq_u8): Delete. (vuninitializedq_u16): Delete. (vuninitializedq_u32): Delete. (vuninitializedq_u64): Delete. (vuninitializedq_s8): Delete. (vuninitializedq_s16): Delete. (vuninitializedq_s32): Delete. (vuninitializedq_s64): Delete. (vuninitializedq_f16): Delete. (vuninitializedq_f32): Delete. (__arm_vuninitializedq): Delete. Diff: --- gcc/config/arm/arm-mve-builtins-shapes.cc | 42 +++++++++-- gcc/config/arm/arm-mve-builtins-shapes.h | 3 +- gcc/config/arm/arm-mve-builtins.cc | 53 +++++++++----- gcc/config/arm/arm-mve-builtins.h | 12 +++- gcc/config/arm/arm_mve.h | 115 ------------------------------ 5 files changed, 86 insertions(+), 139 deletions(-) diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc index dd3be87fc9ce..b1dc1dd1172d 100644 --- a/gcc/config/arm/arm-mve-builtins-shapes.cc +++ b/gcc/config/arm/arm-mve-builtins-shapes.cc @@ -249,7 +249,8 @@ static void build_one (function_builder &b, const char *signature, const function_group_info &group, mode_suffix_index mode_suffix_id, unsigned int ti, unsigned int pi, bool preserve_user_namespace, - bool force_direct_overloads) + bool force_direct_overloads, + unsigned int which_overload = NONOVERLOADED_FORM | OVERLOADED_FORM) { /* Current functions take at most five arguments. Match parse_signature parameter below. */ @@ -261,7 +262,7 @@ build_one (function_builder &b, const char *signature, apply_predication (instance, return_type, argument_types); b.add_unique_function (instance, return_type, argument_types, preserve_user_namespace, group.requires_float, - force_direct_overloads); + force_direct_overloads, which_overload); } /* Add a function instance for every type and predicate combination in @@ -1509,18 +1510,51 @@ struct getq_lane_def : public overloaded_base<0> SHAPE (getq_lane) /* <T0>[xN]_t vfoo_t0(). + <T0>[xN]_t vfoo(<T0>_t). Example: vuninitializedq. int8x16_t [__arm_]vuninitializedq_s8(void) int8x16_t [__arm_]vuninitializedq(int8x16_t t) */ -struct inherent_def : public nonoverloaded_base +struct inherent_def : public overloaded_base<0> { void build (function_builder &b, const function_group_info &group, bool preserve_user_namespace) const override { - build_all (b, "t0", group, MODE_none, preserve_user_namespace); + b.add_overloaded_functions (group, MODE_none, preserve_user_namespace); + + /* Overloaded and non-overloaded forms have different signatures, so call + build_one with either OVERLOADED_FORM or NONOVERLOADED_FORM. */ + unsigned int pi = 0; + bool force_direct_overloads = false; + for (unsigned int ti = 0; + ti == 0 || group.types[ti][0] != NUM_TYPE_SUFFIXES; ++ti) + { + /* For int8x16_t [__arm_]vuninitializedq(int8x16_t t), generate only + the overloaded form, i.e. without type suffix. */ + build_one (b, "t0,t0", group, MODE_none, ti, pi, + preserve_user_namespace, force_direct_overloads, + OVERLOADED_FORM); + /* For int8x16_t [__arm_]vuninitializedq_s8(void), generate only the + non-overloaded form, i.e. with type suffix. */ + build_one (b, "t0", group, MODE_none, ti, pi, + preserve_user_namespace, force_direct_overloads, + NONOVERLOADED_FORM); + } + } + + tree + resolve (function_resolver &r) const override + { + type_suffix_index type; + if (!r.check_num_arguments (1) + || (type = r.infer_vector_type (0)) == NUM_TYPE_SUFFIXES) + return error_mark_node; + + /* We need to pop the useless argument for the non-overloaded function. */ + return r.pop_and_resolve_to (r.mode_suffix_id, type); } + }; SHAPE (inherent) diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h index 22d06ce0ebd3..56bba6dbf876 100644 --- a/gcc/config/arm/arm-mve-builtins-shapes.h +++ b/gcc/config/arm/arm-mve-builtins-shapes.h @@ -29,7 +29,8 @@ namespace arm_mve Also: - - "inherent" means that the function takes no arguments. */ + - "inherent" means that the function takes no arguments, except in its + overloaded form. */ namespace shapes { diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc index b37c91c541bc..ecf5196437bf 100644 --- a/gcc/config/arm/arm-mve-builtins.cc +++ b/gcc/config/arm/arm-mve-builtins.cc @@ -994,7 +994,8 @@ function_builder::add_unique_function (const function_instance &instance, vec<tree> &argument_types, bool preserve_user_namespace, bool requires_float, - bool force_direct_overloads) + bool force_direct_overloads, + unsigned int which_overload) { /* Add the function under its full (unique) name with prefix. */ char *name = get_name (instance, true, false); @@ -1002,27 +1003,31 @@ function_builder::add_unique_function (const function_instance &instance, argument_types.length (), argument_types.address ()); tree attrs = get_attributes (instance); - registered_function &rfn = add_function (instance, name, fntype, attrs, - requires_float, false, false); - - /* Enter the function into the hash table. */ - hashval_t hash = instance.hash (); - registered_function **rfn_slot - = function_table->find_slot_with_hash (instance, hash, INSERT); - gcc_assert (!*rfn_slot); - *rfn_slot = &rfn; - - /* Also add the non-prefixed non-overloaded function, as placeholder - if the user namespace does not need to be preserved. */ - char *noprefix_name = get_name (instance, false, false); - attrs = get_attributes (instance); - add_function (instance, noprefix_name, fntype, attrs, requires_float, - false, preserve_user_namespace); + if (which_overload & NONOVERLOADED_FORM) + { + registered_function &rfn = add_function (instance, name, fntype, attrs, + requires_float, false, false); + + /* Enter the function into the hash table. */ + hashval_t hash = instance.hash (); + registered_function **rfn_slot + = function_table->find_slot_with_hash (instance, hash, INSERT); + gcc_assert (!*rfn_slot); + *rfn_slot = &rfn; + + /* Also add the non-prefixed non-overloaded function, as placeholder + if the user namespace does not need to be preserved. */ + char *noprefix_name = get_name (instance, false, false); + attrs = get_attributes (instance); + add_function (instance, noprefix_name, fntype, attrs, requires_float, + false, preserve_user_namespace); + } /* Also add the function under its overloaded alias, if we want a separate decl for each instance of an overloaded function. */ char *overload_name = get_name (instance, true, true); - if (strcmp (name, overload_name) != 0) + if ((which_overload & OVERLOADED_FORM) + && (strcmp (name, overload_name) != 0)) { /* Attribute lists shouldn't be shared. */ attrs = get_attributes (instance); @@ -1231,6 +1236,18 @@ function_resolver::resolve_to (mode_suffix_index mode, return res; } +/* Pop an argument and resolve the function to one with the mode suffix given + by MODE and the type suffixes given by TYPE0 and TYPE1. Return its function + decl on success, otherwise report an error and return error_mark_node. */ +tree +function_resolver::pop_and_resolve_to (mode_suffix_index mode, + type_suffix_index type0, + type_suffix_index type1) +{ + m_arglist.pop (); + return resolve_to (mode, type0, type1); +} + /* Require argument ARGNO to be a pointer to a scalar type that has a corresponding type suffix. Return that type suffix on success, otherwise report an error and return NUM_TYPE_SUFFIXES. */ diff --git a/gcc/config/arm/arm-mve-builtins.h b/gcc/config/arm/arm-mve-builtins.h index 3a0d50dc64ce..5d25b166b730 100644 --- a/gcc/config/arm/arm-mve-builtins.h +++ b/gcc/config/arm/arm-mve-builtins.h @@ -94,6 +94,13 @@ const unsigned int CP_RAISE_FP_EXCEPTIONS = 1U << 1; const unsigned int CP_READ_MEMORY = 1U << 2; const unsigned int CP_WRITE_MEMORY = 1U << 3; +/* Flags that describe which forms of an intrinsic to generate: non-overloaded + and/or overloaded ones. In general we want both, but for vuninitialized the + two forms have different signatures and we need to generate them + separately. */ +const unsigned int NONOVERLOADED_FORM = 1U << 0; +const unsigned int OVERLOADED_FORM = 1U << 1; + /* Enumerates the MVE predicate and (data) vector types, together called "vector types" for brevity. */ enum vector_type_index @@ -311,7 +318,7 @@ public: ~function_builder (); void add_unique_function (const function_instance &, tree, - vec<tree> &, bool, bool, bool); + vec<tree> &, bool, bool, bool, unsigned int); void add_overloaded_function (const function_instance &, bool, bool); void add_overloaded_functions (const function_group_info &, mode_suffix_index, bool); @@ -383,6 +390,9 @@ public: tree resolve_to (mode_suffix_index, type_suffix_index = NUM_TYPE_SUFFIXES, type_suffix_index = NUM_TYPE_SUFFIXES); + tree pop_and_resolve_to (mode_suffix_index, + type_suffix_index = NUM_TYPE_SUFFIXES, + type_suffix_index = NUM_TYPE_SUFFIXES); type_suffix_index infer_pointer_type (unsigned int); type_suffix_index infer_vector_or_tuple_type (unsigned int, unsigned int); diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h index b66b712c4ce2..58e339ba8b05 100644 --- a/gcc/config/arm/arm_mve.h +++ b/gcc/config/arm/arm_mve.h @@ -44,97 +44,12 @@ #pragma GCC arm "arm_mve.h" false #endif -#ifndef __ARM_MVE_PRESERVE_USER_NAMESPACE -#define vuninitializedq(__v) __arm_vuninitializedq(__v) - - -#define vuninitializedq_u8(void) __arm_vuninitializedq_u8(void) -#define vuninitializedq_u16(void) __arm_vuninitializedq_u16(void) -#define vuninitializedq_u32(void) __arm_vuninitializedq_u32(void) -#define vuninitializedq_u64(void) __arm_vuninitializedq_u64(void) -#define vuninitializedq_s8(void) __arm_vuninitializedq_s8(void) -#define vuninitializedq_s16(void) __arm_vuninitializedq_s16(void) -#define vuninitializedq_s32(void) __arm_vuninitializedq_s32(void) -#define vuninitializedq_s64(void) __arm_vuninitializedq_s64(void) -#define vuninitializedq_f16(void) __arm_vuninitializedq_f16(void) -#define vuninitializedq_f32(void) __arm_vuninitializedq_f32(void) -#endif #ifdef __cplusplus -__extension__ extern __inline uint8x16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vuninitializedq (uint8x16_t /* __v ATTRIBUTE UNUSED */) -{ - return __arm_vuninitializedq_u8 (); -} - -__extension__ extern __inline uint16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vuninitializedq (uint16x8_t /* __v ATTRIBUTE UNUSED */) -{ - return __arm_vuninitializedq_u16 (); -} - -__extension__ extern __inline uint32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vuninitializedq (uint32x4_t /* __v ATTRIBUTE UNUSED */) -{ - return __arm_vuninitializedq_u32 (); -} - -__extension__ extern __inline uint64x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vuninitializedq (uint64x2_t /* __v ATTRIBUTE UNUSED */) -{ - return __arm_vuninitializedq_u64 (); -} - -__extension__ extern __inline int8x16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vuninitializedq (int8x16_t /* __v ATTRIBUTE UNUSED */) -{ - return __arm_vuninitializedq_s8 (); -} - -__extension__ extern __inline int16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vuninitializedq (int16x8_t /* __v ATTRIBUTE UNUSED */) -{ - return __arm_vuninitializedq_s16 (); -} - -__extension__ extern __inline int32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vuninitializedq (int32x4_t /* __v ATTRIBUTE UNUSED */) -{ - return __arm_vuninitializedq_s32 (); -} -__extension__ extern __inline int64x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vuninitializedq (int64x2_t /* __v ATTRIBUTE UNUSED */) -{ - return __arm_vuninitializedq_s64 (); -} - -#if (__ARM_FEATURE_MVE & 2) /* MVE Floating point. */ -__extension__ extern __inline float16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vuninitializedq (float16x8_t /* __v ATTRIBUTE UNUSED */) -{ - return __arm_vuninitializedq_f16 (); -} - -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vuninitializedq (float32x4_t /* __v ATTRIBUTE UNUSED */) -{ - return __arm_vuninitializedq_f32 (); -} -#endif /* __ARM_FEATURE_MVE & 2 (MVE floating point) */ #else enum { @@ -371,36 +286,6 @@ extern void *__ARM_undef; _Generic(param, type: param, const type: param, default: _Generic (param, float*: param, default: *(type *)__ARM_undef)) #endif -#if (__ARM_FEATURE_MVE & 2) /* MVE Floating point. */ - -#define __arm_vuninitializedq(p0) ({ __typeof(p0) __p0 = (p0); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ - int (*)[__ARM_mve_type_int8x16_t]: __arm_vuninitializedq_s8 (), \ - int (*)[__ARM_mve_type_int16x8_t]: __arm_vuninitializedq_s16 (), \ - int (*)[__ARM_mve_type_int32x4_t]: __arm_vuninitializedq_s32 (), \ - int (*)[__ARM_mve_type_int64x2_t]: __arm_vuninitializedq_s64 (), \ - int (*)[__ARM_mve_type_uint8x16_t]: __arm_vuninitializedq_u8 (), \ - int (*)[__ARM_mve_type_uint16x8_t]: __arm_vuninitializedq_u16 (), \ - int (*)[__ARM_mve_type_uint32x4_t]: __arm_vuninitializedq_u32 (), \ - int (*)[__ARM_mve_type_uint64x2_t]: __arm_vuninitializedq_u64 (), \ - int (*)[__ARM_mve_type_float16x8_t]: __arm_vuninitializedq_f16 (), \ - int (*)[__ARM_mve_type_float32x4_t]: __arm_vuninitializedq_f32 ());}) - -#else /* MVE Integer. */ - -#define __arm_vuninitializedq(p0) ({ __typeof(p0) __p0 = (p0); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ - int (*)[__ARM_mve_type_int8x16_t]: __arm_vuninitializedq_s8 (), \ - int (*)[__ARM_mve_type_int16x8_t]: __arm_vuninitializedq_s16 (), \ - int (*)[__ARM_mve_type_int32x4_t]: __arm_vuninitializedq_s32 (), \ - int (*)[__ARM_mve_type_int64x2_t]: __arm_vuninitializedq_s64 (), \ - int (*)[__ARM_mve_type_uint8x16_t]: __arm_vuninitializedq_u8 (), \ - int (*)[__ARM_mve_type_uint16x8_t]: __arm_vuninitializedq_u16 (), \ - int (*)[__ARM_mve_type_uint32x4_t]: __arm_vuninitializedq_u32 (), \ - int (*)[__ARM_mve_type_uint64x2_t]: __arm_vuninitializedq_u64 ());}) - -#endif /* MVE Integer. */ - #endif /* __cplusplus */ #endif /* __ARM_FEATURE_MVE */ #endif /* _GCC_ARM_MVE_H. */
