On Tue, Oct 14, 2025 at 8:32 AM Tamar Christina <[email protected]> wrote:
>
> In PR121604 the problem was noted that currently the SVE intrinsics
> infrastructure assumes that for any predicated operation that the GP is at the
> first argument position which has a svbool_t or for a unary merging operation
> that it's in the second position.
>
> However you have intrinsics like fmov_lane which have an svbool_t but it's not
> a GP but instead it's the inactive lanes.
>
> You also have instructions like BRKB which work only on predicates so it
> incorrectly determines the first operand to be the GP, while that's also the
> inactive lanes.
>
> However during apply_predication we do have the information about where the GP
> is. This patch re-organizes the code to record this information into the
> function_instance such that folders have access to this information.
>
> For functions that are outliers like pmov_lane we can now override the
> availability of the intrinsics having a GP.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> * config/aarch64/aarch64-sve-builtins-shapes.cc (apply_predication):
> Store gp_index.
> (struct pmov_to_vector_lane_def): Mark instruction as has no GP.
> * config/aarch64/aarch64-sve-builtins.h (function_instance::gp_value,
> function_instance::inactive_values, function_instance::gp_index,
> function_shape::has_gp_argument_p): New.
> * config/aarch64/aarch64-sve-builtins.cc (gimple_folder::fold_pfalse):
> Simplify code and use GP helpers.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/aarch64/sve/pr121604_brk.c: New test.
> * gcc.target/aarch64/sve2/pr121604_pmov.c: New test.
>
> Co-authored-by: Jennifer Schmitz <[email protected]>
>
> ---
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
> b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
> index
> 74a3338e9556c4369a3b1e6e864acfba4ced0e3a..0ee60e89aa6905ec5a281a017fde1c8963a03466
> 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
> +++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
> @@ -21,7 +21,10 @@
> #include "system.h"
> #include "coretypes.h"
> #include "tm.h"
> +#include "basic-block.h"
> #include "tree.h"
> +#include "function.h"
> +#include "gimple.h"
Unless I missed something somewhere I don't see why these newly added
includes are needed. Maybe they came from the previous version of the
patch and you forgot to check if they are still needed.
Thanks,
Andrew
> #include "rtl.h"
> #include "tm_p.h"
> #include "memmodel.h"
> @@ -68,23 +71,36 @@ za_group_is_pure_overload (const function_group_info
> &group)
> types in ARGUMENT_TYPES. RETURN_TYPE is the type returned by the
> function. */
> static void
> -apply_predication (const function_instance &instance, tree return_type,
> +apply_predication (function_instance &instance, tree return_type,
> vec<tree> &argument_types)
> {
> + /* Initially mark the function as not being predicated. */
> + instance.gp_index = -1;
> +
> /* There are currently no SME ZA instructions that have both merging and
> unpredicated forms, so for simplicity, the predicates are always
> included
> in the original format string. */
> if (instance.pred != PRED_none && instance.pred != PRED_za_m)
> {
> argument_types.quick_insert (0, instance.gp_type ());
> + instance.gp_index = 0;
> /* For unary merge operations, the first argument is a vector with
> the same type as the result. For unary_convert_narrowt it also
> provides the "bottom" half of active elements, and is present
> for all types of predication. */
> auto nargs = argument_types.length () - 1;
> if (instance.shape->has_merge_argument_p (instance, nargs))
> - argument_types.quick_insert (0, return_type);
> + {
> + argument_types.quick_insert (0, return_type);
> + instance.gp_index = 1;
> + }
> }
> +
> + /* Check if the shape has overriden the GP argument. In this case register
> + the types, predicate the function but don't mark it as having a GP and
> + override what was set before. */
> + if (!instance.shape->has_gp_argument_p (instance))
> + instance.gp_value_index = -1;
> }
>
> /* Parse and move past an element type in FORMAT and return it as a type
> @@ -3332,6 +3348,14 @@ struct pmov_to_vector_lane_def : public
> overloaded_base<0>
> but it doesn't currently have the necessary information. */
> return c.require_immediate_range (1, 1, bytes - 1);
> }
> +
> + /* This function has a predicate argument, and is a merging instruction,
> but
> + the predicate is not a GP, it's the inactive lanes. */
> + bool
> + has_gp_argument_p (const function_instance &) const override
> + {
> + return false;
> + }
> };
> SHAPE (pmov_to_vector_lane)
>
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc
> b/gcc/config/aarch64/aarch64-sve-builtins.cc
> index
> 4956e364929c6a958f37f4d5ab8af668682225a8..4805537231227c565ff2a5ef32f30b24017fb3ad
> 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins.cc
> +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
> @@ -3632,24 +3632,22 @@ gimple_folder::redirect_pred_x ()
> gimple *
> gimple_folder::fold_pfalse ()
> {
> - if (pred == PRED_none)
> + tree gp = gp_pred (call);
> + /* If there isn't a GP then we can't do any folding as the instruction
> isn't
> + predicated. */
> + if (!gp)
> return nullptr;
> - tree arg0 = gimple_call_arg (call, 0);
> +
> if (pred == PRED_m)
> {
> - /* Unary function shapes with _m predication are folded to the
> - inactive vector (arg0), while other function shapes are folded
> - to op1 (arg1). */
> - tree arg1 = gimple_call_arg (call, 1);
> - if (is_pfalse (arg1))
> - return fold_call_to (arg0);
> - if (is_pfalse (arg0))
> - return fold_call_to (arg1);
> + tree val = inactive_values (call);
> + if (is_pfalse (gp))
> + return fold_call_to (val);
> return nullptr;
> }
> - if ((pred == PRED_x || pred == PRED_z) && is_pfalse (arg0))
> + if ((pred == PRED_x || pred == PRED_z) && is_pfalse (gp))
> return fold_call_to (build_zero_cst (TREE_TYPE (lhs)));
> - if (pred == PRED_implicit && is_pfalse (arg0))
> + if (pred == PRED_implicit && is_pfalse (gp))
> {
> unsigned int flags = call_properties ();
> /* Folding to lhs = {0, ...} is not appropriate for intrinsics with
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h
> b/gcc/config/aarch64/aarch64-sve-builtins.h
> index
> d6a58b450d6ddfd38788899b61b75c14e9472a99..fceb80f4d8e3e9ab1584450c98d35f94c4202c8a
> 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins.h
> +++ b/gcc/config/aarch64/aarch64-sve-builtins.h
> @@ -403,6 +403,8 @@ public:
> bool could_trap_p () const;
>
> vector_type_index gp_type_index () const;
> + tree gp_value (gcall *) const;
> + tree inactive_values (gcall *) const;
> tree gp_type () const;
>
> unsigned int vectors_per_tuple () const;
> @@ -436,6 +438,7 @@ public:
> group_suffix_index group_suffix_id;
> predication_index pred;
> fpm_mode_index fpm_mode;
> + int gp_index;
> };
>
> class registered_function;
> @@ -801,6 +804,8 @@ public:
> virtual bool has_merge_argument_p (const function_instance &,
> unsigned int) const;
>
> + virtual bool has_gp_argument_p (const function_instance &) const;
> +
> virtual bool explicit_type_suffix_p (unsigned int) const = 0;
>
> /* True if the group suffix is present in overloaded names.
> @@ -949,6 +954,34 @@ function_instance::gp_type () const
> return acle_vector_types[0][gp_type_index ()];
> }
>
> +/* Return the tree value that should be used as the governing predicate of
> + this function. If none then return NULL_TREE. */
> +inline tree
> +function_instance::gp_value (gcall *call) const
> +{
> + if (pred == PRED_none || gp_index < 0)
> + return NULL_TREE;
> +
> + return gimple_call_arg (call, gp_index);
> +}
> +
> +/* Return the tree value that should be used the inactive lanes should this
> + function be a predicated function with a gp. If none then return
> + NULL_TREE. */
> +inline tree
> +function_instance::inactive_values (gcall *call) const
> +{
> + if (pred == PRED_none || gp_index < 0)
> + return NULL_TREE;
> +
> + /* Function is unary with m predicate. */
> + if (gp_index == 1)
> + return gimple_call_arg (call, 0);
> +
> + /* Else the inactive values are the next element. */
> + return gimple_call_arg (call, 1);
> +}
> +
> /* If the function operates on tuples of vectors, return the number
> of vectors in the tuples, otherwise return 1. */
> inline unsigned int
> @@ -1123,6 +1156,14 @@ function_shape::has_merge_argument_p (const
> function_instance &instance,
> return nargs == 1 && instance.pred == PRED_m;
> }
>
> +/* Return true if INSTANCE has an predicate argument that can be used as the
> global
> + predicate. */
> +inline bool
> +function_shape::has_gp_argument_p (const function_instance &instance) const
> +{
> + return instance.pred != PRED_none;
> +}
> +
> /* Return the mode of the result of a call. */
> inline machine_mode
> function_expander::result_mode () const
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr121604_brk.c
> b/gcc/testsuite/gcc.target/aarch64/sve/pr121604_brk.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..a474a20554d30a19df701f89e16d10f5692b29fd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr121604_brk.c
> @@ -0,0 +1,25 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +#include <arm_sve.h>
> +
> +/*
> +** foo:
> +** ptrue p0\.b, all
> +** brkb p0\.b, p0/z, p0\.b
> +** ret
> +*/
> +svbool_t foo () {
> + return svbrkb_b_m (svpfalse (), svptrue_b8 (), svptrue_b8 ());
> +}
> +
> +/*
> +** bar:
> +** ptrue p0\.b, all
> +** brka p0\.b, p0/z, p0\.b
> +** ret
> +*/
> +svbool_t bar () {
> + return svbrka_b_m (svpfalse (), svptrue_b8 (), svptrue_b8 ());
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/pr121604_pmov.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/pr121604_pmov.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..16844ee4add3bc0f7c6b251b88913bf7c29b54cf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/pr121604_pmov.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=armv8.2-a+sve2p1" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +#include <arm_sve.h>
> +
> +/*
> +** f:
> +** pfalse p([0-7]+)\.b
> +** mov z0\.b, #-1
> +** pmov z0\[1\], p\1\.d
> +** ret
> +*/
> +svuint64_t f () {
> + return svpmov_lane_u64_m (svdup_u64 (~0UL), svpfalse (), 1);
> +}
>
>
> --