On Tue, Oct 14, 2025 at 8:32 AM Tamar Christina <[email protected]> wrote:
>
> In PR121604 the problem was noted that currently the SVE intrinsics
> infrastructure assumes that for any predicated operation that the GP is at the
> first argument position which has a svbool_t or for a unary merging operation
> that it's in the second position.
>
> However you have intrinsics like fmov_lane which have an svbool_t but it's not
> a GP but instead it's the inactive lanes.
>
> You also have instructions like BRKB which work only on predicates so it
> incorrectly determines the first operand to be the GP, while that's also the
> inactive lanes.
>
> However during apply_predication we do have the information about where the GP
> is.  This patch re-organizes the code to record this information into the
> function_instance such that folders have access to this information.
>
> For functions that are outliers like pmov_lane we can now override the
> availability of the intrinsics having a GP.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
>         * config/aarch64/aarch64-sve-builtins-shapes.cc (apply_predication):
>         Store gp_index.
>         (struct pmov_to_vector_lane_def): Mark instruction as has no GP.
>         * config/aarch64/aarch64-sve-builtins.h (function_instance::gp_value,
>         function_instance::inactive_values, function_instance::gp_index,
>         function_shape::has_gp_argument_p): New.
>         * config/aarch64/aarch64-sve-builtins.cc (gimple_folder::fold_pfalse):
>         Simplify code and use GP helpers.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/aarch64/sve/pr121604_brk.c: New test.
>         * gcc.target/aarch64/sve2/pr121604_pmov.c: New test.
>
> Co-authored-by: Jennifer Schmitz <[email protected]>
>
> ---
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc 
> b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
> index 
> 74a3338e9556c4369a3b1e6e864acfba4ced0e3a..0ee60e89aa6905ec5a281a017fde1c8963a03466
>  100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
> +++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
> @@ -21,7 +21,10 @@
>  #include "system.h"
>  #include "coretypes.h"
>  #include "tm.h"
> +#include "basic-block.h"
>  #include "tree.h"
> +#include "function.h"
> +#include "gimple.h"

Unless I missed something somewhere I don't see why these newly added
includes are needed. Maybe they came from the previous version of the
patch and you forgot to check if they are still needed.

Thanks,
Andrew

>  #include "rtl.h"
>  #include "tm_p.h"
>  #include "memmodel.h"
> @@ -68,23 +71,36 @@ za_group_is_pure_overload (const function_group_info 
> &group)
>     types in ARGUMENT_TYPES.  RETURN_TYPE is the type returned by the
>     function.  */
>  static void
> -apply_predication (const function_instance &instance, tree return_type,
> +apply_predication (function_instance &instance, tree return_type,
>                    vec<tree> &argument_types)
>  {
> +  /* Initially mark the function as not being predicated.  */
> +  instance.gp_index = -1;
> +
>    /* There are currently no SME ZA instructions that have both merging and
>       unpredicated forms, so for simplicity, the predicates are always 
> included
>       in the original format string.  */
>    if (instance.pred != PRED_none && instance.pred != PRED_za_m)
>      {
>        argument_types.quick_insert (0, instance.gp_type ());
> +      instance.gp_index = 0;
>        /* For unary merge operations, the first argument is a vector with
>          the same type as the result.  For unary_convert_narrowt it also
>          provides the "bottom" half of active elements, and is present
>          for all types of predication.  */
>        auto nargs = argument_types.length () - 1;
>        if (instance.shape->has_merge_argument_p (instance, nargs))
> -       argument_types.quick_insert (0, return_type);
> +       {
> +         argument_types.quick_insert (0, return_type);
> +         instance.gp_index = 1;
> +       }
>      }
> +
> +  /* Check if the shape has overriden the GP argument.  In this case register
> +     the types, predicate the function but don't mark it as having a GP and
> +     override what was set before.  */
> +  if (!instance.shape->has_gp_argument_p (instance))
> +    instance.gp_value_index = -1;
>  }
>
>  /* Parse and move past an element type in FORMAT and return it as a type
> @@ -3332,6 +3348,14 @@ struct pmov_to_vector_lane_def : public 
> overloaded_base<0>
>         but it doesn't currently have the necessary information.  */
>      return c.require_immediate_range (1, 1, bytes - 1);
>    }
> +
> +  /* This function has a predicate argument, and is a merging instruction, 
> but
> +     the predicate is not a GP, it's the inactive lanes.  */
> +  bool
> +  has_gp_argument_p (const function_instance &) const override
> +  {
> +    return false;
> +  }
>  };
>  SHAPE (pmov_to_vector_lane)
>
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc 
> b/gcc/config/aarch64/aarch64-sve-builtins.cc
> index 
> 4956e364929c6a958f37f4d5ab8af668682225a8..4805537231227c565ff2a5ef32f30b24017fb3ad
>  100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins.cc
> +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
> @@ -3632,24 +3632,22 @@ gimple_folder::redirect_pred_x ()
>  gimple *
>  gimple_folder::fold_pfalse ()
>  {
> -  if (pred == PRED_none)
> +  tree gp = gp_pred (call);
> +  /* If there isn't a GP then we can't do any folding as the instruction 
> isn't
> +     predicated.  */
> +  if (!gp)
>      return nullptr;
> -  tree arg0 = gimple_call_arg (call, 0);
> +
>    if (pred == PRED_m)
>      {
> -      /* Unary function shapes with _m predication are folded to the
> -        inactive vector (arg0), while other function shapes are folded
> -        to op1 (arg1).  */
> -      tree arg1 = gimple_call_arg (call, 1);
> -      if (is_pfalse (arg1))
> -       return fold_call_to (arg0);
> -      if (is_pfalse (arg0))
> -       return fold_call_to (arg1);
> +      tree val = inactive_values (call);
> +      if (is_pfalse (gp))
> +       return fold_call_to (val);
>        return nullptr;
>      }
> -  if ((pred == PRED_x || pred == PRED_z) && is_pfalse (arg0))
> +  if ((pred == PRED_x || pred == PRED_z) && is_pfalse (gp))
>      return fold_call_to (build_zero_cst (TREE_TYPE (lhs)));
> -  if (pred == PRED_implicit && is_pfalse (arg0))
> +  if (pred == PRED_implicit && is_pfalse (gp))
>      {
>        unsigned int flags = call_properties ();
>        /* Folding to lhs = {0, ...} is not appropriate for intrinsics with
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h 
> b/gcc/config/aarch64/aarch64-sve-builtins.h
> index 
> d6a58b450d6ddfd38788899b61b75c14e9472a99..fceb80f4d8e3e9ab1584450c98d35f94c4202c8a
>  100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins.h
> +++ b/gcc/config/aarch64/aarch64-sve-builtins.h
> @@ -403,6 +403,8 @@ public:
>    bool could_trap_p () const;
>
>    vector_type_index gp_type_index () const;
> +  tree gp_value (gcall *) const;
> +  tree inactive_values (gcall *) const;
>    tree gp_type () const;
>
>    unsigned int vectors_per_tuple () const;
> @@ -436,6 +438,7 @@ public:
>    group_suffix_index group_suffix_id;
>    predication_index pred;
>    fpm_mode_index fpm_mode;
> +  int gp_index;
>  };
>
>  class registered_function;
> @@ -801,6 +804,8 @@ public:
>    virtual bool has_merge_argument_p (const function_instance &,
>                                      unsigned int) const;
>
> +  virtual bool has_gp_argument_p (const function_instance &) const;
> +
>    virtual bool explicit_type_suffix_p (unsigned int) const = 0;
>
>    /* True if the group suffix is present in overloaded names.
> @@ -949,6 +954,34 @@ function_instance::gp_type () const
>    return acle_vector_types[0][gp_type_index ()];
>  }
>
> +/* Return the tree value that should be used as the governing predicate of
> +   this function.  If none then return NULL_TREE.  */
> +inline tree
> +function_instance::gp_value (gcall *call) const
> +{
> +  if (pred == PRED_none || gp_index < 0)
> +    return NULL_TREE;
> +
> +  return gimple_call_arg (call, gp_index);
> +}
> +
> +/* Return the tree value that should be used the inactive lanes should this
> +   function be a predicated function with a gp.  If none then return
> +   NULL_TREE.  */
> +inline tree
> +function_instance::inactive_values (gcall *call) const
> +{
> +  if (pred == PRED_none || gp_index < 0)
> +    return NULL_TREE;
> +
> +  /* Function is unary with m predicate.  */
> +  if (gp_index == 1)
> +    return gimple_call_arg (call, 0);
> +
> +  /* Else the inactive values are the next element.  */
> +  return gimple_call_arg (call, 1);
> +}
> +
>  /* If the function operates on tuples of vectors, return the number
>     of vectors in the tuples, otherwise return 1.  */
>  inline unsigned int
> @@ -1123,6 +1156,14 @@ function_shape::has_merge_argument_p (const 
> function_instance &instance,
>    return nargs == 1 && instance.pred == PRED_m;
>  }
>
> +/* Return true if INSTANCE has an predicate argument that can be used as the 
> global
> +   predicate.  */
> +inline bool
> +function_shape::has_gp_argument_p (const function_instance &instance) const
> +{
> +  return instance.pred != PRED_none;
> +}
> +
>  /* Return the mode of the result of a call.  */
>  inline machine_mode
>  function_expander::result_mode () const
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr121604_brk.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/pr121604_brk.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..a474a20554d30a19df701f89e16d10f5692b29fd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr121604_brk.c
> @@ -0,0 +1,25 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +#include <arm_sve.h>
> +
> +/*
> +** foo:
> +**     ptrue   p0\.b, all
> +**     brkb    p0\.b, p0/z, p0\.b
> +**     ret
> +*/
> +svbool_t foo () {
> +  return svbrkb_b_m (svpfalse (), svptrue_b8 (), svptrue_b8 ());
> +}
> +
> +/*
> +** bar:
> +**     ptrue   p0\.b, all
> +**     brka    p0\.b, p0/z, p0\.b
> +**     ret
> +*/
> +svbool_t bar () {
> +  return svbrka_b_m (svpfalse (), svptrue_b8 (), svptrue_b8 ());
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/pr121604_pmov.c 
> b/gcc/testsuite/gcc.target/aarch64/sve2/pr121604_pmov.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..16844ee4add3bc0f7c6b251b88913bf7c29b54cf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/pr121604_pmov.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=armv8.2-a+sve2p1" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +#include <arm_sve.h>
> +
> +/*
> +** f:
> +**     pfalse  p([0-7]+)\.b
> +**     mov     z0\.b, #-1
> +**     pmov    z0\[1\], p\1\.d
> +**     ret
> +*/
> +svuint64_t f () {
> +    return svpmov_lane_u64_m (svdup_u64 (~0UL), svpfalse (), 1);
> +}
>
>
> --

Reply via email to