On Mon, 27 Nov 2023, Richard Sandiford wrote:

> can_duplicate_and_interleave_p checks whether we know a way of
> building a particular VLA SLP invariant.  g:60034ecf25597bd515f
> skipped that test for booleans, to support MASK_LEN_GATHER_LOAD
> calls with a dummy all-ones mask.  But there's nothing fundamentally
> different about VLA masks vs VLA data vectors.  If we have a VLA mask
> that isn't all-ones, we need some way of loading it.  This ultimately
> led to the ICE in the PR.
> 
> This patch fixes it by applying can_duplicate_and_interleave_p
> to masks, while also adding a special path for uniform vectors
> (of all kinds) to support the MASK_LEN_GATHER_LOAD usage.  This
> also fixes an XFAIL in pr36648.cc for SVE.
> 
> The patch is mostly Richard's.  My only changes were to skip
> redundant conversions and to use gimple_build_vector_from_val
> for all eligible vectors.
> 
> Tested on aarch64-linux-gnu (with and without SVE) and x86_64-linux-gnu.
> OK to install?

OK.

Thanks for picking up.

Richard.

> Richard
> 
> 
> 2023-11-27  Richard Biener  <rguent...@suse.de>
>           Richard Sandiford  <richard.sandif...@arm.com>
> 
> gcc/
>       PR tree-optimization/112661
>       * tree-vect-slp.cc (vect_get_and_check_slp_defs): Defer duplicate-and-
>       interleave test to...
>       (vect_build_slp_tree_2): ...here, once we have all the operands.
>       Skip the test for uniform vectors.
>       (vect_create_constant_vectors): Detect uniform vectors.  Avoid
>       redundant conversions in that case.  Use gimple_build_vector_from_val
>       to build the vector.
> 
> gcc/testsuite/
>       * g++.dg/vect/pr36648.cc: Remove XFAIL for VLA load-lanes.
> ---
>  gcc/testsuite/g++.dg/vect/pr36648.cc |  2 +-
>  gcc/tree-vect-slp.cc                 | 56 +++++++++++++++++++---------
>  2 files changed, 40 insertions(+), 18 deletions(-)
> 
> diff --git a/gcc/testsuite/g++.dg/vect/pr36648.cc 
> b/gcc/testsuite/g++.dg/vect/pr36648.cc
> index 8d24d3d445d..7bda82899d0 100644
> --- a/gcc/testsuite/g++.dg/vect/pr36648.cc
> +++ b/gcc/testsuite/g++.dg/vect/pr36648.cc
> @@ -25,6 +25,6 @@ int main() { }
>     targets, ! vect_no_align is a sufficient test.  */
>  
>  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target 
> { { { !  vect_no_align } && { ! powerpc*-*-* } } || { powerpc*-*-* && 
> vect_hw_misalign } } } } } */
> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" 
> { target { { { ! vect_no_align } && { ! powerpc*-*-* } } || { powerpc*-*-* && 
> vect_hw_misalign } } xfail { vect_variable_length && vect_load_lanes } } } } 
> */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" 
> { target { { { ! vect_no_align } && { ! powerpc*-*-* } } || { powerpc*-*-* && 
> vect_hw_misalign } } } } } */
>  
>  
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index 4a09b3c2aca..6799b9375ae 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -763,18 +763,6 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned 
> char swap,
>       {
>         tree type = TREE_TYPE (oprnd);
>         dt = dts[i];
> -       if ((dt == vect_constant_def
> -            || dt == vect_external_def)
> -           && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
> -           && TREE_CODE (type) != BOOLEAN_TYPE
> -           && !can_duplicate_and_interleave_p (vinfo, stmts.length (), type))
> -         {
> -           if (dump_enabled_p ())
> -             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -                              "Build SLP failed: invalid type of def "
> -                              "for variable-length SLP %T\n", oprnd);
> -           return -1;
> -         }
>  
>         /* For the swapping logic below force vect_reduction_def
>            for the reduction op in a SLP reduction group.  */
> @@ -2395,7 +2383,7 @@ out:
>    /* Create SLP_TREE nodes for the definition node/s.  */
>    FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
>      {
> -      slp_tree child;
> +      slp_tree child = nullptr;
>        unsigned int j;
>  
>        /* We're skipping certain operands from processing, for example
> @@ -2443,6 +2431,29 @@ out:
>        if (oprnd_info->first_dt == vect_external_def
>         || oprnd_info->first_dt == vect_constant_def)
>       {
> +       if (!GET_MODE_SIZE (vinfo->vector_mode).is_constant ())
> +         {
> +           tree op0;
> +           tree uniform_val = op0 = oprnd_info->ops[0];
> +           for (j = 1; j < oprnd_info->ops.length (); ++j)
> +             if (!operand_equal_p (uniform_val, oprnd_info->ops[j]))
> +               {
> +                 uniform_val = NULL_TREE;
> +                 break;
> +               }
> +           if (!uniform_val
> +               && !can_duplicate_and_interleave_p (vinfo,
> +                                                   oprnd_info->ops.length (),
> +                                                   TREE_TYPE (op0)))
> +             {
> +               matches[j] = false;
> +               if (dump_enabled_p ())
> +                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                                  "Build SLP failed: invalid type of def "
> +                                  "for variable-length SLP %T\n", op0);
> +               goto fail;
> +             }
> +         }
>         slp_tree invnode = vect_create_new_slp_node (oprnd_info->ops);
>         SLP_TREE_DEF_TYPE (invnode) = oprnd_info->first_dt;
>         oprnd_info->ops = vNULL;
> @@ -8157,6 +8168,7 @@ vect_create_constant_vectors (vec_info *vinfo, slp_tree 
> op_node)
>  
>    number_of_places_left_in_vector = nunits;
>    constant_p = true;
> +  tree uniform_elt = NULL_TREE;
>    tree_vector_builder elts (vector_type, nunits, 1);
>    elts.quick_grow (nunits);
>    stmt_vec_info insert_after = NULL;
> @@ -8166,8 +8178,14 @@ vect_create_constant_vectors (vec_info *vinfo, 
> slp_tree op_node)
>        for (i = group_size - 1; op_node->ops.iterate (i, &op); i--)
>          {
>            /* Create 'vect_ = {op0,op1,...,opn}'.  */
> -          number_of_places_left_in_vector--;
>         tree orig_op = op;
> +       if (number_of_places_left_in_vector == nunits)
> +         uniform_elt = op;
> +       else if (uniform_elt && operand_equal_p (uniform_elt, op))
> +         op = elts[number_of_places_left_in_vector];
> +       else
> +         uniform_elt = NULL_TREE;
> +       number_of_places_left_in_vector--;
>         if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
>           {
>             if (CONSTANT_CLASS_P (op))
> @@ -8236,9 +8254,13 @@ vect_create_constant_vectors (vec_info *vinfo, 
> slp_tree op_node)
>  
>            if (number_of_places_left_in_vector == 0)
>              {
> -           if (constant_p
> -               ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
> -               : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
> +           auto type_nunits = TYPE_VECTOR_SUBPARTS (vector_type);
> +           if (uniform_elt)
> +             vec_cst = gimple_build_vector_from_val (&ctor_seq, vector_type,
> +                                                     elts[0]);
> +           else if (constant_p
> +                    ? multiple_p (type_nunits, nunits)
> +                    : known_eq (type_nunits, nunits))
>               vec_cst = gimple_build_vector (&ctor_seq, &elts);
>             else
>               {
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to