On Thu, Sep 14, 2023 at 5:12 AM Kewen Lin <li...@linux.ibm.com> wrote:
>
> This patch adjusts the cost handling on VMAT_LOAD_STORE_LANES
> in function vectorizable_store.  We don't call function
> vect_model_store_cost for it any more.  It's the case of
> interleaving stores, so it skips all stmts excepting for
> first_stmt_info, consider the whole group when costing
> first_stmt_info.  This patch shouldn't have any functional
> changes.

OK.

> gcc/ChangeLog:
>
>         * tree-vect-stmts.cc (vect_model_store_cost): Assert it will never
>         get VMAT_LOAD_STORE_LANES.
>         (vectorizable_store): Adjust the cost handling on 
> VMAT_LOAD_STORE_LANES
>         without calling vect_model_store_cost.  Factor out new lambda function
>         update_prologue_cost.
> ---
>  gcc/tree-vect-stmts.cc | 110 ++++++++++++++++++++++++++++-------------
>  1 file changed, 75 insertions(+), 35 deletions(-)
>
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 3d01168080a..fbd16b8a487 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -966,7 +966,8 @@ vect_model_store_cost (vec_info *vinfo, stmt_vec_info 
> stmt_info, int ncopies,
>  {
>    gcc_assert (memory_access_type != VMAT_GATHER_SCATTER
>               && memory_access_type != VMAT_ELEMENTWISE
> -             && memory_access_type != VMAT_STRIDED_SLP);
> +             && memory_access_type != VMAT_STRIDED_SLP
> +             && memory_access_type != VMAT_LOAD_STORE_LANES);
>    unsigned int inside_cost = 0, prologue_cost = 0;
>    stmt_vec_info first_stmt_info = stmt_info;
>    bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
> @@ -8408,7 +8409,8 @@ vectorizable_store (vec_info *vinfo,
>        if (grouped_store
>           && !slp
>           && first_stmt_info != stmt_info
> -         && memory_access_type == VMAT_ELEMENTWISE)
> +         && (memory_access_type == VMAT_ELEMENTWISE
> +             || memory_access_type == VMAT_LOAD_STORE_LANES))
>         return true;
>      }
>    gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE 
> (stmt_info));
> @@ -8479,6 +8481,31 @@ vectorizable_store (vec_info *vinfo,
>      dump_printf_loc (MSG_NOTE, vect_location, "transform store. ncopies = 
> %d\n",
>                      ncopies);
>
> +  /* Check if we need to update prologue cost for invariant,
> +     and update it accordingly if so.  If it's not for
> +     interleaving store, we can just check vls_type; but if
> +     it's for interleaving store, need to check the def_type
> +     of the stored value since the current vls_type is just
> +     for first_stmt_info.  */
> +  auto update_prologue_cost = [&](unsigned *prologue_cost, tree store_rhs)
> +  {
> +    gcc_assert (costing_p);
> +    if (slp)
> +      return;
> +    if (grouped_store)
> +      {
> +       gcc_assert (store_rhs);
> +       enum vect_def_type cdt;
> +       gcc_assert (vect_is_simple_use (store_rhs, vinfo, &cdt));
> +       if (cdt != vect_constant_def && cdt != vect_external_def)
> +         return;
> +      }
> +    else if (vls_type != VLS_STORE_INVARIANT)
> +      return;
> +    *prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, 
> stmt_info,
> +                                       0, vect_prologue);
> +  };
> +
>    if (memory_access_type == VMAT_ELEMENTWISE
>        || memory_access_type == VMAT_STRIDED_SLP)
>      {
> @@ -8646,14 +8673,8 @@ vectorizable_store (vec_info *vinfo,
>           if (!costing_p)
>             vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies, op,
>                                &vec_oprnds);
> -         else if (!slp)
> -           {
> -             enum vect_def_type cdt;
> -             gcc_assert (vect_is_simple_use (op, vinfo, &cdt));
> -             if (cdt == vect_constant_def || cdt == vect_external_def)
> -               prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
> -                                                  stmt_info, 0, 
> vect_prologue);
> -           }
> +         else
> +           update_prologue_cost (&prologue_cost, op);
>           unsigned int group_el = 0;
>           unsigned HOST_WIDE_INT
>             elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
> @@ -8857,13 +8878,7 @@ vectorizable_store (vec_info *vinfo,
>    if (memory_access_type == VMAT_LOAD_STORE_LANES)
>      {
>        gcc_assert (!slp && grouped_store);
> -      if (costing_p)
> -       {
> -         vect_model_store_cost (vinfo, stmt_info, ncopies, 
> memory_access_type,
> -                                alignment_support_scheme, misalignment,
> -                                vls_type, slp_node, cost_vec);
> -         return true;
> -       }
> +      unsigned inside_cost = 0, prologue_cost = 0;
>        for (j = 0; j < ncopies; j++)
>         {
>           gimple *new_stmt;
> @@ -8879,29 +8894,39 @@ vectorizable_store (vec_info *vinfo,
>                      DR_GROUP_SIZE is the exact number of stmts in the
>                      chain. Therefore, NEXT_STMT_INFO can't be NULL_TREE.  */
>                   op = vect_get_store_rhs (next_stmt_info);
> -                 vect_get_vec_defs_for_operand (vinfo, next_stmt_info, 
> ncopies,
> -                                                op, gvec_oprnds[i]);
> -                 vec_oprnd = (*gvec_oprnds[i])[0];
> -                 dr_chain.quick_push (vec_oprnd);
> +                 if (costing_p)
> +                   update_prologue_cost (&prologue_cost, op);
> +                 else
> +                   {
> +                     vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
> +                                                    ncopies, op,
> +                                                    gvec_oprnds[i]);
> +                     vec_oprnd = (*gvec_oprnds[i])[0];
> +                     dr_chain.quick_push (vec_oprnd);
> +                   }
>                   next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
>                 }
> -             if (mask)
> +
> +             if (!costing_p)
>                 {
> -                 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
> -                                                mask, &vec_masks,
> -                                                mask_vectype);
> -                 vec_mask = vec_masks[0];
> -               }
> +                 if (mask)
> +                   {
> +                     vect_get_vec_defs_for_operand (vinfo, stmt_info, 
> ncopies,
> +                                                    mask, &vec_masks,
> +                                                    mask_vectype);
> +                     vec_mask = vec_masks[0];
> +                   }
>
> -             /* We should have catched mismatched types earlier.  */
> -             gcc_assert (
> -               useless_type_conversion_p (vectype, TREE_TYPE (vec_oprnd)));
> -             dataref_ptr
> -               = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
> -                                           NULL, offset, &dummy, gsi,
> -                                           &ptr_incr, false, bump);
> +                 /* We should have catched mismatched types earlier.  */
> +                 gcc_assert (
> +                   useless_type_conversion_p (vectype, TREE_TYPE 
> (vec_oprnd)));
> +                 dataref_ptr
> +                   = vect_create_data_ref_ptr (vinfo, first_stmt_info,
> +                                               aggr_type, NULL, offset, 
> &dummy,
> +                                               gsi, &ptr_incr, false, bump);
> +               }
>             }
> -         else
> +         else if (!costing_p)
>             {
>               gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
>               /* DR_CHAIN is then used as an input to
> @@ -8917,6 +8942,15 @@ vectorizable_store (vec_info *vinfo,
>                                              stmt_info, bump);
>             }
>
> +         if (costing_p)
> +           {
> +             for (i = 0; i < vec_num; i++)
> +               vect_get_store_cost (vinfo, stmt_info, 1,
> +                                    alignment_support_scheme, misalignment,
> +                                    &inside_cost, cost_vec);
> +             continue;
> +           }
> +
>           /* Get an array into which we can store the individual vectors.  */
>           tree vec_array = create_vector_array (vectype, vec_num);
>
> @@ -9003,6 +9037,12 @@ vectorizable_store (vec_info *vinfo,
>           STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
>         }
>
> +      if (costing_p && dump_enabled_p ())
> +       dump_printf_loc (MSG_NOTE, vect_location,
> +                        "vect_model_store_cost: inside_cost = %d, "
> +                        "prologue_cost = %d .\n",
> +                        inside_cost, prologue_cost);
> +
>        return true;
>      }
>
> --
> 2.31.1
>

Reply via email to