On Wed, May 13, 2026 at 10:10 PM Richard Biener <[email protected]> wrote:
>
> This implements costing of vector construction and decomposition
> to a custom (possibly vector) element type to be used for vectorizer
> costing of the corresponding operations when dealing with
> VMAT_STRIDED_SLP loads and stores.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu.
>
> I hope this is reasonable to follow.  OK?
>
> Thanks,
> Richard.
>
>         * config/i386/i386.cc (ix86_vector_cd_cost): New function,
>         enhanced and split out from ...
>         (ix86_default_vector_cost): ... here.
> ---
>  gcc/config/i386/i386.cc | 49 ++++++++++++++++++++++-------------------
>  1 file changed, 26 insertions(+), 23 deletions(-)
>
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index 3f22f029f5e..7b85fc8c472 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -25589,6 +25589,31 @@ asm_preferred_eh_data_format (int code, int global)
>    return DW_EH_PE_absptr;
>  }
>
> +/* Cost of constructing or destructing a vector in VECMODE from/to elements
> +   of ELMODE.  */
> +static int
> +ix86_vector_cd_cost (machine_mode vecmode, machine_mode elmode)
> +{
> +  if (GET_MODE_BITSIZE (vecmode) < 128)
> +    return ((GET_MODE_BITSIZE (vecmode) / GET_MODE_BITSIZE (elmode) - 1)
> +           * ix86_cost->sse_op);
> +
> +  int n = GET_MODE_BITSIZE (vecmode) / 128;
> +  int cost = 0;
> +  /* Element inserts/extracts into/from N SSE vectors, the possible
> +     GPR <-> XMM moves have to be accounted for elsewhere.  */
> +  if (GET_MODE_BITSIZE (elmode) < 128)
> +    cost += n * (128 / GET_MODE_BITSIZE (elmode) - 1) * ix86_cost->sse_op;
> +  if (GET_MODE_BITSIZE (vecmode) >= 256
> +      && GET_MODE_BITSIZE (elmode) < 256)
> +    /* N/2 vinserti128/vextracti128 for SSE <-> AVX256.  */
> +    cost += n * ix86_vec_cost (V32QImode, ix86_cost->sse_op) / 2;
> +  if (GET_MODE_BITSIZE (vecmode) == 512)
> +    /* One vinserti64x4/vextracti64*4 for AVX256 <-> AVX512.  */

vextracti64*4 -> vextracti64x4, others LGTM.

> +    cost += ix86_vec_cost (vecmode, ix86_cost->sse_op);
> +  return cost;
> +}
> +
>  /* Worker for ix86_builtin_vectorization_cost and the fallback calls
>     from ix86_vector_costs::add_stmt_cost.  */
>  static int
> @@ -25679,29 +25704,7 @@ ix86_default_vector_cost (enum vect_cost_for_stmt 
> type_of_cost,
>          return ix86_vec_cost (mode, ix86_cost->sse_op);
>
>        case vec_construct:
> -       {
> -         int n = GET_MODE_NUNITS (mode);
> -         /* N - 1 element inserts into an SSE vector, the possible
> -            GPR -> XMM move is accounted for in add_stmt_cost.  */
> -         if (GET_MODE_BITSIZE (mode) <= 128)
> -           return (n - 1) * ix86_cost->sse_op;
> -         /* One vinserti128 for combining two SSE vectors for AVX256.  */
> -         else if (GET_MODE_BITSIZE (mode) == 256)
> -           return ((n - 2) * ix86_cost->sse_op
> -                   + ix86_vec_cost (mode, ix86_cost->sse_op));
> -         /* One vinserti64x4 and two vinserti128 for combining SSE
> -            and AVX256 vectors to AVX512.  */
> -         else if (GET_MODE_BITSIZE (mode) == 512)
> -           {
> -             machine_mode half_mode
> -               = mode_for_vector (GET_MODE_INNER (mode),
> -                                  GET_MODE_NUNITS (mode) / 2).require ();
> -             return ((n - 4) * ix86_cost->sse_op
> -                     + 2 * ix86_vec_cost (half_mode, ix86_cost->sse_op)
> -                     + ix86_vec_cost (mode, ix86_cost->sse_op));
> -           }
> -         gcc_unreachable ();
> -       }
> +       return ix86_vector_cd_cost (mode, GET_MODE_INNER (mode));
>
>        default:
>          gcc_unreachable ();
> --
> 2.51.0



-- 
BR,
Hongtao

Reply via email to