On Wed, May 13, 2026 at 10:10 PM Richard Biener <[email protected]> wrote:
>
> This implements costing of vector construction and decomposition
> to a custom (possibly vector) element type to be used for vectorizer
> costing of the corresponding operations when dealing with
> VMAT_STRIDED_SLP loads and stores.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu.
>
> I hope this is reasonable to follow. OK?
>
> Thanks,
> Richard.
>
> * config/i386/i386.cc (ix86_vector_cd_cost): New function,
> enhanced and split out from ...
> (ix86_default_vector_cost): ... here.
> ---
> gcc/config/i386/i386.cc | 49 ++++++++++++++++++++++-------------------
> 1 file changed, 26 insertions(+), 23 deletions(-)
>
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index 3f22f029f5e..7b85fc8c472 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -25589,6 +25589,31 @@ asm_preferred_eh_data_format (int code, int global)
> return DW_EH_PE_absptr;
> }
>
> +/* Cost of constructing or destructing a vector in VECMODE from/to elements
> + of ELMODE. */
> +static int
> +ix86_vector_cd_cost (machine_mode vecmode, machine_mode elmode)
> +{
> + if (GET_MODE_BITSIZE (vecmode) < 128)
> + return ((GET_MODE_BITSIZE (vecmode) / GET_MODE_BITSIZE (elmode) - 1)
> + * ix86_cost->sse_op);
> +
> + int n = GET_MODE_BITSIZE (vecmode) / 128;
> + int cost = 0;
> + /* Element inserts/extracts into/from N SSE vectors, the possible
> + GPR <-> XMM moves have to be accounted for elsewhere. */
> + if (GET_MODE_BITSIZE (elmode) < 128)
> + cost += n * (128 / GET_MODE_BITSIZE (elmode) - 1) * ix86_cost->sse_op;
> + if (GET_MODE_BITSIZE (vecmode) >= 256
> + && GET_MODE_BITSIZE (elmode) < 256)
> + /* N/2 vinserti128/vextracti128 for SSE <-> AVX256. */
> + cost += n * ix86_vec_cost (V32QImode, ix86_cost->sse_op) / 2;
> + if (GET_MODE_BITSIZE (vecmode) == 512)
> + /* One vinserti64x4/vextracti64*4 for AVX256 <-> AVX512. */
vextracti64*4 -> vextracti64x4, others LGTM.
> + cost += ix86_vec_cost (vecmode, ix86_cost->sse_op);
> + return cost;
> +}
> +
> /* Worker for ix86_builtin_vectorization_cost and the fallback calls
> from ix86_vector_costs::add_stmt_cost. */
> static int
> @@ -25679,29 +25704,7 @@ ix86_default_vector_cost (enum vect_cost_for_stmt
> type_of_cost,
> return ix86_vec_cost (mode, ix86_cost->sse_op);
>
> case vec_construct:
> - {
> - int n = GET_MODE_NUNITS (mode);
> - /* N - 1 element inserts into an SSE vector, the possible
> - GPR -> XMM move is accounted for in add_stmt_cost. */
> - if (GET_MODE_BITSIZE (mode) <= 128)
> - return (n - 1) * ix86_cost->sse_op;
> - /* One vinserti128 for combining two SSE vectors for AVX256. */
> - else if (GET_MODE_BITSIZE (mode) == 256)
> - return ((n - 2) * ix86_cost->sse_op
> - + ix86_vec_cost (mode, ix86_cost->sse_op));
> - /* One vinserti64x4 and two vinserti128 for combining SSE
> - and AVX256 vectors to AVX512. */
> - else if (GET_MODE_BITSIZE (mode) == 512)
> - {
> - machine_mode half_mode
> - = mode_for_vector (GET_MODE_INNER (mode),
> - GET_MODE_NUNITS (mode) / 2).require ();
> - return ((n - 4) * ix86_cost->sse_op
> - + 2 * ix86_vec_cost (half_mode, ix86_cost->sse_op)
> - + ix86_vec_cost (mode, ix86_cost->sse_op));
> - }
> - gcc_unreachable ();
> - }
> + return ix86_vector_cd_cost (mode, GET_MODE_INNER (mode));
>
> default:
> gcc_unreachable ();
> --
> 2.51.0
--
BR,
Hongtao