On Thu, 14 May 2026, Hongtao Liu wrote:
> On Wed, May 13, 2026 at 10:10 PM Richard Biener <[email protected]> wrote:
> >
> > This implements costing of vector construction and decomposition
> > to a custom (possibly vector) element type to be used for vectorizer
> > costing of the corresponding operations when dealing with
> > VMAT_STRIDED_SLP loads and stores.
> >
> > Bootstrapped and tested on x86_64-unknown-linux-gnu.
> >
> > I hope this is reasonable to follow. OK?
> >
> > Thanks,
> > Richard.
> >
> > * config/i386/i386.cc (ix86_vector_cd_cost): New function,
> > enhanced and split out from ...
> > (ix86_default_vector_cost): ... here.
> > ---
> > gcc/config/i386/i386.cc | 49 ++++++++++++++++++++++-------------------
> > 1 file changed, 26 insertions(+), 23 deletions(-)
> >
> > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > index 3f22f029f5e..7b85fc8c472 100644
> > --- a/gcc/config/i386/i386.cc
> > +++ b/gcc/config/i386/i386.cc
> > @@ -25589,6 +25589,31 @@ asm_preferred_eh_data_format (int code, int global)
> > return DW_EH_PE_absptr;
> > }
> >
> > +/* Cost of constructing or destructing a vector in VECMODE from/to elements
> > + of ELMODE. */
> > +static int
> > +ix86_vector_cd_cost (machine_mode vecmode, machine_mode elmode)
> > +{
> > + if (GET_MODE_BITSIZE (vecmode) < 128)
> > + return ((GET_MODE_BITSIZE (vecmode) / GET_MODE_BITSIZE (elmode) - 1)
> > + * ix86_cost->sse_op);
> > +
> > + int n = GET_MODE_BITSIZE (vecmode) / 128;
> > + int cost = 0;
> > + /* Element inserts/extracts into/from N SSE vectors, the possible
> > + GPR <-> XMM moves have to be accounted for elsewhere. */
> > + if (GET_MODE_BITSIZE (elmode) < 128)
> > + cost += n * (128 / GET_MODE_BITSIZE (elmode) - 1) * ix86_cost->sse_op;
> > + if (GET_MODE_BITSIZE (vecmode) >= 256
> > + && GET_MODE_BITSIZE (elmode) < 256)
> > + /* N/2 vinserti128/vextracti128 for SSE <-> AVX256. */
> > + cost += n * ix86_vec_cost (V32QImode, ix86_cost->sse_op) / 2;
> > + if (GET_MODE_BITSIZE (vecmode) == 512)
> > + /* One vinserti64x4/vextracti64*4 for AVX256 <-> AVX512. */
>
> vextracti64*4 -> vextracti64x4, others LGTM.
Thanks, fixed. I'll post with a complete series making use of this
before pushing in a few weeks.
Richard.
> > + cost += ix86_vec_cost (vecmode, ix86_cost->sse_op);
> > + return cost;
> > +}
> > +
> > /* Worker for ix86_builtin_vectorization_cost and the fallback calls
> > from ix86_vector_costs::add_stmt_cost. */
> > static int
> > @@ -25679,29 +25704,7 @@ ix86_default_vector_cost (enum vect_cost_for_stmt
> > type_of_cost,
> > return ix86_vec_cost (mode, ix86_cost->sse_op);
> >
> > case vec_construct:
> > - {
> > - int n = GET_MODE_NUNITS (mode);
> > - /* N - 1 element inserts into an SSE vector, the possible
> > - GPR -> XMM move is accounted for in add_stmt_cost. */
> > - if (GET_MODE_BITSIZE (mode) <= 128)
> > - return (n - 1) * ix86_cost->sse_op;
> > - /* One vinserti128 for combining two SSE vectors for AVX256. */
> > - else if (GET_MODE_BITSIZE (mode) == 256)
> > - return ((n - 2) * ix86_cost->sse_op
> > - + ix86_vec_cost (mode, ix86_cost->sse_op));
> > - /* One vinserti64x4 and two vinserti128 for combining SSE
> > - and AVX256 vectors to AVX512. */
> > - else if (GET_MODE_BITSIZE (mode) == 512)
> > - {
> > - machine_mode half_mode
> > - = mode_for_vector (GET_MODE_INNER (mode),
> > - GET_MODE_NUNITS (mode) / 2).require ();
> > - return ((n - 4) * ix86_cost->sse_op
> > - + 2 * ix86_vec_cost (half_mode, ix86_cost->sse_op)
> > - + ix86_vec_cost (mode, ix86_cost->sse_op));
> > - }
> > - gcc_unreachable ();
> > - }
> > + return ix86_vector_cd_cost (mode, GET_MODE_INNER (mode));
> >
> > default:
> > gcc_unreachable ();
> > --
> > 2.51.0
>
>
>
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Jochen Jaser, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)