This implements costing of vector construction and decomposition
to a custom (possibly vector) element type to be used for vectorizer
costing of the corresponding operations when dealing with
VMAT_STRIDED_SLP loads and stores.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

I hope this is reasonable to follow.  OK?

Thanks,
Richard.

        * config/i386/i386.cc (ix86_vector_cd_cost): New function,
        enhanced and split out from ...
        (ix86_default_vector_cost): ... here.
---
 gcc/config/i386/i386.cc | 49 ++++++++++++++++++++++-------------------
 1 file changed, 26 insertions(+), 23 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 3f22f029f5e..7b85fc8c472 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -25589,6 +25589,31 @@ asm_preferred_eh_data_format (int code, int global)
   return DW_EH_PE_absptr;
 }
 
+/* Cost of constructing or destructing a vector in VECMODE from/to elements
+   of ELMODE.  */
+static int
+ix86_vector_cd_cost (machine_mode vecmode, machine_mode elmode)
+{
+  if (GET_MODE_BITSIZE (vecmode) < 128)
+    return ((GET_MODE_BITSIZE (vecmode) / GET_MODE_BITSIZE (elmode) - 1)
+           * ix86_cost->sse_op);
+
+  int n = GET_MODE_BITSIZE (vecmode) / 128;
+  int cost = 0;
+  /* Element inserts/extracts into/from N SSE vectors, the possible
+     GPR <-> XMM moves have to be accounted for elsewhere.  */
+  if (GET_MODE_BITSIZE (elmode) < 128)
+    cost += n * (128 / GET_MODE_BITSIZE (elmode) - 1) * ix86_cost->sse_op;
+  if (GET_MODE_BITSIZE (vecmode) >= 256
+      && GET_MODE_BITSIZE (elmode) < 256)
+    /* N/2 vinserti128/vextracti128 for SSE <-> AVX256.  */
+    cost += n * ix86_vec_cost (V32QImode, ix86_cost->sse_op) / 2;
+  if (GET_MODE_BITSIZE (vecmode) == 512)
+    /* One vinserti64x4/vextracti64*4 for AVX256 <-> AVX512.  */
+    cost += ix86_vec_cost (vecmode, ix86_cost->sse_op);
+  return cost;
+}
+
 /* Worker for ix86_builtin_vectorization_cost and the fallback calls
    from ix86_vector_costs::add_stmt_cost.  */
 static int
@@ -25679,29 +25704,7 @@ ix86_default_vector_cost (enum vect_cost_for_stmt 
type_of_cost,
         return ix86_vec_cost (mode, ix86_cost->sse_op);
 
       case vec_construct:
-       {
-         int n = GET_MODE_NUNITS (mode);
-         /* N - 1 element inserts into an SSE vector, the possible
-            GPR -> XMM move is accounted for in add_stmt_cost.  */
-         if (GET_MODE_BITSIZE (mode) <= 128)
-           return (n - 1) * ix86_cost->sse_op;
-         /* One vinserti128 for combining two SSE vectors for AVX256.  */
-         else if (GET_MODE_BITSIZE (mode) == 256)
-           return ((n - 2) * ix86_cost->sse_op
-                   + ix86_vec_cost (mode, ix86_cost->sse_op));
-         /* One vinserti64x4 and two vinserti128 for combining SSE
-            and AVX256 vectors to AVX512.  */
-         else if (GET_MODE_BITSIZE (mode) == 512)
-           {
-             machine_mode half_mode
-               = mode_for_vector (GET_MODE_INNER (mode),
-                                  GET_MODE_NUNITS (mode) / 2).require ();
-             return ((n - 4) * ix86_cost->sse_op
-                     + 2 * ix86_vec_cost (half_mode, ix86_cost->sse_op)
-                     + ix86_vec_cost (mode, ix86_cost->sse_op));
-           }
-         gcc_unreachable ();
-       }
+       return ix86_vector_cd_cost (mode, GET_MODE_INNER (mode));
 
       default:
         gcc_unreachable ();
-- 
2.51.0

Reply via email to