https://gcc.gnu.org/g:fcadd6d32398271db9a4935d51a0066648962674

commit r16-2507-gfcadd6d32398271db9a4935d51a0066648962674
Author: Richard Biener <rguent...@suse.de>
Date:   Fri Jul 25 09:40:27 2025 +0200

    Remove store interleaving support
    
    The following removes the non-SLP store interleaving support which
    was already almost unused.
    
            * tree-vectorizer.h (vect_permute_store_chain): Remove.
            * tree-vect-data-refs.cc (vect_permute_store_chain): Likewise.
            * tree-vect-stmts.cc (vectorizable_store): Remove comment
            about store interleaving.

Diff:
---
 gcc/tree-vect-data-refs.cc | 198 ---------------------------------------------
 gcc/tree-vect-stmts.cc     |  33 --------
 gcc/tree-vectorizer.h      |   3 -
 3 files changed, 234 deletions(-)

diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index 824b5f0f7698..1cc17c2cd73a 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -6078,204 +6078,6 @@ vect_store_lanes_supported (tree vectype, unsigned 
HOST_WIDE_INT count,
 }
 
 
-/* Function vect_permute_store_chain.
-
-   Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
-   a power of 2 or equal to 3, generate interleave_high/low stmts to reorder
-   the data correctly for the stores.  Return the final references for stores
-   in RESULT_CHAIN.
-
-   E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
-   The input is 4 vectors each containing 8 elements.  We assign a number to
-   each element, the input sequence is:
-
-   1st vec:   0  1  2  3  4  5  6  7
-   2nd vec:   8  9 10 11 12 13 14 15
-   3rd vec:  16 17 18 19 20 21 22 23
-   4th vec:  24 25 26 27 28 29 30 31
-
-   The output sequence should be:
-
-   1st vec:  0  8 16 24  1  9 17 25
-   2nd vec:  2 10 18 26  3 11 19 27
-   3rd vec:  4 12 20 28  5 13 21 30
-   4th vec:  6 14 22 30  7 15 23 31
-
-   i.e., we interleave the contents of the four vectors in their order.
-
-   We use interleave_high/low instructions to create such output.  The input of
-   each interleave_high/low operation is two vectors:
-   1st vec    2nd vec
-   0 1 2 3    4 5 6 7
-   the even elements of the result vector are obtained left-to-right from the
-   high/low elements of the first vector.  The odd elements of the result are
-   obtained left-to-right from the high/low elements of the second vector.
-   The output of interleave_high will be:   0 4 1 5
-   and of interleave_low:                   2 6 3 7
-
-
-   The permutation is done in log LENGTH stages.  In each stage interleave_high
-   and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
-   where the first argument is taken from the first half of DR_CHAIN and the
-   second argument from it's second half.
-   In our example,
-
-   I1: interleave_high (1st vec, 3rd vec)
-   I2: interleave_low (1st vec, 3rd vec)
-   I3: interleave_high (2nd vec, 4th vec)
-   I4: interleave_low (2nd vec, 4th vec)
-
-   The output for the first stage is:
-
-   I1:  0 16  1 17  2 18  3 19
-   I2:  4 20  5 21  6 22  7 23
-   I3:  8 24  9 25 10 26 11 27
-   I4: 12 28 13 29 14 30 15 31
-
-   The output of the second stage, i.e. the final result is:
-
-   I1:  0  8 16 24  1  9 17 25
-   I2:  2 10 18 26  3 11 19 27
-   I3:  4 12 20 28  5 13 21 30
-   I4:  6 14 22 30  7 15 23 31.  */
-
-void
-vect_permute_store_chain (vec_info *vinfo, vec<tree> &dr_chain,
-                         unsigned int length,
-                         stmt_vec_info stmt_info,
-                         gimple_stmt_iterator *gsi,
-                         vec<tree> *result_chain)
-{
-  tree vect1, vect2, high, low;
-  gimple *perm_stmt;
-  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
-  tree perm_mask_low, perm_mask_high;
-  tree data_ref;
-  tree perm3_mask_low, perm3_mask_high;
-  unsigned int i, j, n, log_length = exact_log2 (length);
-
-  result_chain->quick_grow (length);
-  memcpy (result_chain->address (), dr_chain.address (),
-         length * sizeof (tree));
-
-  if (length == 3)
-    {
-      /* vect_grouped_store_supported ensures that this is constant.  */
-      unsigned int nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
-      unsigned int j0 = 0, j1 = 0, j2 = 0;
-
-      vec_perm_builder sel (nelt, nelt, 1);
-      sel.quick_grow (nelt);
-      vec_perm_indices indices;
-      for (j = 0; j < 3; j++)
-        {
-         int nelt0 = ((3 - j) * nelt) % 3;
-         int nelt1 = ((3 - j) * nelt + 1) % 3;
-         int nelt2 = ((3 - j) * nelt + 2) % 3;
-
-         for (i = 0; i < nelt; i++)
-           {
-             if (3 * i + nelt0 < nelt)
-               sel[3 * i + nelt0] = j0++;
-             if (3 * i + nelt1 < nelt)
-               sel[3 * i + nelt1] = nelt + j1++;
-             if (3 * i + nelt2 < nelt)
-               sel[3 * i + nelt2] = 0;
-           }
-         indices.new_vector (sel, 2, nelt);
-         perm3_mask_low = vect_gen_perm_mask_checked (vectype, indices);
-
-         for (i = 0; i < nelt; i++)
-           {
-             if (3 * i + nelt0 < nelt)
-               sel[3 * i + nelt0] = 3 * i + nelt0;
-             if (3 * i + nelt1 < nelt)
-               sel[3 * i + nelt1] = 3 * i + nelt1;
-             if (3 * i + nelt2 < nelt)
-               sel[3 * i + nelt2] = nelt + j2++;
-           }
-         indices.new_vector (sel, 2, nelt);
-         perm3_mask_high = vect_gen_perm_mask_checked (vectype, indices);
-
-         vect1 = dr_chain[0];
-         vect2 = dr_chain[1];
-
-         /* Create interleaving stmt:
-            low = VEC_PERM_EXPR <vect1, vect2,
-                                 {j, nelt, *, j + 1, nelt + j + 1, *,
-                                  j + 2, nelt + j + 2, *, ...}>  */
-         data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low");
-         perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
-                                          vect2, perm3_mask_low);
-         vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
-
-         vect1 = data_ref;
-         vect2 = dr_chain[2];
-         /* Create interleaving stmt:
-            low = VEC_PERM_EXPR <vect1, vect2,
-                                 {0, 1, nelt + j, 3, 4, nelt + j + 1,
-                                  6, 7, nelt + j + 2, ...}>  */
-         data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high");
-         perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
-                                          vect2, perm3_mask_high);
-         vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
-         (*result_chain)[j] = data_ref;
-       }
-    }
-  else
-    {
-      /* If length is not equal to 3 then only power of 2 is supported.  */
-      gcc_assert (pow2p_hwi (length));
-
-      /* The encoding has 2 interleaved stepped patterns.  */
-      poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (vectype);
-      vec_perm_builder sel (nelt, 2, 3);
-      sel.quick_grow (6);
-      for (i = 0; i < 3; i++)
-       {
-         sel[i * 2] = i;
-         sel[i * 2 + 1] = i + nelt;
-       }
-       vec_perm_indices indices (sel, 2, nelt);
-       perm_mask_high = vect_gen_perm_mask_checked (vectype, indices);
-
-       for (i = 0; i < 6; i++)
-         sel[i] += exact_div (nelt, 2);
-       indices.new_vector (sel, 2, nelt);
-       perm_mask_low = vect_gen_perm_mask_checked (vectype, indices);
-
-       for (i = 0, n = log_length; i < n; i++)
-         {
-           for (j = 0; j < length/2; j++)
-             {
-               vect1 = dr_chain[j];
-               vect2 = dr_chain[j+length/2];
-
-               /* Create interleaving stmt:
-                  high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1,
-                                                       ...}>  */
-               high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
-               perm_stmt = gimple_build_assign (high, VEC_PERM_EXPR, vect1,
-                                                vect2, perm_mask_high);
-               vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
-               (*result_chain)[2*j] = high;
-
-               /* Create interleaving stmt:
-                  low = VEC_PERM_EXPR <vect1, vect2,
-                                       {nelt/2, nelt*3/2, nelt/2+1, nelt*3/2+1,
-                                        ...}>  */
-               low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
-               perm_stmt = gimple_build_assign (low, VEC_PERM_EXPR, vect1,
-                                                vect2, perm_mask_low);
-               vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
-               (*result_chain)[2*j+1] = low;
-             }
-           memcpy (dr_chain.address (), result_chain->address (),
-                   length * sizeof (tree));
-         }
-    }
-}
-
 /* Function vect_setup_realignment
 
    This function is called when vectorizing an unaligned load using
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index c1aaa27ff056..b17efd5f3e61 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -8413,39 +8413,6 @@ vectorizable_store (vec_info *vinfo,
      more than one vector stmt - i.e - we need to "unroll" the
      vector stmt by a factor VF/nunits.  */
 
-  /* In case of interleaving (non-unit grouped access):
-
-        S1:  &base + 2 = x2
-        S2:  &base = x0
-        S3:  &base + 1 = x1
-        S4:  &base + 3 = x3
-
-     We create vectorized stores starting from base address (the access of the
-     first stmt in the chain (S2 in the above example), when the last store 
stmt
-     of the chain (S4) is reached:
-
-        VS1: &base = vx2
-       VS2: &base + vec_size*1 = vx0
-       VS3: &base + vec_size*2 = vx1
-       VS4: &base + vec_size*3 = vx3
-
-     Then permutation statements are generated:
-
-       VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
-       VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
-       ...
-
-     And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
-     (the order of the data-refs in the output of vect_permute_store_chain
-     corresponds to the order of scalar stmts in the interleaving chain - see
-     the documentation of vect_permute_store_chain()).
-
-     In case of both multiple types and interleaving, above vector stores and
-     permutation stmts are created for every copy.  The result vector stmts are
-     put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
-     STMT_VINFO_RELATED_STMT for the next copies.
-  */
-
   auto_vec<tree> dr_chain (group_size);
   auto_vec<tree> vec_masks;
   tree vec_mask = NULL;
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 878047436c86..e7f28e16779a 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2568,9 +2568,6 @@ extern internal_fn vect_store_lanes_supported (tree, 
unsigned HOST_WIDE_INT, boo
 extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT);
 extern internal_fn vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT,
                                              bool, vec<int> * = nullptr);
-extern void vect_permute_store_chain (vec_info *, vec<tree> &,
-                                     unsigned int, stmt_vec_info,
-                                     gimple_stmt_iterator *, vec<tree> *);
 extern tree vect_setup_realignment (vec_info *,
                                    stmt_vec_info, gimple_stmt_iterator *,
                                    tree *, enum dr_alignment_support, tree,

Reply via email to