https://gcc.gnu.org/g:fcadd6d32398271db9a4935d51a0066648962674
commit r16-2507-gfcadd6d32398271db9a4935d51a0066648962674 Author: Richard Biener <rguent...@suse.de> Date: Fri Jul 25 09:40:27 2025 +0200 Remove store interleaving support The following removes the non-SLP store interleaving support which was already almost unused. * tree-vectorizer.h (vect_permute_store_chain): Remove. * tree-vect-data-refs.cc (vect_permute_store_chain): Likewise. * tree-vect-stmts.cc (vectorizable_store): Remove comment about store interleaving. Diff: --- gcc/tree-vect-data-refs.cc | 198 --------------------------------------------- gcc/tree-vect-stmts.cc | 33 -------- gcc/tree-vectorizer.h | 3 - 3 files changed, 234 deletions(-) diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index 824b5f0f7698..1cc17c2cd73a 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -6078,204 +6078,6 @@ vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count, } -/* Function vect_permute_store_chain. - - Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be - a power of 2 or equal to 3, generate interleave_high/low stmts to reorder - the data correctly for the stores. Return the final references for stores - in RESULT_CHAIN. - - E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8. - The input is 4 vectors each containing 8 elements. We assign a number to - each element, the input sequence is: - - 1st vec: 0 1 2 3 4 5 6 7 - 2nd vec: 8 9 10 11 12 13 14 15 - 3rd vec: 16 17 18 19 20 21 22 23 - 4th vec: 24 25 26 27 28 29 30 31 - - The output sequence should be: - - 1st vec: 0 8 16 24 1 9 17 25 - 2nd vec: 2 10 18 26 3 11 19 27 - 3rd vec: 4 12 20 28 5 13 21 30 - 4th vec: 6 14 22 30 7 15 23 31 - - i.e., we interleave the contents of the four vectors in their order. - - We use interleave_high/low instructions to create such output. The input of - each interleave_high/low operation is two vectors: - 1st vec 2nd vec - 0 1 2 3 4 5 6 7 - the even elements of the result vector are obtained left-to-right from the - high/low elements of the first vector. The odd elements of the result are - obtained left-to-right from the high/low elements of the second vector. - The output of interleave_high will be: 0 4 1 5 - and of interleave_low: 2 6 3 7 - - - The permutation is done in log LENGTH stages. In each stage interleave_high - and interleave_low stmts are created for each pair of vectors in DR_CHAIN, - where the first argument is taken from the first half of DR_CHAIN and the - second argument from it's second half. - In our example, - - I1: interleave_high (1st vec, 3rd vec) - I2: interleave_low (1st vec, 3rd vec) - I3: interleave_high (2nd vec, 4th vec) - I4: interleave_low (2nd vec, 4th vec) - - The output for the first stage is: - - I1: 0 16 1 17 2 18 3 19 - I2: 4 20 5 21 6 22 7 23 - I3: 8 24 9 25 10 26 11 27 - I4: 12 28 13 29 14 30 15 31 - - The output of the second stage, i.e. the final result is: - - I1: 0 8 16 24 1 9 17 25 - I2: 2 10 18 26 3 11 19 27 - I3: 4 12 20 28 5 13 21 30 - I4: 6 14 22 30 7 15 23 31. */ - -void -vect_permute_store_chain (vec_info *vinfo, vec<tree> &dr_chain, - unsigned int length, - stmt_vec_info stmt_info, - gimple_stmt_iterator *gsi, - vec<tree> *result_chain) -{ - tree vect1, vect2, high, low; - gimple *perm_stmt; - tree vectype = STMT_VINFO_VECTYPE (stmt_info); - tree perm_mask_low, perm_mask_high; - tree data_ref; - tree perm3_mask_low, perm3_mask_high; - unsigned int i, j, n, log_length = exact_log2 (length); - - result_chain->quick_grow (length); - memcpy (result_chain->address (), dr_chain.address (), - length * sizeof (tree)); - - if (length == 3) - { - /* vect_grouped_store_supported ensures that this is constant. */ - unsigned int nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant (); - unsigned int j0 = 0, j1 = 0, j2 = 0; - - vec_perm_builder sel (nelt, nelt, 1); - sel.quick_grow (nelt); - vec_perm_indices indices; - for (j = 0; j < 3; j++) - { - int nelt0 = ((3 - j) * nelt) % 3; - int nelt1 = ((3 - j) * nelt + 1) % 3; - int nelt2 = ((3 - j) * nelt + 2) % 3; - - for (i = 0; i < nelt; i++) - { - if (3 * i + nelt0 < nelt) - sel[3 * i + nelt0] = j0++; - if (3 * i + nelt1 < nelt) - sel[3 * i + nelt1] = nelt + j1++; - if (3 * i + nelt2 < nelt) - sel[3 * i + nelt2] = 0; - } - indices.new_vector (sel, 2, nelt); - perm3_mask_low = vect_gen_perm_mask_checked (vectype, indices); - - for (i = 0; i < nelt; i++) - { - if (3 * i + nelt0 < nelt) - sel[3 * i + nelt0] = 3 * i + nelt0; - if (3 * i + nelt1 < nelt) - sel[3 * i + nelt1] = 3 * i + nelt1; - if (3 * i + nelt2 < nelt) - sel[3 * i + nelt2] = nelt + j2++; - } - indices.new_vector (sel, 2, nelt); - perm3_mask_high = vect_gen_perm_mask_checked (vectype, indices); - - vect1 = dr_chain[0]; - vect2 = dr_chain[1]; - - /* Create interleaving stmt: - low = VEC_PERM_EXPR <vect1, vect2, - {j, nelt, *, j + 1, nelt + j + 1, *, - j + 2, nelt + j + 2, *, ...}> */ - data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low"); - perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1, - vect2, perm3_mask_low); - vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi); - - vect1 = data_ref; - vect2 = dr_chain[2]; - /* Create interleaving stmt: - low = VEC_PERM_EXPR <vect1, vect2, - {0, 1, nelt + j, 3, 4, nelt + j + 1, - 6, 7, nelt + j + 2, ...}> */ - data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high"); - perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1, - vect2, perm3_mask_high); - vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi); - (*result_chain)[j] = data_ref; - } - } - else - { - /* If length is not equal to 3 then only power of 2 is supported. */ - gcc_assert (pow2p_hwi (length)); - - /* The encoding has 2 interleaved stepped patterns. */ - poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (vectype); - vec_perm_builder sel (nelt, 2, 3); - sel.quick_grow (6); - for (i = 0; i < 3; i++) - { - sel[i * 2] = i; - sel[i * 2 + 1] = i + nelt; - } - vec_perm_indices indices (sel, 2, nelt); - perm_mask_high = vect_gen_perm_mask_checked (vectype, indices); - - for (i = 0; i < 6; i++) - sel[i] += exact_div (nelt, 2); - indices.new_vector (sel, 2, nelt); - perm_mask_low = vect_gen_perm_mask_checked (vectype, indices); - - for (i = 0, n = log_length; i < n; i++) - { - for (j = 0; j < length/2; j++) - { - vect1 = dr_chain[j]; - vect2 = dr_chain[j+length/2]; - - /* Create interleaving stmt: - high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, - ...}> */ - high = make_temp_ssa_name (vectype, NULL, "vect_inter_high"); - perm_stmt = gimple_build_assign (high, VEC_PERM_EXPR, vect1, - vect2, perm_mask_high); - vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi); - (*result_chain)[2*j] = high; - - /* Create interleaving stmt: - low = VEC_PERM_EXPR <vect1, vect2, - {nelt/2, nelt*3/2, nelt/2+1, nelt*3/2+1, - ...}> */ - low = make_temp_ssa_name (vectype, NULL, "vect_inter_low"); - perm_stmt = gimple_build_assign (low, VEC_PERM_EXPR, vect1, - vect2, perm_mask_low); - vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi); - (*result_chain)[2*j+1] = low; - } - memcpy (dr_chain.address (), result_chain->address (), - length * sizeof (tree)); - } - } -} - /* Function vect_setup_realignment This function is called when vectorizing an unaligned load using diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index c1aaa27ff056..b17efd5f3e61 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -8413,39 +8413,6 @@ vectorizable_store (vec_info *vinfo, more than one vector stmt - i.e - we need to "unroll" the vector stmt by a factor VF/nunits. */ - /* In case of interleaving (non-unit grouped access): - - S1: &base + 2 = x2 - S2: &base = x0 - S3: &base + 1 = x1 - S4: &base + 3 = x3 - - We create vectorized stores starting from base address (the access of the - first stmt in the chain (S2 in the above example), when the last store stmt - of the chain (S4) is reached: - - VS1: &base = vx2 - VS2: &base + vec_size*1 = vx0 - VS3: &base + vec_size*2 = vx1 - VS4: &base + vec_size*3 = vx3 - - Then permutation statements are generated: - - VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} > - VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} > - ... - - And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts - (the order of the data-refs in the output of vect_permute_store_chain - corresponds to the order of scalar stmts in the interleaving chain - see - the documentation of vect_permute_store_chain()). - - In case of both multiple types and interleaving, above vector stores and - permutation stmts are created for every copy. The result vector stmts are - put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding - STMT_VINFO_RELATED_STMT for the next copies. - */ - auto_vec<tree> dr_chain (group_size); auto_vec<tree> vec_masks; tree vec_mask = NULL; diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 878047436c86..e7f28e16779a 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -2568,9 +2568,6 @@ extern internal_fn vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT, boo extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT); extern internal_fn vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, bool, vec<int> * = nullptr); -extern void vect_permute_store_chain (vec_info *, vec<tree> &, - unsigned int, stmt_vec_info, - gimple_stmt_iterator *, vec<tree> *); extern tree vect_setup_realignment (vec_info *, stmt_vec_info, gimple_stmt_iterator *, tree *, enum dr_alignment_support, tree,