https://gcc.gnu.org/g:5e3a4a01785e2d5135528a07bb8116af9c55ddf8
commit r15-3712-g5e3a4a01785e2d5135528a07bb8116af9c55ddf8 Author: Richard Biener <rguent...@suse.de> Date: Tue Sep 17 11:20:10 2024 +0200 tree-optimization/116573 - .SELECT_VL for SLP The following restores the use of .SELECT_VL for testcases where it is safe to use even when using SLP. I've for now restricted it to single-lane SLP plus optimistically allow store-lane nodes and assume single-lane roots are not widened but at most to load-lane who should be fine. PR tree-optimization/116573 * tree-vect-loop.cc (vect_analyze_loop_2): Allow .SELECV_VL for SLP but disable it when there's multi-lane instances. * tree-vect-stmts.cc (vectorizable_store): Only compute the ptr increment when generating code. (vectorizable_load): Likewise. Diff: --- gcc/tree-vect-loop.cc | 15 ++++++++++++++- gcc/tree-vect-stmts.cc | 10 ++++++---- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index d42694d19747..c6778ab5f154 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -3084,10 +3084,23 @@ start_over: if (direct_internal_fn_supported_p (IFN_SELECT_VL, iv_type, OPTIMIZE_FOR_SPEED) && LOOP_VINFO_LENS (loop_vinfo).length () == 1 - && LOOP_VINFO_LENS (loop_vinfo)[0].factor == 1 && !slp + && LOOP_VINFO_LENS (loop_vinfo)[0].factor == 1 && (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ())) LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) = true; + + /* If any of the SLP instances cover more than a single lane + we cannot use .SELECT_VL at the moment, even if the number + of lanes is uniform throughout the SLP graph. */ + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo)) + for (slp_instance inst : LOOP_VINFO_SLP_INSTANCES (loop_vinfo)) + if (SLP_TREE_LANES (SLP_INSTANCE_TREE (inst)) != 1 + && !(SLP_INSTANCE_KIND (inst) == slp_inst_kind_store + && SLP_INSTANCE_TREE (inst)->ldst_lanes)) + { + LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) = false; + break; + } } /* Decide whether this loop_vinfo should use partial vectors or peeling, diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 495f45e40e63..33cdccae7849 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -8744,8 +8744,9 @@ vectorizable_store (vec_info *vinfo, aggr_type = build_array_type_nelts (elem_type, group_size * nunits); else aggr_type = vectype; - bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type, - memory_access_type, loop_lens); + if (!costing_p) + bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type, + memory_access_type, loop_lens); } if (mask && !costing_p) @@ -10820,8 +10821,9 @@ vectorizable_load (vec_info *vinfo, aggr_type = build_array_type_nelts (elem_type, group_size * nunits); else aggr_type = vectype; - bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type, - memory_access_type, loop_lens); + if (!costing_p) + bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type, + memory_access_type, loop_lens); } auto_vec<tree> vec_offsets;