This cleans the rest of vectorizable_load from non-SLP * tree-vect-stmts.cc (vectorizable_load): Step 2. --- gcc/tree-vect-stmts.cc | 185 +++++++++++------------------------------ 1 file changed, 50 insertions(+), 135 deletions(-)
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 92739903754..c5fe7879d5a 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -9850,7 +9850,6 @@ vectorizable_load (vec_info *vinfo, bool compute_in_loop = false; class loop *at_loop; int vec_num; - bool slp = true; bool slp_perm = false; bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo); poly_uint64 vf; @@ -9909,7 +9908,7 @@ vectorizable_load (vec_info *vinfo, return false; mask_index = internal_fn_mask_index (ifn); - if (mask_index >= 0 && 1) + if (mask_index >= 0) mask_index = vect_slp_child_index_for_operand (call, mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info)); if (mask_index >= 0 @@ -9918,7 +9917,7 @@ vectorizable_load (vec_info *vinfo, return false; els_index = internal_fn_else_index (ifn); - if (els_index >= 0 && 1) + if (els_index >= 0) els_index = vect_slp_child_index_for_operand (call, els_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info)); if (els_index >= 0 @@ -9942,16 +9941,13 @@ vectorizable_load (vec_info *vinfo, /* Multiple types in SLP are handled by creating the appropriate number of vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in case of SLP. */ - if (1) - ncopies = 1; - else - ncopies = vect_get_num_copies (loop_vinfo, vectype); + ncopies = 1; gcc_assert (ncopies >= 1); /* FORNOW. This restriction should be relaxed. */ if (nested_in_vect_loop - && (ncopies > 1 || (1 && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1))) + && (ncopies > 1 || SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -9997,15 +9993,6 @@ vectorizable_load (vec_info *vinfo, first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); group_size = DR_GROUP_SIZE (first_stmt_info); - /* Refuse non-SLP vectorization of SLP-only groups. */ - if (0 && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "cannot vectorize load in non-SLP mode.\n"); - return false; - } - /* Invalidate assumptions made by dependence analysis when vectorization on the unrolled body effectively re-orders stmts. */ if (STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0 @@ -10046,8 +10033,7 @@ vectorizable_load (vec_info *vinfo, /* ??? The following checks should really be part of get_group_load_store_type. */ - if (1 - && SLP_TREE_LOAD_PERMUTATION (slp_node).exists () + if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists () && !((memory_access_type == VMAT_ELEMENTWISE || memory_access_type == VMAT_GATHER_SCATTER) && SLP_TREE_LANES (slp_node) == 1)) @@ -10090,8 +10076,7 @@ vectorizable_load (vec_info *vinfo, } } - if (1 - && slp_node->ldst_lanes + if (slp_node->ldst_lanes && memory_access_type != VMAT_LOAD_STORE_LANES) { if (dump_enabled_p ()) @@ -10142,8 +10127,7 @@ vectorizable_load (vec_info *vinfo, if (costing_p) /* transformation not required. */ { - if (1 - && mask + if (mask && !vect_maybe_update_slp_op_vectype (slp_op, mask_vectype)) { @@ -10153,10 +10137,7 @@ vectorizable_load (vec_info *vinfo, return false; } - if (0) - STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type; - else - SLP_TREE_MEMORY_ACCESS_TYPE (slp_node) = memory_access_type; + SLP_TREE_MEMORY_ACCESS_TYPE (slp_node) = memory_access_type; if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) @@ -10210,12 +10191,7 @@ vectorizable_load (vec_info *vinfo, if (elsvals.length ()) maskload_elsval = *elsvals.begin (); - if (0) - gcc_assert (memory_access_type - == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info)); - else - gcc_assert (memory_access_type - == SLP_TREE_MEMORY_ACCESS_TYPE (slp_node)); + gcc_assert (memory_access_type == SLP_TREE_MEMORY_ACCESS_TYPE (slp_node)); if (dump_enabled_p () && !costing_p) dump_printf_loc (MSG_NOTE, vect_location, @@ -10289,15 +10265,8 @@ vectorizable_load (vec_info *vinfo, vectype, &gsi2); } gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp); - if (1) - for (j = 0; j < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++j) - slp_node->push_vec_def (new_stmt); - else - { - for (j = 0; j < ncopies; ++j) - STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); - *vec_stmt = new_stmt; - } + for (j = 0; j < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++j) + slp_node->push_vec_def (new_stmt); return true; } @@ -10615,12 +10584,11 @@ vectorizable_load (vec_info *vinfo, return true; } - if (memory_access_type == VMAT_GATHER_SCATTER - || (0 && memory_access_type == VMAT_CONTIGUOUS)) + if (memory_access_type == VMAT_GATHER_SCATTER) grouped_load = false; if (grouped_load - || (1 && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())) + || SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()) { if (grouped_load) { @@ -10634,7 +10602,7 @@ vectorizable_load (vec_info *vinfo, } /* For SLP vectorization we directly vectorize a subchain without permutation. */ - if (1 && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()) + if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()) first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0]; /* For BB vectorization always use the first stmt to base the data ref pointer on. */ @@ -10642,60 +10610,39 @@ vectorizable_load (vec_info *vinfo, first_stmt_info_for_drptr = vect_find_first_scalar_stmt_in_slp (slp_node); - /* Check if the chain of loads is already vectorized. */ - if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists () - /* For SLP we would need to copy over SLP_TREE_VEC_DEFS. - ??? But we can only do so if there is exactly one - as we have no way to get at the rest. Leave the CSE - opportunity alone. - ??? With the group load eventually participating - in multiple different permutations (having multiple - slp nodes which refer to the same group) the CSE - is even wrong code. See PR56270. */ - && 0) - { - *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0]; - return true; - } first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info); group_gap_adj = 0; /* VEC_NUM is the number of vect stmts to be created for this group. */ - if (1) - { - grouped_load = false; - /* If an SLP permutation is from N elements to N elements, - and if one vector holds a whole number of N, we can load - the inputs to the permutation in the same way as an - unpermuted sequence. In other cases we need to load the - whole group, not only the number of vector stmts the - permutation result fits in. */ - unsigned scalar_lanes = SLP_TREE_LANES (slp_node); - if (nested_in_vect_loop) - /* We do not support grouped accesses in a nested loop, - instead the access is contiguous but it might be - permuted. No gap adjustment is needed though. */ - vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); - else if (slp_perm - && (group_size != scalar_lanes - || !multiple_p (nunits, group_size))) - { - /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for - variable VF; see vect_transform_slp_perm_load. */ - unsigned int const_vf = vf.to_constant (); - unsigned int const_nunits = nunits.to_constant (); - vec_num = CEIL (group_size * const_vf, const_nunits); - group_gap_adj = vf * group_size - nunits * vec_num; - } - else - { - vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); - group_gap_adj - = group_size - scalar_lanes; - } - } + grouped_load = false; + /* If an SLP permutation is from N elements to N elements, + and if one vector holds a whole number of N, we can load + the inputs to the permutation in the same way as an + unpermuted sequence. In other cases we need to load the + whole group, not only the number of vector stmts the + permutation result fits in. */ + unsigned scalar_lanes = SLP_TREE_LANES (slp_node); + if (nested_in_vect_loop) + /* We do not support grouped accesses in a nested loop, + instead the access is contiguous but it might be + permuted. No gap adjustment is needed though. */ + vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + else if (slp_perm + && (group_size != scalar_lanes + || !multiple_p (nunits, group_size))) + { + /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for + variable VF; see vect_transform_slp_perm_load. */ + unsigned int const_vf = vf.to_constant (); + unsigned int const_nunits = nunits.to_constant (); + vec_num = CEIL (group_size * const_vf, const_nunits); + group_gap_adj = vf * group_size - nunits * vec_num; + } else - vec_num = group_size; + { + vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + group_gap_adj = group_size - scalar_lanes; + } ref_type = get_group_alias_ptr_type (first_stmt_info); } @@ -10706,8 +10653,7 @@ vectorizable_load (vec_info *vinfo, group_size = vec_num = 1; group_gap_adj = 0; ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr)); - if (1) - vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); } gcc_assert (alignment_support_scheme); @@ -10909,14 +10855,8 @@ vectorizable_load (vec_info *vinfo, auto_vec<tree> vec_offsets; auto_vec<tree> vec_masks; if (mask && !costing_p) - { - if (1) - vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index], - &vec_masks); - else - vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask, - &vec_masks, mask_vectype); - } + vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index], + &vec_masks); tree vec_mask = NULL_TREE; tree vec_els = NULL_TREE; @@ -10929,8 +10869,7 @@ vectorizable_load (vec_info *vinfo, /* For costing some adjacent vector loads, we'd like to cost with the total number of them once instead of cost each one by one. */ unsigned int n_adjacent_loads = 0; - if (1) - ncopies = slp_node->vec_stmts_size / group_size; + ncopies = slp_node->vec_stmts_size / group_size; for (j = 0; j < ncopies; j++) { if (costing_p) @@ -11053,32 +10992,17 @@ vectorizable_load (vec_info *vinfo, gimple_call_set_nothrow (call, true); vect_finish_stmt_generation (vinfo, stmt_info, call, gsi); - if (0) - dr_chain.create (group_size); /* Extract each vector into an SSA_NAME. */ for (unsigned i = 0; i < group_size; i++) { new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest, vec_array, i, need_zeroing, final_mask); - if (1) - slp_node->push_vec_def (new_temp); - else - dr_chain.quick_push (new_temp); + slp_node->push_vec_def (new_temp); } - if (0) - /* Record the mapping between SSA_NAMEs and statements. */ - vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain); - /* Record that VEC_ARRAY is now dead. */ vect_clobber_variable (vinfo, stmt_info, gsi, vec_array); - - if (0) - dr_chain.release (); - - if (0) - *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0]; } if (costing_p) @@ -11453,17 +11377,10 @@ vectorizable_load (vec_info *vinfo, } /* Store vector loads in the corresponding SLP_NODE. */ - if (1) - slp_node->push_vec_def (new_stmt); + slp_node->push_vec_def (new_stmt); } - - if (0 && !costing_p) - STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); } - if (0 && !costing_p) - *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0]; - if (costing_p && dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "vect_model_load_cost: inside_cost = %u, " @@ -12052,7 +11969,7 @@ vectorizable_load (vec_info *vinfo, dr_chain.quick_push (new_temp); /* Store vector loads in the corresponding SLP_NODE. */ - if (!costing_p && 1 && !slp_perm) + if (!costing_p && !slp_perm) slp_node->push_vec_def (new_stmt); /* With SLP permutation we load the gaps as well, without @@ -12090,7 +12007,7 @@ vectorizable_load (vec_info *vinfo, stmt_info, bump); } - if (1 && !slp_perm) + if (!slp_perm) continue; if (slp_perm) @@ -12152,8 +12069,6 @@ vectorizable_load (vec_info *vinfo, } dr_chain.release (); } - if (0 && !costing_p) - *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0]; if (costing_p) { -- 2.43.0