On Fri, 24 Nov 2023, Tamar Christina wrote: > Good morning, > > This is a respun patch with a fix for VLA. > > This adds support to vectorizable_live_reduction to handle multiple exits by > doing a search for which exit the live value should be materialized in. > > Additionally which value in the index we're after depends on whether the exit > it's materialized in is an early exit or whether the loop's main exit is > different from the loop's natural one (i.e. the one with the same src block as > the latch). > > In those two cases we want the first rather than the last value as we're going > to restart the iteration in the scalar loop. For VLA this means we need to > reverse both the mask and vector since there's only a way to get the last > active element and not the first. > > For inductions and multiple exits: > - we test if the target will support vectorizing the induction > - mark all inductions in the loop as relevant > - for codegen of non-live inductions during codegen > - induction during an early exit gets the first element rather than last. > > For reductions and multiple exits: > - Reductions for early exits reduces the reduction definition statement > rather than the reduction step. This allows us to get the value at the > start of the iteration. > - The peeling layout means that we just have to update one block, the merge > block. We expect all the reductions to be the same but we leave it up to > the value numbering to clean up any duplicate code as we iterate over all > edges. > > These two changes fix the reduction codegen given before which has been added > to the testsuite for early vect. > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for master? > > Thanks, > Tamar > > gcc/ChangeLog: > > * tree-vect-loop.cc (vectorizable_live_operation): Support early exits. > (vect_analyze_loop_operations): Check if target supports vectorizing IV. > (vect_transform_loop): Call vectorizable_live_operation for non-live > inductions or reductions. > (find_connected_edge, vectorizable_live_operation_1): New. > (vect_create_epilog_for_reduction): Support reductions in early break. > * tree-vect-stmts.cc (perm_mask_for_reverse): Expose. > (vect_stmt_relevant_p): Mark all inductions when early break as being > relevant. > * tree-vectorizer.h (perm_mask_for_reverse): Expose. > (vect_iv_increment_position): New. > * tree-vect-loop-manip.cc (vect_iv_increment_position): Expose. > > --- inline copy of patch --- > > diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc > index > 476be8a0bb6da2d06c4ca7052cb07bacecca60b1..1a4ba349fb6ae39c79401aecd4e7eaaaa9e2b8a0 > 100644 > --- a/gcc/tree-vect-loop-manip.cc > +++ b/gcc/tree-vect-loop-manip.cc > @@ -453,7 +453,7 @@ vect_adjust_loop_lens_control (tree iv_type, gimple_seq > *seq, > INSERT_AFTER is set to true if the increment should be inserted after > *BSI. */ > > -static void > +void > vect_iv_increment_position (edge loop_exit, gimple_stmt_iterator *bsi, > bool *insert_after) > { > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc > index > 8a50380de49bc12105be47ea1d8ee3cf1f2bdab4..b42318b2999e6a27e6983382190792602cb25af1 > 100644 > --- a/gcc/tree-vect-loop.cc > +++ b/gcc/tree-vect-loop.cc > @@ -2163,6 +2163,15 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo) > ok = vectorizable_live_operation (loop_vinfo, stmt_info, NULL, NULL, > -1, false, &cost_vec); > > + /* Check if we can perform the operation for early break if we force > + the live operation. */ > + if (ok > + && LOOP_VINFO_EARLY_BREAKS (loop_vinfo) > + && !STMT_VINFO_LIVE_P (stmt_info) > + && STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def) > + ok = vectorizable_live_operation (loop_vinfo, stmt_info, NULL, NULL, > + -1, false, &cost_vec);
can you add && !PURE_SLP_STMT? > + > if (!ok) > return opt_result::failure_at (phi, > "not vectorized: relevant phi not " > @@ -5842,6 +5851,10 @@ vect_create_partial_epilog (tree vec_def, tree > vectype, code_helper code, > SLP_NODE_INSTANCE is the SLP node instance containing SLP_NODE > REDUC_INDEX says which rhs operand of the STMT_INFO is the reduction phi > (counting from 0) > + LOOP_EXIT is the edge to update in the merge block. In the case of a > single > + exit this edge is always the main loop exit. > + MAIN_EXIT_P indicates whether we are updating the main exit or an > alternate > + exit. This determines whether we use the final or original value. > > This function: > 1. Completes the reduction def-use cycles. > @@ -5882,7 +5895,9 @@ static void > vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, > stmt_vec_info stmt_info, > slp_tree slp_node, > - slp_instance slp_node_instance) > + slp_instance slp_node_instance, > + edge loop_exit, > + bool main_exit_p = true) isn't main_exit_p computable from 'loop_exit' by comparing that to the one recorded in loop_vinfo? If so please do that instead of passing in another argument. > { > stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info); > gcc_assert (reduc_info->is_reduc_info); > @@ -6053,7 +6068,7 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, > /* Create an induction variable. */ > gimple_stmt_iterator incr_gsi; > bool insert_after; > - standard_iv_increment_position (loop, &incr_gsi, &insert_after); > + vect_iv_increment_position (loop_exit, &incr_gsi, &insert_after); > create_iv (series_vect, PLUS_EXPR, vec_step, NULL_TREE, loop, > &incr_gsi, > insert_after, &indx_before_incr, &indx_after_incr); > > @@ -6132,23 +6147,30 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, > Store them in NEW_PHIS. */ > if (double_reduc) > loop = outer_loop; > - exit_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest; > + /* We need to reduce values in all exits. */ > + exit_bb = loop_exit->dest; > exit_gsi = gsi_after_labels (exit_bb); > reduc_inputs.create (slp_node ? vec_num : ncopies); > + vec <gimple *> vec_stmts; > + if (main_exit_p) > + vec_stmts = STMT_VINFO_VEC_STMTS (rdef_info); > + else > + vec_stmts = STMT_VINFO_VEC_STMTS (STMT_VINFO_REDUC_DEF (rdef_info)); both would be wrong for SLP, also I think you need to look at STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))? For SLP the PHI SLP node is reached via slp_node_instance->reduc_phis. I think an overall better structure would be to add a vect_get_vect_def (stmt_vec_info, slp_tree, unsigned); abstracting SLP and non-SLP and doing for (unsigned i = 0; i < vec_num * ncopies; ++i) { def = vect_get_vect_def (stmt_info, slp_node, i); ... } and then adjusting stmt_info/slp_node according to main_exit_p? (would be nice to transition stmt_info->vec_stmts to stmt_info->vec_defs) That said, wherever possible please think of SLP ;) > + > for (unsigned i = 0; i < vec_num; i++) > { > gimple_seq stmts = NULL; > if (slp_node) > def = vect_get_slp_vect_def (slp_node, i); > else > - def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)[0]); > + def = gimple_get_lhs (vec_stmts[0]); > for (j = 0; j < ncopies; j++) > { > tree new_def = copy_ssa_name (def); > phi = create_phi_node (new_def, exit_bb); > if (j) > - def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)[j]); > - SET_PHI_ARG_DEF (phi, LOOP_VINFO_IV_EXIT (loop_vinfo)->dest_idx, def); > + def = gimple_get_lhs (vec_stmts[j]); > + SET_PHI_ARG_DEF (phi, loop_exit->dest_idx, def); > new_def = gimple_convert (&stmts, vectype, new_def); > reduc_inputs.quick_push (new_def); > } > @@ -6885,7 +6907,20 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, > FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name) > { > FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) > - SET_USE (use_p, scalar_result); > + { > + gimple *stmt = USE_STMT (use_p); > + if (main_exit_p) > + SET_USE (use_p, scalar_result); > + else if (is_a <gphi *> (stmt)) > + { > + /* If an early exit only update usages in the merge > + block. */ shouldn't that be the only use at this point anyway? You only update uses in PHI nodes btw. and you can use SET_USE, maybe you wanted to check that gimple_phi_arg_edge (stmt, phi_arg_index_from_use (use_p)) == merge_e instead? That said, the comment could be more precise Are we calling vect_create_epilog_for_reduction for each early exit? I suppose not? > + edge merge_e = single_succ_edge (loop_exit->dest); > + if (gimple_bb (stmt) != merge_e->dest) > + continue; > + SET_PHI_ARG_DEF (stmt, merge_e->dest_idx, scalar_result); > + } > + } > update_stmt (use_stmt); > } > } > @@ -10481,6 +10516,156 @@ vectorizable_induction (loop_vec_info loop_vinfo, > return true; > } > > +/* Function vectorizable_live_operation_1. > + > + helper function for vectorizable_live_operation. */ > + > +tree > +vectorizable_live_operation_1 (loop_vec_info loop_vinfo, > + stmt_vec_info stmt_info, edge exit_e, > + tree vectype, int ncopies, slp_tree slp_node, > + tree bitsize, tree bitstart, tree vec_lhs, > + tree lhs_type, bool restart_loop, > + gimple_stmt_iterator *exit_gsi) > +{ > + basic_block exit_bb = exit_e->dest; > + gcc_assert (single_pred_p (exit_bb) || LOOP_VINFO_EARLY_BREAKS > (loop_vinfo)); > + > + tree vec_lhs_phi = copy_ssa_name (vec_lhs); > + gimple *phi = create_phi_node (vec_lhs_phi, exit_bb); > + for (unsigned i = 0; i < gimple_phi_num_args (phi); i++) > + SET_PHI_ARG_DEF (phi, i, vec_lhs); > + > + gimple_seq stmts = NULL; > + tree new_tree; > + if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)) > + { > + /* Emit: > + > + SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN + BIAS - 1> > + > + where VEC_LHS is the vectorized live-out result and MASK is > + the loop mask for the final iteration. */ > + gcc_assert (ncopies == 1 && !slp_node); > + gimple_seq tem = NULL; > + gimple_stmt_iterator gsi = gsi_last (tem); > + tree len = vect_get_loop_len (loop_vinfo, &gsi, > + &LOOP_VINFO_LENS (loop_vinfo), > + 1, vectype, 0, 0); > + > + /* BIAS - 1. */ > + signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo); > + tree bias_minus_one > + = int_const_binop (MINUS_EXPR, > + build_int_cst (TREE_TYPE (len), biasval), > + build_one_cst (TREE_TYPE (len))); > + > + /* LAST_INDEX = LEN + (BIAS - 1). */ > + tree last_index = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (len), > + len, bias_minus_one); > + > + /* This needs to implement extraction of the first index, but not sure > + how the LEN stuff works. At the moment we shouldn't get here since > + there's no LEN support for early breaks. But guard this so there's > + no incorrect codegen. */ > + gcc_assert (!LOOP_VINFO_EARLY_BREAKS (loop_vinfo)); > + > + /* SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN + BIAS - 1>. */ > + tree scalar_res > + = gimple_build (&stmts, CFN_VEC_EXTRACT, TREE_TYPE (vectype), > + vec_lhs_phi, last_index); > + > + /* Convert the extracted vector element to the scalar type. */ > + new_tree = gimple_convert (&stmts, lhs_type, scalar_res); > + } > + else if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) > + { > + /* Emit: > + > + SCALAR_RES = EXTRACT_LAST <VEC_LHS, MASK> > + > + where VEC_LHS is the vectorized live-out result and MASK is > + the loop mask for the final iteration. */ > + gcc_assert (!slp_node); > + tree scalar_type = TREE_TYPE (STMT_VINFO_VECTYPE (stmt_info)); > + gimple_seq tem = NULL; > + gimple_stmt_iterator gsi = gsi_last (tem); > + tree mask = vect_get_loop_mask (loop_vinfo, &gsi, > + &LOOP_VINFO_MASKS (loop_vinfo), > + 1, vectype, 0); > + tree scalar_res; > + > + /* For an inverted control flow with early breaks we want EXTRACT_FIRST > + instead of EXTRACT_LAST. Emulate by reversing the vector and mask. */ > + if (restart_loop && LOOP_VINFO_EARLY_BREAKS (loop_vinfo)) > + { > + /* First create the permuted mask. */ > + tree perm_mask = perm_mask_for_reverse (TREE_TYPE (mask)); > + tree perm_dest = copy_ssa_name (mask); > + gimple *perm_stmt > + = gimple_build_assign (perm_dest, VEC_PERM_EXPR, mask, > + mask, perm_mask); > + vect_finish_stmt_generation (loop_vinfo, stmt_info, perm_stmt, > + &gsi); > + mask = perm_dest; > + > + /* Then permute the vector contents. */ > + tree perm_elem = perm_mask_for_reverse (vectype); > + perm_dest = copy_ssa_name (vec_lhs_phi); > + perm_stmt > + = gimple_build_assign (perm_dest, VEC_PERM_EXPR, vec_lhs_phi, > + vec_lhs_phi, perm_elem); > + vect_finish_stmt_generation (loop_vinfo, stmt_info, perm_stmt, > + &gsi); > + vec_lhs_phi = perm_dest; > + } > + > + gimple_seq_add_seq (&stmts, tem); > + > + scalar_res = gimple_build (&stmts, CFN_EXTRACT_LAST, scalar_type, > + mask, vec_lhs_phi); > + > + /* Convert the extracted vector element to the scalar type. */ > + new_tree = gimple_convert (&stmts, lhs_type, scalar_res); > + } > + else > + { > + tree bftype = TREE_TYPE (vectype); > + if (VECTOR_BOOLEAN_TYPE_P (vectype)) > + bftype = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 1); > + new_tree = build3 (BIT_FIELD_REF, bftype, vec_lhs_phi, bitsize, > bitstart); > + new_tree = force_gimple_operand (fold_convert (lhs_type, new_tree), > + &stmts, true, NULL_TREE); > + } > + > + *exit_gsi = gsi_after_labels (exit_bb); > + if (stmts) > + gsi_insert_seq_before (exit_gsi, stmts, GSI_SAME_STMT); > + > + return new_tree; > +} > + > +/* Find the edge that's the final one in the path from SRC to DEST and > + return it. This edge must exist in at most one forwarder edge between. > */ > + > +static edge > +find_connected_edge (edge src, basic_block dest) > +{ > + if (src->dest == dest) > + return src; > + > + edge e; > + edge_iterator ei; > + > + FOR_EACH_EDGE (e, ei, dest->preds) > + { > + if (src->dest == e->src) > + return e; > + } isn't that just find_edge (src->dest, dest)? > + return NULL; > +} > + > /* Function vectorizable_live_operation. > > STMT_INFO computes a value that is used outside the loop. Check if > @@ -10505,7 +10690,8 @@ vectorizable_live_operation (vec_info *vinfo, > stmt_vec_info stmt_info, > int vec_entry = 0; > poly_uint64 vec_index = 0; > > - gcc_assert (STMT_VINFO_LIVE_P (stmt_info)); > + gcc_assert (STMT_VINFO_LIVE_P (stmt_info) > + || LOOP_VINFO_EARLY_BREAKS (loop_vinfo)); > > /* If a stmt of a reduction is live, vectorize it via > vect_create_epilog_for_reduction. vectorizable_reduction assessed > @@ -10530,8 +10716,22 @@ vectorizable_live_operation (vec_info *vinfo, > stmt_vec_info stmt_info, > if (STMT_VINFO_REDUC_TYPE (reduc_info) == FOLD_LEFT_REDUCTION > || STMT_VINFO_REDUC_TYPE (reduc_info) == EXTRACT_LAST_REDUCTION) > return true; > + > + /* If early break we only have to materialize the reduction on the > merge > + block, but we have to find an alternate exit first. */ > + if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)) > + { > + for (auto exit : get_loop_exit_edges (LOOP_VINFO_LOOP (loop_vinfo))) > + if (exit != LOOP_VINFO_IV_EXIT (loop_vinfo)) > + vect_create_epilog_for_reduction (loop_vinfo, stmt_info, > + slp_node, slp_node_instance, > + exit, false); Hmm, for each one. But we only need a single reduction epilogue, no? In the merge block? > + } > + > vect_create_epilog_for_reduction (loop_vinfo, stmt_info, slp_node, > - slp_node_instance); > + slp_node_instance, > + LOOP_VINFO_IV_EXIT (loop_vinfo)); > + > return true; > } > > @@ -10683,103 +10883,63 @@ vectorizable_live_operation (vec_info *vinfo, > stmt_vec_info stmt_info, > lhs' = new_tree; */ > > class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); > - basic_block exit_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest; > - gcc_assert (single_pred_p (exit_bb)); > - > - tree vec_lhs_phi = copy_ssa_name (vec_lhs); > - gimple *phi = create_phi_node (vec_lhs_phi, exit_bb); > - SET_PHI_ARG_DEF (phi, LOOP_VINFO_IV_EXIT (loop_vinfo)->dest_idx, > vec_lhs); > - > - gimple_seq stmts = NULL; > - tree new_tree; > - if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)) > - { > - /* Emit: > - > - SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN + BIAS - 1> > - > - where VEC_LHS is the vectorized live-out result and MASK is > - the loop mask for the final iteration. */ > - gcc_assert (ncopies == 1 && !slp_node); > - gimple_seq tem = NULL; > - gimple_stmt_iterator gsi = gsi_last (tem); > - tree len > - = vect_get_loop_len (loop_vinfo, &gsi, > - &LOOP_VINFO_LENS (loop_vinfo), > - 1, vectype, 0, 0); > - > - /* BIAS - 1. */ > - signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo); > - tree bias_minus_one > - = int_const_binop (MINUS_EXPR, > - build_int_cst (TREE_TYPE (len), biasval), > - build_one_cst (TREE_TYPE (len))); > - > - /* LAST_INDEX = LEN + (BIAS - 1). */ > - tree last_index = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (len), > - len, bias_minus_one); > - > - /* SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN + BIAS - 1>. */ > - tree scalar_res > - = gimple_build (&stmts, CFN_VEC_EXTRACT, TREE_TYPE (vectype), > - vec_lhs_phi, last_index); > - > - /* Convert the extracted vector element to the scalar type. */ > - new_tree = gimple_convert (&stmts, lhs_type, scalar_res); > - } > - else if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) > - { > - /* Emit: > - > - SCALAR_RES = EXTRACT_LAST <VEC_LHS, MASK> > - > - where VEC_LHS is the vectorized live-out result and MASK is > - the loop mask for the final iteration. */ > - gcc_assert (ncopies == 1 && !slp_node); > - tree scalar_type = TREE_TYPE (STMT_VINFO_VECTYPE (stmt_info)); > - gimple_seq tem = NULL; > - gimple_stmt_iterator gsi = gsi_last (tem); > - tree mask = vect_get_loop_mask (loop_vinfo, &gsi, > - &LOOP_VINFO_MASKS (loop_vinfo), > - 1, vectype, 0); > - gimple_seq_add_seq (&stmts, tem); > - tree scalar_res = gimple_build (&stmts, CFN_EXTRACT_LAST, scalar_type, > - mask, vec_lhs_phi); > - > - /* Convert the extracted vector element to the scalar type. */ > - new_tree = gimple_convert (&stmts, lhs_type, scalar_res); > - } > - else > - { > - tree bftype = TREE_TYPE (vectype); > - if (VECTOR_BOOLEAN_TYPE_P (vectype)) > - bftype = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 1); > - new_tree = build3 (BIT_FIELD_REF, bftype, > - vec_lhs_phi, bitsize, bitstart); > - new_tree = force_gimple_operand (fold_convert (lhs_type, new_tree), > - &stmts, true, NULL_TREE); > - } > + /* Check if we have a loop where the chosen exit is not the main exit, > + in these cases for an early break we restart the iteration the vector > code > + did. For the live values we want the value at the start of the > iteration > + rather than at the end. */ > + edge main_e = LOOP_VINFO_IV_EXIT (loop_vinfo); > + bool restart_loop = LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo); > + FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs) > + if (!is_gimple_debug (use_stmt) > + && !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) > + { > + basic_block use_bb = gimple_bb (use_stmt); > + if (!is_a <gphi *> (use_stmt)) > + continue; > + for (auto exit_e : get_loop_exit_edges (loop)) > + { > + /* See if this exit leads to the value. */ > + edge dest_e = find_connected_edge (exit_e, use_bb); > + if (!dest_e || PHI_ARG_DEF_FROM_EDGE (use_stmt, dest_e) != lhs) > + continue; > > - gimple_stmt_iterator exit_gsi = gsi_after_labels (exit_bb); > - if (stmts) > - gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); > + gimple *tmp_vec_stmt = vec_stmt; > + tree tmp_vec_lhs = vec_lhs; > + tree tmp_bitstart = bitstart; > + /* For early exit where the exit is not in the BB that leads > + to the latch then we're restarting the iteration in the > + scalar loop. So get the first live value. */ > + restart_loop = restart_loop || exit_e != main_e; > + if (restart_loop) > + { > + tmp_vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0]; > + tmp_vec_lhs = gimple_get_lhs (tmp_vec_stmt); > + tmp_bitstart = build_zero_cst (TREE_TYPE (bitstart)); > + } > > - /* Remove existing phis that copy from lhs and create copies > - from new_tree. */ > - gimple_stmt_iterator gsi; > - for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi);) > - { > - gimple *phi = gsi_stmt (gsi); > - if ((gimple_phi_arg_def (phi, 0) == lhs)) > - { > - remove_phi_node (&gsi, false); > - tree lhs_phi = gimple_phi_result (phi); > - gimple *copy = gimple_build_assign (lhs_phi, new_tree); > - gsi_insert_before (&exit_gsi, copy, GSI_SAME_STMT); > - } > - else > - gsi_next (&gsi); > - } > + gimple_stmt_iterator exit_gsi; > + tree new_tree > + = vectorizable_live_operation_1 (loop_vinfo, stmt_info, > + exit_e, vectype, ncopies, > + slp_node, bitsize, > + tmp_bitstart, tmp_vec_lhs, > + lhs_type, restart_loop, > + &exit_gsi); > + > + /* Use the empty block on the exit to materialize the new stmts > + so we can use update the PHI here. */ > + if (gimple_phi_num_args (use_stmt) == 1) > + { > + auto gsi = gsi_for_stmt (use_stmt); > + remove_phi_node (&gsi, false); > + tree lhs_phi = gimple_phi_result (use_stmt); > + gimple *copy = gimple_build_assign (lhs_phi, new_tree); > + gsi_insert_before (&exit_gsi, copy, GSI_SAME_STMT); > + } > + else > + SET_PHI_ARG_DEF (use_stmt, dest_e->dest_idx, new_tree); > + } > + } Difficult to see what changed due to the split out, guess it'll be ok. > /* There a no further out-of-loop uses of lhs by LC-SSA construction. > */ > FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs) > @@ -11797,6 +11957,21 @@ vect_transform_loop (loop_vec_info loop_vinfo, > gimple *loop_vectorized_call) > if (dump_enabled_p ()) > dump_printf_loc (MSG_NOTE, vect_location, "transform phi.\n"); > vect_transform_stmt (loop_vinfo, stmt_info, NULL, NULL, NULL); > + /* If vectorizing early break we must also vectorize the use of > + the PHIs as a live operation. */ > + if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo) > + && !STMT_VINFO_LIVE_P (stmt_info) > + && STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def) > + { > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_NOTE, vect_location, > + "----> vectorizing early break reduc or induc phi: %G", > + (gimple *) phi); > + bool done > + = vectorizable_live_operation (loop_vinfo, stmt_info, NULL, > + NULL, -1, true, NULL); you should be able to amend can_vectorize_live_stmts instead by adding || (LOOP_VINFO_EARLY_BREAKS (loop_vinfo) && vect_induction_def), then we keep it at one place also where we'd handle the SLP case. > + gcc_assert (done); > + } > } > } > > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc > index > fe38beb4fa1d9f8593445354f56ba52e10a040cd..f1b6a13395f286f9997530bbe57cda3a00502f8f > 100644 > --- a/gcc/tree-vect-stmts.cc > +++ b/gcc/tree-vect-stmts.cc > @@ -342,6 +342,7 @@ is_simple_and_all_uses_invariant (stmt_vec_info stmt_info, > - it has uses outside the loop. > - it has vdefs (it alters memory). > - control stmts in the loop (except for the exit condition). > + - it is an induction and we have multiple exits. > > CHECKME: what other side effects would the vectorizer allow? */ > > @@ -399,6 +400,19 @@ vect_stmt_relevant_p (stmt_vec_info stmt_info, > loop_vec_info loop_vinfo, > } > } > > + /* Check if it's an induction and multiple exits. In this case there will > be > + a usage later on after peeling which is needed for the alternate exit. > */ > + if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo) > + && STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def) > + { > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_NOTE, vect_location, > + "vec_stmt_relevant_p: induction forced for " > + "early break.\n"); > + *relevant = vect_used_in_scope; > + I think you should instead set *live_p? > + } > + > if (*live_p && *relevant == vect_unused_in_scope > && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo)) > { > @@ -1774,7 +1788,7 @@ compare_step_with_zero (vec_info *vinfo, stmt_vec_info > stmt_info) > /* If the target supports a permute mask that reverses the elements in > a vector of type VECTYPE, return that mask, otherwise return null. */ > > -static tree > +tree > perm_mask_for_reverse (tree vectype) > { > poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > index > 076a698eb4288f68e81f91923f7e3e8d181ad685..de673ae56eac455c9560a29d7f3792b6c3c49f3b > 100644 > --- a/gcc/tree-vectorizer.h > +++ b/gcc/tree-vectorizer.h > @@ -2227,6 +2227,7 @@ extern bool vect_can_advance_ivs_p (loop_vec_info); > extern void vect_update_inits_of_drs (loop_vec_info, tree, tree_code); > extern edge vec_init_loop_exit_info (class loop *); > extern bool vect_is_loop_exit_latch_pred (edge, class loop *); > +extern void vect_iv_increment_position (edge, gimple_stmt_iterator *, bool > *); > > /* In tree-vect-stmts.cc. */ > extern tree get_related_vectype_for_scalar_type (machine_mode, tree, > @@ -2248,6 +2249,7 @@ extern bool vect_is_simple_use (vec_info *, > stmt_vec_info, slp_tree, > enum vect_def_type *, > tree *, stmt_vec_info * = NULL); > extern bool vect_maybe_update_slp_op_vectype (slp_tree, tree); > +extern tree perm_mask_for_reverse (tree); > extern bool supportable_widening_operation (vec_info*, code_helper, > stmt_vec_info, tree, tree, > code_helper*, code_helper*, > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)