The following makes sure to even mark unsupported PHIs live when doing early-break vectorization since otherwise we fail to validate we can vectorize those and generate wrong code based on the scalar PHIs which would only work with a vectorization factor of one.
Bootstrapped and tested on x86_64-unknown-linux-gnu. It's been a bit of a whack-a-mole, and this is an area that needs improvement but I wanted to have sth that can be backported and survive the mixed loop/SLP world. I'll wait for the CI results and then plan to push, unless you have any comments. Thanks, Richard. PR tree-optimization/120089 * tree-vect-stmts.cc (vect_stmt_relevant_p): Mark all PHIs live when not already so and doing early-break vectorization. (vect_mark_stmts_to_be_vectorized): Skip virtual PHIs. * tree-vect-slp.cc (vect_analyze_slp): Robustify handling of early-break forced IVs. * gcc.dg/vect/vect-early-break_134-pr120089.c: New testcase. --- .../vect/vect-early-break_134-pr120089.c | 66 +++++++++++++++++++ gcc/tree-vect-slp.cc | 17 ++--- gcc/tree-vect-stmts.cc | 26 ++++++-- 3 files changed, 94 insertions(+), 15 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/vect-early-break_134-pr120089.c diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_134-pr120089.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_134-pr120089.c new file mode 100644 index 00000000000..4d8199ca637 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_134-pr120089.c @@ -0,0 +1,66 @@ +/* { dg-add-options vect_early_break } */ +/* { dg-additional-options "-funswitch-loops" } */ + +#include "tree-vect.h" + +typedef int type; +typedef type Vec2[2]; + +struct BytesVec { + type d[100]; +}; + +__attribute__((noipa)) struct BytesVec +buildVertexBufferData(const Vec2 *origVertices, bool needsZW, + unsigned paddingSize, unsigned long t) { + const unsigned vertexCount = t; + struct BytesVec data = (struct BytesVec){.d = {0}}; + type *nextVertexPtr = data.d; + + for (unsigned vertexIdx = 0u; vertexIdx < vertexCount; ++vertexIdx) { + + if (vertexIdx > t) + __builtin_trap(); + __builtin_memcpy(nextVertexPtr, &origVertices[vertexIdx], + 2 * sizeof(type)); + nextVertexPtr += 2; + + if (needsZW) { + nextVertexPtr += 2; + } + + nextVertexPtr += paddingSize; + } + + return data; +} +Vec2 origVertices[] = { + {0, 1}, {2, 3}, {4, 5}, {6, 7}, + {8, 9}, {10, 11}, {12, 13}, {14, 15}, + {16, 17}, {18, 19}, {20, 21}, {22, 23}, + {24, 25}, {26, 27}, {27, 28}, {29, 30}, +}; + +int main() +{ + check_vect (); + struct BytesVec vec + = buildVertexBufferData(origVertices, false, 0, + sizeof(origVertices) / sizeof(origVertices[0])); + + int errors = 0; + for (unsigned i = 0; i < 100; i++) { + if (i / 2 < sizeof(origVertices) / sizeof(origVertices[0])) { + int ii = i; + int e = origVertices[ii / 2][ii % 2]; + if (vec.d[i] != e) + errors++; + } else { + if (vec.d[i] != 0) + errors++; + } + } + if (errors) + __builtin_abort(); + return 0; +} diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 87d9ab9355a..6d5824a97bf 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -5044,14 +5044,17 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size, vec<stmt_vec_info> roots = vNULL; vec<tree> remain = vNULL; gphi *phi = as_a<gphi *> (STMT_VINFO_STMT (stmt_info)); - stmts.create (1); tree def = gimple_phi_arg_def_from_edge (phi, latch_e); stmt_vec_info lc_info = loop_vinfo->lookup_def (def); - stmts.quick_push (vect_stmt_to_vectorize (lc_info)); - vect_build_slp_instance (vinfo, slp_inst_kind_reduc_group, - stmts, roots, remain, - max_tree_size, &limit, - bst_map, NULL, force_single_lane); + if (lc_info) + { + stmts.create (1); + stmts.quick_push (vect_stmt_to_vectorize (lc_info)); + vect_build_slp_instance (vinfo, slp_inst_kind_reduc_group, + stmts, roots, remain, + max_tree_size, &limit, + bst_map, NULL, force_single_lane); + } /* When the latch def is from a different cycle this can only be a induction. Build a simple instance for this. ??? We should be able to start discovery from the PHI @@ -5061,8 +5064,6 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size, tem.quick_push (stmt_info); if (!bst_map->get (tem)) { - gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) - == vect_induction_def); stmts.create (1); stmts.quick_push (stmt_info); vect_build_slp_instance (vinfo, slp_inst_kind_reduc_group, diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 95da855e5b3..8755fa86a5c 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -419,18 +419,28 @@ vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, } } - /* Check if it's an induction and multiple exits. In this case there will be - a usage later on after peeling which is needed for the alternate exit. */ + /* Check if it's a not live PHI and multiple exits. In this case + there will be a usage later on after peeling which is needed for the + alternate exit. */ if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo) - && STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def) + && is_a <gphi *> (stmt) + && ((! VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)) + && ! *live_p) + || STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)) { if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "vec_stmt_relevant_p: induction forced for " - "early break.\n"); + { + if (is_a <gphi *> (stmt)) + dump_printf_loc (MSG_NOTE, vect_location, + "vec_stmt_relevant_p: PHI forced live for " + "early break.\n"); + else + dump_printf_loc (MSG_NOTE, vect_location, + "vec_stmt_relevant_p: induction forced for " + "early break.\n"); + } LOOP_VINFO_EARLY_BREAKS_LIVE_IVS (loop_vinfo).safe_push (stmt_info); *live_p = true; - } if (*live_p && *relevant == vect_unused_in_scope @@ -714,6 +724,8 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal) bb = bbs[i]; for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si)) { + if (virtual_operand_p (gimple_phi_result (gsi_stmt (si)))) + continue; stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G", -- 2.43.0