On Tue, 6 May 2025, Tamar Christina wrote: > > -----Original Message----- > > From: Richard Biener <rguent...@suse.de> > > Sent: Tuesday, May 6, 2025 9:51 AM > > To: gcc-patches@gcc.gnu.org > > Cc: Tamar Christina <tamar.christ...@arm.com>; RISC-V CI <patchworks- > > c...@rivosinc.com> > > Subject: [PATCH] tree-optimization/120089 - force all PHIs live for > > early-break vect > > > > The following makes sure to even mark unsupported PHIs live when > > doing early-break vectorization since otherwise we fail to validate > > we can vectorize those and generate wrong code based on the scalar > > PHIs which would only work with a vectorization factor of one. > > > > Bootstrapped and tested on x86_64-unknown-linux-gnu. > > > > It's been a bit of a whack-a-mole, and this is an area that needs > > improvement but I wanted to have sth that can be backported and > > survive the mixed loop/SLP world. > > > > I'll wait for the CI results and then plan to push, unless you > > have any comments. > > Thanks, > Makes sense to me, just small question, > > > > > Thanks, > > Richard. > > > > PR tree-optimization/120089 > > * tree-vect-stmts.cc (vect_stmt_relevant_p): Mark all > > PHIs live when not already so and doing early-break > > vectorization. > > (vect_mark_stmts_to_be_vectorized): Skip virtual PHIs. > > * tree-vect-slp.cc (vect_analyze_slp): Robustify handling > > of early-break forced IVs. > > > > * gcc.dg/vect/vect-early-break_134-pr120089.c: New testcase. > > --- > > .../vect/vect-early-break_134-pr120089.c | 66 +++++++++++++++++++ > > gcc/tree-vect-slp.cc | 17 ++--- > > gcc/tree-vect-stmts.cc | 26 ++++++-- > > 3 files changed, 94 insertions(+), 15 deletions(-) > > create mode 100644 > > gcc/testsuite/gcc.dg/vect/vect-early-break_134-pr120089.c > > > > diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_134-pr120089.c > > b/gcc/testsuite/gcc.dg/vect/vect-early-break_134-pr120089.c > > new file mode 100644 > > index 00000000000..4d8199ca637 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_134-pr120089.c > > @@ -0,0 +1,66 @@ > > +/* { dg-add-options vect_early_break } */ > > +/* { dg-additional-options "-funswitch-loops" } */ > > + > > +#include "tree-vect.h" > > + > > +typedef int type; > > +typedef type Vec2[2]; > > + > > +struct BytesVec { > > + type d[100]; > > +}; > > + > > +__attribute__((noipa)) struct BytesVec > > +buildVertexBufferData(const Vec2 *origVertices, bool needsZW, > > + unsigned paddingSize, unsigned long t) { > > + const unsigned vertexCount = t; > > + struct BytesVec data = (struct BytesVec){.d = {0}}; > > + type *nextVertexPtr = data.d; > > + > > + for (unsigned vertexIdx = 0u; vertexIdx < vertexCount; ++vertexIdx) { > > + > > + if (vertexIdx > t) > > + __builtin_trap(); > > + __builtin_memcpy(nextVertexPtr, &origVertices[vertexIdx], > > + 2 * sizeof(type)); > > + nextVertexPtr += 2; > > + > > + if (needsZW) { > > + nextVertexPtr += 2; > > + } > > + > > + nextVertexPtr += paddingSize; > > + } > > + > > + return data; > > +} > > +Vec2 origVertices[] = { > > + {0, 1}, {2, 3}, {4, 5}, {6, 7}, > > + {8, 9}, {10, 11}, {12, 13}, {14, 15}, > > + {16, 17}, {18, 19}, {20, 21}, {22, 23}, > > + {24, 25}, {26, 27}, {27, 28}, {29, 30}, > > +}; > > + > > +int main() > > +{ > > + check_vect (); > > + struct BytesVec vec > > + = buildVertexBufferData(origVertices, false, 0, > > + sizeof(origVertices) / sizeof(origVertices[0])); > > + > > + int errors = 0; > > + for (unsigned i = 0; i < 100; i++) { > > + if (i / 2 < sizeof(origVertices) / sizeof(origVertices[0])) { > > + int ii = i; > > + int e = origVertices[ii / 2][ii % 2]; > > + if (vec.d[i] != e) > > + errors++; > > + } else { > > + if (vec.d[i] != 0) > > + errors++; > > + } > > + } > > + if (errors) > > + __builtin_abort(); > > + return 0; > > +} > > diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc > > index 87d9ab9355a..6d5824a97bf 100644 > > --- a/gcc/tree-vect-slp.cc > > +++ b/gcc/tree-vect-slp.cc > > @@ -5044,14 +5044,17 @@ vect_analyze_slp (vec_info *vinfo, unsigned > > max_tree_size, > > vec<stmt_vec_info> roots = vNULL; > > vec<tree> remain = vNULL; > > gphi *phi = as_a<gphi *> (STMT_VINFO_STMT (stmt_info)); > > - stmts.create (1); > > tree def = gimple_phi_arg_def_from_edge (phi, latch_e); > > stmt_vec_info lc_info = loop_vinfo->lookup_def (def); > > - stmts.quick_push (vect_stmt_to_vectorize (lc_info)); > > - vect_build_slp_instance (vinfo, slp_inst_kind_reduc_group, > > - stmts, roots, remain, > > - max_tree_size, &limit, > > - bst_map, NULL, force_single_lane); > > + if (lc_info) > > + { > > + stmts.create (1); > > + stmts.quick_push (vect_stmt_to_vectorize (lc_info)); > > + vect_build_slp_instance (vinfo, slp_inst_kind_reduc_group, > > + stmts, roots, remain, > > + max_tree_size, &limit, > > + bst_map, NULL, force_single_lane); > > + } > > /* When the latch def is from a different cycle this can only > > be a induction. Build a simple instance for this. > > ??? We should be able to start discovery from the PHI > > @@ -5061,8 +5064,6 @@ vect_analyze_slp (vec_info *vinfo, unsigned > > max_tree_size, > > tem.quick_push (stmt_info); > > if (!bst_map->get (tem)) > > { > > - gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) > > - == vect_induction_def); > > stmts.create (1); > > stmts.quick_push (stmt_info); > > vect_build_slp_instance (vinfo, slp_inst_kind_reduc_group, > > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc > > index 95da855e5b3..8755fa86a5c 100644 > > --- a/gcc/tree-vect-stmts.cc > > +++ b/gcc/tree-vect-stmts.cc > > @@ -419,18 +419,28 @@ vect_stmt_relevant_p (stmt_vec_info stmt_info, > > loop_vec_info loop_vinfo, > > } > > } > > > > - /* Check if it's an induction and multiple exits. In this case there > > will be > > - a usage later on after peeling which is needed for the alternate > > exit. */ > > + /* Check if it's a not live PHI and multiple exits. In this case > > + there will be a usage later on after peeling which is needed for the > > + alternate exit. */ > > if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo) > > - && STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def) > > + && is_a <gphi *> (stmt) > > + && ((! VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)) > > + && ! *live_p) > > + || STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)) > > { > > if (dump_enabled_p ()) > > - dump_printf_loc (MSG_NOTE, vect_location, > > - "vec_stmt_relevant_p: induction forced for " > > - "early break.\n"); > > + { > > + if (is_a <gphi *> (stmt)) > > + dump_printf_loc (MSG_NOTE, vect_location, > > + "vec_stmt_relevant_p: PHI forced live for " > > + "early break.\n"); > > Isn't this always true since the condition above requires everything to > always be a phi? > Should it have been STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def ?
Ah, true - this was from a previous whack-a-mole attempt ... I'll fix that up. Richard. > Thanks, > Tamar > > > + else > > + dump_printf_loc (MSG_NOTE, vect_location, > > + "vec_stmt_relevant_p: induction forced for " > > + "early break.\n"); > > + } > > LOOP_VINFO_EARLY_BREAKS_LIVE_IVS (loop_vinfo).safe_push (stmt_info); > > *live_p = true; > > - > > } > > > > if (*live_p && *relevant == vect_unused_in_scope > > @@ -714,6 +724,8 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info > > loop_vinfo, bool *fatal) > > bb = bbs[i]; > > for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si)) > > { > > + if (virtual_operand_p (gimple_phi_result (gsi_stmt (si)))) > > + continue; > > stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); > > if (dump_enabled_p ()) > > dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G", > > -- > > 2.43.0 > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)