https://gcc.gnu.org/g:c1926449cad66b7e7875f214109950efdb8bb97b
commit r16-7913-gc1926449cad66b7e7875f214109950efdb8bb97b Author: Richard Biener <[email protected]> Date: Thu Mar 5 11:39:38 2026 +0100 Fix overly restrictive live-lane extraction replacement The following fixes a regression introduced by r11-5542 which restricts replacing uses of live original defs of now vectorized stmts to when that does not require new loop-closed PHIs to be inserted. That restriction keeps the original scalar definition live which is sub-optimal and also not reflected in costing. The particular case the following fixes can be seen in gcc.dg/vect/bb-slp-57.c is the case where we are replacing an existing loop closed PHI argument. PR tree-optimization/98064 * tree-vect-loop.cc (vectorizable_live_operation): Do not restrict replacing uses in a LC PHI. * gcc.dg/vect/bb-slp-57.c: Verify we do not keep original stmts live. Diff: --- gcc/testsuite/gcc.dg/vect/bb-slp-57.c | 1 + gcc/tree-vect-loop.cc | 45 +++++++++++++++++++++-------------- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-57.c b/gcc/testsuite/gcc.dg/vect/bb-slp-57.c index 6f13507fd67e..6633a3092ad7 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-57.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-57.c @@ -36,3 +36,4 @@ void l() /* { dg-final { scan-tree-dump-times "transform load" 1 "slp1" { target { { x86_64-*-* i?86-*-* } && lp64 } } } } */ /* { dg-final { scan-tree-dump "optimized: basic block" "slp1" { target { { x86_64-*-* i?86-*-* } && lp64 } } } } */ +/* { dg-final { scan-tree-dump-not "missed: Using original scalar computation" "slp1" } } */ diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 670a03ea06b3..4818a8e88a19 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -10441,26 +10441,35 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info, "def\n"); continue; } - /* ??? It can also happen that we end up pulling a def into - a loop where replacing out-of-loop uses would require - a new LC SSA PHI node. Retain the original scalar in - those cases as well. PR98064. */ - if (TREE_CODE (new_tree) == SSA_NAME - && !SSA_NAME_IS_DEFAULT_DEF (new_tree) - && (gimple_bb (use_stmt)->loop_father - != gimple_bb (vec_stmt)->loop_father) - && !flow_loop_nested_p (gimple_bb (vec_stmt)->loop_father, - gimple_bb (use_stmt)->loop_father)) + FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "Using original scalar computation for " - "live lane because there is an out-of-loop " - "definition for it\n"); - continue; + /* ??? It can also happen that we end up pulling a def into + a loop where replacing out-of-loop uses would require + a new LC SSA PHI node. Retain the original scalar in + those cases as well. PR98064. */ + edge e; + if (TREE_CODE (new_tree) == SSA_NAME + && !SSA_NAME_IS_DEFAULT_DEF (new_tree) + && (gimple_bb (use_stmt)->loop_father + != gimple_bb (vec_stmt)->loop_father) + /* But a replacemend in a LC PHI is OK. This happens + in gcc.dg/vect/bb-slp-57.c for example. */ + && (gimple_code (use_stmt) != GIMPLE_PHI + || (((e = phi_arg_edge_from_use (use_p)), true) + && !loop_exit_edge_p + (gimple_bb (vec_stmt)->loop_father, e))) + && !flow_loop_nested_p (gimple_bb (vec_stmt)->loop_father, + gimple_bb (use_stmt)->loop_father)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "Using original scalar computation for " + "live lane because there is an " + "out-of-loop definition for it\n"); + continue; + } + SET_USE (use_p, new_tree); } - FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) - SET_USE (use_p, new_tree); update_stmt (use_stmt); } }
