https://gcc.gnu.org/g:b5c64db0a49d4653418457a520648c4305e01e75
commit r16-6104-gb5c64db0a49d4653418457a520648c4305e01e75 Author: Andrew Pinski <[email protected]> Date: Tue Nov 18 23:02:36 2025 -0800 ch: Improve copy header when the bbs are predicated as non-executed [PR122734] This is version based on https://gcc.gnu.org/pipermail/gcc-patches/2025-November/701533.html review. Though the only thing is we still need an extra check for the edge probability like it is done in single_likely_exit because probabilities are still not being tracked correctly it seems. I also added copy-headers-12.c which we fail by duplicating too much. But I think that is ok, as this pattern of being the "correct" part of the loop header leading to a noreturn function does not happen that often and when it does the header would have had a statically figured out conditional which is already being checked. Bootstrapped and tested on x86_64-linux-gnu. PR tree-optimization/122734 gcc/ChangeLog: * tree-ssa-loop-ch.cc (should_duplicate_loop_header_p): Add new argument, canbe_neverexecuted. When canbe_neverexecuted is true, return if a loop exit is "never executed" like we are doing an invariant conditional. (ch_base::copy_headers): Update call to should_duplicate_loop_header_p. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/20030711-1.c: Update. * gcc.dg/tree-ssa/copy-headers-10.c: New test. * gcc.dg/tree-ssa/copy-headers-11.c: New test. * gcc.dg/tree-ssa/copy-headers-12.c: New test. Signed-off-by: Andrew Pinski <[email protected]> Diff: --- gcc/testsuite/gcc.dg/tree-ssa/20030711-1.c | 8 ++--- gcc/testsuite/gcc.dg/tree-ssa/copy-headers-10.c | 25 +++++++++++++++ gcc/testsuite/gcc.dg/tree-ssa/copy-headers-11.c | 25 +++++++++++++++ gcc/testsuite/gcc.dg/tree-ssa/copy-headers-12.c | 32 +++++++++++++++++++ gcc/tree-ssa-loop-ch.cc | 42 ++++++++++++++++++++++--- 5 files changed, 124 insertions(+), 8 deletions(-) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/20030711-1.c b/gcc/testsuite/gcc.dg/tree-ssa/20030711-1.c index c3f75eff29ef..29e1df7a707e 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/20030711-1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/20030711-1.c @@ -44,12 +44,12 @@ record_component_aliases (type) /* The call to blah can not be eliminated. */ /* { dg-final { scan-tree-dump-times "blah \\(\\)" 1 "dom2" } } */ -/* There should be three IF conditionals. */ -/* { dg-final { scan-tree-dump-times "if " 3 "dom2"} } */ +/* There should be four IF conditionals. */ +/* { dg-final { scan-tree-dump-times "if " 4 "dom2"} } */ /* There should be two loads of type.binfo. */ /* { dg-final { scan-tree-dump-times "type\\.binfo" 2 "dom2"} } */ -/* There should be three loads of vec.length. */ -/* { dg-final { scan-tree-dump-times "vec.length" 3 "dom2"} } */ +/* There should be four loads of vec.length. */ +/* { dg-final { scan-tree-dump-times "vec.length" 4 "dom2"} } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-10.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-10.c new file mode 100644 index 000000000000..5641a9a1d3a2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-10.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-ch-details -fdump-tree-ldist-details" } */ + +/* PR tree-optimization/122734 */ +/* We want to duplicate the block after the one containing the condition going to unreachable. + Since later on we will be removing the condition going to unreachable anyways. */ +/* So in the end ldist can generate a memset. */ + +static inline int size(int *a) +{ + int t = *a; + if (t < 0) __builtin_unreachable(); + return t; +} + +void f(int *l, short *d) +{ + for(int i = 0; i < size(l); i++) + d[i] = 0; +} + +/* { dg-final { scan-tree-dump-times "Duplicating bb . is a win" 1 "ch2" } } */ +/* { dg-final { scan-tree-dump-times "Will duplicate bb" 2 "ch2" } } */ +/* { dg-final { scan-tree-dump "is now do-while loop" "ch2" } } */ +/* { dg-final { scan-tree-dump "generated memset zero" "ldist" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-11.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-11.c new file mode 100644 index 000000000000..2223a72f0238 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-11.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-ch-details -fdump-tree-ldist-details" } */ + +/* PR tree-optimization/122734 */ +/* We want to duplicate the block after the one containing the condition going to unreachable. + Since later on we will be removing the condition going to unreachable anyways. */ +/* So in the end ldist can generate a memset. */ + +static inline int size(int *a) +{ + int t = *a; + if (t < 0) __builtin_trap(); + return t; +} + +void f(int *l, short *d) +{ + for(int i = 0; i < size(l); i++) + d[i] = 0; +} + +/* { dg-final { scan-tree-dump-times "Duplicating bb . is a win" 1 "ch2" } } */ +/* { dg-final { scan-tree-dump-times "Will duplicate bb" 2 "ch2" } } */ +/* { dg-final { scan-tree-dump "is now do-while loop" "ch2" } } */ +/* { dg-final { scan-tree-dump "generated memset zero" "ldist" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-12.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-12.c new file mode 100644 index 000000000000..99a8b419a2ea --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-12.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-ch-details" } */ + +/* PR tree-optimization/122734 */ +/* We want to duplicate the block after the one containing the + condition since it is not the "true" header. */ + +static inline int size(int *a) +{ + int t = *a; + if (t < 0) __builtin_unreachable(); + return t; +} + +void f(int *l, short *d) +{ + for(int i = 0; i < size(l); i++) + { + if (d[i] < 0) + return; + d[i]--; + } + __builtin_abort (); +} + +/* Not only we duplicate the true header but also duplicate the condition + `d[i] < 0` but we should not. This structure does not show up enough to + make a difference but we record it just in case we improve the heurstics + some more. */ +/* { dg-final { scan-tree-dump-times "Duplicating bb . is a win" 1 "ch2" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "Will duplicate bb" 2 "ch2" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "is now do-while loop" "ch2" } } */ diff --git a/gcc/tree-ssa-loop-ch.cc b/gcc/tree-ssa-loop-ch.cc index 91a61dd4e80b..feecf91cf700 100644 --- a/gcc/tree-ssa-loop-ch.cc +++ b/gcc/tree-ssa-loop-ch.cc @@ -185,19 +185,23 @@ enum ch_decision /* We want to copy. */ ch_win, /* We want to copy and we will eliminate loop exit. */ - ch_win_invariant_exit + ch_win_invariant_exit, + }; /* Check whether we should duplicate HEADER of LOOP. At most *LIMIT instructions should be duplicated, limit is decreased by the actual - amount. */ + amount. In the case of *CANBE_NEVEREXECUTED, if there is a exit edge + of the HEADER that is most likely never executed then consider that + as invariant and continue. Set *CANBE_NEVEREXECUTED to false otherwise. */ static ch_decision should_duplicate_loop_header_p (basic_block header, class loop *loop, gimple_ranger *ranger, int *limit, hash_set <edge> *invariant_exits, - hash_set <edge> *static_exits) + hash_set <edge> *static_exits, + bool *canbe_neverexecuted) { gimple_stmt_iterator bsi; @@ -460,6 +464,34 @@ should_duplicate_loop_header_p (basic_block header, class loop *loop, } return ch_win_invariant_exit; } + if (!static_exit && *canbe_neverexecuted) + { + /* See if one of the edges are an exit edge that is probable + never executed. + If so treat it as invariant exit win. */ + edge e; + edge_iterator ei; + bool hasone = false; + FOR_EACH_EDGE (e, ei, header->succs) + if (loop_exit_edge_p (loop, e) + && (probably_never_executed_edge_p (cfun, e) + /* We want to rule out paths to noreturns but not + low probabilities resulting from adjustments + or combining. + FIXME: once we have better quality tracking, + make this more robust. */ + || e->probability <= profile_probability::very_unlikely ())) + { + hasone = true; + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + " `never executed` exit %i->%i\n", + e->src->index, e->dest->index); + } + if (hasone) + return ch_win_invariant_exit; + } + *canbe_neverexecuted = false; /* If the static exit fully optimize out, it is win to "duplicate" it. @@ -846,10 +878,12 @@ ch_base::copy_headers (function *fun) auto_vec <ch_decision, 32> decision; hash_set <edge> *invariant_exits = new hash_set <edge>; hash_set <edge> *static_exits = new hash_set <edge>; + bool canbe_neverexecuted = true; while ((ret = should_duplicate_loop_header_p (header, loop, ranger, &remaining_limit, invariant_exits, - static_exits)) + static_exits, + &canbe_neverexecuted)) != ch_impossible) { nheaders++;
