Richard Biener <richard.guent...@gmail.com> writes:
> On Tue, Aug 22, 2023 at 12:42 PM Szabolcs Nagy via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
>>
>> From: Richard Sandiford <richard.sandif...@arm.com>
>>
>> The prologue/epilogue pass allows the prologue sequence
>> to contain jumps.  The sequence is then partitioned into
>> basic blocks using find_many_sub_basic_blocks.
>>
>> This patch treats epilogues in the same way.  It's needed for
>> a follow-on aarch64 patch that adds conditional code to both
>> the prologue and the epilogue.
>>
>> Tested on aarch64-linux-gnu (including with a follow-on patch)
>> and x86_64-linux-gnu.  OK to install?
>>
>> Richard
>>
>> gcc/
>>         * function.cc (thread_prologue_and_epilogue_insns): Handle
>>         epilogues that contain jumps.
>> ---
>>
>> This is a previously approved patch that was not committed
>> because it was not needed at the time, but i'd like to commit
>> it as it is needed for the followup aarch64 eh_return changes:
>>
>> https://gcc.gnu.org/pipermail/gcc-patches/2022-November/605769.html
>>
>> ---
>>  gcc/function.cc | 10 ++++++++++
>>  1 file changed, 10 insertions(+)
>>
>> diff --git a/gcc/function.cc b/gcc/function.cc
>> index dd2c1136e07..70d1cd65303 100644
>> --- a/gcc/function.cc
>> +++ b/gcc/function.cc
>> @@ -6120,6 +6120,11 @@ thread_prologue_and_epilogue_insns (void)
>>                   && returnjump_p (BB_END (e->src)))
>>                 e->flags &= ~EDGE_FALLTHRU;
>>             }
>> +
>> +         auto_sbitmap blocks (last_basic_block_for_fn (cfun));
>> +         bitmap_clear (blocks);
>> +           bitmap_set_bit (blocks, BLOCK_FOR_INSN (epilogue_seq)->index);
>> +         find_many_sub_basic_blocks (blocks);
>>         }
>>        else if (next_active_insn (BB_END (exit_fallthru_edge->src)))
>>         {
>> @@ -6218,6 +6223,11 @@ thread_prologue_and_epilogue_insns (void)
>>           set_insn_locations (seq, epilogue_location);
>>
>>           emit_insn_before (seq, insn);
>> +
>> +         auto_sbitmap blocks (last_basic_block_for_fn (cfun));
>> +         bitmap_clear (blocks);
>> +         bitmap_set_bit (blocks, BLOCK_FOR_INSN (insn)->index);
>> +         find_many_sub_basic_blocks (blocks);
>
> I'll note that clearing a full sbitmap to pass down a single basic block
> to find_many_sub_basic_blocks is a quite expensive operation.  May I suggest
> to add an overload operating on a single basic block?  It's only
>
>   FOR_EACH_BB_FN (bb, cfun)
>     SET_STATE (bb,
>                bitmap_bit_p (blocks, bb->index) ? BLOCK_TO_SPLIT :
> BLOCK_ORIGINAL);
>
> using the bitmap, so factoring the rest of the function and customizing this
> walk would do the trick.  Note that the whole function could be refactored to
> handle single blocks more efficiently.

Sorry for the late reply, but does this look OK?  Tested on
aarch64-linux-gnu and x86_64-linux-gnu.

Thanks,
Richard

---

The prologue/epilogue pass allows the prologue sequence to contain
jumps.  The sequence is then partitioned into basic blocks using
find_many_sub_basic_blocks.

This patch treats epilogues in a similar way.  Since only one block
might need to be split, the patch (re)introduces a find_sub_basic_blocks
routine to handle a single block.

The new routine hard-codes the assumption that split_block will chain
the new block immediately after the original block.  The routine doesn't
try to replicate the fix for PR81030, since that was specific to
gimple->rtl expansion.

The patch is needed for follow-on aarch64 patches that add conditional
code to the epilogue.  The tests are part of those patches.

gcc/
        * cfgbuild.h (find_sub_basic_blocks): Declare.
        * cfgbuild.cc (update_profile_for_new_sub_basic_block): New function,
        split out from...
        (find_many_sub_basic_blocks): ...here.
        (find_sub_basic_blocks): New function.
        * function.cc (thread_prologue_and_epilogue_insns): Handle
        epilogues that contain jumps.
---
 gcc/cfgbuild.cc | 95 +++++++++++++++++++++++++++++++++----------------
 gcc/cfgbuild.h  |  1 +
 gcc/function.cc |  4 +++
 3 files changed, 70 insertions(+), 30 deletions(-)

diff --git a/gcc/cfgbuild.cc b/gcc/cfgbuild.cc
index 15ed4deb5f7..9a6b34fb4b1 100644
--- a/gcc/cfgbuild.cc
+++ b/gcc/cfgbuild.cc
@@ -693,6 +693,43 @@ compute_outgoing_frequencies (basic_block b)
     }
 }
 
+/* Update the profile information for BB, which was created by splitting
+   an RTL block that had a non-final jump.  */
+
+static void
+update_profile_for_new_sub_basic_block (basic_block bb)
+{
+  edge e;
+  edge_iterator ei;
+
+  bool initialized_src = false, uninitialized_src = false;
+  bb->count = profile_count::zero ();
+  FOR_EACH_EDGE (e, ei, bb->preds)
+    {
+      if (e->count ().initialized_p ())
+       {
+         bb->count += e->count ();
+         initialized_src = true;
+       }
+      else
+       uninitialized_src = true;
+    }
+  /* When some edges are missing with read profile, this is
+     most likely because RTL expansion introduced loop.
+     When profile is guessed we may have BB that is reachable
+     from unlikely path as well as from normal path.
+
+     TODO: We should handle loops created during BB expansion
+     correctly here.  For now we assume all those loop to cycle
+     precisely once.  */
+  if (!initialized_src
+      || (uninitialized_src
+          && profile_status_for_fn (cfun) < PROFILE_GUESSED))
+    bb->count = profile_count::uninitialized ();
+
+  compute_outgoing_frequencies (bb);
+}
+
 /* Assume that some pass has inserted labels or control flow
    instructions within a basic block.  Split basic blocks as needed
    and create edges.  */
@@ -744,40 +781,15 @@ find_many_sub_basic_blocks (sbitmap blocks)
   if (profile_status_for_fn (cfun) != PROFILE_ABSENT)
     FOR_BB_BETWEEN (bb, min, max->next_bb, next_bb)
       {
-       edge e;
-       edge_iterator ei;
-
        if (STATE (bb) == BLOCK_ORIGINAL)
          continue;
        if (STATE (bb) == BLOCK_NEW)
          {
-           bool initialized_src = false, uninitialized_src = false;
-           bb->count = profile_count::zero ();
-           FOR_EACH_EDGE (e, ei, bb->preds)
-             {
-               if (e->count ().initialized_p ())
-                 {
-                   bb->count += e->count ();
-                   initialized_src = true;
-                 }
-               else
-                 uninitialized_src = true;
-             }
-           /* When some edges are missing with read profile, this is
-              most likely because RTL expansion introduced loop.
-              When profile is guessed we may have BB that is reachable
-              from unlikely path as well as from normal path.
-
-              TODO: We should handle loops created during BB expansion
-              correctly here.  For now we assume all those loop to cycle
-              precisely once.  */
-           if (!initialized_src
-               || (uninitialized_src
-                    && profile_status_for_fn (cfun) < PROFILE_GUESSED))
-             bb->count = profile_count::uninitialized ();
+           update_profile_for_new_sub_basic_block (bb);
+           continue;
          }
-       /* If nothing changed, there is no need to create new BBs.  */
-       else if (EDGE_COUNT (bb->succs) == n_succs[bb->index])
+       /* If nothing changed, there is no need to create new BBs.  */
+       if (EDGE_COUNT (bb->succs) == n_succs[bb->index])
          {
            /* In rare occassions RTL expansion might have mistakely assigned
               a probabilities different from what is in CFG.  This happens
@@ -788,10 +800,33 @@ find_many_sub_basic_blocks (sbitmap blocks)
              update_br_prob_note (bb);
            continue;
          }
-
        compute_outgoing_frequencies (bb);
       }
 
   FOR_EACH_BB_FN (bb, cfun)
     SET_STATE (bb, 0);
 }
+
+/* Like find_many_sub_basic_blocks, but look only within BB.  */
+
+void
+find_sub_basic_blocks (basic_block bb)
+{
+  basic_block end_bb = bb->next_bb;
+  find_bb_boundaries (bb);
+  if (bb->next_bb == end_bb)
+    return;
+
+  /* Re-scan and wire in all edges.  This expects simple (conditional)
+     jumps at the end of each new basic blocks.  */
+  make_edges (bb, end_bb->prev_bb, 1);
+
+  /* Update branch probabilities.  Expect only (un)conditional jumps
+     to be created with only the forward edges.  */
+  if (profile_status_for_fn (cfun) != PROFILE_ABSENT)
+    {
+      compute_outgoing_frequencies (bb);
+      for (bb = bb->next_bb; bb != end_bb; bb = bb->next_bb)
+       update_profile_for_new_sub_basic_block (bb);
+    }
+}
diff --git a/gcc/cfgbuild.h b/gcc/cfgbuild.h
index 51d3eccb1ae..4191fb3fcba 100644
--- a/gcc/cfgbuild.h
+++ b/gcc/cfgbuild.h
@@ -24,5 +24,6 @@ extern bool inside_basic_block_p (const rtx_insn *);
 extern bool control_flow_insn_p (const rtx_insn *);
 extern void rtl_make_eh_edge (sbitmap, basic_block, rtx);
 extern void find_many_sub_basic_blocks (sbitmap);
+extern void find_sub_basic_blocks (basic_block);
 
 #endif /* GCC_CFGBUILD_H */
diff --git a/gcc/function.cc b/gcc/function.cc
index 336af28fb22..afb0b33da9e 100644
--- a/gcc/function.cc
+++ b/gcc/function.cc
@@ -6112,6 +6112,8 @@ thread_prologue_and_epilogue_insns (void)
                  && returnjump_p (BB_END (e->src)))
                e->flags &= ~EDGE_FALLTHRU;
            }
+
+         find_sub_basic_blocks (BLOCK_FOR_INSN (epilogue_seq));
        }
       else if (next_active_insn (BB_END (exit_fallthru_edge->src)))
        {
@@ -6210,6 +6212,8 @@ thread_prologue_and_epilogue_insns (void)
          set_insn_locations (seq, epilogue_location);
 
          emit_insn_before (seq, insn);
+
+         find_sub_basic_blocks (BLOCK_FOR_INSN (insn));
        }
     }
 
-- 
2.25.1

Reply via email to