In this PR, the SME mode-switching code needs to insert a stack-probe
loop for an alloca.  This patch allows the target to do that.

There are two parts to it: allowing loops for insertions in blocks,
and allowing them for insertions on edges.  The former can be handled
entirely within mode-switching itself, by recording which blocks have
had new branches inserted.  The latter requires an extension to
commit_one_edge_insertion.

I think the extension to commit_one_edge_insertion makes logical sense,
since it already explicitly allows internal loops during RTL expansion.
The single-block find_sub_basic_blocks is a relatively recent addition,
so wouldn't have been available when the code was originally written.

The patch also has a small and obvious fix to make the aarch64 emit
hook cope with labels.

I've added specific -fstack-clash-protection versions of all
aarch64-sme.exp tests that previously failed because of this bug.
I've also added -fno-stack-clash-protection to the original versions
of these tests if they contain scans that assume no protection.

Tested on aarch64-linux-gnu.  OK to install?

Richard


gcc/
        PR target/113220
        * cfgrtl.cc (commit_one_edge_insertion): Handle sequences that
        contain jumps even if called after initial RTL expansion.
        * mode-switching.cc: Include cfgbuild.h.
        (optimize_mode_switching): Allow the sequence returned by the
        emit hook to contain internal jumps.  Record which blocks
        contain such jumps and split the blocks at the end.
        * config/aarch64/aarch64.cc (aarch64_mode_emit): Check for
        non-debug insns when scanning the sequence.

gcc/testsuite/
        PR target/113220
        * gcc.target/aarch64/sme/call_sm_switch_5.c: Add
        -fno-stack-clash-protection.
        * gcc.target/aarch64/sme/call_sm_switch_5_scp.c: New test.
        * gcc.target/aarch64/sme/sibcall_6_scp.c: New test.
        * gcc.target/aarch64/sme/za_state_4.c: Add
        -fno-stack-clash-protection.
        * gcc.target/aarch64/sme/za_state_4_scp.c: New test.
        * gcc.target/aarch64/sme/za_state_5.c: Add
        -fno-stack-clash-protection.
        * gcc.target/aarch64/sme/za_state_5_scp.c: New test.
---
 gcc/cfgrtl.cc                                 | 27 ++++++++++++++-----
 gcc/config/aarch64/aarch64.cc                 |  2 ++
 gcc/mode-switching.cc                         | 15 +++++++++++
 .../gcc.target/aarch64/sme/call_sm_switch_5.c |  2 +-
 .../aarch64/sme/call_sm_switch_5_scp.c        |  3 +++
 .../gcc.target/aarch64/sme/sibcall_6_scp.c    |  3 +++
 .../gcc.target/aarch64/sme/za_state_4.c       |  2 +-
 .../gcc.target/aarch64/sme/za_state_4_scp.c   |  3 +++
 .../gcc.target/aarch64/sme/za_state_5.c       |  2 +-
 .../gcc.target/aarch64/sme/za_state_5_scp.c   |  3 +++
 10 files changed, 53 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5_scp.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_6_scp.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/za_state_4_scp.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/za_state_5_scp.c

diff --git a/gcc/cfgrtl.cc b/gcc/cfgrtl.cc
index 15259c5e984..304c429c99b 100644
--- a/gcc/cfgrtl.cc
+++ b/gcc/cfgrtl.cc
@@ -2018,6 +2018,21 @@ commit_one_edge_insertion (edge e)
   insns = e->insns.r;
   e->insns.r = NULL;
 
+  /* Allow the sequence to contain internal jumps, such as a memcpy loop
+     or an allocation loop.  If such a sequence is emitted during RTL
+     expansion, we'll create the appropriate basic blocks later,
+     at the end of the pass.  But if such a sequence is emitted after
+     initial expansion, we'll need to find the subblocks ourselves.  */
+  bool contains_jump = false;
+  if (!currently_expanding_to_rtl)
+    for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
+      if (JUMP_P (insn))
+       {
+         rebuild_jump_labels_chain (insns);
+         contains_jump = true;
+         break;
+       }
+
   /* Figure out where to put these insns.  If the destination has
      one predecessor, insert there.  Except for the exit block.  */
   if (single_pred_p (e->dest) && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
@@ -2112,13 +2127,13 @@ commit_one_edge_insertion (edge e)
        delete_insn (before);
     }
   else
-    /* Some builtin expanders, such as those for memset and memcpy,
-       may generate loops and conditionals, and those may get emitted
-       into edges.  That's ok while expanding to rtl, basic block
-       boundaries will be identified and split afterwards.  ???  Need
-       we check whether the destination labels of any inserted jumps
-       are also part of the inserted sequence?  */
+    /* Sequences inserted after RTL expansion are expected to be SESE,
+       with only internal branches allowed.  If the sequence jumps outside
+       itself then we do not know how to add the associated edges here.  */
     gcc_assert (!JUMP_P (last) || currently_expanding_to_rtl);
+
+  if (contains_jump)
+    find_sub_basic_blocks (bb);
 }
 
 /* Update the CFG for all queued instructions.  */
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 76f1230d81a..104f7e1831e 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -29513,6 +29513,8 @@ aarch64_mode_emit (int entity, int mode, int prev_mode, 
HARD_REG_SET live)
   HARD_REG_SET clobbers = {};
   for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
     {
+      if (!NONDEBUG_INSN_P (insn))
+       continue;
       vec_rtx_properties properties;
       properties.add_insn (insn, false);
       for (rtx_obj_reference ref : properties.refs ())
diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
index 01754e9eed4..583929184ce 100644
--- a/gcc/mode-switching.cc
+++ b/gcc/mode-switching.cc
@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "lcm.h"
 #include "cfgcleanup.h"
 #include "tree-pass.h"
+#include "cfgbuild.h"
 
 /* We want target macros for the mode switching code to be able to refer
    to instruction attribute values.  */
@@ -1105,6 +1106,8 @@ optimize_mode_switching (void)
   edge_list = pre_edge_lcm_avs (n_entities * max_num_modes, transp, comp, 
antic,
                                kill, avin, avout, &insert, &del);
 
+  auto_sbitmap jumping_blocks (last_basic_block_for_fn (cfun));
+  bitmap_clear (jumping_blocks);
   for (j = n_entities - 1; j >= 0; j--)
     {
       int no_mode = num_modes[entity_map[j]];
@@ -1215,6 +1218,13 @@ optimize_mode_switching (void)
                  /* Insert MODE_SET only if it is nonempty.  */
                  if (mode_set != NULL_RTX)
                    {
+                     for (auto insn = mode_set; insn; insn = NEXT_INSN (insn))
+                       if (JUMP_P (insn))
+                         {
+                           rebuild_jump_labels_chain (mode_set);
+                           bitmap_set_bit (jumping_blocks, bb->index);
+                           break;
+                         }
                      emitted = true;
                      if (NOTE_INSN_BASIC_BLOCK_P (ptr->insn_ptr))
                        /* We need to emit the insns in a FIFO-like manner,
@@ -1252,6 +1262,11 @@ optimize_mode_switching (void)
   sbitmap_vector_free (avin);
   sbitmap_vector_free (avout);
 
+  gcc_assert (SBITMAP_SIZE ((sbitmap) jumping_blocks)
+             == (unsigned int) last_basic_block_for_fn (cfun));
+  if (!bitmap_empty_p (jumping_blocks))
+    find_many_sub_basic_blocks (jumping_blocks);
+
   if (need_commit)
     commit_edge_insertions ();
 
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c 
b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c
index 6238ab80da2..d31b6b91f1f 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c
@@ -1,4 +1,4 @@
-// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls 
-funwind-tables" }
+// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls 
-funwind-tables -fno-stack-clash-protection" }
 // { dg-final { check-function-bodies "**" "" } }
 
 #include <arm_sve.h>
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5_scp.c 
b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5_scp.c
new file mode 100644
index 00000000000..09c64464907
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5_scp.c
@@ -0,0 +1,3 @@
+// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls 
-funwind-tables" }
+
+#include "call_sm_switch_5.c"
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_6_scp.c 
b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_6_scp.c
new file mode 100644
index 00000000000..4f2e5746ecc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_6_scp.c
@@ -0,0 +1,3 @@
+/* { dg-options "-O2 -fstack-clash-protection" } */
+
+#include "sibcall_6.c"
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c 
b/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c
index cec0abf0ea9..f27470bb8a0 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c
@@ -1,4 +1,4 @@
-// { dg-options "-O -fno-optimize-sibling-calls" }
+// { dg-options "-O -fno-optimize-sibling-calls -fno-stack-clash-protection" }
 // { dg-final { check-function-bodies "**" "" } }
 
 void private_za();
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_4_scp.c 
b/gcc/testsuite/gcc.target/aarch64/sme/za_state_4_scp.c
new file mode 100644
index 00000000000..1620f881d36
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_4_scp.c
@@ -0,0 +1,3 @@
+// { dg-options "-O -fno-optimize-sibling-calls -fstack-clash-protection" }
+
+#include "za_state_4.c"
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_5.c 
b/gcc/testsuite/gcc.target/aarch64/sme/za_state_5.c
index d54840d3d77..52c96b3c32c 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme/za_state_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_5.c
@@ -1,4 +1,4 @@
-// { dg-options "-O2 -fno-optimize-sibling-calls" }
+// { dg-options "-O2 -fno-optimize-sibling-calls -fno-stack-clash-protection" }
 // { dg-final { check-function-bodies "**" "" } }
 
 void private_za();
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_5_scp.c 
b/gcc/testsuite/gcc.target/aarch64/sme/za_state_5_scp.c
new file mode 100644
index 00000000000..8b4d6e12949
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_5_scp.c
@@ -0,0 +1,3 @@
+// { dg-options "-O -fno-optimize-sibling-calls -fstack-clash-protection" }
+
+#include "za_state_5.c"
-- 
2.25.1

Reply via email to