From: Ross Burton <ross.bur...@arm.com>

This series of patches fixes deficiencies in GCC's -fstack-protector
implementation for AArch64 when using dynamically allocated stack space.
This is CVE-2023-4039.  See:

https://developer.arm.com/Arm%20Security%20Center/GCC%20Stack%20Protector%20Vulnerability%20AArch64
https://github.com/metaredteam/external-disclosures/security/advisories/GHSA-x7ch-h5rf-w2mf

for more details.

Signed-off-by: Ross Burton <ross.bur...@arm.com>
Signed-off-by: Steve Sakoman <st...@sakoman.com>
---
 meta/recipes-devtools/gcc/gcc-9.5.inc         |    1 +
 .../gcc/gcc-9.5/CVE-2023-4039.patch           | 1506 +++++++++++++++++
 2 files changed, 1507 insertions(+)
 create mode 100644 meta/recipes-devtools/gcc/gcc-9.5/CVE-2023-4039.patch

diff --git a/meta/recipes-devtools/gcc/gcc-9.5.inc 
b/meta/recipes-devtools/gcc/gcc-9.5.inc
index 23bfb1a9db..9bb41bbe24 100644
--- a/meta/recipes-devtools/gcc/gcc-9.5.inc
+++ b/meta/recipes-devtools/gcc/gcc-9.5.inc
@@ -70,6 +70,7 @@ SRC_URI = "\
            
file://0038-gentypes-genmodes-Do-not-use-__LINE__-for-maintainin.patch \
            
file://0039-process_alt_operands-Don-t-match-user-defined-regs-o.patch \
            
file://0002-libstdc-Fix-inconsistent-noexcept-specific-for-valar.patch \
+           file://CVE-2023-4039.patch \
 "
 S = "${TMPDIR}/work-shared/gcc-${PV}-${PR}/gcc-${PV}"
 SRC_URI[sha256sum] = 
"27769f64ef1d4cd5e2be8682c0c93f9887983e6cfd1a927ce5a0a2915a95cf8f"
diff --git a/meta/recipes-devtools/gcc/gcc-9.5/CVE-2023-4039.patch 
b/meta/recipes-devtools/gcc/gcc-9.5/CVE-2023-4039.patch
new file mode 100644
index 0000000000..56d229066f
--- /dev/null
+++ b/meta/recipes-devtools/gcc/gcc-9.5/CVE-2023-4039.patch
@@ -0,0 +1,1506 @@
+From: Richard Sandiford <richard.sandif...@arm.com>
+Subject: [PATCH 00/19] aarch64: Fix -fstack-protector issue
+Date: Tue, 12 Sep 2023 16:25:10 +0100
+
+This series of patches fixes deficiencies in GCC's -fstack-protector
+implementation for AArch64 when using dynamically allocated stack space.
+This is CVE-2023-4039.  See:
+
+https://developer.arm.com/Arm%20Security%20Center/GCC%20Stack%20Protector%20Vulnerability%20AArch64
+https://github.com/metaredteam/external-disclosures/security/advisories/GHSA-x7ch-h5rf-w2mf
+
+for more details.
+
+The fix is to put the saved registers above the locals area when
+-fstack-protector is used.
+
+The series also fixes a stack-clash problem that I found while working
+on the CVE.  In unpatched sources, the stack-clash problem would only
+trigger for unrealistic numbers of arguments (8K 64-bit arguments, or an
+equivalent).  But it would be a more significant issue with the new
+-fstack-protector frame layout.  It's therefore important that both
+problems are fixed together.
+
+Some reorganisation of the code seemed necessary to fix the problems in a
+cleanish way.  The series is therefore quite long, but only a handful of
+patches should have any effect on code generation.
+
+See the individual patches for a detailed description.
+
+Tested on aarch64-linux-gnu. Pushed to trunk and to all active branches.
+I've also pushed backports to GCC 7+ to vendors/ARM/heads/CVE-2023-4039.
+
+CVE: CVE-2023-4039
+Upstream-Status: Submitted
+Signed-off-by: Ross Burton <ross.bur...@arm.com>
+  
+  
+From 78ebdb7b12d5e258b9811bab715734454268fd0c Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandif...@arm.com>
+Date: Fri, 16 Jun 2023 17:00:51 +0100
+Subject: [PATCH 01/10] aarch64: Explicitly handle frames with no saved
+ registers
+
+If a frame has no saved registers, it can be allocated in one go.
+There is no need to treat the areas below and above the saved
+registers as separate.
+
+And if we allocate the frame in one go, it should be allocated
+as the initial_adjust rather than the final_adjust.  This allows the
+frame size to grow to guard_size - guard_used_by_caller before a stack
+probe is needed.  (A frame with no register saves is necessarily a
+leaf frame.)
+
+This is a no-op as thing stand, since a leaf function will have
+no outgoing arguments, and so all the frame will be above where
+the saved registers normally go.
+
+gcc/
+       * config/aarch64/aarch64.c (aarch64_layout_frame): Explicitly
+       allocate the frame in one go if there are no saved registers.
+---
+ gcc/config/aarch64/aarch64.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
+index a35dceab9fc..e9dad682738 100644
+--- a/gcc/config/aarch64/aarch64.c
++++ b/gcc/config/aarch64/aarch64.c
+@@ -4771,9 +4771,11 @@ aarch64_layout_frame (void)
+     max_push_offset = 256;
+ 
+   HOST_WIDE_INT const_size, const_fp_offset;
+-  if (cfun->machine->frame.frame_size.is_constant (&const_size)
+-      && const_size < max_push_offset
+-      && known_eq (crtl->outgoing_args_size, 0))
++  if (cfun->machine->frame.saved_regs_size == 0)
++    cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size;
++  else if (cfun->machine->frame.frame_size.is_constant (&const_size)
++         && const_size < max_push_offset
++         && known_eq (crtl->outgoing_args_size, 0))
+     {
+       /* Simple, small frame with no outgoing arguments:
+        stp reg1, reg2, [sp, -frame_size]!
+-- 
+2.34.1
+
+
+From 347487fffa0266d43bf18f1f91878410881f596e Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandif...@arm.com>
+Date: Fri, 16 Jun 2023 16:55:12 +0100
+Subject: [PATCH 02/10] aarch64: Add bytes_below_hard_fp to frame info
+
+The frame layout code currently hard-codes the assumption that
+the number of bytes below the saved registers is equal to the
+size of the outgoing arguments.  This patch abstracts that
+value into a new field of aarch64_frame.
+
+gcc/
+       * config/aarch64/aarch64.h (aarch64_frame::bytes_below_hard_fp): New
+       field.
+       * config/aarch64/aarch64.c (aarch64_layout_frame): Initialize it,
+       and use it instead of crtl->outgoing_args_size.
+       (aarch64_get_separate_components): Use bytes_below_hard_fp instead
+       of outgoing_args_size.
+       (aarch64_process_components): Likewise.
+---
+ gcc/config/aarch64/aarch64.c | 50 +++++++++++++++++++-----------------
+ gcc/config/aarch64/aarch64.h |  6 ++++-
+ 2 files changed, 32 insertions(+), 24 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
+index e9dad682738..25cf10cc4b9 100644
+--- a/gcc/config/aarch64/aarch64.c
++++ b/gcc/config/aarch64/aarch64.c
+@@ -4684,6 +4684,8 @@ aarch64_layout_frame (void)
+       last_fp_reg = regno;
+       }
+ 
++  cfun->machine->frame.bytes_below_hard_fp = crtl->outgoing_args_size;
++
+   if (cfun->machine->frame.emit_frame_chain)
+     {
+       /* FP and LR are placed in the linkage record.  */
+@@ -4751,11 +4753,11 @@ aarch64_layout_frame (void)
+                          STACK_BOUNDARY / BITS_PER_UNIT);
+ 
+   /* Both these values are already aligned.  */
+-  gcc_assert (multiple_p (crtl->outgoing_args_size,
++  gcc_assert (multiple_p (cfun->machine->frame.bytes_below_hard_fp,
+                         STACK_BOUNDARY / BITS_PER_UNIT));
+   cfun->machine->frame.frame_size
+     = (cfun->machine->frame.hard_fp_offset
+-       + crtl->outgoing_args_size);
++       + cfun->machine->frame.bytes_below_hard_fp);
+ 
+   cfun->machine->frame.locals_offset = 
cfun->machine->frame.saved_varargs_size;
+ 
+@@ -4775,23 +4777,23 @@ aarch64_layout_frame (void)
+     cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size;
+   else if (cfun->machine->frame.frame_size.is_constant (&const_size)
+          && const_size < max_push_offset
+-         && known_eq (crtl->outgoing_args_size, 0))
++         && known_eq (cfun->machine->frame.bytes_below_hard_fp, 0))
+     {
+-      /* Simple, small frame with no outgoing arguments:
++      /* Simple, small frame with no data below the saved registers.
+        stp reg1, reg2, [sp, -frame_size]!
+        stp reg3, reg4, [sp, 16]  */
+       cfun->machine->frame.callee_adjust = const_size;
+     }
+-  else if (known_lt (crtl->outgoing_args_size
++  else if (known_lt (cfun->machine->frame.bytes_below_hard_fp
+                    + cfun->machine->frame.saved_regs_size, 512)
+          && !(cfun->calls_alloca
+               && known_lt (cfun->machine->frame.hard_fp_offset,
+                            max_push_offset)))
+     {
+-      /* Frame with small outgoing arguments:
++      /* Frame with small area below the saved registers:
+        sub sp, sp, frame_size
+-       stp reg1, reg2, [sp, outgoing_args_size]
+-       stp reg3, reg4, [sp, outgoing_args_size + 16]  */
++       stp reg1, reg2, [sp, bytes_below_hard_fp]
++       stp reg3, reg4, [sp, bytes_below_hard_fp + 16]  */
+       cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size;
+       cfun->machine->frame.callee_offset
+       = cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset;
+@@ -4799,22 +4801,23 @@ aarch64_layout_frame (void)
+   else if (cfun->machine->frame.hard_fp_offset.is_constant (&const_fp_offset)
+          && const_fp_offset < max_push_offset)
+     {
+-      /* Frame with large outgoing arguments but a small local area:
++      /* Frame with large area below the saved registers, but with a
++       small area above:
+        stp reg1, reg2, [sp, -hard_fp_offset]!
+        stp reg3, reg4, [sp, 16]
+-       sub sp, sp, outgoing_args_size  */
++       sub sp, sp, bytes_below_hard_fp  */
+       cfun->machine->frame.callee_adjust = const_fp_offset;
+       cfun->machine->frame.final_adjust
+       = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust;
+     }
+   else
+     {
+-      /* Frame with large local area and outgoing arguments using frame 
pointer:
++      /* General case:
+        sub sp, sp, hard_fp_offset
+        stp x29, x30, [sp, 0]
+        add x29, sp, 0
+        stp reg3, reg4, [sp, 16]
+-       sub sp, sp, outgoing_args_size  */
++       sub sp, sp, bytes_below_hard_fp  */
+       cfun->machine->frame.initial_adjust = 
cfun->machine->frame.hard_fp_offset;
+       cfun->machine->frame.final_adjust
+       = cfun->machine->frame.frame_size - cfun->machine->frame.initial_adjust;
+@@ -5243,9 +5246,11 @@ aarch64_get_separate_components (void)
+     if (aarch64_register_saved_on_entry (regno))
+       {
+       poly_int64 offset = cfun->machine->frame.reg_offset[regno];
++
++      /* Get the offset relative to the register we'll use.  */
+       if (!frame_pointer_needed)
+-        offset += cfun->machine->frame.frame_size
+-                  - cfun->machine->frame.hard_fp_offset;
++        offset += cfun->machine->frame.bytes_below_hard_fp;
++
+       /* Check that we can access the stack slot of the register with one
+          direct load with no adjustments needed.  */
+       if (offset_12bit_unsigned_scaled_p (DImode, offset))
+@@ -5367,8 +5372,8 @@ aarch64_process_components (sbitmap components, bool 
prologue_p)
+       rtx reg = gen_rtx_REG (mode, regno);
+       poly_int64 offset = cfun->machine->frame.reg_offset[regno];
+       if (!frame_pointer_needed)
+-      offset += cfun->machine->frame.frame_size
+-                - cfun->machine->frame.hard_fp_offset;
++      offset += cfun->machine->frame.bytes_below_hard_fp;
++
+       rtx addr = plus_constant (Pmode, ptr_reg, offset);
+       rtx mem = gen_frame_mem (mode, addr);
+ 
+@@ -5410,8 +5415,7 @@ aarch64_process_components (sbitmap components, bool 
prologue_p)
+       /* REGNO2 can be saved/restored in a pair with REGNO.  */
+       rtx reg2 = gen_rtx_REG (mode, regno2);
+       if (!frame_pointer_needed)
+-      offset2 += cfun->machine->frame.frame_size
+-                - cfun->machine->frame.hard_fp_offset;
++      offset2 += cfun->machine->frame.bytes_below_hard_fp;
+       rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
+       rtx mem2 = gen_frame_mem (mode, addr2);
+       rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
+@@ -5478,10 +5482,10 @@ aarch64_stack_clash_protection_alloca_probe_range 
(void)
+    registers.  If POLY_SIZE is not large enough to require a probe this 
function
+    will only adjust the stack.  When allocating the stack space
+    FRAME_RELATED_P is then used to indicate if the allocation is frame 
related.
+-   FINAL_ADJUSTMENT_P indicates whether we are allocating the outgoing
+-   arguments.  If we are then we ensure that any allocation larger than the 
ABI
+-   defined buffer needs a probe so that the invariant of having a 1KB buffer 
is
+-   maintained.
++   FINAL_ADJUSTMENT_P indicates whether we are allocating the area below
++   the saved registers.  If we are then we ensure that any allocation
++   larger than the ABI defined buffer needs a probe so that the
++   invariant of having a 1KB buffer is maintained.
+ 
+    We emit barriers after each stack adjustment to prevent optimizations from
+    breaking the invariant that we never drop the stack more than a page.  This
+@@ -5671,7 +5675,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx 
temp2,
+   /* Handle any residuals.  Residuals of at least MIN_PROBE_THRESHOLD have to
+      be probed.  This maintains the requirement that each page is probed at
+      least once.  For initial probing we probe only if the allocation is
+-     more than GUARD_SIZE - buffer, and for the outgoing arguments we probe
++     more than GUARD_SIZE - buffer, and below the saved registers we probe
+      if the amount is larger than buffer.  GUARD_SIZE - buffer + buffer ==
+      GUARD_SIZE.  This works that for any allocation that is large enough to
+      trigger a probe here, we'll have at least one, and if they're not large
+diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
+index af0bc3f1881..95831637ba7 100644
+--- a/gcc/config/aarch64/aarch64.h
++++ b/gcc/config/aarch64/aarch64.h
+@@ -712,9 +712,13 @@ struct GTY (()) aarch64_frame
+   HOST_WIDE_INT saved_varargs_size;
+ 
+   /* The size of the saved callee-save int/FP registers.  */
+-
+   HOST_WIDE_INT saved_regs_size;
+ 
++  /* The number of bytes between the bottom of the static frame (the bottom
++     of the outgoing arguments) and the hard frame pointer.  This value is
++     always a multiple of STACK_BOUNDARY.  */
++  poly_int64 bytes_below_hard_fp;
++
+   /* Offset from the base of the frame (incomming SP) to the
+      top of the locals area.  This value is always a multiple of
+      STACK_BOUNDARY.  */
+-- 
+2.34.1
+
+
+From 4604c4cd0a6c4c26d6594ec9a0383b4d9197d9df Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandif...@arm.com>
+Date: Tue, 27 Jun 2023 11:25:40 +0100
+Subject: [PATCH 03/10] aarch64: Rename locals_offset to bytes_above_locals
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+locals_offset was described as:
+
+  /* Offset from the base of the frame (incomming SP) to the
+     top of the locals area.  This value is always a multiple of
+     STACK_BOUNDARY.  */
+
+This is implicitly an “upside down” view of the frame: the incoming
+SP is at offset 0, and anything N bytes below the incoming SP is at
+offset N (rather than -N).
+
+However, reg_offset instead uses a “right way up” view; that is,
+it views offsets in address terms.  Something above X is at a
+positive offset from X and something below X is at a negative
+offset from X.
+
+Also, even on FRAME_GROWS_DOWNWARD targets like AArch64,
+target-independent code views offsets in address terms too:
+locals are allocated at negative offsets to virtual_stack_vars.
+
+It seems confusing to have *_offset fields of the same structure
+using different polarities like this.  This patch tries to avoid
+that by renaming locals_offset to bytes_above_locals.
+
+gcc/
+       * config/aarch64/aarch64.h (aarch64_frame::locals_offset): Rename to...
+       (aarch64_frame::bytes_above_locals): ...this.
+       * config/aarch64/aarch64.c (aarch64_layout_frame)
+       (aarch64_initial_elimination_offset): Update accordingly.
+---
+ gcc/config/aarch64/aarch64.c | 9 +++++----
+ gcc/config/aarch64/aarch64.h | 6 +++---
+ 2 files changed, 8 insertions(+), 7 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
+index 25cf10cc4b9..dcaf491af42 100644
+--- a/gcc/config/aarch64/aarch64.c
++++ b/gcc/config/aarch64/aarch64.c
+@@ -4759,7 +4759,8 @@ aarch64_layout_frame (void)
+     = (cfun->machine->frame.hard_fp_offset
+        + cfun->machine->frame.bytes_below_hard_fp);
+ 
+-  cfun->machine->frame.locals_offset = 
cfun->machine->frame.saved_varargs_size;
++  cfun->machine->frame.bytes_above_locals
++    = cfun->machine->frame.saved_varargs_size;
+ 
+   cfun->machine->frame.initial_adjust = 0;
+   cfun->machine->frame.final_adjust = 0;
+@@ -8566,14 +8567,14 @@ aarch64_initial_elimination_offset (unsigned from, 
unsigned to)
+ 
+       if (from == FRAME_POINTER_REGNUM)
+       return cfun->machine->frame.hard_fp_offset
+-             - cfun->machine->frame.locals_offset;
++             - cfun->machine->frame.bytes_above_locals;
+     }
+ 
+   if (to == STACK_POINTER_REGNUM)
+     {
+       if (from == FRAME_POINTER_REGNUM)
+-        return cfun->machine->frame.frame_size
+-               - cfun->machine->frame.locals_offset;
++      return cfun->machine->frame.frame_size
++             - cfun->machine->frame.bytes_above_locals;
+     }
+ 
+   return cfun->machine->frame.frame_size;
+diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
+index 95831637ba7..a079a88b4f4 100644
+--- a/gcc/config/aarch64/aarch64.h
++++ b/gcc/config/aarch64/aarch64.h
+@@ -719,10 +719,10 @@ struct GTY (()) aarch64_frame
+      always a multiple of STACK_BOUNDARY.  */
+   poly_int64 bytes_below_hard_fp;
+ 
+-  /* Offset from the base of the frame (incomming SP) to the
+-     top of the locals area.  This value is always a multiple of
++  /* The number of bytes between the top of the locals area and the top
++     of the frame (the incomming SP).  This value is always a multiple of
+      STACK_BOUNDARY.  */
+-  poly_int64 locals_offset;
++  poly_int64 bytes_above_locals;
+ 
+   /* Offset from the base of the frame (incomming SP) to the
+      hard_frame_pointer.  This value is always a multiple of
+-- 
+2.34.1
+
+
+From 16016465ff28a75f5e0540cbaeb4eb102fdc3230 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandif...@arm.com>
+Date: Tue, 27 Jun 2023 11:28:11 +0100
+Subject: [PATCH 04/10] aarch64: Rename hard_fp_offset to bytes_above_hard_fp
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Similarly to the previous locals_offset patch, hard_fp_offset
+was described as:
+
+  /* Offset from the base of the frame (incomming SP) to the
+     hard_frame_pointer.  This value is always a multiple of
+     STACK_BOUNDARY.  */
+  poly_int64 hard_fp_offset;
+
+which again took an “upside-down” view: higher offsets meant lower
+addresses.  This patch renames the field to bytes_above_hard_fp instead.
+
+gcc/
+       * config/aarch64/aarch64.h (aarch64_frame::hard_fp_offset): Rename
+       to...
+       (aarch64_frame::bytes_above_hard_fp): ...this.
+       * config/aarch64/aarch64.c (aarch64_layout_frame)
+       (aarch64_expand_prologue): Update accordingly.
+       (aarch64_initial_elimination_offset): Likewise.
+---
+ gcc/config/aarch64/aarch64.c | 21 +++++++++++----------
+ gcc/config/aarch64/aarch64.h |  6 +++---
+ 2 files changed, 14 insertions(+), 13 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
+index dcaf491af42..2681e0c2bb9 100644
+--- a/gcc/config/aarch64/aarch64.c
++++ b/gcc/config/aarch64/aarch64.c
+@@ -4747,7 +4747,7 @@ aarch64_layout_frame (void)
+   HOST_WIDE_INT varargs_and_saved_regs_size
+     = offset + cfun->machine->frame.saved_varargs_size;
+ 
+-  cfun->machine->frame.hard_fp_offset
++  cfun->machine->frame.bytes_above_hard_fp
+     = aligned_upper_bound (varargs_and_saved_regs_size
+                          + get_frame_size (),
+                          STACK_BOUNDARY / BITS_PER_UNIT);
+@@ -4756,7 +4756,7 @@ aarch64_layout_frame (void)
+   gcc_assert (multiple_p (cfun->machine->frame.bytes_below_hard_fp,
+                         STACK_BOUNDARY / BITS_PER_UNIT));
+   cfun->machine->frame.frame_size
+-    = (cfun->machine->frame.hard_fp_offset
++    = (cfun->machine->frame.bytes_above_hard_fp
+        + cfun->machine->frame.bytes_below_hard_fp);
+ 
+   cfun->machine->frame.bytes_above_locals
+@@ -4788,7 +4788,7 @@ aarch64_layout_frame (void)
+   else if (known_lt (cfun->machine->frame.bytes_below_hard_fp
+                    + cfun->machine->frame.saved_regs_size, 512)
+          && !(cfun->calls_alloca
+-              && known_lt (cfun->machine->frame.hard_fp_offset,
++              && known_lt (cfun->machine->frame.bytes_above_hard_fp,
+                            max_push_offset)))
+     {
+       /* Frame with small area below the saved registers:
+@@ -4797,14 +4797,14 @@ aarch64_layout_frame (void)
+        stp reg3, reg4, [sp, bytes_below_hard_fp + 16]  */
+       cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size;
+       cfun->machine->frame.callee_offset
+-      = cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset;
++      = cfun->machine->frame.frame_size - 
cfun->machine->frame.bytes_above_hard_fp;
+     }
+-  else if (cfun->machine->frame.hard_fp_offset.is_constant (&const_fp_offset)
++  else if (cfun->machine->frame.bytes_above_hard_fp.is_constant 
(&const_fp_offset)
+          && const_fp_offset < max_push_offset)
+     {
+       /* Frame with large area below the saved registers, but with a
+        small area above:
+-       stp reg1, reg2, [sp, -hard_fp_offset]!
++       stp reg1, reg2, [sp, -bytes_above_hard_fp]!
+        stp reg3, reg4, [sp, 16]
+        sub sp, sp, bytes_below_hard_fp  */
+       cfun->machine->frame.callee_adjust = const_fp_offset;
+@@ -4814,12 +4814,13 @@ aarch64_layout_frame (void)
+   else
+     {
+       /* General case:
+-       sub sp, sp, hard_fp_offset
++       sub sp, sp, bytes_above_hard_fp
+        stp x29, x30, [sp, 0]
+        add x29, sp, 0
+        stp reg3, reg4, [sp, 16]
+        sub sp, sp, bytes_below_hard_fp  */
+-      cfun->machine->frame.initial_adjust = 
cfun->machine->frame.hard_fp_offset;
++      cfun->machine->frame.initial_adjust
++      = cfun->machine->frame.bytes_above_hard_fp;
+       cfun->machine->frame.final_adjust
+       = cfun->machine->frame.frame_size - cfun->machine->frame.initial_adjust;
+     }
+@@ -8563,10 +8564,10 @@ aarch64_initial_elimination_offset (unsigned from, 
unsigned to)
+   if (to == HARD_FRAME_POINTER_REGNUM)
+     {
+       if (from == ARG_POINTER_REGNUM)
+-      return cfun->machine->frame.hard_fp_offset;
++      return cfun->machine->frame.bytes_above_hard_fp;
+ 
+       if (from == FRAME_POINTER_REGNUM)
+-      return cfun->machine->frame.hard_fp_offset
++      return cfun->machine->frame.bytes_above_hard_fp
+              - cfun->machine->frame.bytes_above_locals;
+     }
+ 
+diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
+index a079a88b4f4..eab6da84a02 100644
+--- a/gcc/config/aarch64/aarch64.h
++++ b/gcc/config/aarch64/aarch64.h
+@@ -724,10 +724,10 @@ struct GTY (()) aarch64_frame
+      STACK_BOUNDARY.  */
+   poly_int64 bytes_above_locals;
+ 
+-  /* Offset from the base of the frame (incomming SP) to the
+-     hard_frame_pointer.  This value is always a multiple of
++  /* The number of bytes between the hard_frame_pointer and the top of
++     the frame (the incomming SP).  This value is always a multiple of
+      STACK_BOUNDARY.  */
+-  poly_int64 hard_fp_offset;
++  poly_int64 bytes_above_hard_fp;
+ 
+   /* The size of the frame.  This value is the offset from base of the
+      frame (incomming SP) to the stack_pointer.  This value is always
+-- 
+2.34.1
+
+
+From eb2271eb6bb68ec3c9aa9ae4746ea1ee5f18874a Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandif...@arm.com>
+Date: Thu, 22 Jun 2023 22:26:30 +0100
+Subject: [PATCH 05/10] aarch64: Tweak frame_size comment
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This patch fixes another case in which a value was described with
+an “upside-down” view.
+
+gcc/
+       * config/aarch64/aarch64.h (aarch64_frame::frame_size): Tweak comment.
+---
+ gcc/config/aarch64/aarch64.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
+index eab6da84a02..7c4b65ec55b 100644
+--- a/gcc/config/aarch64/aarch64.h
++++ b/gcc/config/aarch64/aarch64.h
+@@ -729,8 +729,8 @@ struct GTY (()) aarch64_frame
+      STACK_BOUNDARY.  */
+   poly_int64 bytes_above_hard_fp;
+ 
+-  /* The size of the frame.  This value is the offset from base of the
+-     frame (incomming SP) to the stack_pointer.  This value is always
++  /* The size of the frame, i.e. the number of bytes between the bottom
++     of the outgoing arguments and the incoming SP.  This value is always
+      a multiple of STACK_BOUNDARY.  */
+   poly_int64 frame_size;
+ 
+-- 
+2.34.1
+
+
+From cfed3b87e9351edff1568ade4ef666edc9887639 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandif...@arm.com>
+Date: Tue, 15 Aug 2023 19:05:30 +0100
+Subject: [PATCH 06/10] Backport check-function-bodies support
+
+---
+ gcc/testsuite/lib/scanasm.exp | 191 ++++++++++++++++++++++++++++++++++
+ 1 file changed, 191 insertions(+)
+
+diff --git a/gcc/testsuite/lib/scanasm.exp b/gcc/testsuite/lib/scanasm.exp
+index 35ccbc86fc0..c9af27bf47a 100644
+--- a/gcc/testsuite/lib/scanasm.exp
++++ b/gcc/testsuite/lib/scanasm.exp
+@@ -546,3 +546,194 @@ proc scan-lto-assembler { args } {
+     verbose "output_file: $output_file"
+     dg-scan "scan-lto-assembler" 1 $testcase $output_file $args
+ }
++
++# Read assembly file FILENAME and store a mapping from function names
++# to function bodies in array RESULT.  FILENAME has already been uploaded
++# locally where necessary and is known to exist.
++
++proc parse_function_bodies { filename result } {
++    upvar $result up_result
++
++    # Regexp for the start of a function definition (name in \1).
++    set label {^([a-zA-Z_]\S+):$}
++
++    # Regexp for the end of a function definition.
++    set terminator {^\s*\.size}
++
++    # Regexp for lines that aren't interesting.
++    set fluff {^\s*(?:\.|//|@|$)}
++
++    set fd [open $filename r]
++    set in_function 0
++    while { [gets $fd line] >= 0 } {
++      if { [regexp $label $line dummy function_name] } {
++          set in_function 1
++          set function_body ""
++      } elseif { $in_function } {
++          if { [regexp $terminator $line] } {
++              set up_result($function_name) $function_body
++              set in_function 0
++          } elseif { ![regexp $fluff $line] } {
++              append function_body $line "\n"
++          }
++      }
++    }
++    close $fd
++}
++
++# FUNCTIONS is an array that maps function names to function bodies.
++# Return true if it contains a definition of function NAME and if
++# that definition matches BODY_REGEXP.
++
++proc check_function_body { functions name body_regexp } {
++    upvar $functions up_functions
++
++    if { ![info exists up_functions($name)] } {
++      return 0
++    }
++    set fn_res [regexp "^$body_regexp\$" $up_functions($name)]
++    if { !$fn_res } {
++      verbose -log "body: $body_regexp"
++      verbose -log "against: $up_functions($name)"
++    }
++    return $fn_res
++}
++
++# Check the implementations of functions against expected output.  Used as:
++#
++# { dg-do { check-function-bodies PREFIX TERMINATOR[ OPTION[ SELECTOR]] } }
++#
++# See sourcebuild.texi for details.
++
++proc check-function-bodies { args } {
++    if { [llength $args] < 2 } {
++      error "too few arguments to check-function-bodies"
++    }
++    if { [llength $args] > 4 } {
++      error "too many arguments to check-function-bodies"
++    }
++
++    if { [llength $args] >= 3 } {
++      set required_flags [lindex $args 2]
++
++      upvar 2 dg-extra-tool-flags extra_tool_flags
++      set flags $extra_tool_flags
++
++      global torture_current_flags
++      if { [info exists torture_current_flags] } {
++          append flags " " $torture_current_flags
++      }
++      foreach required_flag $required_flags {
++          switch -- $required_flag {
++              target -
++              xfail {
++                  error "misplaced $required_flag in check-function-bodies"
++              }
++          }
++      }
++      foreach required_flag $required_flags {
++          if { ![regexp " $required_flag " $flags] } {
++              return
++          }
++      }
++    }
++
++    set xfail_all 0
++    if { [llength $args] >= 4 } {
++      switch [dg-process-target [lindex $args 3]] {
++          "S" { }
++          "N" { return }
++          "F" { set xfail_all 1 }
++          "P" { }
++      }
++    }
++
++    set testcase [testname-for-summary]
++    # The name might include a list of options; extract the file name.
++    set filename [lindex $testcase 0]
++
++    global srcdir
++    set input_filename "$srcdir/$filename"
++    set output_filename "[file rootname [file tail $filename]].s"
++
++    set prefix [lindex $args 0]
++    set prefix_len [string length $prefix]
++    set terminator [lindex $args 1]
++    if { [string equal $terminator ""] } {
++      set terminator "*/"
++    }
++    set terminator_len [string length $terminator]
++
++    set have_bodies 0
++    if { [is_remote host] } {
++      remote_upload host "$filename"
++    }
++    if { [file exists $output_filename] } {
++      parse_function_bodies $output_filename functions
++      set have_bodies 1
++    } else {
++      verbose -log "$testcase: output file does not exist"
++    }
++
++    set count 0
++    set function_regexp ""
++    set label {^(\S+):$}
++
++    set lineno 1
++    set fd [open $input_filename r]
++    set in_function 0
++    while { [gets $fd line] >= 0 } {
++      if { [string equal -length $prefix_len $line $prefix] } {
++          set line [string trim [string range $line $prefix_len end]]
++          if { !$in_function } {
++              if { [regexp "^(.*?\\S)\\s+{(.*)}\$" $line dummy \
++                        line selector] } {
++                  set selector [dg-process-target $selector]
++              } else {
++                  set selector "P"
++              }
++              if { ![regexp $label $line dummy function_name] } {
++                  close $fd
++                  error "check-function-bodies: line $lineno does not have a 
function label"
++              }
++              set in_function 1
++              set function_regexp ""
++          } elseif { [string equal $line "("] } {
++              append function_regexp "(?:"
++          } elseif { [string equal $line "|"] } {
++              append function_regexp "|"
++          } elseif { [string equal $line ")"] } {
++              append function_regexp ")"
++          } elseif { [string equal $line "..."] } {
++              append function_regexp ".*"
++          } else {
++              append function_regexp "\t" $line "\n"
++          }
++      } elseif { [string equal -length $terminator_len $line $terminator] } {
++          if { ![string equal $selector "N"] } {
++              if { $xfail_all || [string equal $selector "F"] } {
++                  setup_xfail "*-*-*"
++              }
++              set testname "$testcase check-function-bodies $function_name"
++              if { !$have_bodies } {
++                  unresolved $testname
++              } elseif { [check_function_body functions $function_name \
++                              $function_regexp] } {
++                  pass $testname
++              } else {
++                  fail $testname
++              }
++          }
++          set in_function 0
++          incr count
++      }
++      incr lineno
++    }
++    close $fd
++    if { $in_function } {
++      error "check-function-bodies: missing \"$terminator\""
++    }
++    if { $count == 0 } {
++      error "check-function-bodies: no matches found"
++    }
++}
+-- 
+2.34.1
+
+
+From 4dd8925d95d3d6d89779b494b5f4cfadcf9fa96e Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandif...@arm.com>
+Date: Tue, 27 Jun 2023 15:11:44 +0100
+Subject: [PATCH 07/10] aarch64: Tweak stack clash boundary condition
+
+The AArch64 ABI says that, when stack clash protection is used,
+there can be a maximum of 1KiB of unprobed space at sp on entry
+to a function.  Therefore, we need to probe when allocating
+>= guard_size - 1KiB of data (>= rather than >).  This is what
+GCC does.
+
+If an allocation is exactly guard_size bytes, it is enough to allocate
+those bytes and probe once at offset 1024.  It isn't possible to use a
+single probe at any other offset: higher would conmplicate later code,
+by leaving more unprobed space than usual, while lower would risk
+leaving an entire page unprobed.  For simplicity, the code probes all
+allocations at offset 1024.
+
+Some register saves also act as probes.  If we need to allocate
+more space below the last such register save probe, we need to
+probe the allocation if it is > 1KiB.  Again, this allocation is
+then sometimes (but not always) probed at offset 1024.  This sort of
+allocation is currently only used for outgoing arguments, which are
+rarely this big.
+
+However, the code also probed if this final outgoing-arguments
+allocation was == 1KiB, rather than just > 1KiB.  This isn't
+necessary, since the register save then probes at offset 1024
+as required.  Continuing to probe allocations of exactly 1KiB
+would complicate later patches.
+
+gcc/
+       * config/aarch64/aarch64.c (aarch64_allocate_and_probe_stack_space):
+       Don't probe final allocations that are exactly 1KiB in size (after
+       unprobed space above the final allocation has been deducted).
+
+gcc/testsuite/
+       * gcc.target/aarch64/stack-check-prologue-17.c: New test.
+---
+ gcc/config/aarch64/aarch64.c                  |  6 +-
+ .../aarch64/stack-check-prologue-17.c         | 55 +++++++++++++++++++
+ 2 files changed, 60 insertions(+), 1 deletion(-)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
+
+diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
+index 2681e0c2bb9..4c9e11cd7cf 100644
+--- a/gcc/config/aarch64/aarch64.c
++++ b/gcc/config/aarch64/aarch64.c
+@@ -5506,6 +5506,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx 
temp2,
+   HOST_WIDE_INT guard_size
+     = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
+   HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
++  HOST_WIDE_INT byte_sp_alignment = STACK_BOUNDARY / BITS_PER_UNIT;
++  gcc_assert (multiple_p (poly_size, byte_sp_alignment));
+   /* When doing the final adjustment for the outgoing argument size we can't
+      assume that LR was saved at position 0.  So subtract it's offset from the
+      ABI safe buffer so that we don't accidentally allow an adjustment that
+@@ -5513,7 +5515,9 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx 
temp2,
+      probing.  */
+   HOST_WIDE_INT min_probe_threshold
+     = final_adjustment_p
+-      ? guard_used_by_caller - cfun->machine->frame.reg_offset[LR_REGNUM]
++      ? (guard_used_by_caller
++       + byte_sp_alignment
++       - cfun->machine->frame.reg_offset[LR_REGNUM])
+       : guard_size - guard_used_by_caller;
+ 
+   poly_int64 frame_size = cfun->machine->frame.frame_size;
+diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c 
b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
+new file mode 100644
+index 00000000000..0d8a25d73a2
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
+@@ -0,0 +1,55 @@
++/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param 
stack-clash-protection-guard-size=12" } */
++/* { dg-final { check-function-bodies "**" "" } } */
++
++void f(int, ...);
++void g();
++
++/*
++** test1:
++**    ...
++**    str     x30, \[sp\]
++**    sub     sp, sp, #1024
++**    cbnz    w0, .*
++**    bl      g
++**    ...
++*/
++int test1(int z) {
++  __uint128_t x = 0;
++  int y[0x400];
++  if (z)
++    {
++      f(0, 0, 0, 0, 0, 0, 0, &y,
++      x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++      x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++      x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++      x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x);
++    }
++  g();
++  return 1;
++}
++
++/*
++** test2:
++**    ...
++**    str     x30, \[sp\]
++**    sub     sp, sp, #1040
++**    str     xzr, \[sp\]
++**    cbnz    w0, .*
++**    bl      g
++**    ...
++*/
++int test2(int z) {
++  __uint128_t x = 0;
++  int y[0x400];
++  if (z)
++    {
++      f(0, 0, 0, 0, 0, 0, 0, &y,
++      x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++      x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++      x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++      x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++      x);
++    }
++  g();
++  return 1;
++}
+-- 
+2.34.1
+
+
+From 12517baf6c88447e3bda3a459ac4c29d61f84e6c Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandif...@arm.com>
+Date: Tue, 27 Jun 2023 15:12:55 +0100
+Subject: [PATCH 08/10] aarch64: Put LR save probe in first 16 bytes
+
+-fstack-clash-protection uses the save of LR as a probe for the next
+allocation.  The next allocation could be:
+
+* another part of the static frame, e.g. when allocating SVE save slots
+  or outgoing arguments
+
+* an alloca in the same function
+
+* an allocation made by a callee function
+
+However, when -fomit-frame-pointer is used, the LR save slot is placed
+above the other GPR save slots.  It could therefore be up to 80 bytes
+above the base of the GPR save area (which is also the hard fp address).
+
+aarch64_allocate_and_probe_stack_space took this into account when
+deciding how much subsequent space could be allocated without needing
+a probe.  However, it interacted badly with:
+
+      /* If doing a small final adjustment, we always probe at offset 0.
+        This is done to avoid issues when LR is not at position 0 or when
+        the final adjustment is smaller than the probing offset.  */
+      else if (final_adjustment_p && rounded_size == 0)
+       residual_probe_offset = 0;
+
+which forces any allocation that is smaller than the guard page size
+to be probed at offset 0 rather than the usual offset 1024.  It was
+therefore possible to construct cases in which we had:
+
+* a probe using LR at SP + 80 bytes (or some other value >= 16)
+* an allocation of the guard page size - 16 bytes
+* a probe at SP + 0
+
+which allocates guard page size + 64 consecutive unprobed bytes.
+
+This patch requires the LR probe to be in the first 16 bytes of the
+save area when stack clash protection is active.  Doing it
+unconditionally would cause code-quality regressions.
+
+gcc/
+       * config/aarch64/aarch64.c (aarch64_layout_frame): Ensure that
+       the LR save slot is in the first 16 bytes of the register save area.
+       (aarch64_allocate_and_probe_stack_space): Remove workaround for
+       when LR was not in the first 16 bytes.
+
+gcc/testsuite/
+       * gcc.target/aarch64/stack-check-prologue-18.c: New test.
+---
+ gcc/config/aarch64/aarch64.c                  |  50 +++++----
+ .../aarch64/stack-check-prologue-18.c         | 100 ++++++++++++++++++
+ 2 files changed, 127 insertions(+), 23 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
+
+diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
+index 4c9e11cd7cf..1e8467fdd03 100644
+--- a/gcc/config/aarch64/aarch64.c
++++ b/gcc/config/aarch64/aarch64.c
+@@ -4686,15 +4686,31 @@ aarch64_layout_frame (void)
+ 
+   cfun->machine->frame.bytes_below_hard_fp = crtl->outgoing_args_size;
+ 
++#define ALLOCATE_GPR_SLOT(REGNO)                                      \
++  do                                                                  \
++    {                                                                 \
++      cfun->machine->frame.reg_offset[REGNO] = offset;                        
\
++      if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)               
\
++      cfun->machine->frame.wb_candidate1 = (REGNO);                   \
++      else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM)  \
++      cfun->machine->frame.wb_candidate2 = (REGNO);                   \
++      offset += UNITS_PER_WORD;                                               
\
++    }                                                                 \
++  while (0)
++
+   if (cfun->machine->frame.emit_frame_chain)
+     {
+       /* FP and LR are placed in the linkage record.  */
+-      cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
+-      cfun->machine->frame.wb_candidate1 = R29_REGNUM;
+-      cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
+-      cfun->machine->frame.wb_candidate2 = R30_REGNUM;
+-      offset = 2 * UNITS_PER_WORD;
++      ALLOCATE_GPR_SLOT (R29_REGNUM);
++      ALLOCATE_GPR_SLOT (R30_REGNUM);
+     }
++  else if (flag_stack_clash_protection
++         && cfun->machine->frame.reg_offset[R30_REGNUM] == SLOT_REQUIRED)
++    /* Put the LR save slot first, since it makes a good choice of probe
++       for stack clash purposes.  The idea is that the link register usually
++       has to be saved before a call anyway, and so we lose little by
++       stopping it from being individually shrink-wrapped.  */
++    ALLOCATE_GPR_SLOT (R30_REGNUM);
+ 
+   /* With stack-clash, LR must be saved in non-leaf functions.  */
+   gcc_assert (crtl->is_leaf
+@@ -4704,14 +4720,9 @@ aarch64_layout_frame (void)
+   /* Now assign stack slots for them.  */
+   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
+     if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
+-      {
+-      cfun->machine->frame.reg_offset[regno] = offset;
+-      if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
+-        cfun->machine->frame.wb_candidate1 = regno;
+-      else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM)
+-        cfun->machine->frame.wb_candidate2 = regno;
+-      offset += UNITS_PER_WORD;
+-      }
++      ALLOCATE_GPR_SLOT (regno);
++
++#undef ALLOCATE_GPR_SLOT
+ 
+   HOST_WIDE_INT max_int_offset = offset;
+   offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
+@@ -5508,16 +5519,9 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx 
temp2,
+   HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
+   HOST_WIDE_INT byte_sp_alignment = STACK_BOUNDARY / BITS_PER_UNIT;
+   gcc_assert (multiple_p (poly_size, byte_sp_alignment));
+-  /* When doing the final adjustment for the outgoing argument size we can't
+-     assume that LR was saved at position 0.  So subtract it's offset from the
+-     ABI safe buffer so that we don't accidentally allow an adjustment that
+-     would result in an allocation larger than the ABI buffer without
+-     probing.  */
+   HOST_WIDE_INT min_probe_threshold
+     = final_adjustment_p
+-      ? (guard_used_by_caller
+-       + byte_sp_alignment
+-       - cfun->machine->frame.reg_offset[LR_REGNUM])
++      ? guard_used_by_caller + byte_sp_alignment
+       : guard_size - guard_used_by_caller;
+ 
+   poly_int64 frame_size = cfun->machine->frame.frame_size;
+@@ -5697,8 +5701,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx 
temp2,
+       if (final_adjustment_p && rounded_size != 0)
+       min_probe_threshold = 0;
+       /* If doing a small final adjustment, we always probe at offset 0.
+-       This is done to avoid issues when LR is not at position 0 or when
+-       the final adjustment is smaller than the probing offset.  */
++       This is done to avoid issues when the final adjustment is smaller
++       than the probing offset.  */
+       else if (final_adjustment_p && rounded_size == 0)
+       residual_probe_offset = 0;
+ 
+diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c 
b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
+new file mode 100644
+index 00000000000..82447d20fff
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
+@@ -0,0 +1,100 @@
++/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param 
stack-clash-protection-guard-size=12" } */
++/* { dg-final { check-function-bodies "**" "" } } */
++
++void f(int, ...);
++void g();
++
++/*
++** test1:
++**    ...
++**    str     x30, \[sp\]
++**    sub     sp, sp, #4064
++**    str     xzr, \[sp\]
++**    cbnz    w0, .*
++**    bl      g
++**    ...
++**    str     x26, \[sp, #?4128\]
++**    ...
++*/
++int test1(int z) {
++  __uint128_t x = 0;
++  int y[0x400];
++  if (z)
++    {
++      asm volatile ("" :::
++                  "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26");
++      f(0, 0, 0, 0, 0, 0, 0, &y,
++      x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++      x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++      x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++      x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++      x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++      x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++      x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++      x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++      x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++      x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++      x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++      x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++      x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++      x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++      x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++      x, x, x, x, x, x, x, x, x, x, x, x, x, x);
++    }
++  g();
++  return 1;
++}
++
++/*
++** test2:
++**    ...
++**    str     x30, \[sp\]
++**    sub     sp, sp, #1040
++**    str     xzr, \[sp\]
++**    cbnz    w0, .*
++**    bl      g
++**    ...
++*/
++int test2(int z) {
++  __uint128_t x = 0;
++  int y[0x400];
++  if (z)
++    {
++      asm volatile ("" :::
++                  "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26");
++      f(0, 0, 0, 0, 0, 0, 0, &y,
++      x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++      x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++      x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++      x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++      x);
++    }
++  g();
++  return 1;
++}
++
++/*
++** test3:
++**    ...
++**    str     x30, \[sp\]
++**    sub     sp, sp, #1024
++**    cbnz    w0, .*
++**    bl      g
++**    ...
++*/
++int test3(int z) {
++  __uint128_t x = 0;
++  int y[0x400];
++  if (z)
++    {
++      asm volatile ("" :::
++                  "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26");
++      f(0, 0, 0, 0, 0, 0, 0, &y,
++      x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++      x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++      x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
++      x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x);
++    }
++  g();
++  return 1;
++}
+-- 
+2.34.1
+
+
+From f2684e63652bb251d22c79e40081c646df1f36b6 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandif...@arm.com>
+Date: Tue, 8 Aug 2023 01:57:26 +0100
+Subject: [PATCH 09/10] aarch64: Simplify probe of final frame allocation
+
+Previous patches ensured that the final frame allocation only needs
+a probe when the size is strictly greater than 1KiB.  It's therefore
+safe to use the normal 1024 probe offset in all cases.
+
+The main motivation for doing this is to simplify the code and
+remove the number of special cases.
+
+gcc/
+       * config/aarch64/aarch64.c (aarch64_allocate_and_probe_stack_space):
+       Always probe the residual allocation at offset 1024, asserting
+       that that is in range.
+
+gcc/testsuite/
+       * gcc.target/aarch64/stack-check-prologue-17.c: Expect the probe
+       to be at offset 1024 rather than offset 0.
+       * gcc.target/aarch64/stack-check-prologue-18.c: Likewise.
+---
+ gcc/config/aarch64/aarch64.c                         | 12 ++++--------
+ .../gcc.target/aarch64/stack-check-prologue-17.c     |  2 +-
+ .../gcc.target/aarch64/stack-check-prologue-18.c     |  7 +++++--
+ 3 files changed, 10 insertions(+), 11 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
+index 1e8467fdd03..705f719a2ea 100644
+--- a/gcc/config/aarch64/aarch64.c
++++ b/gcc/config/aarch64/aarch64.c
+@@ -5695,16 +5695,12 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx 
temp2,
+      are still safe.  */
+   if (residual)
+     {
+-      HOST_WIDE_INT residual_probe_offset = guard_used_by_caller;
++      gcc_assert (guard_used_by_caller + byte_sp_alignment <= size);
++
+       /* If we're doing final adjustments, and we've done any full page
+        allocations then any residual needs to be probed.  */
+       if (final_adjustment_p && rounded_size != 0)
+       min_probe_threshold = 0;
+-      /* If doing a small final adjustment, we always probe at offset 0.
+-       This is done to avoid issues when the final adjustment is smaller
+-       than the probing offset.  */
+-      else if (final_adjustment_p && rounded_size == 0)
+-      residual_probe_offset = 0;
+ 
+       aarch64_sub_sp (temp1, temp2, residual, frame_related_p);
+       if (residual >= min_probe_threshold)
+@@ -5715,8 +5711,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx 
temp2,
+                    HOST_WIDE_INT_PRINT_DEC " bytes, probing will be required."
+                    "\n", residual);
+ 
+-          emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+-                                           residual_probe_offset));
++        emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
++                                         guard_used_by_caller));
+         emit_insn (gen_blockage ());
+       }
+     }
+diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c 
b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
+index 0d8a25d73a2..f0ec1389771 100644
+--- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
+@@ -33,7 +33,7 @@ int test1(int z) {
+ **    ...
+ **    str     x30, \[sp\]
+ **    sub     sp, sp, #1040
+-**    str     xzr, \[sp\]
++**    str     xzr, \[sp, #?1024\]
+ **    cbnz    w0, .*
+ **    bl      g
+ **    ...
+diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c 
b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
+index 82447d20fff..71d33ba34e9 100644
+--- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
++++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
+@@ -8,8 +8,9 @@ void g();
+ ** test1:
+ **    ...
+ **    str     x30, \[sp\]
++**    ...
+ **    sub     sp, sp, #4064
+-**    str     xzr, \[sp\]
++**    str     xzr, \[sp, #?1024\]
+ **    cbnz    w0, .*
+ **    bl      g
+ **    ...
+@@ -49,8 +50,9 @@ int test1(int z) {
+ ** test2:
+ **    ...
+ **    str     x30, \[sp\]
++**    ...
+ **    sub     sp, sp, #1040
+-**    str     xzr, \[sp\]
++**    str     xzr, \[sp, #?1024\]
+ **    cbnz    w0, .*
+ **    bl      g
+ **    ...
+@@ -77,6 +79,7 @@ int test2(int z) {
+ ** test3:
+ **    ...
+ **    str     x30, \[sp\]
++**    ...
+ **    sub     sp, sp, #1024
+ **    cbnz    w0, .*
+ **    bl      g
+-- 
+2.34.1
+
+
+From bf3eeaa0182a92987570d9c787bd45079eebf528 Mon Sep 17 00:00:00 2001
+From: Richard Sandiford <richard.sandif...@arm.com>
+Date: Thu, 15 Jun 2023 19:16:52 +0100
+Subject: [PATCH 10/10] aarch64: Make stack smash canary protect saved
+ registers
+
+AArch64 normally puts the saved registers near the bottom of the frame,
+immediately above any dynamic allocations.  But this means that a
+stack-smash attack on those dynamic allocations could overwrite the
+saved registers without needing to reach as far as the stack smash
+canary.
+
+The same thing could also happen for variable-sized arguments that are
+passed by value, since those are allocated before a call and popped on
+return.
+
+This patch avoids that by putting the locals (and thus the canary) below
+the saved registers when stack smash protection is active.
+
+The patch fixes CVE-2023-4039.
+
+gcc/
+       * config/aarch64/aarch64.c (aarch64_save_regs_above_locals_p):
+       New function.
+       (aarch64_layout_frame): Use it to decide whether locals should
+       go above or below the saved registers.
+       (aarch64_expand_prologue): Update stack layout comment.
+       Emit a stack tie after the final adjustment.
+
+gcc/testsuite/
+       * gcc.target/aarch64/stack-protector-8.c: New test.
+       * gcc.target/aarch64/stack-protector-9.c: Likewise.
+---
+ gcc/config/aarch64/aarch64.c                  | 46 +++++++++++++--
+ .../gcc.target/aarch64/stack-protector-8.c    | 58 +++++++++++++++++++
+ .../gcc.target/aarch64/stack-protector-9.c    | 33 +++++++++++
+ 3 files changed, 133 insertions(+), 4 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-8.c
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-9.c
+
+diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
+index 705f719a2ea..3d094214fac 100644
+--- a/gcc/config/aarch64/aarch64.c
++++ b/gcc/config/aarch64/aarch64.c
+@@ -4622,6 +4622,20 @@ aarch64_needs_frame_chain (void)
+   return aarch64_use_frame_pointer;
+ }
+ 
++/* Return true if the current function should save registers above
++   the locals area, rather than below it.  */
++
++static bool
++aarch64_save_regs_above_locals_p ()
++{
++  /* When using stack smash protection, make sure that the canary slot
++     comes between the locals and the saved registers.  Otherwise,
++     it would be possible for a carefully sized smash attack to change
++     the saved registers (particularly LR and FP) without reaching the
++     canary.  */
++  return crtl->stack_protect_guard;
++}
++
+ /* Mark the registers that need to be saved by the callee and calculate
+    the size of the callee-saved registers area and frame record (both FP
+    and LR may be omitted).  */
+@@ -4686,6 +4700,16 @@ aarch64_layout_frame (void)
+ 
+   cfun->machine->frame.bytes_below_hard_fp = crtl->outgoing_args_size;
+ 
++  bool regs_at_top_p = aarch64_save_regs_above_locals_p ();
++
++  if (regs_at_top_p)
++    {
++      cfun->machine->frame.bytes_below_hard_fp += get_frame_size ();
++      cfun->machine->frame.bytes_below_hard_fp
++      = aligned_upper_bound (cfun->machine->frame.bytes_below_hard_fp,
++                             STACK_BOUNDARY / BITS_PER_UNIT);
++    }
++
+ #define ALLOCATE_GPR_SLOT(REGNO)                                      \
+   do                                                                  \
+     {                                                                 \
+@@ -4758,9 +4782,11 @@ aarch64_layout_frame (void)
+   HOST_WIDE_INT varargs_and_saved_regs_size
+     = offset + cfun->machine->frame.saved_varargs_size;
+ 
++  cfun->machine->frame.bytes_above_hard_fp = varargs_and_saved_regs_size;
++  if (!regs_at_top_p)
++    cfun->machine->frame.bytes_above_hard_fp += get_frame_size ();
+   cfun->machine->frame.bytes_above_hard_fp
+-    = aligned_upper_bound (varargs_and_saved_regs_size
+-                         + get_frame_size (),
++    = aligned_upper_bound (cfun->machine->frame.bytes_above_hard_fp,
+                          STACK_BOUNDARY / BITS_PER_UNIT);
+ 
+   /* Both these values are already aligned.  */
+@@ -4772,6 +4798,9 @@ aarch64_layout_frame (void)
+ 
+   cfun->machine->frame.bytes_above_locals
+     = cfun->machine->frame.saved_varargs_size;
++  if (regs_at_top_p)
++    cfun->machine->frame.bytes_above_locals
++      += cfun->machine->frame.saved_regs_size;
+ 
+   cfun->machine->frame.initial_adjust = 0;
+   cfun->machine->frame.final_adjust = 0;
+@@ -5764,10 +5793,10 @@ aarch64_add_cfa_expression (rtx_insn *insn, unsigned 
int reg,
+       |  for register varargs         |
+       |                               |
+       +-------------------------------+
+-      |  local variables              | <-- frame_pointer_rtx
++      |  local variables (1)          | <-- frame_pointer_rtx
+       |                               |
+       +-------------------------------+
+-      |  padding                      | \
++      |  padding (1)                  | \
+       +-------------------------------+  |
+       |  callee-saved registers       |  | frame.saved_regs_size
+       +-------------------------------+  |
+@@ -5775,6 +5804,10 @@ aarch64_add_cfa_expression (rtx_insn *insn, unsigned 
int reg,
+       +-------------------------------+  |
+       |  FP'                          | / <- hard_frame_pointer_rtx (aligned)
+         +-------------------------------+
++      |  local variables (2)          |
++      +-------------------------------+
++      |  padding (2)                  |
++      +-------------------------------+
+       |  dynamic allocation           |
+       +-------------------------------+
+       |  padding                      |
+@@ -5784,6 +5817,9 @@ aarch64_add_cfa_expression (rtx_insn *insn, unsigned int 
reg,
+       +-------------------------------+
+       |                               | <-- stack_pointer_rtx (aligned)
+ 
++   The regions marked (1) and (2) are mutually exclusive.  (2) is used
++   when aarch64_save_regs_above_locals_p is true.
++
+    Dynamic stack allocations via alloca() decrease stack_pointer_rtx
+    but leave frame_pointer_rtx and hard_frame_pointer_rtx
+    unchanged.
+@@ -5937,6 +5973,8 @@ aarch64_expand_prologue (void)
+      that is assumed by the called.  */
+   aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust,
+                                         !frame_pointer_needed, true);
++  if (emit_frame_chain && maybe_ne (final_adjust, 0))
++    emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
+ }
+ 
+ /* Return TRUE if we can use a simple_return insn.
+diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c 
b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c
+new file mode 100644
+index 00000000000..c5e7deef6c1
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c
+@@ -0,0 +1,58 @@
++/* { dg-options " -O -fstack-protector-strong -mstack-protector-guard=sysreg 
-mstack-protector-guard-reg=tpidr2_el0 -mstack-protector-guard-offset=16" } */
++/* { dg-final { check-function-bodies "**" "" } } */
++
++void g(void *);
++
++/*
++** test1:
++**    sub     sp, sp, #288
++**    stp     x29, x30, \[sp, #?272\]
++**    add     x29, sp, #?272
++**    mrs     (x[0-9]+), tpidr2_el0
++**    ldr     (x[0-9]+), \[\1, #?16\]
++**    str     \2, \[sp, #?264\]
++**    mov     \2, *0
++**    add     x0, sp, #?8
++**    bl      g
++**    ...
++**    mrs     .*
++**    ...
++**    bne     .*
++**    ...
++**    ldp     x29, x30, \[sp, #?272\]
++**    add     sp, sp, #?288
++**    ret
++**    bl      __stack_chk_fail
++*/
++int test1() {
++  int y[0x40];
++  g(y);
++  return 1;
++}
++
++/*
++** test2:
++**    stp     x29, x30, \[sp, #?-16\]!
++**    mov     x29, sp
++**    sub     sp, sp, #1040
++**    mrs     (x[0-9]+), tpidr2_el0
++**    ldr     (x[0-9]+), \[\1, #?16\]
++**    str     \2, \[sp, #?1032\]
++**    mov     \2, *0
++**    add     x0, sp, #?8
++**    bl      g
++**    ...
++**    mrs     .*
++**    ...
++**    bne     .*
++**    ...
++**    add     sp, sp, #?1040
++**    ldp     x29, x30, \[sp\], #?16
++**    ret
++**    bl      __stack_chk_fail
++*/
++int test2() {
++  int y[0x100];
++  g(y);
++  return 1;
++}
+diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c 
b/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c
+new file mode 100644
+index 00000000000..58f322aa480
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c
+@@ -0,0 +1,33 @@
++/* { dg-options "-O2 -mcpu=neoverse-v1 -fstack-protector-all" } */
++/* { dg-final { check-function-bodies "**" "" } } */
++
++/*
++** main:
++**    ...
++**    stp     x29, x30, \[sp, #?-[0-9]+\]!
++**    ...
++**    sub     sp, sp, #[0-9]+
++**    ...
++**    str     x[0-9]+, \[x29, #?-8\]
++**    ...
++*/
++int f(const char *);
++void g(void *);
++int main(int argc, char* argv[])
++{
++  int a;
++  int b;
++  char c[2+f(argv[1])];
++  int d[0x100];
++  char y;
++
++  y=42; a=4; b=10;
++  c[0] = 'h'; c[1] = '\0';
++
++  c[f(argv[2])] = '\0';
++
++  __builtin_printf("%d %d\n%s\n", a, b, c);
++  g(d);
++
++  return 0;
++}
+-- 
+2.34.1
+
-- 
2.34.1

-=-=-=-=-=-=-=-=-=-=-=-
Links: You receive all messages sent to this group.
View/Reply Online (#188251): 
https://lists.openembedded.org/g/openembedded-core/message/188251
Mute This Topic: https://lists.openembedded.org/mt/101596334/21656
Group Owner: openembedded-core+ow...@lists.openembedded.org
Unsubscribe: https://lists.openembedded.org/g/openembedded-core/unsub 
[arch...@mail-archive.com]
-=-=-=-=-=-=-=-=-=-=-=-

Reply via email to