https://gcc.gnu.org/g:d5a22a53f58403c888a43c75c5983ba3cb5023ae

commit d5a22a53f58403c888a43c75c5983ba3cb5023ae
Author: Szabolcs Nagy <szabolcs.n...@arm.com>
Date:   Fri Apr 14 18:23:52 2023 +0100

    aarch64: Add GCS support for nonlocal stack save
    
    Nonlocal stack save and restore has to also save and restore the GCS
    pointer. This is used in __builtin_setjmp/longjmp and nonlocal goto.
    
    The GCS specific code is only emitted if GCS branch-protection is
    enabled and the code always checks at runtime if GCS is enabled.
    
    The new -mbranch-protection=gcs and old -mbranch-protection=none code
    are ABI compatible: jmpbuf for __builtin_setjmp has space for 5
    pointers, the layout is
    
      old layout: fp, pc, sp, unused, unused
      new layout: fp, pc, sp, gcsp, unused
    
    Note: the ILP32 code generation is wrong as it saves the pointers with
    Pmode (i.e. 8 bytes per pointer), but the user supplied buffer size is
    for 5 pointers (4 bytes per pointer), this is not fixed.
    
    The nonlocal goto has no ABI compatibility issues as the goto and its
    destination are in the same translation unit.
    
    TODO:
    - can we simplify the define_expand rtls?
    
    gcc/ChangeLog:
    
            * config/aarch64/aarch64.h (STACK_SAVEAREA_MODE): Make space for 
gcs.
            * config/aarch64/aarch64.md (save_stack_nonlocal): New.
            (restore_stack_nonlocal): New.

Diff:
---
 gcc/config/aarch64/aarch64.h  |  7 ++++
 gcc/config/aarch64/aarch64.md | 82 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 89 insertions(+)

diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 45e901cda64..3238452f53f 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -1294,6 +1294,13 @@ typedef struct
 #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
   ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
 
+/* Have space for both SP and GCSPR in the NONLOCAL case in
+   emit_stack_save as well as in __builtin_setjmp, __builtin_longjmp
+   and __builtin_nonlocal_goto.
+   Note: On ILP32 the documented buf size is not enough PR84150.  */
+#define STACK_SAVEAREA_MODE(LEVEL)                     \
+  ((LEVEL) == SAVE_NONLOCAL ? TImode : Pmode)
+
 #define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, LR_REGNUM)
 
 #define RETURN_ADDR_RTX aarch64_return_addr
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 8defd6e0582..2d36af12cfb 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1183,6 +1183,88 @@
                      (const_int 1)))]
 )
 
+(define_expand "save_stack_nonlocal"
+  [(set (match_operand 0 "memory_operand")
+        (match_operand 1 "register_operand"))]
+  ""
+{
+  rtx stack_slot = adjust_address (operands[0], Pmode, 0);
+  emit_move_insn (stack_slot, operands[1]);
+
+  if (aarch64_gcs_enabled ())
+    {
+      /* Save GCS with code like
+               mov     x16, 1
+               chkfeat x16
+               tbnz    x16, 0, .L_done
+               mrs     tmp, gcspr_el0
+               str     tmp, [%0, 8]
+       .L_done:  */
+
+      rtx done_label = gen_label_rtx ();
+      rtx r16 = gen_rtx_REG (DImode, R16_REGNUM);
+      emit_move_insn (r16, const1_rtx);
+      emit_insn (gen_aarch64_chkfeat ());
+      emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label));
+      rtx gcs_slot = adjust_address (operands[0], Pmode, GET_MODE_SIZE 
(Pmode));
+      rtx gcs = force_reg (Pmode, const0_rtx);
+      emit_insn (gen_aarch64_load_gcspr (gcs));
+      emit_move_insn (gcs_slot, gcs);
+      emit_label (done_label);
+    }
+  DONE;
+})
+
+(define_expand "restore_stack_nonlocal"
+  [(set (match_operand 0 "register_operand" "")
+       (match_operand 1 "memory_operand" ""))]
+  ""
+{
+  rtx stack_slot = adjust_address (operands[1], Pmode, 0);
+  emit_move_insn (operands[0], stack_slot);
+
+  if (aarch64_gcs_enabled ())
+    {
+      /* Restore GCS with code like
+               mov     x16, 1
+               chkfeat x16
+               tbnz    x16, 0, .L_done
+               ldr     tmp1, [%1, 8]
+               mrs     tmp2, gcspr_el0
+               subs    tmp2, tmp1, tmp2
+               b.eq    .L_done
+       .L_loop:
+               gcspopm
+               subs    tmp2, tmp2, 8
+               b.ne    .L_loop
+       .L_done:  */
+
+      rtx loop_label = gen_label_rtx ();
+      rtx done_label = gen_label_rtx ();
+      rtx r16 = gen_rtx_REG (DImode, R16_REGNUM);
+      emit_move_insn (r16, const1_rtx);
+      emit_insn (gen_aarch64_chkfeat ());
+      emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label));
+      rtx gcs_slot = adjust_address (operands[1], Pmode, GET_MODE_SIZE 
(Pmode));
+      rtx gcs_old = force_reg (Pmode, const0_rtx);
+      emit_move_insn (gcs_old, gcs_slot);
+      rtx gcs_now = force_reg (Pmode, const0_rtx);
+      emit_insn (gen_aarch64_load_gcspr (gcs_now));
+      emit_insn (gen_subdi3_compare1 (gcs_now, gcs_old, gcs_now));
+      rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
+      rtx cmp_rtx = gen_rtx_fmt_ee (EQ, DImode, cc_reg, const0_rtx);
+      emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, done_label));
+      emit_label (loop_label);
+      emit_insn (gen_aarch64_gcspopm_xzr ());
+      emit_insn (gen_adddi3_compare0 (gcs_now, gcs_now, GEN_INT (-8)));
+      cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
+      cmp_rtx = gen_rtx_fmt_ee (NE, DImode, cc_reg, const0_rtx);
+      emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, loop_label));
+      emit_label (done_label);
+    }
+  DONE;
+})
+
 ;; -------------------------------------------------------------------
 ;; Subroutine calls and sibcalls
 ;; -------------------------------------------------------------------

Reply via email to