Hi Andrew, A few comments on the implementation, I think it can be simplified a lot:
> +++ b/gcc/config/aarch64/aarch64.h > @@ -700,8 +700,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = > AARCH64_FL_SM_OFF; > #define DWARF2_UNWIND_INFO 1 > > /* Use R0 through R3 to pass exception handling information. */ > +#define EH_RETURN_DATA_REGISTERS_N 4 > #define EH_RETURN_DATA_REGNO(N) \ > - ((N) < 4 ? ((unsigned int) R0_REGNUM + (N)) : INVALID_REGNUM) > + ((N) < EH_RETURN_DATA_REGISTERS_N ? ((unsigned int) R0_REGNUM + (N)) : > INVALID_REGNUM) It would be useful to add a macro IS_EH_RETURN_REGNUM(regnum) that just checks the range R0_REGNUM to R0_REGNUM + EH_RETURN_DATA_REGISTERS_N. > @@ -929,6 +928,7 @@ struct GTY (()) aarch64_frame > outgoing arguments) of each register save slot, or -2 if no save is > needed. */ > poly_int64 reg_offset[LAST_SAVED_REGNUM + 1]; > + bool eh_return_allocated[EH_RETURN_DATA_REGISTERS_N]; This doesn't make much sense - besides X0-X3, we also need X5 and X6 for eh_return. If these or any of the other temporaries used by epilog are callee-saved somehow, things are going horribly wrong already... So what do we gain by doing this? > +++ b/gcc/config/aarch64/aarch64.cc > @@ -7792,6 +7792,7 @@ aarch64_layout_frame (void) > > #define SLOT_NOT_REQUIRED (-2) > #define SLOT_REQUIRED (-1) > +#define SLOT_EH_RETURN_REQUIRED (-3) I don't see a need for this. > @@ -7949,6 +7950,18 @@ aarch64_layout_frame (void) > stopping it from being individually shrink-wrapped. */ > allocate_gpr_slot (R30_REGNUM); > > + /* Allocate the eh_return first. */ > + if (crtl->calls_eh_return) > + for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++) > + { > + int realregno = EH_RETURN_DATA_REGNO (regno); > + if (known_eq (frame.reg_offset[realregno], SLOT_EH_RETURN_REQUIRED)) > + { > + frame.eh_return_allocated[regno] = true; > + allocate_gpr_slot (realregno); > + } > + } This change is unnecessary if we just mark the slots with SLOT_REQUIRED. > @@ -8035,6 +8048,23 @@ aarch64_layout_frame (void) > frame.wb_pop_candidate1 = frame.wb_push_candidate1; > frame.wb_pop_candidate2 = frame.wb_push_candidate2; > > + /* EH data registers are not pop canidates. */ > + if (crtl->calls_eh_return) > + for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; > regno++)> > + { > + if (frame.eh_return_allocated[regno] > + && frame.wb_pop_candidate1 == EH_RETURN_DATA_REGNO (regno)) > + { > + frame.wb_pop_candidate1 = frame.wb_pop_candidate2; > + frame.wb_pop_candidate2 = INVALID_REGNUM; > + } > + if (frame.eh_return_allocated[regno] > + && frame.wb_pop_candidate2 == EH_RETURN_DATA_REGNO (regno)) > + { > + frame.wb_pop_candidate2 = INVALID_REGNUM; > + } > + } This is unnecessary since we can just avoid making them push candidates if there is no frame chain, eg: if ((!crtl->calls_eh_return || frame.emit_frame_chain) && !push_regs.empty () && known_eq (frame.reg_offset[push_regs[0]], frame.bytes_below_hard_fp)) @@ -8681,6 +8712,20 @@ aarch64_restore_callee_saves (poly_int64 bytes_below_sp, if (frame.is_scs_enabled && regno == LR_REGNUM) return true; + /* Skip the eh return data registers if we are + returning normally rather than via eh_return. */ + if (!was_eh_return && crtl->calls_eh_return) + { + for (unsigned ehregno = 0; + EH_RETURN_DATA_REGNO (ehregno) != INVALID_REGNUM; + ehregno++) + { + if (EH_RETURN_DATA_REGNO (ehregno) == regno + && frame.eh_return_allocated[ehregno]) + return true; + } + } + So this could be something like: if (!was_eh_return && crtl->calls_eh_return && IS_EH_RETURN_REGNUM (regno)) return true; Cheers, Wilco