Re: [PATCH][AArch64 - v2] Simplify eh_return implementation

2016-09-01 Thread Ramana Radhakrishnan


On 10/08/16 17:26, Wilco Dijkstra wrote:
> I noticed it would still be a good idea to add an extra barrier in the epilog 
> as the
> scheduler doesn't appear to handle aliases of frame accesses properly.
> 
> This patch simplifies the handling of the EH return value.  We force the use 
> of the
> frame pointer so the return location is always at FP + 8.  This means we can 
> emit
> a simple volatile access in EH_RETURN_HANDLER_RTX without needing md
> patterns, splitters and frame offset calculations.  The new implementation 
> also
> fixes various bugs in aarch64_final_eh_return_addr, which does not work with
> -fomit-frame-pointer, alloca or outgoing arguments.
> 
> Bootstrap OK, GCC Regression OK, OK for trunk? Would it be useful to backport
> this to GCC6.x?


Can you please file a PR for this and add some testcases ?  This sounds like a 
serious enough problem that needs to be looked at probably going back since the 
dawn of time.



Ramana
> 
> ChangeLog:
> 2016-08-10  Wilco Dijkstra  
> gcc/
>   * config/aarch64/aarch64.md (eh_return): Remove pattern and splitter.
>   * config/aarch64/aarch64.h (AARCH64_EH_STACKADJ_REGNUM): Remove.
>   (EH_RETURN_HANDLER_RTX): New define.
>   * config/aarch64/aarch64.c (aarch64_frame_pointer_required):
>   Force frame pointer in EH return functions.
>   (aarch64_expand_epilogue): Add barrier for eh_return.
>   (aarch64_final_eh_return_addr): Remove.
>   (aarch64_eh_return_handler_rtx): New function.
>   * config/aarch64/aarch64-protos.h (aarch64_final_eh_return_addr):
>   Remove.
>   (aarch64_eh_return_handler_rtx): New prototype.
> --
> 
> diff --git a/gcc/config/aarch64/aarch64-protos.h 
> b/gcc/config/aarch64/aarch64-protos.h
> index 
> 3cdd69b8af1089a839e5d45cda94bc70a15cd777..327c0a97f6f687604afef249b79ac22628418070
>  100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -358,7 +358,7 @@ int aarch64_hard_regno_mode_ok (unsigned, machine_mode);
>  int aarch64_hard_regno_nregs (unsigned, machine_mode);
>  int aarch64_uxt_size (int, HOST_WIDE_INT);
>  int aarch64_vec_fpconst_pow_of_2 (rtx);
> -rtx aarch64_final_eh_return_addr (void);
> +rtx aarch64_eh_return_handler_rtx (void);
>  rtx aarch64_mask_from_zextract_ops (rtx, rtx);
>  const char *aarch64_output_move_struct (rtx *operands);
>  rtx aarch64_return_addr (int, rtx);
> diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
> index 
> 003fec87e41db618570663f28cc2387a87e8252a..fa81e4b853daf08842955288861ec7e7acca
>  100644
> --- a/gcc/config/aarch64/aarch64.h
> +++ b/gcc/config/aarch64/aarch64.h
> @@ -400,9 +400,9 @@ extern unsigned aarch64_architecture_version;
>  #define ASM_DECLARE_FUNCTION_NAME(STR, NAME, DECL)   \
>aarch64_declare_function_name (STR, NAME, DECL)
>  
> -/* The register that holds the return address in exception handlers.  */
> -#define AARCH64_EH_STACKADJ_REGNUM   (R0_REGNUM + 4)
> -#define EH_RETURN_STACKADJ_RTX   gen_rtx_REG (Pmode, 
> AARCH64_EH_STACKADJ_REGNUM)
> +/* For EH returns X4 contains the stack adjustment.  */
> +#define EH_RETURN_STACKADJ_RTX   gen_rtx_REG (Pmode, R4_REGNUM)
> +#define EH_RETURN_HANDLER_RTX  aarch64_eh_return_handler_rtx ()
>  
>  /* Don't use __builtin_setjmp until we've defined it.  */
>  #undef DONT_USE_BUILTIN_SETJMP
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 
> 5a25fce17785af9f9dc12e0f2a9609af09af0b35..bb8baff1e7a06942c8b8f51c1d6b341673401ef9
>  100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -2739,6 +2739,10 @@ aarch64_frame_pointer_required (void)
>&& (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
>  return true;
>  
> +  /* Force a frame pointer for EH returns so the return address is at FP+8.  
> */
> +  if (crtl->calls_eh_return)
> +return true;
> +
>return false;
>  }
>  
> @@ -3298,7 +3302,8 @@ aarch64_expand_epilogue (bool for_sibcall)
>+ cfun->machine->frame.saved_varargs_size) != 0;
>  
>/* Emit a barrier to prevent loads from a deallocated stack.  */
> -  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca)
> +  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca
> +  || crtl->calls_eh_return)
>  {
>emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
>need_barrier_p = false;
> @@ -3366,52 +3371,15 @@ aarch64_expand_epilogue (bool for_sibcall)
>  emit_jump_insn (ret_rtx);
>  }
>  
> -/* Return the place to copy the exception unwinding return address to.
> -   This will probably be a stack slot, but could (in theory be the
> -   return register).  */
> +/* Implement EH_RETURN_HANDLER_RTX.  The return address is stored at FP + 8.
> +   The access needs to be volatile to prevent it from being removed.  */
>  rtx
> -aarch64_final_eh_return_addr (void)
> +aarch64_eh_return_handler_rtx (void)
>  {
> -  

Re: [PATCH][AArch64 - v2] Simplify eh_return implementation

2016-09-01 Thread Wilco Dijkstra
 
Ping

I noticed it would still be a good idea to add an extra barrier in the epilog 
as the
scheduler doesn't appear to handle aliases of frame accesses properly.

This patch simplifies the handling of the EH return value.  We force the use of 
the
frame pointer so the return location is always at FP + 8.  This means we can 
emit
a simple volatile access in EH_RETURN_HANDLER_RTX without needing md
patterns, splitters and frame offset calculations.  The new implementation also
fixes various bugs in aarch64_final_eh_return_addr, which does not work with
-fomit-frame-pointer, alloca or outgoing arguments.

Bootstrap OK, GCC Regression OK, OK for trunk? Would it be useful to backport
this to GCC6.x?

ChangeLog:
2016-08-10  Wilco Dijkstra  
gcc/
    * config/aarch64/aarch64.md (eh_return): Remove pattern and splitter.
    * config/aarch64/aarch64.h (AARCH64_EH_STACKADJ_REGNUM): Remove.
    (EH_RETURN_HANDLER_RTX): New define.
    * config/aarch64/aarch64.c (aarch64_frame_pointer_required):
    Force frame pointer in EH return functions.
    (aarch64_expand_epilogue): Add barrier for eh_return.
    (aarch64_final_eh_return_addr): Remove.
    (aarch64_eh_return_handler_rtx): New function.
    * config/aarch64/aarch64-protos.h (aarch64_final_eh_return_addr):
    Remove.
    (aarch64_eh_return_handler_rtx): New prototype.
--

diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 
3cdd69b8af1089a839e5d45cda94bc70a15cd777..327c0a97f6f687604afef249b79ac22628418070
 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -358,7 +358,7 @@ int aarch64_hard_regno_mode_ok (unsigned, machine_mode);
 int aarch64_hard_regno_nregs (unsigned, machine_mode);
 int aarch64_uxt_size (int, HOST_WIDE_INT);
 int aarch64_vec_fpconst_pow_of_2 (rtx);
-rtx aarch64_final_eh_return_addr (void);
+rtx aarch64_eh_return_handler_rtx (void);
 rtx aarch64_mask_from_zextract_ops (rtx, rtx);
 const char *aarch64_output_move_struct (rtx *operands);
 rtx aarch64_return_addr (int, rtx);
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 
003fec87e41db618570663f28cc2387a87e8252a..fa81e4b853daf08842955288861ec7e7acca
 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -400,9 +400,9 @@ extern unsigned aarch64_architecture_version;
 #define ASM_DECLARE_FUNCTION_NAME(STR, NAME, DECL)  \
   aarch64_declare_function_name (STR, NAME, DECL)
 
-/* The register that holds the return address in exception handlers.  */
-#define AARCH64_EH_STACKADJ_REGNUM (R0_REGNUM + 4)
-#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, AARCH64_EH_STACKADJ_REGNUM)
+/* For EH returns X4 contains the stack adjustment.  */
+#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, R4_REGNUM)
+#define EH_RETURN_HANDLER_RTX  aarch64_eh_return_handler_rtx ()
 
 /* Don't use __builtin_setjmp until we've defined it.  */
 #undef DONT_USE_BUILTIN_SETJMP
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 
5a25fce17785af9f9dc12e0f2a9609af09af0b35..bb8baff1e7a06942c8b8f51c1d6b341673401ef9
 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2739,6 +2739,10 @@ aarch64_frame_pointer_required (void)
   && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
 return true;
 
+  /* Force a frame pointer for EH returns so the return address is at FP+8.  */
+  if (crtl->calls_eh_return)
+    return true;
+
   return false;
 }
 
@@ -3298,7 +3302,8 @@ aarch64_expand_epilogue (bool for_sibcall)
  + cfun->machine->frame.saved_varargs_size) != 0;
 
   /* Emit a barrier to prevent loads from a deallocated stack.  */
-  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca)
+  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca
+  || crtl->calls_eh_return)
 {
   emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
   need_barrier_p = false;
@@ -3366,52 +3371,15 @@ aarch64_expand_epilogue (bool for_sibcall)
 emit_jump_insn (ret_rtx);
 }
 
-/* Return the place to copy the exception unwinding return address to.
-   This will probably be a stack slot, but could (in theory be the
-   return register).  */
+/* Implement EH_RETURN_HANDLER_RTX.  The return address is stored at FP + 8.
+   The access needs to be volatile to prevent it from being removed.  */
 rtx
-aarch64_final_eh_return_addr (void)
+aarch64_eh_return_handler_rtx (void)
 {
-  HOST_WIDE_INT fp_offset;
-
-  aarch64_layout_frame ();
-
-  fp_offset = cfun->machine->frame.frame_size
- - cfun->machine->frame.hard_fp_offset;
-
-  if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
-    return gen_rtx_REG (DImode, LR_REGNUM);
-
-  /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
- result in a store to save LR introduced by builtin_eh_return () being
- incorrectly 

Re: [PATCH][AArch64 - v2] Simplify eh_return implementation

2016-08-26 Thread Wilco Dijkstra
Jiong Wang  wrote:
> The -fomit-frame-pointer is really broken on aarch64_find_eh_return_addr

Yes, that's a good conclusion. However even with the frame pointer there are 
cases that fail, for example it will access LR off SP even after alloca. In fact
we're lucky it works at all sometimes...

> I think the correct offset when -fomit-frame-pointer should be:
>
>  "cfun->machine->frame.reg_offset[LR_REGNUM]"

With the offset to the register save area added of course. But then you still
need to expand late and so need the patterns in the MD. By forcing a frame
pointer we avoid all that complexity.

Wilco



Re: [PATCH][AArch64 - v2] Simplify eh_return implementation

2016-08-26 Thread Jiong Wang

Wilco Dijkstra writes:

> Ping
>
> I noticed it would still be a good idea to add an extra barrier in the epilog 
> as the
> scheduler doesn't appear to handle aliases of frame accesses properly.
>
> This patch simplifies the handling of the EH return value.  We force the use 
> of the
> frame pointer so the return location is always at FP + 8.  This means we can 
> emit
> a simple volatile access in EH_RETURN_HANDLER_RTX without needing md
> patterns, splitters and frame offset calculations.  The new implementation 
> also
> fixes various bugs in aarch64_final_eh_return_addr, which does not work with
> -fomit-frame-pointer, alloca or outgoing arguments.

The -fomit-frame-pointer is really broken on aarch64_find_eh_return_addr

-  return gen_frame_mem (DImode,
-   plus_constant (Pmode,
-  stack_pointer_rtx,
-  fp_offset
-  + cfun->machine->frame.saved_regs_size
-  - 2 * UNITS_PER_WORD));

the saved_regs_size includes both general and vector register saving
area, while LR should be saved on top of general register
area. Meanwhile saved_regs_size contains alignment amount.

Given EH unwind code will invoke __builtin_unwind_init which pushes all
callee-saved, both general and vector, the current function will always
get wrong offset.

I think the correct offset when -fomit-frame-pointer should be:

  "cfun->machine->frame.reg_offset[LR_REGNUM]"

I have done a quick check on _Unwind_RaiseException which is the only
code affected by this change.  Without frame pointer, the exception
handler's address is installed in different, thus wrong, stack slot.

...
str x30, [sp, 112]
...
str x19, [sp, 176]

This approach used in this patch looks good to me.

> 2016-08-10  Wilco Dijkstra  
> gcc/
> * config/aarch64/aarch64.md (eh_return): Remove pattern and splitter.
> * config/aarch64/aarch64.h (AARCH64_EH_STACKADJ_REGNUM): Remove.
> (EH_RETURN_HANDLER_RTX): New define.
> * config/aarch64/aarch64.c (aarch64_frame_pointer_required):
> Force frame pointer in EH return functions.
> (aarch64_expand_epilogue): Add barrier for eh_return.
> (aarch64_final_eh_return_addr): Remove.
> (aarch64_eh_return_handler_rtx): New function.
> * config/aarch64/aarch64-protos.h (aarch64_final_eh_return_addr):
> Remove.
> (aarch64_eh_return_handler_rtx): New prototype.

-- 
Regards,
Jiong


Re: [PATCH][AArch64 - v2] Simplify eh_return implementation

2016-08-23 Thread Wilco Dijkstra
Ping

I noticed it would still be a good idea to add an extra barrier in the epilog 
as the
scheduler doesn't appear to handle aliases of frame accesses properly.

This patch simplifies the handling of the EH return value.  We force the use of 
the
frame pointer so the return location is always at FP + 8.  This means we can 
emit
a simple volatile access in EH_RETURN_HANDLER_RTX without needing md
patterns, splitters and frame offset calculations.  The new implementation also
fixes various bugs in aarch64_final_eh_return_addr, which does not work with
-fomit-frame-pointer, alloca or outgoing arguments.

Bootstrap OK, GCC Regression OK, OK for trunk? Would it be useful to backport
this to GCC6.x?

ChangeLog:
2016-08-10  Wilco Dijkstra  
gcc/
* config/aarch64/aarch64.md (eh_return): Remove pattern and splitter.
* config/aarch64/aarch64.h (AARCH64_EH_STACKADJ_REGNUM): Remove.
(EH_RETURN_HANDLER_RTX): New define.
* config/aarch64/aarch64.c (aarch64_frame_pointer_required):
Force frame pointer in EH return functions.
(aarch64_expand_epilogue): Add barrier for eh_return.
(aarch64_final_eh_return_addr): Remove.
(aarch64_eh_return_handler_rtx): New function.
* config/aarch64/aarch64-protos.h (aarch64_final_eh_return_addr):
Remove.
(aarch64_eh_return_handler_rtx): New prototype.
--

diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 
3cdd69b8af1089a839e5d45cda94bc70a15cd777..327c0a97f6f687604afef249b79ac22628418070
 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -358,7 +358,7 @@ int aarch64_hard_regno_mode_ok (unsigned, machine_mode);
 int aarch64_hard_regno_nregs (unsigned, machine_mode);
 int aarch64_uxt_size (int, HOST_WIDE_INT);
 int aarch64_vec_fpconst_pow_of_2 (rtx);
-rtx aarch64_final_eh_return_addr (void);
+rtx aarch64_eh_return_handler_rtx (void);
 rtx aarch64_mask_from_zextract_ops (rtx, rtx);
 const char *aarch64_output_move_struct (rtx *operands);
 rtx aarch64_return_addr (int, rtx);
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 
003fec87e41db618570663f28cc2387a87e8252a..fa81e4b853daf08842955288861ec7e7acca
 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -400,9 +400,9 @@ extern unsigned aarch64_architecture_version;
 #define ASM_DECLARE_FUNCTION_NAME(STR, NAME, DECL)  \
   aarch64_declare_function_name (STR, NAME, DECL)
 
-/* The register that holds the return address in exception handlers.  */
-#define AARCH64_EH_STACKADJ_REGNUM (R0_REGNUM + 4)
-#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, AARCH64_EH_STACKADJ_REGNUM)
+/* For EH returns X4 contains the stack adjustment.  */
+#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, R4_REGNUM)
+#define EH_RETURN_HANDLER_RTX  aarch64_eh_return_handler_rtx ()
 
 /* Don't use __builtin_setjmp until we've defined it.  */
 #undef DONT_USE_BUILTIN_SETJMP
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 
5a25fce17785af9f9dc12e0f2a9609af09af0b35..bb8baff1e7a06942c8b8f51c1d6b341673401ef9
 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2739,6 +2739,10 @@ aarch64_frame_pointer_required (void)
   && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
 return true;
 
+  /* Force a frame pointer for EH returns so the return address is at FP+8.  */
+  if (crtl->calls_eh_return)
+return true;
+
   return false;
 }
 
@@ -3298,7 +3302,8 @@ aarch64_expand_epilogue (bool for_sibcall)
  + cfun->machine->frame.saved_varargs_size) != 0;
 
   /* Emit a barrier to prevent loads from a deallocated stack.  */
-  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca)
+  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca
+  || crtl->calls_eh_return)
 {
   emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
   need_barrier_p = false;
@@ -3366,52 +3371,15 @@ aarch64_expand_epilogue (bool for_sibcall)
 emit_jump_insn (ret_rtx);
 }
 
-/* Return the place to copy the exception unwinding return address to.
-   This will probably be a stack slot, but could (in theory be the
-   return register).  */
+/* Implement EH_RETURN_HANDLER_RTX.  The return address is stored at FP + 8.
+   The access needs to be volatile to prevent it from being removed.  */
 rtx
-aarch64_final_eh_return_addr (void)
+aarch64_eh_return_handler_rtx (void)
 {
-  HOST_WIDE_INT fp_offset;
-
-  aarch64_layout_frame ();
-
-  fp_offset = cfun->machine->frame.frame_size
- - cfun->machine->frame.hard_fp_offset;
-
-  if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
-return gen_rtx_REG (DImode, LR_REGNUM);
-
-  /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
- result in a store to save LR introduced by builtin_eh_return () being
- incorrectly 

Re: [PATCH][AArch64 - v2] Simplify eh_return implementation

2016-08-10 Thread Wilco Dijkstra
I noticed it would still be a good idea to add an extra barrier in the epilog 
as the
scheduler doesn't appear to handle aliases of frame accesses properly.

This patch simplifies the handling of the EH return value.  We force the use of 
the
frame pointer so the return location is always at FP + 8.  This means we can 
emit
a simple volatile access in EH_RETURN_HANDLER_RTX without needing md
patterns, splitters and frame offset calculations.  The new implementation also
fixes various bugs in aarch64_final_eh_return_addr, which does not work with
-fomit-frame-pointer, alloca or outgoing arguments.

Bootstrap OK, GCC Regression OK, OK for trunk? Would it be useful to backport
this to GCC6.x?

ChangeLog:
2016-08-10  Wilco Dijkstra  
gcc/
* config/aarch64/aarch64.md (eh_return): Remove pattern and splitter.
* config/aarch64/aarch64.h (AARCH64_EH_STACKADJ_REGNUM): Remove.
(EH_RETURN_HANDLER_RTX): New define.
* config/aarch64/aarch64.c (aarch64_frame_pointer_required):
Force frame pointer in EH return functions.
(aarch64_expand_epilogue): Add barrier for eh_return.
(aarch64_final_eh_return_addr): Remove.
(aarch64_eh_return_handler_rtx): New function.
* config/aarch64/aarch64-protos.h (aarch64_final_eh_return_addr):
Remove.
(aarch64_eh_return_handler_rtx): New prototype.
--

diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 
3cdd69b8af1089a839e5d45cda94bc70a15cd777..327c0a97f6f687604afef249b79ac22628418070
 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -358,7 +358,7 @@ int aarch64_hard_regno_mode_ok (unsigned, machine_mode);
 int aarch64_hard_regno_nregs (unsigned, machine_mode);
 int aarch64_uxt_size (int, HOST_WIDE_INT);
 int aarch64_vec_fpconst_pow_of_2 (rtx);
-rtx aarch64_final_eh_return_addr (void);
+rtx aarch64_eh_return_handler_rtx (void);
 rtx aarch64_mask_from_zextract_ops (rtx, rtx);
 const char *aarch64_output_move_struct (rtx *operands);
 rtx aarch64_return_addr (int, rtx);
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 
003fec87e41db618570663f28cc2387a87e8252a..fa81e4b853daf08842955288861ec7e7acca
 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -400,9 +400,9 @@ extern unsigned aarch64_architecture_version;
 #define ASM_DECLARE_FUNCTION_NAME(STR, NAME, DECL) \
   aarch64_declare_function_name (STR, NAME, DECL)
 
-/* The register that holds the return address in exception handlers.  */
-#define AARCH64_EH_STACKADJ_REGNUM (R0_REGNUM + 4)
-#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, AARCH64_EH_STACKADJ_REGNUM)
+/* For EH returns X4 contains the stack adjustment.  */
+#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, R4_REGNUM)
+#define EH_RETURN_HANDLER_RTX  aarch64_eh_return_handler_rtx ()
 
 /* Don't use __builtin_setjmp until we've defined it.  */
 #undef DONT_USE_BUILTIN_SETJMP
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 
5a25fce17785af9f9dc12e0f2a9609af09af0b35..bb8baff1e7a06942c8b8f51c1d6b341673401ef9
 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2739,6 +2739,10 @@ aarch64_frame_pointer_required (void)
   && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
 return true;
 
+  /* Force a frame pointer for EH returns so the return address is at FP+8.  */
+  if (crtl->calls_eh_return)
+return true;
+
   return false;
 }
 
@@ -3298,7 +3302,8 @@ aarch64_expand_epilogue (bool for_sibcall)
 + cfun->machine->frame.saved_varargs_size) != 0;
 
   /* Emit a barrier to prevent loads from a deallocated stack.  */
-  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca)
+  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca
+  || crtl->calls_eh_return)
 {
   emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
   need_barrier_p = false;
@@ -3366,52 +3371,15 @@ aarch64_expand_epilogue (bool for_sibcall)
 emit_jump_insn (ret_rtx);
 }
 
-/* Return the place to copy the exception unwinding return address to.
-   This will probably be a stack slot, but could (in theory be the
-   return register).  */
+/* Implement EH_RETURN_HANDLER_RTX.  The return address is stored at FP + 8.
+   The access needs to be volatile to prevent it from being removed.  */
 rtx
-aarch64_final_eh_return_addr (void)
+aarch64_eh_return_handler_rtx (void)
 {
-  HOST_WIDE_INT fp_offset;
-
-  aarch64_layout_frame ();
-
-  fp_offset = cfun->machine->frame.frame_size
- - cfun->machine->frame.hard_fp_offset;
-
-  if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
-return gen_rtx_REG (DImode, LR_REGNUM);
-
-  /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
- result in a store to save LR introduced by builtin_eh_return () being
- incorrectly deleted