Re: [PATCH v6] xtensa: Eliminate the use of callee-saved register that saves and restores only once

2023-02-16 Thread Takayuki 'January June' Suwa via Gcc-patches
On 2023/02/16 7:18, Max Filippov wrote:
> Hi Suwa-san,

Hi!

> 
> On Thu, Jan 26, 2023 at 7:17 PM Takayuki 'January June' Suwa
>  wrote:
>>
>> In the case of the CALL0 ABI, values that must be retained before and
>> after function calls are placed in the callee-saved registers (A12
>> through A15) and referenced later.  However, it is often the case that
>> the save and the reference are each only once and a simple register-
>> register move (with two exceptions; i. the register saved to/restored
>> from is the stack pointer, ii. the function needs an additional stack
>> pointer adjustment to grow the stack).
>>
>> e.g. in the following example, if there are no other occurrences of
>> register A14:
>>
>> ;; before
>> ; prologue {
>>   ...
>> s32i.n  a14, sp, 16
>>   ...   ;; no frame pointer needed
>> ;; no additional stack growth
>> ; } prologue
>>   ...
>> mov.n   a14, a6 ;; A6 is not SP
>>   ...
>> call0   foo
>>   ...
>> mov.n   a8, a14 ;; A8 is not SP
>>   ...
>> ; epilogue {
>>   ...
>> l32i.n  a14, sp, 16
>>   ...
>> ; } epilogue
>>
>> It can be possible like this:
>>
>> ;; after
>> ; prologue {
>>   ...
>> (no save needed)
>>   ...
>> ; } prologue
>>   ...
>> s32i.n  a6, sp, 16  ;; replaced with A14's slot
>>   ...
>> call0   foo
>>   ...
>> l32i.n  a8, sp, 16  ;; through SP
>>   ...
>> ; epilogue {
>>   ...
>> (no restoration needed)
>>   ...
>> ; } epilogue
>>
>> This patch adds the abovementioned logic to the function prologue/epilogue
>> RTL expander code.
>>
>> gcc/ChangeLog:
>>
>> * config/xtensa/xtensa.cc (machine_function): Add new member
>> 'eliminated_callee_saved_bmp'.
>> (xtensa_can_eliminate_callee_saved_reg_p): New function to
>> determine whether the register can be eliminated or not.
>> (xtensa_expand_prologue): Add invoking the above function and
>> elimination the use of callee-saved register by using its stack
>> slot through the stack pointer (or the frame pointer if needed)
>> directly.
>> (xtensa_expand_prologue): Modify to not emit register restoration
>> insn from its stack slot if the register is already eliminated.
>>
>> gcc/testsuite/ChangeLog:
>>
>> * gcc.target/xtensa/elim_callee_saved.c: New.
>> ---
>>  gcc/config/xtensa/xtensa.cc   | 132 ++
>>  .../gcc.target/xtensa/elim_callee_saved.c |  38 +
>>  2 files changed, 145 insertions(+), 25 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c
> 
> This version passes regression tests, but I still have a couple questions.
> 
>> diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
>> index 3e2e22d4cbe..ff59c933d4d 100644
>> --- a/gcc/config/xtensa/xtensa.cc
>> +++ b/gcc/config/xtensa/xtensa.cc
>> @@ -105,6 +105,7 @@ struct GTY(()) machine_function
>>bool epilogue_done;
>>bool inhibit_logues_a1_adjusts;
>>rtx last_logues_a9_content;
>> +  HOST_WIDE_INT eliminated_callee_saved_bmp;
>>  };
>>
>>  static void xtensa_option_override (void);
>> @@ -3343,6 +3344,66 @@ xtensa_emit_adjust_stack_ptr (HOST_WIDE_INT offset, 
>> int flags)
>>  cfun->machine->last_logues_a9_content = GEN_INT (offset);
>>  }
>>
>> +static bool
>> +xtensa_can_eliminate_callee_saved_reg_p (unsigned int regno,
>> +rtx_insn **p_insnS,
>> +rtx_insn **p_insnR)
>> +{
>> +  df_ref ref;
>> +  rtx_insn *insn, *insnS = NULL, *insnR = NULL;
>> +  rtx pattern;
>> +
>> +  if (!optimize || !df || call_used_or_fixed_reg_p (regno))
>> +return false;
>> +
>> +  for (ref = DF_REG_DEF_CHAIN (regno);
>> +   ref; ref = DF_REF_NEXT_REG (ref))
>> +if (DF_REF_CLASS (ref) != DF_REF_REGULAR
>> +   || DEBUG_INSN_P (insn = DF_REF_INSN (ref)))
>> +  continue;
>> +else if (GET_CODE (pattern = PATTERN (insn)) == SET
>> +&& REG_P (SET_DEST (pattern))
>> +&& REGNO (SET_DEST (pattern)) == regno
>> +&& REG_NREGS (SET_DEST (pattern)) == 1
>> +&& REG_P (SET_SRC (pattern))
>> +&& REGNO (SET_SRC (pattern)) != A1_REG)
> 
> Do I understand correctly that the check for A1 here and below is
> for the case when regno is a hard frame pointer and the function
> needs the frame pointer? If so, wouldn't it be better to check
> for it explicitly in the beginning?

I see.  But I can't be sure that the body of the function never saves and 
restores the stack pointer to another register if the function doesn't need the 
frame pointer.
Therefore, I think that the validity depends on the regtest.

> 
>> +  {
>> +   if (insnS)
>> + return false;
>> +   insnS = insn;
>> +   continue;
>> +  }
>> +else
>> +  

Re: [PATCH v6] xtensa: Eliminate the use of callee-saved register that saves and restores only once

2023-02-15 Thread Max Filippov via Gcc-patches
Hi Suwa-san,

On Thu, Jan 26, 2023 at 7:17 PM Takayuki 'January June' Suwa
 wrote:
>
> In the case of the CALL0 ABI, values that must be retained before and
> after function calls are placed in the callee-saved registers (A12
> through A15) and referenced later.  However, it is often the case that
> the save and the reference are each only once and a simple register-
> register move (with two exceptions; i. the register saved to/restored
> from is the stack pointer, ii. the function needs an additional stack
> pointer adjustment to grow the stack).
>
> e.g. in the following example, if there are no other occurrences of
> register A14:
>
> ;; before
> ; prologue {
>   ...
> s32i.n  a14, sp, 16
>   ...   ;; no frame pointer needed
> ;; no additional stack growth
> ; } prologue
>   ...
> mov.n   a14, a6 ;; A6 is not SP
>   ...
> call0   foo
>   ...
> mov.n   a8, a14 ;; A8 is not SP
>   ...
> ; epilogue {
>   ...
> l32i.n  a14, sp, 16
>   ...
> ; } epilogue
>
> It can be possible like this:
>
> ;; after
> ; prologue {
>   ...
> (no save needed)
>   ...
> ; } prologue
>   ...
> s32i.n  a6, sp, 16  ;; replaced with A14's slot
>   ...
> call0   foo
>   ...
> l32i.n  a8, sp, 16  ;; through SP
>   ...
> ; epilogue {
>   ...
> (no restoration needed)
>   ...
> ; } epilogue
>
> This patch adds the abovementioned logic to the function prologue/epilogue
> RTL expander code.
>
> gcc/ChangeLog:
>
> * config/xtensa/xtensa.cc (machine_function): Add new member
> 'eliminated_callee_saved_bmp'.
> (xtensa_can_eliminate_callee_saved_reg_p): New function to
> determine whether the register can be eliminated or not.
> (xtensa_expand_prologue): Add invoking the above function and
> elimination the use of callee-saved register by using its stack
> slot through the stack pointer (or the frame pointer if needed)
> directly.
> (xtensa_expand_prologue): Modify to not emit register restoration
> insn from its stack slot if the register is already eliminated.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/xtensa/elim_callee_saved.c: New.
> ---
>  gcc/config/xtensa/xtensa.cc   | 132 ++
>  .../gcc.target/xtensa/elim_callee_saved.c |  38 +
>  2 files changed, 145 insertions(+), 25 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c

This version passes regression tests, but I still have a couple questions.

> diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
> index 3e2e22d4cbe..ff59c933d4d 100644
> --- a/gcc/config/xtensa/xtensa.cc
> +++ b/gcc/config/xtensa/xtensa.cc
> @@ -105,6 +105,7 @@ struct GTY(()) machine_function
>bool epilogue_done;
>bool inhibit_logues_a1_adjusts;
>rtx last_logues_a9_content;
> +  HOST_WIDE_INT eliminated_callee_saved_bmp;
>  };
>
>  static void xtensa_option_override (void);
> @@ -3343,6 +3344,66 @@ xtensa_emit_adjust_stack_ptr (HOST_WIDE_INT offset, 
> int flags)
>  cfun->machine->last_logues_a9_content = GEN_INT (offset);
>  }
>
> +static bool
> +xtensa_can_eliminate_callee_saved_reg_p (unsigned int regno,
> +rtx_insn **p_insnS,
> +rtx_insn **p_insnR)
> +{
> +  df_ref ref;
> +  rtx_insn *insn, *insnS = NULL, *insnR = NULL;
> +  rtx pattern;
> +
> +  if (!optimize || !df || call_used_or_fixed_reg_p (regno))
> +return false;
> +
> +  for (ref = DF_REG_DEF_CHAIN (regno);
> +   ref; ref = DF_REF_NEXT_REG (ref))
> +if (DF_REF_CLASS (ref) != DF_REF_REGULAR
> +   || DEBUG_INSN_P (insn = DF_REF_INSN (ref)))
> +  continue;
> +else if (GET_CODE (pattern = PATTERN (insn)) == SET
> +&& REG_P (SET_DEST (pattern))
> +&& REGNO (SET_DEST (pattern)) == regno
> +&& REG_NREGS (SET_DEST (pattern)) == 1
> +&& REG_P (SET_SRC (pattern))
> +&& REGNO (SET_SRC (pattern)) != A1_REG)

Do I understand correctly that the check for A1 here and below is
for the case when regno is a hard frame pointer and the function
needs the frame pointer? If so, wouldn't it be better to check
for it explicitly in the beginning?

> +  {
> +   if (insnS)
> + return false;
> +   insnS = insn;
> +   continue;
> +  }
> +else
> +  return false;
> +
> +  for (ref = DF_REG_USE_CHAIN (regno);
> +   ref; ref = DF_REF_NEXT_REG (ref))
> +if (DF_REF_CLASS (ref) != DF_REF_REGULAR
> +   || DEBUG_INSN_P (insn = DF_REF_INSN (ref)))
> +  continue;
> +else if (GET_CODE (pattern = PATTERN (insn)) == SET
> +&& REG_P (SET_SRC (pattern))
> +&& REGNO (SET_SRC (pattern)) == regno
> +&& REG_NREGS (SET_SRC 

[PATCH v6] xtensa: Eliminate the use of callee-saved register that saves and restores only once

2023-01-26 Thread Takayuki 'January June' Suwa via Gcc-patches
In the case of the CALL0 ABI, values that must be retained before and
after function calls are placed in the callee-saved registers (A12
through A15) and referenced later.  However, it is often the case that
the save and the reference are each only once and a simple register-
register move (with two exceptions; i. the register saved to/restored
from is the stack pointer, ii. the function needs an additional stack
pointer adjustment to grow the stack).

e.g. in the following example, if there are no other occurrences of
register A14:

;; before
; prologue {
  ...
s32i.n  a14, sp, 16
  ...   ;; no frame pointer needed
;; no additional stack growth
; } prologue
  ...
mov.n   a14, a6 ;; A6 is not SP
  ...
call0   foo
  ...
mov.n   a8, a14 ;; A8 is not SP
  ...
; epilogue {
  ...
l32i.n  a14, sp, 16
  ...
; } epilogue

It can be possible like this:

;; after
; prologue {
  ...
(no save needed)
  ...
; } prologue
  ...
s32i.n  a6, sp, 16  ;; replaced with A14's slot
  ...
call0   foo
  ...
l32i.n  a8, sp, 16  ;; through SP
  ...
; epilogue {
  ...
(no restoration needed)
  ...
; } epilogue

This patch adds the abovementioned logic to the function prologue/epilogue
RTL expander code.

gcc/ChangeLog:

* config/xtensa/xtensa.cc (machine_function): Add new member
'eliminated_callee_saved_bmp'.
(xtensa_can_eliminate_callee_saved_reg_p): New function to
determine whether the register can be eliminated or not.
(xtensa_expand_prologue): Add invoking the above function and
elimination the use of callee-saved register by using its stack
slot through the stack pointer (or the frame pointer if needed)
directly.
(xtensa_expand_prologue): Modify to not emit register restoration
insn from its stack slot if the register is already eliminated.

gcc/testsuite/ChangeLog:

* gcc.target/xtensa/elim_callee_saved.c: New.
---
 gcc/config/xtensa/xtensa.cc   | 132 ++
 .../gcc.target/xtensa/elim_callee_saved.c |  38 +
 2 files changed, 145 insertions(+), 25 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/xtensa/elim_callee_saved.c

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 3e2e22d4cbe..ff59c933d4d 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -105,6 +105,7 @@ struct GTY(()) machine_function
   bool epilogue_done;
   bool inhibit_logues_a1_adjusts;
   rtx last_logues_a9_content;
+  HOST_WIDE_INT eliminated_callee_saved_bmp;
 };
 
 static void xtensa_option_override (void);
@@ -3343,6 +3344,66 @@ xtensa_emit_adjust_stack_ptr (HOST_WIDE_INT offset, int 
flags)
 cfun->machine->last_logues_a9_content = GEN_INT (offset);
 }
 
+static bool
+xtensa_can_eliminate_callee_saved_reg_p (unsigned int regno,
+rtx_insn **p_insnS,
+rtx_insn **p_insnR)
+{
+  df_ref ref;
+  rtx_insn *insn, *insnS = NULL, *insnR = NULL;
+  rtx pattern;
+
+  if (!optimize || !df || call_used_or_fixed_reg_p (regno))
+return false;
+
+  for (ref = DF_REG_DEF_CHAIN (regno);
+   ref; ref = DF_REF_NEXT_REG (ref))
+if (DF_REF_CLASS (ref) != DF_REF_REGULAR
+   || DEBUG_INSN_P (insn = DF_REF_INSN (ref)))
+  continue;
+else if (GET_CODE (pattern = PATTERN (insn)) == SET
+&& REG_P (SET_DEST (pattern))
+&& REGNO (SET_DEST (pattern)) == regno
+&& REG_NREGS (SET_DEST (pattern)) == 1
+&& REG_P (SET_SRC (pattern))
+&& REGNO (SET_SRC (pattern)) != A1_REG)
+  {
+   if (insnS)
+ return false;
+   insnS = insn;
+   continue;
+  }
+else
+  return false;
+
+  for (ref = DF_REG_USE_CHAIN (regno);
+   ref; ref = DF_REF_NEXT_REG (ref))
+if (DF_REF_CLASS (ref) != DF_REF_REGULAR
+   || DEBUG_INSN_P (insn = DF_REF_INSN (ref)))
+  continue;
+else if (GET_CODE (pattern = PATTERN (insn)) == SET
+&& REG_P (SET_SRC (pattern))
+&& REGNO (SET_SRC (pattern)) == regno
+&& REG_NREGS (SET_SRC (pattern)) == 1
+&& REG_P (SET_DEST (pattern))
+&& REGNO (SET_DEST (pattern)) != A1_REG)
+  {
+   if (insnR)
+ return false;
+   insnR = insn;
+   continue;
+  }
+else
+  return false;
+
+  if (!insnS || !insnR)
+return false;
+
+  *p_insnS = insnS, *p_insnR = insnR;
+
+  return true;
+}
+
 /* minimum frame = reg save area (4 words) plus static chain (1 word)
and the total number of words must be a multiple of 128 bits.  */
 #define MIN_FRAME_SIZE (8 * UNITS_PER_WORD)
@@ -3382,6 +3443,7 @@ xtensa_expand_prologue (void)
   df_ref ref;
   bool stack_pointer_needed =