Re: Re: [PATCH 1/1] [V2] [RISC-V] support cm.push cm.pop cm.popret in zcmp

2023-05-31 Thread Fei Gao
On 2023-05-31 18:13  Kito Cheng  wrote:
>
>> >[1] 
>> >https://patchwork.sourceware.org/project/gcc/patch/20230406062118.47431-5-jia...@iscas.ac.cn/
>> Thanks for your review.
>>
>> The md file looks verbose with bunch of *_offset_operand and 
>> stack_push_up_to_*_operand, but it significantly
>> simplies implementation of recognizing zmcp push and pop insns and 
>> outputting assembly.  Also, the md file
>> clearly shows and checks the slot that each register is placed(different to 
>> slot order w/o save-restore before
>> zcmp is introduced). So I prefer my patch V2 to V1 or the link you attached. 
>> But ideas are welcome to make
>> it better. Appreciated if you suggest more details for the improvement.
>
>Got your point, and share an idea to simplify that:
>
>struct code_for_push_pop_t {
>   insn_code (*push)(machine_mode);
>   insn_code (*pop)(machine_mode);
>   insn_code (*pop_ret)(machine_mode);
>};
>const code_for_push_pop_t code_for_push_pop [/*ZCMP_MAX_GRP_SLOTS*/2] = {
>    {code_for_gpr_multi_pop_up_to_ra, /*FIXME*/nullptr, /*FIXME*/nullptr},
>    {code_for_gpr_multi_pop_up_to_s0, /*FIXME*/nullptr, /*FIXME*/nullptr}
>};
>
>static rtx
>riscv_gen_multi_push_pop_insn (op_idx op, HOST_WIDE_INT adj_size,
>unsigned int regs_num)
>{
>  rtx stack_adj = GEN_INT (adj_size);
>
>  return GEN_FCN (code_for_push_pop[regs_num].push(Pmode)) (stack_adj);
>}
>
>(define_mode_attr slot0_offset [(SI "0") (DI "0")])
>(define_mode_attr slot1_offset [(SI "4") (DI "8")])
>
>(define_insn "@gpr_multi_pop_up_to_ra"
>  [(set (reg:X SP_REGNUM)
>    (plus:X (reg:X SP_REGNUM)
> (match_operand 0 "stack_pop_up_to_ra_operand" "I")))
>   (set (reg:X RETURN_ADDR_REGNUM)
>    (mem:X (plus:X (reg:X SP_REGNUM)
>   (const_int ]
>  "TARGET_ZCMP"
>  "cm.pop   {ra}, %0"
>)
>
>(define_insn "@gpr_multi_pop_up_to_s0"
>  [(set (reg:X SP_REGNUM)
>    (plus:X (reg:X SP_REGNUM)
> (match_operand 0 "stack_pop_up_to_s0_operand" "I")))
>   (set (reg:X S0_REGNUM)
>    (mem:X (plus:X (reg:X SP_REGNUM)
>   (const_int 
>   (set (reg:X RETURN_ADDR_REGNUM)
>    (mem:X (plus:X (reg:X SP_REGNUM)
>   (const_int ]
>  "TARGET_ZCMP"
>  "cm.pop   {ra, s0}, %0"
>) 

Perfect. 
Working on it. 

>
>
>
>> >> @@ -5620,7 +5977,7 @@ riscv_expand_epilogue (int style)
>> >>    adjust));
>> >>   rtx dwarf = NULL_RTX;
>> >>   rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
>> >> -    GEN_INT (step2));
>> >> +    GEN_INT (step2 + 
>> >> libcall_size + multipop_size));
>> >
>> >Why we need `+ libcall_size` here? or...why we don't need that before?
>> It's a good catch:)
>> I should have  added `+ libcall_size` in
>> https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=60524be1e3929d83e15fceac6e2aa053c8a6fb20
>>
>> That's why I corrected the cfi issue in save-restore along with zcmp changes 
>> in this patch.
>
>I would like to have a separate patch to fix this bug instead of
>hidden in this patch. 
sure,  I will make  a separate patch. 

Thanks & BR, 
Fei

Re: Re: [PATCH 1/1] [V2] [RISC-V] support cm.push cm.pop cm.popret in zcmp

2023-05-31 Thread Kito Cheng via Gcc-patches
> >[1] 
> >https://patchwork.sourceware.org/project/gcc/patch/20230406062118.47431-5-jia...@iscas.ac.cn/
> Thanks for your review.
>
> The md file looks verbose with bunch of *_offset_operand and 
> stack_push_up_to_*_operand, but it significantly
> simplies implementation of recognizing zmcp push and pop insns and outputting 
> assembly.  Also, the md file
> clearly shows and checks the slot that each register is placed(different to 
> slot order w/o save-restore before
> zcmp is introduced). So I prefer my patch V2 to V1 or the link you attached. 
> But ideas are welcome to make
> it better. Appreciated if you suggest more details for the improvement.

Got your point, and share an idea to simplify that:

struct code_for_push_pop_t {
   insn_code (*push)(machine_mode);
   insn_code (*pop)(machine_mode);
   insn_code (*pop_ret)(machine_mode);
};
const code_for_push_pop_t code_for_push_pop [/*ZCMP_MAX_GRP_SLOTS*/2] = {
{code_for_gpr_multi_pop_up_to_ra, /*FIXME*/nullptr, /*FIXME*/nullptr},
{code_for_gpr_multi_pop_up_to_s0, /*FIXME*/nullptr, /*FIXME*/nullptr}
};

static rtx
riscv_gen_multi_push_pop_insn (op_idx op, HOST_WIDE_INT adj_size,
unsigned int regs_num)
{
  rtx stack_adj = GEN_INT (adj_size);

  return GEN_FCN (code_for_push_pop[regs_num].push(Pmode)) (stack_adj);
}

(define_mode_attr slot0_offset [(SI "0") (DI "0")])
(define_mode_attr slot1_offset [(SI "4") (DI "8")])

(define_insn "@gpr_multi_pop_up_to_ra"
  [(set (reg:X SP_REGNUM)
(plus:X (reg:X SP_REGNUM)
 (match_operand 0 "stack_pop_up_to_ra_operand" "I")))
   (set (reg:X RETURN_ADDR_REGNUM)
(mem:X (plus:X (reg:X SP_REGNUM)
   (const_int ]
  "TARGET_ZCMP"
  "cm.pop   {ra}, %0"
)

(define_insn "@gpr_multi_pop_up_to_s0"
  [(set (reg:X SP_REGNUM)
(plus:X (reg:X SP_REGNUM)
 (match_operand 0 "stack_pop_up_to_s0_operand" "I")))
   (set (reg:X S0_REGNUM)
(mem:X (plus:X (reg:X SP_REGNUM)
   (const_int 
   (set (reg:X RETURN_ADDR_REGNUM)
(mem:X (plus:X (reg:X SP_REGNUM)
   (const_int ]
  "TARGET_ZCMP"
  "cm.pop   {ra, s0}, %0"
)



> >> @@ -5620,7 +5977,7 @@ riscv_expand_epilogue (int style)
> >>adjust));
> >>   rtx dwarf = NULL_RTX;
> >>   rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
> >> -GEN_INT (step2));
> >> +GEN_INT (step2 + libcall_size 
> >> + multipop_size));
> >
> >Why we need `+ libcall_size` here? or...why we don't need that before?
> It's a good catch:)
> I should have  added `+ libcall_size` in
> https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=60524be1e3929d83e15fceac6e2aa053c8a6fb20
>
> That's why I corrected the cfi issue in save-restore along with zcmp changes 
> in this patch.

I would like to have a separate patch to fix this bug instead of
hidden in this patch.


Re: Re: [PATCH 1/1] [V2] [RISC-V] support cm.push cm.pop cm.popret in zcmp

2023-05-30 Thread Fei Gao
On 2023-05-30 13:26  Sinan  wrote:
>
>>> +/* Return TRUE if Zcmp push and pop insns should be
>>> + avoided. FALSE otherwise.
>>> + Only use multi push & pop if all GPRs masked can be covered,
>>> + and stack access is SP based,
>>> + and GPRs are at top of the stack frame,
>>> + and no conflicts in stack allocation with other features */
>>> +static bool
>>> +riscv_avoid_multi_push(const struct riscv_frame_info *frame)
>>> +{
>>> + if (!TARGET_ZCMP
>>> + || crtl->calls_eh_return
>>> + || frame_pointer_needed
>>> + || cfun->machine->interrupt_handler_p
>>> + || cfun->machine->varargs_size != 0
>>> + || crtl->args.pretend_args_size != 0
>>> + || (frame->mask & ~ MULTI_PUSH_GPR_MASK))
>>> + return true;
>>> +
>>> + return false;
>>> +}
>Any reason to skip generating push/pop in the cases where a frame pointer is 
>needed?
>IIRC, only code compiled with O1 and above will omit frame pointer, if so then 
>code with
>O0 will never generate cm.push/pop. 
without -fomit-frame-pointer in O0, the stack access is s0 based, while 
cm.push/pop is sp based access.
So cm.push/pop will not be generated. Same logic as taken by save-restore.

>Same question for interrupt_handler_p. I think cm.push/pop can handle this 
>case. e.g.
>the test case zc-zcmp-push-pop-6.c from Jiawei's patch. 
Same logic as taken by save-restore. I don't know the exact reason why 
save-restore cannot be used in interrupt. 
In riscv_compute_frame_info, riscv_stack_align (num_save_restore * 
UNITS_PER_WORD) == x_save_size fails in most cases for interrupt.
          if (riscv_stack_align (num_save_restore * UNITS_PER_WORD) == 
x_save_size
              && !riscv_avoid_save_libcall ())
            {
              ...
              frame->save_libcall_adjustment = x_save_size;
            }
In my understanding, use save-restore if all regs to be saved can be covered. 
That's why i added (frame->mask & ~ MULTI_PUSH_GPR_MASK)
in riscv_avoid_multi_push.

BR, 
Fei

>BR,
>Sinan 

>--
>Sender:Fei Gao 
>Sent At:2023 May 16 (Tue.) 17:34
>Recipient:sinan.lin ; jiawei 
>; shihua ; lidie 
>
>Cc:Fei Gao 
>Subject:[PATCH 1/1] [V2] [RISC-V] support cm.push cm.pop cm.popret in zcmp
>Zcmp can share the same logic as save-restore in stack allocation: 
>pre-allocation
>by cm.push, step 1 and step 2.
>please be noted cm.push pushes ra, s0-s11 in reverse order than what 
>save-restore does.
>So adaption has been done in .cfi directives in my patch.
>gcc/ChangeLog:
> * config/riscv/predicates.md (slot_0_offset_operand): predicates for slot 0 
> offset.
> (slot_1_offset_operand): likewise
> (slot_2_offset_operand): likewise
> (slot_3_offset_operand): likewise
> (slot_4_offset_operand): likewise
> (slot_5_offset_operand): likewise
> (slot_6_offset_operand): likewise
> (slot_7_offset_operand): likewise
> (slot_8_offset_operand): likewise
> (slot_9_offset_operand): likewise
> (slot_10_offset_operand): likewise
> (slot_11_offset_operand): likewise
> (slot_12_offset_operand): likewise
> (stack_push_up_to_ra_operand): predicates for stack adjust of pushing ra
> (stack_push_up_to_s0_operand): predicates for stack adjust of pushing ra, s0
> (stack_push_up_to_s1_operand): likewise
> (stack_push_up_to_s2_operand): likewise
> (stack_push_up_to_s3_operand): likewise
> (stack_push_up_to_s4_operand): likewise
> (stack_push_up_to_s5_operand): likewise
> (stack_push_up_to_s6_operand): likewise
> (stack_push_up_to_s7_operand): likewise
> (stack_push_up_to_s8_operand): likewise
> (stack_push_up_to_s9_operand): likewise
> (stack_push_up_to_s11_operand): likewise
> (stack_pop_up_to_ra_operand): predicates for stack adjust of poping ra
> (stack_pop_up_to_s0_operand): predicates for stack adjust of poping ra, s0
> (stack_pop_up_to_s1_operand): likewise
> (stack_pop_up_to_s2_operand): likewise
> (stack_pop_up_to_s3_operand): likewise
> (stack_pop_up_to_s4_operand): likewise
> (stack_pop_up_to_s5_operand): likewise
> (stack_pop_up_to_s6_operand): likewise
> (stack_pop_up_to_s7_operand): likewise
> (stack_pop_up_to_s8_operand): likewise
> (stack_pop_up_to_s9_operand): likewise
> (stack_pop_up_to_s11_operand): likewise
> * config/riscv/riscv-protos.h (riscv_zcmp_valid_slot_offset_p): declaration
> (riscv_zcmp_valid_stack_adj_bytes_p): declaration
> * config/riscv/riscv.cc (struct riscv_frame_info): comment change
> (riscv_avoid_multi_push): helper function of riscv_use_multi_push
> (riscv_use_multi_push): true if multi push is used
> (riscv_multi_push_sregs_count): num of sregs in multi-push
> (riscv_multi_push_regs_count): num of regs in multi-push
> (riscv_16bytes_align): align to 16 bytes
> (riscv_stack_align): moved to a better place
> (riscv_save_libcall_count): no functional change
> (riscv_compute_frame_info): add zcmp frame info
> (riscv_adjust_multi_push_cfi_prologue): adjust cfi for cm.push
> (get_slot_offset_rtx): get the rtx of slot to push or pop
> (riscv_gen_multi_push_pop_insn): gen function for multi push and pop
> (ris

Re: Re: [PATCH 1/1] [V2] [RISC-V] support cm.push cm.pop cm.popret in zcmp

2023-05-29 Thread Fei Gao
On 2023-05-29 11:05  Kito Cheng  wrote:
>
>Thanks for this patch, just few minor comment, I think this is pretty
>close to accept :)
>
>Could you reference JiaWei's match_parallel[1] to prevent adding bunch
>of *_offset_operand and stack_push_up_to_*_operand?
>
>
>[1] 
>https://patchwork.sourceware.org/project/gcc/patch/20230406062118.47431-5-jia...@iscas.ac.cn/
Thanks for your review. 

The md file looks verbose with bunch of *_offset_operand and 
stack_push_up_to_*_operand, but it significantly
simplies implementation of recognizing zmcp push and pop insns and outputting 
assembly.  Also, the md file
clearly shows and checks the slot that each register is placed(different to 
slot order w/o save-restore before
zcmp is introduced). So I prefer my patch V2 to V1 or the link you attached. 
But ideas are welcome to make
it better. Appreciated if you suggest more details for the improvement.

>
>
>> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
>> index 629e5e45cac..a0a2db1f594 100644
>> --- a/gcc/config/riscv/riscv.cc
>> +++ b/gcc/config/riscv/riscv.cc
>> @@ -117,6 +117,14 @@ struct GTY(())  riscv_frame_info {
>>    /* How much the GPR save/restore routines adjust sp (or 0 if unused).  */
>>    unsigned save_libcall_adjustment;
>>
>> +  /* the minimum number of bytes, in multiples of 16-byte address 
>> increments,
>> + required to cover the registers in a multi push & pop.  */
>> +  unsigned multi_push_adj_base;
>> +
>> +  /* the number of additional 16-byte address increments allocated for the 
>> stack frame
>> + in a multi push & pop.  */
>> +  unsigned multi_push_adj_addi;
>> +
>>    /* Offsets of fixed-point and floating-point save areas from frame bottom 
>>*/
>>    poly_int64 gp_sp_offset;
>>    poly_int64 fp_sp_offset;
>> @@ -413,6 +421,21 @@ static const struct riscv_tune_info 
>> riscv_tune_info_table[] = {
>>  #include "riscv-cores.def"
>>  };
>>
>> +typedef enum
>> +{
>> +  SI_IDX = 0,
>> +  DI_IDX,
>> +  MAX_MODE_IDX = DI_IDX
>> +} mode_idx;
>> +
>
>Didn't see any use in this version? 
It's used in defining the array below.
const insn_gen_fn gen_push_pop [MAX_OP_IDX + 1][MAX_MODE_IDX + 
1][ZCMP_MAX_GRP_SLOTS]

>
>> @@ -5574,18 +5924,25 @@ riscv_expand_epilogue (int style)
>>    REG_NOTES (insn) = dwarf;
>>  }
>>
>> -  if (use_restore_libcall)
>> -    frame->mask = 0; /* Temporarily fib for GPRs.  */
>> +  if (use_restore_libcall || use_multi_pop)
>> +    frame->mask = 0; /* Temporarily fib that we need not save GPRs.  */
>>
>>    /* If we need to restore registers, deallocate as much stack as
>>   possible in the second step without going out of range.  */
>> -  if ((frame->mask | frame->fmask) != 0)
>> +  if (use_multi_pop)
>> +    {
>> +  if (frame->fmask
>> +  && known_gt (frame->total_size - multipop_size,
>> +  frame->frame_pointer_offset))
>> +    step2 = riscv_first_stack_step (frame, frame->total_size - 
>> multipop_size);
>> +    }
>> +  else if ((frame->mask | frame->fmask) != 0)
>>  step2 = riscv_first_stack_step (frame, frame->total_size - 
>>libcall_size);
>>
>> -  if (use_restore_libcall)
>> +  if (use_restore_libcall || use_multi_pop)
>>  frame->mask = mask; /* Undo the above fib.  */
>>
>> -  poly_int64 step1 = frame->total_size - step2 - libcall_size;
>> +  poly_int64 step1 = frame->total_size - step2 - libcall_size - 
>> multipop_size ;
>>
>>    /* Set TARGET to BASE + STEP1.  */
>>    if (known_gt (step1, 0))
>> @@ -5620,7 +5977,7 @@ riscv_expand_epilogue (int style)
>>    adjust));
>>   rtx dwarf = NULL_RTX;
>>   rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
>> -    GEN_INT (step2));
>> +    GEN_INT (step2 + libcall_size + 
>> multipop_size));
>
>Why we need `+ libcall_size` here? or...why we don't need that before? 
It's a good catch:)  
I should have  added `+ libcall_size` in 
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=60524be1e3929d83e15fceac6e2aa053c8a6fb20

That's why I corrected the cfi issue in save-restore along with zcmp changes in 
this patch.

>
>>
>>   dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
>>   RTX_FRAME_RELATED_P (insn) = 1;
>> @@ -5635,15 +5992,15 @@ riscv_expand_epilogue (int style)
>>    epilogue_cfa_sp_offset = step2;
>>  }
>>
>> -  if (use_restore_libcall)
>> +  if (use_restore_libcall || use_multi_pop)
>>  frame->mask = 0; /* Temporarily fib that we need not save GPRs.  */
>>
>>    /* Restore the registers.  */
>> -  riscv_for_each_saved_reg (frame->total_size - step2 - libcall_size,
>> +  riscv_for_each_saved_reg (frame->total_size - step2 - libcall_size - 
>> multipop_size,
>> riscv_restore_reg,
>> true, style == EXCEPTION_RETURN);
>>
>> -  if (use_restore_libcall)
>> +  if (use_restore_libcall || use_multi_po

Re: [PATCH 1/1] [V2] [RISC-V] support cm.push cm.pop cm.popret in zcmp

2023-05-29 Thread Sinan via Gcc-patches
>> +/* Return TRUE if Zcmp push and pop insns should be
>> + avoided. FALSE otherwise.
>> + Only use multi push & pop if all GPRs masked can be covered,
>> + and stack access is SP based,
>> + and GPRs are at top of the stack frame,
>> + and no conflicts in stack allocation with other features */
>> +static bool
>> +riscv_avoid_multi_push(const struct riscv_frame_info *frame)
>> +{
>> + if (!TARGET_ZCMP
>> + || crtl->calls_eh_return
>> + || frame_pointer_needed
>> + || cfun->machine->interrupt_handler_p
>> + || cfun->machine->varargs_size != 0
>> + || crtl->args.pretend_args_size != 0
>> + || (frame->mask & ~ MULTI_PUSH_GPR_MASK))
>> + return true;
>> +
>> + return false;
>> +}
Any reason to skip generating push/pop in the cases where a frame pointer is 
needed?
IIRC, only code compiled with O1 and above will omit frame pointer, if so then 
code with
O0 will never generate cm.push/pop. 
Same question for interrupt_handler_p. I think cm.push/pop can handle this 
case. e.g.
the test case zc-zcmp-push-pop-6.c from Jiawei's patch.
BR,
Sinan
--
Sender:Fei Gao 
Sent At:2023 May 16 (Tue.) 17:34
Recipient:sinan.lin ; jiawei ; 
shihua ; lidie 
Cc:Fei Gao 
Subject:[PATCH 1/1] [V2] [RISC-V] support cm.push cm.pop cm.popret in zcmp
Zcmp can share the same logic as save-restore in stack allocation: 
pre-allocation
by cm.push, step 1 and step 2.
please be noted cm.push pushes ra, s0-s11 in reverse order than what 
save-restore does.
So adaption has been done in .cfi directives in my patch.
gcc/ChangeLog:
 * config/riscv/predicates.md (slot_0_offset_operand): predicates for slot 0 
offset.
 (slot_1_offset_operand): likewise
 (slot_2_offset_operand): likewise
 (slot_3_offset_operand): likewise
 (slot_4_offset_operand): likewise
 (slot_5_offset_operand): likewise
 (slot_6_offset_operand): likewise
 (slot_7_offset_operand): likewise
 (slot_8_offset_operand): likewise
 (slot_9_offset_operand): likewise
 (slot_10_offset_operand): likewise
 (slot_11_offset_operand): likewise
 (slot_12_offset_operand): likewise
 (stack_push_up_to_ra_operand): predicates for stack adjust of pushing ra
 (stack_push_up_to_s0_operand): predicates for stack adjust of pushing ra, s0
 (stack_push_up_to_s1_operand): likewise
 (stack_push_up_to_s2_operand): likewise
 (stack_push_up_to_s3_operand): likewise
 (stack_push_up_to_s4_operand): likewise
 (stack_push_up_to_s5_operand): likewise
 (stack_push_up_to_s6_operand): likewise
 (stack_push_up_to_s7_operand): likewise
 (stack_push_up_to_s8_operand): likewise
 (stack_push_up_to_s9_operand): likewise
 (stack_push_up_to_s11_operand): likewise
 (stack_pop_up_to_ra_operand): predicates for stack adjust of poping ra
 (stack_pop_up_to_s0_operand): predicates for stack adjust of poping ra, s0
 (stack_pop_up_to_s1_operand): likewise
 (stack_pop_up_to_s2_operand): likewise
 (stack_pop_up_to_s3_operand): likewise
 (stack_pop_up_to_s4_operand): likewise
 (stack_pop_up_to_s5_operand): likewise
 (stack_pop_up_to_s6_operand): likewise
 (stack_pop_up_to_s7_operand): likewise
 (stack_pop_up_to_s8_operand): likewise
 (stack_pop_up_to_s9_operand): likewise
 (stack_pop_up_to_s11_operand): likewise
 * config/riscv/riscv-protos.h (riscv_zcmp_valid_slot_offset_p): declaration
 (riscv_zcmp_valid_stack_adj_bytes_p): declaration
 * config/riscv/riscv.cc (struct riscv_frame_info): comment change
 (riscv_avoid_multi_push): helper function of riscv_use_multi_push
 (riscv_use_multi_push): true if multi push is used
 (riscv_multi_push_sregs_count): num of sregs in multi-push
 (riscv_multi_push_regs_count): num of regs in multi-push
 (riscv_16bytes_align): align to 16 bytes
 (riscv_stack_align): moved to a better place
 (riscv_save_libcall_count): no functional change
 (riscv_compute_frame_info): add zcmp frame info
 (riscv_adjust_multi_push_cfi_prologue): adjust cfi for cm.push
 (get_slot_offset_rtx): get the rtx of slot to push or pop
 (riscv_gen_multi_push_pop_insn): gen function for multi push and pop
 (riscv_expand_prologue): allocate stack by cm.push
 (riscv_adjust_multi_pop_cfi_epilogue): adjust cfi for cm.pop[ret]
 (riscv_expand_epilogue): allocate stack by cm.pop[ret]
 (zcmp_base_adj): calculate stack adjustment base size
 (zcmp_additional_adj): calculate stack adjustment additional size
 (riscv_zcmp_valid_slot_offset_p): check if offset is valid for a slot
 (riscv_zcmp_valid_stack_adj_bytes_p): check if stack adjustment size is valid
 * config/riscv/riscv.h (RETURN_ADDR_MASK): mask of ra
 (S0_MASK): likewise
 (S1_MASK): likewise
 (S2_MASK): likewise
 (S3_MASK): likewise
 (S4_MASK): likewise
 (S5_MASK): likewise
 (S6_MASK): likewise
 (S7_MASK): likewise
 (S8_MASK): likewise
 (S9_MASK): likewise
 (S10_MASK): likewise
 (S11_MASK): likewise
 (MULTI_PUSH_GPR_MASK): GPR_MASK that cm.push can cover at most
 (ZCMP_MAX_SPIMM): max spimm value
 (ZCMP_SP_INC_STEP): zcmp sp increment step
 (ZCMP_INVALID_S0S10_SREGS_COUNTS): num of s0-s10
 (ZCMP_S0S11_SREGS_COUNTS): num of s0

Re: [PATCH 1/1] [V2] [RISC-V] support cm.push cm.pop cm.popret in zcmp

2023-05-28 Thread Kito Cheng via Gcc-patches
Thanks for this patch, just few minor comment, I think this is pretty
close to accept :)

Could you reference JiaWei's match_parallel[1] to prevent adding bunch
of *_offset_operand and stack_push_up_to_*_operand?


[1] 
https://patchwork.sourceware.org/project/gcc/patch/20230406062118.47431-5-jia...@iscas.ac.cn/


> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index 629e5e45cac..a0a2db1f594 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -117,6 +117,14 @@ struct GTY(())  riscv_frame_info {
>/* How much the GPR save/restore routines adjust sp (or 0 if unused).  */
>unsigned save_libcall_adjustment;
>
> +  /* the minimum number of bytes, in multiples of 16-byte address increments,
> + required to cover the registers in a multi push & pop.  */
> +  unsigned multi_push_adj_base;
> +
> +  /* the number of additional 16-byte address increments allocated for the 
> stack frame
> + in a multi push & pop.  */
> +  unsigned multi_push_adj_addi;
> +
>/* Offsets of fixed-point and floating-point save areas from frame bottom 
> */
>poly_int64 gp_sp_offset;
>poly_int64 fp_sp_offset;
> @@ -413,6 +421,21 @@ static const struct riscv_tune_info 
> riscv_tune_info_table[] = {
>  #include "riscv-cores.def"
>  };
>
> +typedef enum
> +{
> +  SI_IDX = 0,
> +  DI_IDX,
> +  MAX_MODE_IDX = DI_IDX
> +} mode_idx;
> +

Didn't see any use in this version?

> @@ -5574,18 +5924,25 @@ riscv_expand_epilogue (int style)
>REG_NOTES (insn) = dwarf;
>  }
>
> -  if (use_restore_libcall)
> -frame->mask = 0; /* Temporarily fib for GPRs.  */
> +  if (use_restore_libcall || use_multi_pop)
> +frame->mask = 0; /* Temporarily fib that we need not save GPRs.  */
>
>/* If we need to restore registers, deallocate as much stack as
>   possible in the second step without going out of range.  */
> -  if ((frame->mask | frame->fmask) != 0)
> +  if (use_multi_pop)
> +{
> +  if (frame->fmask
> +  && known_gt (frame->total_size - multipop_size,
> +  frame->frame_pointer_offset))
> +step2 = riscv_first_stack_step (frame, frame->total_size - 
> multipop_size);
> +}
> +  else if ((frame->mask | frame->fmask) != 0)
>  step2 = riscv_first_stack_step (frame, frame->total_size - libcall_size);
>
> -  if (use_restore_libcall)
> +  if (use_restore_libcall || use_multi_pop)
>  frame->mask = mask; /* Undo the above fib.  */
>
> -  poly_int64 step1 = frame->total_size - step2 - libcall_size;
> +  poly_int64 step1 = frame->total_size - step2 - libcall_size - 
> multipop_size ;
>
>/* Set TARGET to BASE + STEP1.  */
>if (known_gt (step1, 0))
> @@ -5620,7 +5977,7 @@ riscv_expand_epilogue (int style)
>adjust));
>   rtx dwarf = NULL_RTX;
>   rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
> -GEN_INT (step2));
> +GEN_INT (step2 + libcall_size + 
> multipop_size));

Why we need `+ libcall_size` here? or...why we don't need that before?

>
>   dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
>   RTX_FRAME_RELATED_P (insn) = 1;
> @@ -5635,15 +5992,15 @@ riscv_expand_epilogue (int style)
>epilogue_cfa_sp_offset = step2;
>  }
>
> -  if (use_restore_libcall)
> +  if (use_restore_libcall || use_multi_pop)
>  frame->mask = 0; /* Temporarily fib that we need not save GPRs.  */
>
>/* Restore the registers.  */
> -  riscv_for_each_saved_reg (frame->total_size - step2 - libcall_size,
> +  riscv_for_each_saved_reg (frame->total_size - step2 - libcall_size - 
> multipop_size,
> riscv_restore_reg,
> true, style == EXCEPTION_RETURN);
>
> -  if (use_restore_libcall)
> +  if (use_restore_libcall || use_multi_pop)
>frame->mask = mask; /* Undo the above fib.  */
>
>if (need_barrier_p)
> @@ -5657,14 +6014,30 @@ riscv_expand_epilogue (int style)
>
>rtx dwarf = NULL_RTX;
>rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
> -const0_rtx);
> +GEN_INT (libcall_size + 
> multipop_size));

Same question for `libcall_size` part.