Gentle ping. The patch I previously submitted: | Date: Wed, 30 Nov 2022 00:38:08 -0800 | Subject: [PATCH] RISC-V: optimize stack manipulation in save-restore | Message-ID: <gao...@eswincomputing.com>
I split the patches as per Palmer's review comment. BR Fei >The stack that save-restore reserves is not well accumulated in stack >allocation and deallocation. >This patch allows less instructions to be used in stack allocation and >deallocation if save-restore enabled. > >before patch: > bar: > call t0,__riscv_save_4 > addi sp,sp,-64 > ... > li t0,-12288 > addi t0,t0,-1968 # optimized out after patch > add sp,sp,t0 # prologue > ... > li t0,12288 # epilogue > addi t0,t0,2000 # optimized out after patch > add sp,sp,t0 > ... > addi sp,sp,32 > tail __riscv_restore_4 > >after patch: > bar: > call t0,__riscv_save_4 > addi sp,sp,-2032 > ... > li t0,-12288 > add sp,sp,t0 # prologue > ... > li t0,12288 # epilogue > add sp,sp,t0 > ... > addi sp,sp,2032 > tail __riscv_restore_4 > >gcc/ChangeLog: > > * config/riscv/riscv.cc (riscv_expand_prologue): consider save-restore >in stack allocation. > (riscv_expand_epilogue): consider save-restore in stack deallocation. > >gcc/testsuite/ChangeLog: > > * gcc.target/riscv/stack_save_restore.c: New test. >--- > gcc/config/riscv/riscv.cc | 50 ++++++++++--------- > .../gcc.target/riscv/stack_save_restore.c | 40 +++++++++++++++ > 2 files changed, 66 insertions(+), 24 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/riscv/stack_save_restore.c > >diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc >index f0bbcd6d6be..a50f2303032 100644 >--- a/gcc/config/riscv/riscv.cc >+++ b/gcc/config/riscv/riscv.cc >@@ -5010,12 +5010,12 @@ void > riscv_expand_prologue (void) > { > struct riscv_frame_info *frame = &cfun->machine->frame; >- poly_int64 size = frame->total_size; >+ poly_int64 remaining_size = frame->total_size; > unsigned mask = frame->mask; > rtx insn; > > if (flag_stack_usage_info) >- current_function_static_stack_size = constant_lower_bound (size); >+ current_function_static_stack_size = constant_lower_bound >(remaining_size); > > if (cfun->machine->naked_p) > return; >@@ -5026,7 +5026,7 @@ riscv_expand_prologue (void) > rtx dwarf = NULL_RTX; > dwarf = riscv_adjust_libcall_cfi_prologue (); > >- size -= frame->save_libcall_adjustment; >+ remaining_size -= frame->save_libcall_adjustment; > insn = emit_insn (riscv_gen_gpr_save_insn (frame)); > frame->mask = 0; /* Temporarily fib that we need not save GPRs. */ > >@@ -5037,16 +5037,14 @@ riscv_expand_prologue (void) > /* Save the registers. */ > if ((frame->mask | frame->fmask) != 0) > { >- HOST_WIDE_INT step1 = riscv_first_stack_step (frame, frame->total_size); >- if (size.is_constant ()) >- step1 = MIN (size.to_constant(), step1); >+ HOST_WIDE_INT step1 = riscv_first_stack_step (frame, remaining_size); > > insn = gen_add3_insn (stack_pointer_rtx, > stack_pointer_rtx, > GEN_INT (-step1)); > RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; >- size -= step1; >- riscv_for_each_saved_reg (size, riscv_save_reg, false, false); >+ remaining_size -= step1; >+ riscv_for_each_saved_reg (remaining_size, riscv_save_reg, false, false); > } > > frame->mask = mask; /* Undo the above fib. */ >@@ -5055,29 +5053,29 @@ riscv_expand_prologue (void) > if (frame_pointer_needed) > { > insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx, >- GEN_INT ((frame->hard_frame_pointer_offset - size).to_constant ())); >+ GEN_INT ((frame->hard_frame_pointer_offset - >remaining_size).to_constant ())); > RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; > > riscv_emit_stack_tie (); > } > > /* Allocate the rest of the frame. */ >- if (known_gt (size, 0)) >+ if (known_gt (remaining_size, 0)) > { > /* Two step adjustment: > 1.scalable frame. 2.constant frame. */ > poly_int64 scalable_frame (0, 0); >- if (!size.is_constant ()) >+ if (!remaining_size.is_constant ()) > { > /* First for scalable frame. */ >- poly_int64 scalable_frame = size; >- scalable_frame.coeffs[0] = size.coeffs[1]; >+ poly_int64 scalable_frame = remaining_size; >+ scalable_frame.coeffs[0] = remaining_size.coeffs[1]; > riscv_v_adjust_scalable_frame (stack_pointer_rtx, scalable_frame, false); >- size -= scalable_frame; >+ remaining_size -= scalable_frame; > } > > /* Second step for constant frame. */ >- HOST_WIDE_INT constant_frame = size.to_constant (); >+ HOST_WIDE_INT constant_frame = remaining_size.to_constant (); > if (constant_frame == 0) > return; > >@@ -5142,6 +5140,8 @@ riscv_expand_epilogue (int style) > HOST_WIDE_INT step2 = 0; > bool use_restore_libcall = ((style == NORMAL_RETURN) > && riscv_use_save_libcall (frame)); >+ unsigned libcall_size = use_restore_libcall ? >+ frame->save_libcall_adjustment : 0; > rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); > rtx insn; > >@@ -5212,13 +5212,18 @@ riscv_expand_epilogue (int style) > REG_NOTES (insn) = dwarf; > } > >+ if (use_restore_libcall) >+ frame->mask = 0; /* Temporarily fib for GPRs. */ >+ > /* If we need to restore registers, deallocate as much stack as > possible in the second step without going out of range. */ > if ((frame->mask | frame->fmask) != 0) >- { >- step2 = riscv_first_stack_step (frame, frame->total_size); >- step1 -= step2; >- } >+ step2 = riscv_first_stack_step (frame, frame->total_size - libcall_size); >+ >+ if (use_restore_libcall) >+ frame->mask = mask; /* Undo the above fib. */ >+ >+ step1 -= step2 + libcall_size; > > /* Set TARGET to BASE + STEP1. */ > if (known_gt (step1, 0)) >@@ -5272,15 +5277,12 @@ riscv_expand_epilogue (int style) > frame->mask = 0; /* Temporarily fib that we need not save GPRs. */ > > /* Restore the registers. */ >- riscv_for_each_saved_reg (frame->total_size - step2, riscv_restore_reg, >+ riscv_for_each_saved_reg (frame->total_size - step2 - libcall_size, >+ riscv_restore_reg, > true, style == EXCEPTION_RETURN); > > if (use_restore_libcall) >- { > frame->mask = mask; /* Undo the above fib. */ >- gcc_assert (step2 >= frame->save_libcall_adjustment); >- step2 -= frame->save_libcall_adjustment; >- } > > if (need_barrier_p) > riscv_emit_stack_tie (); >diff --git a/gcc/testsuite/gcc.target/riscv/stack_save_restore.c >b/gcc/testsuite/gcc.target/riscv/stack_save_restore.c >new file mode 100644 >index 00000000000..522e706cfbf >--- /dev/null >+++ b/gcc/testsuite/gcc.target/riscv/stack_save_restore.c >@@ -0,0 +1,40 @@ >+/* { dg-do compile } */ >+/* { dg-options "-march=rv32imafc -mabi=ilp32f -msave-restore -O2 >-fno-schedule-insns -fno-schedule-insns2 -fno-unroll-loops -fno-peel-loops >-fno-lto" } */ >+/* { dg-final { check-function-bodies "**" "" } } */ >+ >+char my_getchar(); >+float getf(); >+ >+/* >+**bar: >+** call t0,__riscv_save_4 >+** addi sp,sp,-2032 >+** ... >+** li t0,-12288 >+** add sp,sp,t0 >+** ... >+** li t0,12288 >+** add sp,sp,t0 >+** ... >+** addi sp,sp,2032 >+** tail __riscv_restore_4 >+*/ >+int bar() >+{ >+ float volatile farray[3568]; >+ >+ float sum = 0; >+ float f1 = getf(); >+ float f2 = getf(); >+ float f3 = getf(); >+ float f4 = getf(); >+ >+ for (int i = 0; i < 3568; i++) >+ { >+ farray[i] = my_getchar() * 1.2; >+ sum += farray[i]; >+ } >+ >+ return sum + f1 + f2 + f3 + f4; >+} >+ >-- >2.17.1