This is an automated email from the ASF dual-hosted git repository. xiaoxiang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nuttx.git
commit d0fbf9883d2e16785829a5d8f75b9d6122ad2b69 Author: Ville Juven <[email protected]> AuthorDate: Wed Jun 7 14:02:48 2023 +0300 riscv/lazyfpu: Add option to disable lazy FPU Adds option to use the old implementation where FPU is stored into the process stack. --- arch/Kconfig | 41 +++++++++++++++++++++++++++++++++ arch/risc-v/include/irq.h | 8 ++++++- arch/risc-v/src/common/riscv_fork.c | 28 ++++++++++++---------- arch/risc-v/src/common/riscv_fpu.S | 4 ++++ arch/risc-v/src/common/riscv_internal.h | 20 ++++++++++++++-- 5 files changed, 86 insertions(+), 15 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 232e32ca10..673ff57771 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -93,6 +93,7 @@ config ARCH_RISCV select ARCH_HAVE_RDWR_MEM_CPU_RUN select ARCH_HAVE_TCBINFO select ARCH_HAVE_THREAD_LOCAL + select ARCH_HAVE_LAZYFPU if ARCH_HAVE_FPU ---help--- RISC-V 32 and 64-bit RV32 / RV64 architectures. @@ -416,6 +417,11 @@ config ARCH_HAVE_DPFPU default n select ARCH_HAVE_FPU +config ARCH_HAVE_LAZYFPU + bool + default n + depends on ARCH_HAVE_FPU + config ARCH_HAVE_MMU bool default n @@ -516,6 +522,41 @@ config ARCH_DPFPU Enable toolchain support for double precision (64-bit) floating point if both the toolchain and the hardware support it. +config ARCH_LAZYFPU + bool "Enable lazy FPU state save / restore" + default n + depends on ARCH_FPU && ARCH_HAVE_LAZYFPU + ---help--- + Enable lazy FPU state save and restore. Normally FPU state is saved + and restored with the integer context registers, if the task is using + FPU. The state is typically saved into the task's user stack upon + exception entry or context switch out, and restored when the + exception returns or context switches back in. + + As the kernel does not use FPU, this can be optimized with the help + of the FPU hardware status and a bit of code logic inside the kernel. + The logic keeps track of the FPU state, which can be "unused", + "dirty" or "clean". A clean state means the FPU has not been used + since the last state save, while the dirty state indicates that the + FPU has been used. + + The optimization saves / restores FPU registers only if: + - A context change has happened, save and restore does not happen + during exception entry / return to the same task + - FPU is in use (state is not unused) and + - FPU status is dirty, i.e. FPU has been used after the last + - FPU restore happens when status is in dirty or clean + + This saves CPU time as the FPU registers do not have to be moved in + and out when handling an exception that does not result in a context + switch. + + The tradeoff with the lazy FPU feature is that it requires a static + memory allocation from the task's TCB to store the FPU registers, + while the non-lazy style can use stack memory for storing the FPU + registers, saving memory as the stack frame for the FPU registers can + be skipped if the FPU is not in use. + config ARCH_USE_MMU bool "Enable MMU" default n diff --git a/arch/risc-v/include/irq.h b/arch/risc-v/include/irq.h index 5941d7057b..54f0febbba 100644 --- a/arch/risc-v/include/irq.h +++ b/arch/risc-v/include/irq.h @@ -247,9 +247,15 @@ #define XCPTCONTEXT_REGS (INT_XCPT_REGS + FPU_XCPT_REGS) +#ifdef CONFIG_ARCH_LAZYFPU /* Save only integer regs. FPU is handled separately */ #define XCPTCONTEXT_SIZE (INT_XCPT_SIZE) +#else +/* Save FPU registers with the integer registers */ + +#define XCPTCONTEXT_SIZE (INT_XCPT_SIZE + FPU_XCPT_SIZE) +#endif /* In assembly language, values have to be referenced as byte address * offsets. But in C, it is more convenient to reference registers as @@ -570,7 +576,7 @@ struct xcptcontext /* FPU register save area */ -#ifdef CONFIG_ARCH_FPU +#if defined(CONFIG_ARCH_FPU) && defined(CONFIG_ARCH_LAZYFPU) uintptr_t fregs[FPU_XCPT_REGS]; #endif }; diff --git a/arch/risc-v/src/common/riscv_fork.c b/arch/risc-v/src/common/riscv_fork.c index b1d96598e0..0dd41aecdc 100644 --- a/arch/risc-v/src/common/riscv_fork.c +++ b/arch/risc-v/src/common/riscv_fork.c @@ -109,6 +109,9 @@ pid_t riscv_fork(const struct fork_s *context) uintptr_t newtop; uintptr_t stacktop; uintptr_t stackutil; +#ifdef CONFIG_ARCH_FPU + uintptr_t *fregs; +#endif sinfo("s0:%" PRIxREG " s1:%" PRIxREG " s2:%" PRIxREG " s3:%" PRIxREG "" " s4:%" PRIxREG "\n", @@ -228,18 +231,19 @@ pid_t riscv_fork(const struct fork_s *context) child->cmn.xcp.regs[REG_GP] = newsp; /* Global pointer */ #endif #ifdef CONFIG_ARCH_FPU - child->cmn.xcp.fregs[REG_FS0] = context->fs0; /* Saved register fs1 */ - child->cmn.xcp.fregs[REG_FS1] = context->fs1; /* Saved register fs1 */ - child->cmn.xcp.fregs[REG_FS2] = context->fs2; /* Saved register fs2 */ - child->cmn.xcp.fregs[REG_FS3] = context->fs3; /* Saved register fs3 */ - child->cmn.xcp.fregs[REG_FS4] = context->fs4; /* Saved register fs4 */ - child->cmn.xcp.fregs[REG_FS5] = context->fs5; /* Saved register fs5 */ - child->cmn.xcp.fregs[REG_FS6] = context->fs6; /* Saved register fs6 */ - child->cmn.xcp.fregs[REG_FS7] = context->fs7; /* Saved register fs7 */ - child->cmn.xcp.fregs[REG_FS8] = context->fs8; /* Saved register fs8 */ - child->cmn.xcp.fregs[REG_FS9] = context->fs9; /* Saved register fs9 */ - child->cmn.xcp.fregs[REG_FS10] = context->fs10; /* Saved register fs10 */ - child->cmn.xcp.fregs[REG_FS11] = context->fs11; /* Saved register fs11 */ + fregs = riscv_fpuregs(&child->cmn); + fregs[REG_FS0] = context->fs0; /* Saved register fs1 */ + fregs[REG_FS1] = context->fs1; /* Saved register fs1 */ + fregs[REG_FS2] = context->fs2; /* Saved register fs2 */ + fregs[REG_FS3] = context->fs3; /* Saved register fs3 */ + fregs[REG_FS4] = context->fs4; /* Saved register fs4 */ + fregs[REG_FS5] = context->fs5; /* Saved register fs5 */ + fregs[REG_FS6] = context->fs6; /* Saved register fs6 */ + fregs[REG_FS7] = context->fs7; /* Saved register fs7 */ + fregs[REG_FS8] = context->fs8; /* Saved register fs8 */ + fregs[REG_FS9] = context->fs9; /* Saved register fs9 */ + fregs[REG_FS10] = context->fs10; /* Saved register fs10 */ + fregs[REG_FS11] = context->fs11; /* Saved register fs11 */ #endif #ifdef CONFIG_LIB_SYSCALL diff --git a/arch/risc-v/src/common/riscv_fpu.S b/arch/risc-v/src/common/riscv_fpu.S index cc88e769dd..11f24b90ca 100644 --- a/arch/risc-v/src/common/riscv_fpu.S +++ b/arch/risc-v/src/common/riscv_fpu.S @@ -102,7 +102,11 @@ riscv_savefpu: li t1, MSTATUS_FS and t2, t0, t1 li t1, MSTATUS_FS_DIRTY +#ifdef CONFIG_ARCH_LAZYFPU bne t2, t1, 1f +#else + blt t2, t1, 1f +#endif li t1, ~MSTATUS_FS and t0, t0, t1 li t1, MSTATUS_FS_CLEAN diff --git a/arch/risc-v/src/common/riscv_internal.h b/arch/risc-v/src/common/riscv_internal.h index 836d7354f5..ac16c0bd87 100644 --- a/arch/risc-v/src/common/riscv_internal.h +++ b/arch/risc-v/src/common/riscv_internal.h @@ -206,10 +206,26 @@ void riscv_exception_attach(void); void riscv_fpuconfig(void); void riscv_savefpu(uintptr_t *regs, uintptr_t *fregs); void riscv_restorefpu(uintptr_t *regs, uintptr_t *fregs); + +/* Get FPU register save area */ + +static inline uintptr_t *riscv_fpuregs(struct tcb_s *tcb) +{ +#ifdef CONFIG_ARCH_LAZYFPU + /* With lazy FPU the registers are simply in tcb */ + + return tcb->xcp.fregs; +#else + /* Otherwise they are after the integer registers */ + + return (uintptr_t *)((uintptr_t)tcb->xcp.regs + INT_XCPT_SIZE); +#endif +} #else # define riscv_fpuconfig() # define riscv_savefpu(regs, fregs) # define riscv_restorefpu(regs, fregs) +# define riscv_fpuregs(tcb) #endif /* Save / restore context of task */ @@ -221,7 +237,7 @@ static inline void riscv_savecontext(struct tcb_s *tcb) #ifdef CONFIG_ARCH_FPU /* Save current process FPU state to TCB */ - riscv_savefpu(tcb->xcp.regs, tcb->xcp.fregs); + riscv_savefpu(tcb->xcp.regs, riscv_fpuregs(tcb)); #endif } @@ -232,7 +248,7 @@ static inline void riscv_restorecontext(struct tcb_s *tcb) #ifdef CONFIG_ARCH_FPU /* Restore FPU state for next process */ - riscv_restorefpu(tcb->xcp.regs, tcb->xcp.fregs); + riscv_restorefpu(tcb->xcp.regs, riscv_fpuregs(tcb)); #endif }
