This is an automated email from the ASF dual-hosted git repository.

xiaoxiang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nuttx.git

commit d0fbf9883d2e16785829a5d8f75b9d6122ad2b69
Author: Ville Juven <[email protected]>
AuthorDate: Wed Jun 7 14:02:48 2023 +0300

    riscv/lazyfpu: Add option to disable lazy FPU
    
    Adds option to use the old implementation where FPU is stored into
    the process stack.
---
 arch/Kconfig                            | 41 +++++++++++++++++++++++++++++++++
 arch/risc-v/include/irq.h               |  8 ++++++-
 arch/risc-v/src/common/riscv_fork.c     | 28 ++++++++++++----------
 arch/risc-v/src/common/riscv_fpu.S      |  4 ++++
 arch/risc-v/src/common/riscv_internal.h | 20 ++++++++++++++--
 5 files changed, 86 insertions(+), 15 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 232e32ca10..673ff57771 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -93,6 +93,7 @@ config ARCH_RISCV
        select ARCH_HAVE_RDWR_MEM_CPU_RUN
        select ARCH_HAVE_TCBINFO
        select ARCH_HAVE_THREAD_LOCAL
+       select ARCH_HAVE_LAZYFPU if ARCH_HAVE_FPU
        ---help---
                RISC-V 32 and 64-bit RV32 / RV64 architectures.
 
@@ -416,6 +417,11 @@ config ARCH_HAVE_DPFPU
        default n
        select ARCH_HAVE_FPU
 
+config ARCH_HAVE_LAZYFPU
+       bool
+       default n
+       depends on ARCH_HAVE_FPU
+
 config ARCH_HAVE_MMU
        bool
        default n
@@ -516,6 +522,41 @@ config ARCH_DPFPU
                Enable toolchain support for double precision (64-bit) floating
                point if both the toolchain and the hardware support it.
 
+config ARCH_LAZYFPU
+       bool "Enable lazy FPU state save / restore"
+       default n
+       depends on ARCH_FPU && ARCH_HAVE_LAZYFPU
+       ---help---
+               Enable lazy FPU state save and restore. Normally FPU state is 
saved
+               and restored with the integer context registers, if the task is 
using
+               FPU. The state is typically saved into the task's user stack 
upon
+               exception entry or context switch out, and restored when the
+               exception returns or context switches back in.
+
+               As the kernel does not use FPU, this can be optimized with the 
help
+               of the FPU hardware status and a bit of code logic inside the 
kernel.
+               The logic keeps track of the FPU state, which can be "unused",
+               "dirty" or "clean". A clean state means the FPU has not been 
used
+               since the last state save, while the dirty state indicates that 
the
+               FPU has been used.
+
+               The optimization saves / restores FPU registers only if:
+               - A context change has happened, save and restore does not 
happen
+                 during exception entry / return to the same task
+               - FPU is in use (state is not unused) and
+               - FPU status is dirty, i.e. FPU has been used after the last
+               - FPU restore happens when status is in dirty or clean
+
+               This saves CPU time as the FPU registers do not have to be 
moved in
+               and out when handling an exception that does not result in a 
context
+               switch.
+
+               The tradeoff with the lazy FPU feature is that it requires a 
static
+               memory allocation from the task's TCB to store the FPU 
registers,
+               while the non-lazy style can use stack memory for storing the 
FPU
+               registers, saving memory as the stack frame for the FPU 
registers can
+               be skipped if the FPU is not in use.
+
 config ARCH_USE_MMU
        bool "Enable MMU"
        default n
diff --git a/arch/risc-v/include/irq.h b/arch/risc-v/include/irq.h
index 5941d7057b..54f0febbba 100644
--- a/arch/risc-v/include/irq.h
+++ b/arch/risc-v/include/irq.h
@@ -247,9 +247,15 @@
 
 #define XCPTCONTEXT_REGS    (INT_XCPT_REGS + FPU_XCPT_REGS)
 
+#ifdef CONFIG_ARCH_LAZYFPU
 /* Save only integer regs. FPU is handled separately */
 
 #define XCPTCONTEXT_SIZE    (INT_XCPT_SIZE)
+#else
+/* Save FPU registers with the integer registers */
+
+#define XCPTCONTEXT_SIZE    (INT_XCPT_SIZE + FPU_XCPT_SIZE)
+#endif
 
 /* In assembly language, values have to be referenced as byte address
  * offsets.  But in C, it is more convenient to reference registers as
@@ -570,7 +576,7 @@ struct xcptcontext
 
   /* FPU register save area */
 
-#ifdef CONFIG_ARCH_FPU
+#if defined(CONFIG_ARCH_FPU) && defined(CONFIG_ARCH_LAZYFPU)
   uintptr_t fregs[FPU_XCPT_REGS];
 #endif
 };
diff --git a/arch/risc-v/src/common/riscv_fork.c 
b/arch/risc-v/src/common/riscv_fork.c
index b1d96598e0..0dd41aecdc 100644
--- a/arch/risc-v/src/common/riscv_fork.c
+++ b/arch/risc-v/src/common/riscv_fork.c
@@ -109,6 +109,9 @@ pid_t riscv_fork(const struct fork_s *context)
   uintptr_t newtop;
   uintptr_t stacktop;
   uintptr_t stackutil;
+#ifdef CONFIG_ARCH_FPU
+  uintptr_t *fregs;
+#endif
 
   sinfo("s0:%" PRIxREG " s1:%" PRIxREG " s2:%" PRIxREG " s3:%" PRIxREG ""
         " s4:%" PRIxREG "\n",
@@ -228,18 +231,19 @@ pid_t riscv_fork(const struct fork_s *context)
   child->cmn.xcp.regs[REG_GP]   = newsp;        /* Global pointer */
 #endif
 #ifdef CONFIG_ARCH_FPU
-  child->cmn.xcp.fregs[REG_FS0]  = context->fs0;  /* Saved register fs1 */
-  child->cmn.xcp.fregs[REG_FS1]  = context->fs1;  /* Saved register fs1 */
-  child->cmn.xcp.fregs[REG_FS2]  = context->fs2;  /* Saved register fs2 */
-  child->cmn.xcp.fregs[REG_FS3]  = context->fs3;  /* Saved register fs3 */
-  child->cmn.xcp.fregs[REG_FS4]  = context->fs4;  /* Saved register fs4 */
-  child->cmn.xcp.fregs[REG_FS5]  = context->fs5;  /* Saved register fs5 */
-  child->cmn.xcp.fregs[REG_FS6]  = context->fs6;  /* Saved register fs6 */
-  child->cmn.xcp.fregs[REG_FS7]  = context->fs7;  /* Saved register fs7 */
-  child->cmn.xcp.fregs[REG_FS8]  = context->fs8;  /* Saved register fs8 */
-  child->cmn.xcp.fregs[REG_FS9]  = context->fs9;  /* Saved register fs9 */
-  child->cmn.xcp.fregs[REG_FS10] = context->fs10; /* Saved register fs10 */
-  child->cmn.xcp.fregs[REG_FS11] = context->fs11; /* Saved register fs11 */
+  fregs                         = riscv_fpuregs(&child->cmn);
+  fregs[REG_FS0]                = context->fs0;  /* Saved register fs1 */
+  fregs[REG_FS1]                = context->fs1;  /* Saved register fs1 */
+  fregs[REG_FS2]                = context->fs2;  /* Saved register fs2 */
+  fregs[REG_FS3]                = context->fs3;  /* Saved register fs3 */
+  fregs[REG_FS4]                = context->fs4;  /* Saved register fs4 */
+  fregs[REG_FS5]                = context->fs5;  /* Saved register fs5 */
+  fregs[REG_FS6]                = context->fs6;  /* Saved register fs6 */
+  fregs[REG_FS7]                = context->fs7;  /* Saved register fs7 */
+  fregs[REG_FS8]                = context->fs8;  /* Saved register fs8 */
+  fregs[REG_FS9]                = context->fs9;  /* Saved register fs9 */
+  fregs[REG_FS10]               = context->fs10; /* Saved register fs10 */
+  fregs[REG_FS11]               = context->fs11; /* Saved register fs11 */
 #endif
 
 #ifdef CONFIG_LIB_SYSCALL
diff --git a/arch/risc-v/src/common/riscv_fpu.S 
b/arch/risc-v/src/common/riscv_fpu.S
index cc88e769dd..11f24b90ca 100644
--- a/arch/risc-v/src/common/riscv_fpu.S
+++ b/arch/risc-v/src/common/riscv_fpu.S
@@ -102,7 +102,11 @@ riscv_savefpu:
   li         t1, MSTATUS_FS
   and        t2, t0, t1
   li         t1, MSTATUS_FS_DIRTY
+#ifdef CONFIG_ARCH_LAZYFPU
   bne        t2, t1, 1f
+#else
+  blt        t2, t1, 1f
+#endif
   li         t1, ~MSTATUS_FS
   and        t0, t0, t1
   li         t1, MSTATUS_FS_CLEAN
diff --git a/arch/risc-v/src/common/riscv_internal.h 
b/arch/risc-v/src/common/riscv_internal.h
index 836d7354f5..ac16c0bd87 100644
--- a/arch/risc-v/src/common/riscv_internal.h
+++ b/arch/risc-v/src/common/riscv_internal.h
@@ -206,10 +206,26 @@ void riscv_exception_attach(void);
 void riscv_fpuconfig(void);
 void riscv_savefpu(uintptr_t *regs, uintptr_t *fregs);
 void riscv_restorefpu(uintptr_t *regs, uintptr_t *fregs);
+
+/* Get FPU register save area */
+
+static inline uintptr_t *riscv_fpuregs(struct tcb_s *tcb)
+{
+#ifdef CONFIG_ARCH_LAZYFPU
+  /* With lazy FPU the registers are simply in tcb */
+
+  return tcb->xcp.fregs;
+#else
+  /* Otherwise they are after the integer registers */
+
+  return (uintptr_t *)((uintptr_t)tcb->xcp.regs + INT_XCPT_SIZE);
+#endif
+}
 #else
 #  define riscv_fpuconfig()
 #  define riscv_savefpu(regs, fregs)
 #  define riscv_restorefpu(regs, fregs)
+#  define riscv_fpuregs(tcb)
 #endif
 
 /* Save / restore context of task */
@@ -221,7 +237,7 @@ static inline void riscv_savecontext(struct tcb_s *tcb)
 #ifdef CONFIG_ARCH_FPU
   /* Save current process FPU state to TCB */
 
-  riscv_savefpu(tcb->xcp.regs, tcb->xcp.fregs);
+  riscv_savefpu(tcb->xcp.regs, riscv_fpuregs(tcb));
 #endif
 }
 
@@ -232,7 +248,7 @@ static inline void riscv_restorecontext(struct tcb_s *tcb)
 #ifdef CONFIG_ARCH_FPU
   /* Restore FPU state for next process */
 
-  riscv_restorefpu(tcb->xcp.regs, tcb->xcp.fregs);
+  riscv_restorefpu(tcb->xcp.regs, riscv_fpuregs(tcb));
 #endif
 }
 

Reply via email to