On Tue, Dec 16, 2025 at 10:46:28AM +0100, Christophe Leroy (CS GROUP) wrote:
> 
> 
> Le 14/12/2025 à 14:02, Mukesh Kumar Chaurasiya a écrit :
> > From: Mukesh Kumar Chaurasiya <[email protected]>
> > 
> > Add PowerPC-specific implementations of the generic syscall exit hooks
> > used by the generic entry/exit framework:
> > 
> >   - arch_exit_to_user_mode_work_prepare()
> >   - arch_exit_to_user_mode_work()
> > 
> > These helpers handle user state restoration when returning from the
> > kernel to userspace, including FPU/VMX/VSX state, transactional memory,
> > KUAP restore, and per-CPU accounting.
> > 
> > Additionally, move check_return_regs_valid() from interrupt.c to
> > interrupt.h so it can be shared by the new entry/exit logic, and add
> > arch_do_signal_or_restart() for use with the generic entry flow.
> > 
> > No functional change is intended with this patch.
> > 
> > Signed-off-by: Mukesh Kumar Chaurasiya <[email protected]>
> > ---
> >   arch/powerpc/include/asm/entry-common.h | 49 +++++++++++++++
> >   arch/powerpc/include/asm/interrupt.h    | 82 +++++++++++++++++++++++++
> >   arch/powerpc/kernel/interrupt.c         | 81 ------------------------
> >   arch/powerpc/kernel/signal.c            | 14 +++++
> >   4 files changed, 145 insertions(+), 81 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/entry-common.h 
> > b/arch/powerpc/include/asm/entry-common.h
> > index 093ece06ef79..e8ebd42a4e6d 100644
> > --- a/arch/powerpc/include/asm/entry-common.h
> > +++ b/arch/powerpc/include/asm/entry-common.h
> > @@ -8,6 +8,7 @@
> >   #include <asm/cputime.h>
> >   #include <asm/interrupt.h>
> >   #include <asm/stacktrace.h>
> > +#include <asm/switch_to.h>
> >   #include <asm/tm.h>
> >   static __always_inline void arch_enter_from_user_mode(struct pt_regs 
> > *regs)
> > @@ -104,5 +105,53 @@ static __always_inline void 
> > arch_enter_from_user_mode(struct pt_regs *regs)
> >   #define arch_enter_from_user_mode arch_enter_from_user_mode
> > +static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
> > +                                             unsigned long ti_work)
> > +{
> > +   unsigned long mathflags;
> > +
> > +   if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) {
> > +           if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
> > +               unlikely((ti_work & _TIF_RESTORE_TM))) {
> > +                   restore_tm_state(regs);
> > +           } else {
> > +                   mathflags = MSR_FP;
> > +
> > +                   if (cpu_has_feature(CPU_FTR_VSX))
> > +                           mathflags |= MSR_VEC | MSR_VSX;
> > +                   else if (cpu_has_feature(CPU_FTR_ALTIVEC))
> > +                           mathflags |= MSR_VEC;
> > +
> > +                   /*
> > +                    * If userspace MSR has all available FP bits set,
> > +                    * then they are live and no need to restore. If not,
> > +                    * it means the regs were given up and restore_math
> > +                    * may decide to restore them (to avoid taking an FP
> > +                    * fault).
> > +                    */
> > +                   if ((regs->msr & mathflags) != mathflags)
> > +                           restore_math(regs);
> > +           }
> > +   }
> > +
> > +   check_return_regs_valid(regs);
> > +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> > +   local_paca->tm_scratch = regs->msr;
> > +#endif
> > +   /* Restore user access locks last */
> > +   kuap_user_restore(regs);
> > +}
> > +
> > +#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
> > +
> > +static __always_inline void arch_exit_to_user_mode(void)
> > +{
> > +   booke_load_dbcr0();
> > +
> > +   account_cpu_user_exit();
> > +}
> > +
> > +#define arch_exit_to_user_mode arch_exit_to_user_mode
> > +
> >   #endif /* CONFIG_GENERIC_IRQ_ENTRY */
> >   #endif /* _ASM_PPC_ENTRY_COMMON_H */
> > diff --git a/arch/powerpc/include/asm/interrupt.h 
> > b/arch/powerpc/include/asm/interrupt.h
> > index ca8a2cda9400..77ff8e33f8cd 100644
> > --- a/arch/powerpc/include/asm/interrupt.h
> > +++ b/arch/powerpc/include/asm/interrupt.h
> > @@ -68,6 +68,8 @@
> >   #include <linux/context_tracking.h>
> >   #include <linux/hardirq.h>
> > +#include <linux/sched/debug.h> /* for show_regs */
> > +
> >   #include <asm/cputime.h>
> >   #include <asm/firmware.h>
> >   #include <asm/ftrace.h>
> > @@ -172,6 +174,86 @@ static inline void booke_restore_dbcr0(void)
> >   #endif
> >   }
> > +static inline void check_return_regs_valid(struct pt_regs *regs)
> 
> This was previously a notrace function. Should it be marked __always_inline
> instead of just inline ?
> 
Yes it should. Will fix this too.
> > +{
> > +#ifdef CONFIG_PPC_BOOK3S_64
> > +   unsigned long trap, srr0, srr1;
> > +   static bool warned;
> > +   u8 *validp;
> > +   char *h;
> > +
> > +   if (trap_is_scv(regs))
> > +           return;
> > +
> > +   trap = TRAP(regs);
> > +   // EE in HV mode sets HSRRs like 0xea0
> > +   if (cpu_has_feature(CPU_FTR_HVMODE) && trap == INTERRUPT_EXTERNAL)
> > +           trap = 0xea0;
> > +
> > +   switch (trap) {
> > +   case 0x980:
> > +   case INTERRUPT_H_DATA_STORAGE:
> > +   case 0xe20:
> > +   case 0xe40:
> > +   case INTERRUPT_HMI:
> > +   case 0xe80:
> > +   case 0xea0:
> > +   case INTERRUPT_H_FAC_UNAVAIL:
> > +   case 0x1200:
> > +   case 0x1500:
> > +   case 0x1600:
> > +   case 0x1800:
> > +           validp = &local_paca->hsrr_valid;
> > +           if (!READ_ONCE(*validp))
> > +                   return;
> > +
> > +           srr0 = mfspr(SPRN_HSRR0);
> > +           srr1 = mfspr(SPRN_HSRR1);
> > +           h = "H";
> > +
> > +           break;
> > +   default:
> > +           validp = &local_paca->srr_valid;
> > +           if (!READ_ONCE(*validp))
> > +                   return;
> > +
> > +           srr0 = mfspr(SPRN_SRR0);
> > +           srr1 = mfspr(SPRN_SRR1);
> > +           h = "";
> > +           break;
> > +   }
> > +
> > +   if (srr0 == regs->nip && srr1 == regs->msr)
> > +           return;
> > +
> > +   /*
> > +    * A NMI / soft-NMI interrupt may have come in after we found
> > +    * srr_valid and before the SRRs are loaded. The interrupt then
> > +    * comes in and clobbers SRRs and clears srr_valid. Then we load
> > +    * the SRRs here and test them above and find they don't match.
> > +    *
> > +    * Test validity again after that, to catch such false positives.
> > +    *
> > +    * This test in general will have some window for false negatives
> > +    * and may not catch and fix all such cases if an NMI comes in
> > +    * later and clobbers SRRs without clearing srr_valid, but hopefully
> > +    * such things will get caught most of the time, statistically
> > +    * enough to be able to get a warning out.
> > +    */
> > +   if (!READ_ONCE(*validp))
> > +           return;
> > +
> > +   if (!data_race(warned)) {
> > +           data_race(warned = true);
> > +           pr_warn("%sSRR0 was: %lx should be: %lx\n", h, srr0, regs->nip);
> > +           pr_warn("%sSRR1 was: %lx should be: %lx\n", h, srr1, regs->msr);
> > +           show_regs(regs);
> > +   }
> > +
> > +   WRITE_ONCE(*validp, 0); /* fixup */
> > +#endif
> > +}
> > +
> >   static inline void interrupt_enter_prepare(struct pt_regs *regs)
> >   {
> >   #ifdef CONFIG_PPC64
> > diff --git a/arch/powerpc/kernel/interrupt.c 
> > b/arch/powerpc/kernel/interrupt.c
> > index 2a09ac5dabd6..f53d432f6087 100644
> > --- a/arch/powerpc/kernel/interrupt.c
> > +++ b/arch/powerpc/kernel/interrupt.c
> > @@ -4,7 +4,6 @@
> >   #include <linux/err.h>
> >   #include <linux/compat.h>
> >   #include <linux/rseq.h>
> > -#include <linux/sched/debug.h> /* for show_regs */
> >   #include <asm/kup.h>
> >   #include <asm/cputime.h>
> > @@ -78,86 +77,6 @@ static notrace __always_inline bool 
> > prep_irq_for_enabled_exit(bool restartable)
> >     return true;
> >   }
> > -static notrace void check_return_regs_valid(struct pt_regs *regs)
> > -{
> > -#ifdef CONFIG_PPC_BOOK3S_64
> > -   unsigned long trap, srr0, srr1;
> > -   static bool warned;
> > -   u8 *validp;
> > -   char *h;
> > -
> > -   if (trap_is_scv(regs))
> > -           return;
> > -
> > -   trap = TRAP(regs);
> > -   // EE in HV mode sets HSRRs like 0xea0
> > -   if (cpu_has_feature(CPU_FTR_HVMODE) && trap == INTERRUPT_EXTERNAL)
> > -           trap = 0xea0;
> > -
> > -   switch (trap) {
> > -   case 0x980:
> > -   case INTERRUPT_H_DATA_STORAGE:
> > -   case 0xe20:
> > -   case 0xe40:
> > -   case INTERRUPT_HMI:
> > -   case 0xe80:
> > -   case 0xea0:
> > -   case INTERRUPT_H_FAC_UNAVAIL:
> > -   case 0x1200:
> > -   case 0x1500:
> > -   case 0x1600:
> > -   case 0x1800:
> > -           validp = &local_paca->hsrr_valid;
> > -           if (!READ_ONCE(*validp))
> > -                   return;
> > -
> > -           srr0 = mfspr(SPRN_HSRR0);
> > -           srr1 = mfspr(SPRN_HSRR1);
> > -           h = "H";
> > -
> > -           break;
> > -   default:
> > -           validp = &local_paca->srr_valid;
> > -           if (!READ_ONCE(*validp))
> > -                   return;
> > -
> > -           srr0 = mfspr(SPRN_SRR0);
> > -           srr1 = mfspr(SPRN_SRR1);
> > -           h = "";
> > -           break;
> > -   }
> > -
> > -   if (srr0 == regs->nip && srr1 == regs->msr)
> > -           return;
> > -
> > -   /*
> > -    * A NMI / soft-NMI interrupt may have come in after we found
> > -    * srr_valid and before the SRRs are loaded. The interrupt then
> > -    * comes in and clobbers SRRs and clears srr_valid. Then we load
> > -    * the SRRs here and test them above and find they don't match.
> > -    *
> > -    * Test validity again after that, to catch such false positives.
> > -    *
> > -    * This test in general will have some window for false negatives
> > -    * and may not catch and fix all such cases if an NMI comes in
> > -    * later and clobbers SRRs without clearing srr_valid, but hopefully
> > -    * such things will get caught most of the time, statistically
> > -    * enough to be able to get a warning out.
> > -    */
> > -   if (!READ_ONCE(*validp))
> > -           return;
> > -
> > -   if (!data_race(warned)) {
> > -           data_race(warned = true);
> > -           printk("%sSRR0 was: %lx should be: %lx\n", h, srr0, regs->nip);
> > -           printk("%sSRR1 was: %lx should be: %lx\n", h, srr1, regs->msr);
> > -           show_regs(regs);
> > -   }
> > -
> > -   WRITE_ONCE(*validp, 0); /* fixup */
> > -#endif
> > -}
> > -
> >   static notrace unsigned long
> >   interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs)
> >   {
> > diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
> > index aa17e62f3754..719930cf4ae1 100644
> > --- a/arch/powerpc/kernel/signal.c
> > +++ b/arch/powerpc/kernel/signal.c
> > @@ -22,6 +22,11 @@
> >   #include "signal.h"
> > +/* This will be removed */
> > +#ifdef CONFIG_GENERIC_ENTRY
> 
> Is this #ifdef really needed ?
Will fix this.
> 
> > +#include <linux/entry-common.h>
> > +#endif /* CONFIG_GENERIC_ENTRY */
> > +
> >   #ifdef CONFIG_VSX
> >   unsigned long copy_fpr_to_user(void __user *to,
> >                            struct task_struct *task)
> > @@ -368,3 +373,12 @@ void signal_fault(struct task_struct *tsk, struct 
> > pt_regs *regs,
> >             printk_ratelimited(regs->msr & MSR_64BIT ? fm64 : fm32, 
> > tsk->comm,
> >                                task_pid_nr(tsk), where, ptr, regs->nip, 
> > regs->link);
> >   }
> > +
> > +#ifdef CONFIG_GENERIC_ENTRY
> 
> Why is this #ifdef needed ?
> 
> > +void arch_do_signal_or_restart(struct pt_regs *regs)
> > +{
> > +   BUG_ON(regs != current->thread.regs);
> 
> Is this BUG_ON() needed ? Can't we use something smoother ?
> 
I am not sure about what to do here. Proceeding with this seemed
dangerous. So went with a BUG_ON. Can you suggest if something better
comes to your mind.

Regards,
Mukesh
> > +   local_paca->generic_fw_flags |= GFW_RESTORE_ALL;
> > +   do_signal(current);
> > +}
> > +#endif /* CONFIG_GENERIC_ENTRY */
> 

Reply via email to