On Tue, Dec 16, 2025 at 12:01:32PM +0100, Christophe Leroy (CS GROUP) wrote:
> 
> 
> Le 14/12/2025 à 14:02, Mukesh Kumar Chaurasiya a écrit :
> > From: Mukesh Kumar Chaurasiya <[email protected]>
> > 
> > Convert the PowerPC syscall entry and exit paths to use the generic
> > entry/exit framework by selecting GENERIC_ENTRY and integrating with
> > the common syscall handling routines.
> > 
> > This change transitions PowerPC away from its custom syscall entry and
> > exit code to use the generic helpers such as:
> >   - syscall_enter_from_user_mode()
> >   - syscall_exit_to_user_mode()
> > 
> > As part of this migration:
> >   - The architecture now selects GENERIC_ENTRY in Kconfig.
> >   - Old tracing, seccomp, and audit handling in ptrace.c is removed in
> >     favor of generic entry infrastructure.
> >   - interrupt.c and syscall.c are simplified to delegate context
> >     management and user exit handling to the generic entry path.
> >   - The new pt_regs field `exit_flags` introduced earlier is now used
> >     to carry per-syscall exit state flags (e.g. _TIF_RESTOREALL).
> > 
> > This aligns PowerPC with the common entry code used by other
> > architectures and reduces duplicated logic around syscall tracing,
> > context tracking, and signal handling.
> > 
> > The performance benchmarks from perf bench basic syscall are below:
> > 
> > perf bench syscall usec/op
> > 
> > | Test            | With Patch | Without Patch | % Change |
> > | --------------- | ---------- | ------------- | -------- |
> > | getppid usec/op | 0.207795   | 0.210373      | -1.22%   |
> > | getpgid usec/op | 0.206282   | 0.211676      | -2.55%   |
> > | fork usec/op    | 833.986    | 814.809       | +2.35%   |
> > | execve usec/op  | 360.939    | 365.168       | -1.16%   |
> > 
> > perf bench syscall ops/sec
> > 
> > | Test            | With Patch | Without Patch | % Change |
> > | --------------- | ---------- | ------------- | -------- |
> > | getppid ops/sec | 48,12,433  | 47,53,459     | +1.24%   |
> > | getpgid ops/sec | 48,47,744  | 47,24,192     | +2.61%   |
> > | fork ops/sec    | 1,199      | 1,227         | -2.28%   |
> > | execve ops/sec  | 2,770      | 2,738         | +1.16%   |
> > 
> > IPI latency benchmark
> > 
> > | Metric                  | With Patch       | Without Patch    | % Change |
> > | ----------------------- | ---------------- | ---------------- | -------- |
> > | Dry-run (ns)            | 206,675.81       | 206,719.36       | -0.02%   |
> > | Self-IPI avg (ns)       | 1,939,991.00     | 1,976,116.15     | -1.83%   |
> > | Self-IPI max (ns)       | 3,533,718.93     | 3,582,650.33     | -1.37%   |
> > | Normal IPI avg (ns)     | 111,110,034.23   | 110,513,373.51   | +0.54%   |
> > | Normal IPI max (ns)     | 150,393,442.64   | 149,669,477.89   | +0.48%   |
> > | Broadcast IPI max (ns)  | 3,978,231,022.96 | 4,359,916,859.46 | -8.73%   |
> > | Broadcast lock max (ns) | 4,025,425,714.49 | 4,384,956,730.83 | -8.20%   |
> > 
> > Thats very close to performance earlier with arch specific handling.
> > 
> > Signed-off-by: Mukesh Kumar Chaurasiya <[email protected]>
> > ---
> >   arch/powerpc/Kconfig                    |   1 +
> >   arch/powerpc/include/asm/entry-common.h |   5 +-
> >   arch/powerpc/kernel/interrupt.c         | 139 +++++++----------------
> >   arch/powerpc/kernel/ptrace/ptrace.c     | 141 ------------------------
> >   arch/powerpc/kernel/signal.c            |  10 +-
> >   arch/powerpc/kernel/syscall.c           | 119 +-------------------
> >   6 files changed, 49 insertions(+), 366 deletions(-)
> > 
> > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> > index b0c602c3bbe1..a4330775b254 100644
> > --- a/arch/powerpc/Kconfig
> > +++ b/arch/powerpc/Kconfig
> > @@ -203,6 +203,7 @@ config PPC
> >     select GENERIC_CPU_AUTOPROBE
> >     select GENERIC_CPU_VULNERABILITIES      if PPC_BARRIER_NOSPEC
> >     select GENERIC_EARLY_IOREMAP
> > +   select GENERIC_ENTRY
> >     select GENERIC_GETTIMEOFDAY
> >     select GENERIC_IDLE_POLL_SETUP
> >     select GENERIC_IOREMAP
> > diff --git a/arch/powerpc/include/asm/entry-common.h 
> > b/arch/powerpc/include/asm/entry-common.h
> > index e2ae7416dee1..77129174f882 100644
> > --- a/arch/powerpc/include/asm/entry-common.h
> > +++ b/arch/powerpc/include/asm/entry-common.h
> > @@ -3,7 +3,7 @@
> >   #ifndef _ASM_PPC_ENTRY_COMMON_H
> >   #define _ASM_PPC_ENTRY_COMMON_H
> > -#ifdef CONFIG_GENERIC_IRQ_ENTRY
> > +#ifdef CONFIG_GENERIC_ENTRY
> 
> Powerpc now selected this inconditionaly. Why is this #ifdef needed ?
> 
Will remove this.
> 
> >   #include <asm/cputime.h>
> >   #include <asm/interrupt.h>
> > @@ -217,9 +217,6 @@ static inline void arch_interrupt_enter_prepare(struct 
> > pt_regs *regs)
> >     if (user_mode(regs)) {
> >             kuap_lock();
> > -           CT_WARN_ON(ct_state() != CT_STATE_USER);
> > -           user_exit_irqoff();
> > -
> >             account_cpu_user_entry();
> >             account_stolen_time();
> >     } else {
> > diff --git a/arch/powerpc/kernel/interrupt.c 
> > b/arch/powerpc/kernel/interrupt.c
> > index 7f67f0b9d627..7d5cd4b5a610 100644
> > --- a/arch/powerpc/kernel/interrupt.c
> > +++ b/arch/powerpc/kernel/interrupt.c
> > @@ -1,6 +1,7 @@
> >   // SPDX-License-Identifier: GPL-2.0-or-later
> >   #include <linux/context_tracking.h>
> > +#include <linux/entry-common.h>
> >   #include <linux/err.h>
> >   #include <linux/compat.h>
> >   #include <linux/rseq.h>
> > @@ -73,79 +74,6 @@ static notrace __always_inline bool 
> > prep_irq_for_enabled_exit(bool restartable)
> >     return true;
> >   }
> > -static notrace unsigned long
> > -interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs)
> > -{
> > -   unsigned long ti_flags;
> > -
> > -again:
> > -   ti_flags = read_thread_flags();
> > -   while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
> > -           local_irq_enable();
> > -           if (ti_flags & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) {
> > -                   schedule();
> > -           } else {
> > -                   /*
> > -                    * SIGPENDING must restore signal handler function
> > -                    * argument GPRs, and some non-volatiles (e.g., r1).
> > -                    * Restore all for now. This could be made lighter.
> > -                    */
> > -                   if (ti_flags & _TIF_SIGPENDING)
> > -                           ret |= _TIF_RESTOREALL;
> > -                   do_notify_resume(regs, ti_flags);
> 
> do_notify_resume() has no caller anymore, should be removed from
> arch/powerpc/include/asm/signal.h and arch/powerpc/kernel/signal.c
> 
> 
> 
Oh yeah, will remove this.
> > -           }
> > -           local_irq_disable();
> > -           ti_flags = read_thread_flags();
> > -   }
> > -
> > -   if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) {
> > -           if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
> > -                           unlikely((ti_flags & _TIF_RESTORE_TM))) {
> > -                   restore_tm_state(regs);
> > -           } else {
> > -                   unsigned long mathflags = MSR_FP;
> > -
> > -                   if (cpu_has_feature(CPU_FTR_VSX))
> > -                           mathflags |= MSR_VEC | MSR_VSX;
> > -                   else if (cpu_has_feature(CPU_FTR_ALTIVEC))
> > -                           mathflags |= MSR_VEC;
> > -
> > -                   /*
> > -                    * If userspace MSR has all available FP bits set,
> > -                    * then they are live and no need to restore. If not,
> > -                    * it means the regs were given up and restore_math
> > -                    * may decide to restore them (to avoid taking an FP
> > -                    * fault).
> > -                    */
> > -                   if ((regs->msr & mathflags) != mathflags)
> > -                           restore_math(regs);
> > -           }
> > -   }
> > -
> > -   check_return_regs_valid(regs);
> > -
> > -   user_enter_irqoff();
> > -   if (!prep_irq_for_enabled_exit(true)) {
> > -           user_exit_irqoff();
> > -           local_irq_enable();
> > -           local_irq_disable();
> > -           goto again;
> > -   }
> > -
> > -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> > -   local_paca->tm_scratch = regs->msr;
> > -#endif
> > -
> > -   booke_load_dbcr0();
> > -
> > -   account_cpu_user_exit();
> > -
> > -   /* Restore user access locks last */
> > -   kuap_user_restore(regs);
> > -
> > -   return ret;
> > -}
> > -
> >   /*
> >    * This should be called after a syscall returns, with r3 the return value
> >    * from the syscall. If this function returns non-zero, the system call
> > @@ -160,17 +88,12 @@ notrace unsigned long syscall_exit_prepare(unsigned 
> > long r3,
> >                                        long scv)
> >   {
> >     unsigned long ti_flags;
> > -   unsigned long ret = 0;
> >     bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv;
> > -   CT_WARN_ON(ct_state() == CT_STATE_USER);
> > -
> >     kuap_assert_locked();
> >     regs->result = r3;
> > -
> > -   /* Check whether the syscall is issued inside a restartable sequence */
> > -   rseq_syscall(regs);
> > +   regs->exit_flags = 0;
> >     ti_flags = read_thread_flags();
> > @@ -183,7 +106,7 @@ notrace unsigned long syscall_exit_prepare(unsigned 
> > long r3,
> >     if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) {
> >             if (ti_flags & _TIF_RESTOREALL)
> > -                   ret = _TIF_RESTOREALL;
> > +                   regs->exit_flags = _TIF_RESTOREALL;
> >             else
> >                     regs->gpr[3] = r3;
> >             clear_bits(_TIF_PERSYSCALL_MASK, &current_thread_info()->flags);
> > @@ -192,18 +115,28 @@ notrace unsigned long syscall_exit_prepare(unsigned 
> > long r3,
> >     }
> >     if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
> > -           do_syscall_trace_leave(regs);
> > -           ret |= _TIF_RESTOREALL;
> > +           regs->exit_flags |= _TIF_RESTOREALL;
> >     }
> > -   local_irq_disable();
> > -   ret = interrupt_exit_user_prepare_main(ret, regs);
> > +   syscall_exit_to_user_mode(regs);
> > +
> > +again:
> > +   user_enter_irqoff();
> > +   if (!prep_irq_for_enabled_exit(true)) {
> > +           user_exit_irqoff();
> > +           local_irq_enable();
> > +           local_irq_disable();
> > +           goto again;
> > +   }
> > +
> > +   /* Restore user access locks last */
> > +   kuap_user_restore(regs);
> >   #ifdef CONFIG_PPC64
> > -   regs->exit_result = ret;
> > +   regs->exit_result = regs->exit_flags;
> >   #endif
> > -   return ret;
> > +   return regs->exit_flags;
> >   }
> >   #ifdef CONFIG_PPC64
> > @@ -223,13 +156,16 @@ notrace unsigned long syscall_exit_restart(unsigned 
> > long r3, struct pt_regs *reg
> >     set_kuap(AMR_KUAP_BLOCKED);
> >   #endif
> > -   trace_hardirqs_off();
> > -   user_exit_irqoff();
> > -   account_cpu_user_entry();
> > -
> > -   BUG_ON(!user_mode(regs));
> > +again:
> > +   user_enter_irqoff();
> > +   if (!prep_irq_for_enabled_exit(true)) {
> > +           user_exit_irqoff();
> > +           local_irq_enable();
> > +           local_irq_disable();
> > +           goto again;
> > +   }
> > -   regs->exit_result = interrupt_exit_user_prepare_main(regs->exit_result, 
> > regs);
> > +   regs->exit_result |= regs->exit_flags;
> >     return regs->exit_result;
> >   }
> > @@ -241,7 +177,6 @@ notrace unsigned long 
> > interrupt_exit_user_prepare(struct pt_regs *regs)
> >     BUG_ON(regs_is_unrecoverable(regs));
> >     BUG_ON(regs_irqs_disabled(regs));
> > -   CT_WARN_ON(ct_state() == CT_STATE_USER);
> >     /*
> >      * We don't need to restore AMR on the way back to userspace for KUAP.
> > @@ -250,8 +185,21 @@ notrace unsigned long 
> > interrupt_exit_user_prepare(struct pt_regs *regs)
> >     kuap_assert_locked();
> >     local_irq_disable();
> > +   regs->exit_flags = 0;
> > +again:
> > +   check_return_regs_valid(regs);
> > +   user_enter_irqoff();
> > +   if (!prep_irq_for_enabled_exit(true)) {
> > +           user_exit_irqoff();
> > +           local_irq_enable();
> > +           local_irq_disable();
> > +           goto again;
> > +   }
> > +
> > +   /* Restore user access locks last */
> > +   kuap_user_restore(regs);
> > -   ret = interrupt_exit_user_prepare_main(0, regs);
> > +   ret = regs->exit_flags;
> >   #ifdef CONFIG_PPC64
> >     regs->exit_result = ret;
> > @@ -293,8 +241,6 @@ notrace unsigned long 
> > interrupt_exit_kernel_prepare(struct pt_regs *regs)
> >             /* Returning to a kernel context with local irqs enabled. */
> >             WARN_ON_ONCE(!(regs->msr & MSR_EE));
> >   again:
> > -           if (need_irq_preemption())
> > -                   irqentry_exit_cond_resched();
> >             check_return_regs_valid(regs);
> > @@ -364,7 +310,6 @@ notrace unsigned long 
> > interrupt_exit_user_restart(struct pt_regs *regs)
> >   #endif
> >     trace_hardirqs_off();
> > -   user_exit_irqoff();
> >     account_cpu_user_entry();
> >     BUG_ON(!user_mode(regs));
> > diff --git a/arch/powerpc/kernel/ptrace/ptrace.c 
> > b/arch/powerpc/kernel/ptrace/ptrace.c
> > index 2134b6d155ff..316d4f5ead8e 100644
> > --- a/arch/powerpc/kernel/ptrace/ptrace.c
> > +++ b/arch/powerpc/kernel/ptrace/ptrace.c
> > @@ -21,9 +21,6 @@
> >   #include <asm/switch_to.h>
> >   #include <asm/debug.h>
> > -#define CREATE_TRACE_POINTS
> > -#include <trace/events/syscalls.h>
> > -
> >   #include "ptrace-decl.h"
> >   /*
> > @@ -195,144 +192,6 @@ long arch_ptrace(struct task_struct *child, long 
> > request,
> >     return ret;
> >   }
> > -#ifdef CONFIG_SECCOMP
> > -static int do_seccomp(struct pt_regs *regs)
> > -{
> > -   if (!test_thread_flag(TIF_SECCOMP))
> > -           return 0;
> > -
> > -   /*
> > -    * The ABI we present to seccomp tracers is that r3 contains
> > -    * the syscall return value and orig_gpr3 contains the first
> > -    * syscall parameter. This is different to the ptrace ABI where
> > -    * both r3 and orig_gpr3 contain the first syscall parameter.
> > -    */
> > -   regs->gpr[3] = -ENOSYS;
> > -
> > -   /*
> > -    * We use the __ version here because we have already checked
> > -    * TIF_SECCOMP. If this fails, there is nothing left to do, we
> > -    * have already loaded -ENOSYS into r3, or seccomp has put
> > -    * something else in r3 (via SECCOMP_RET_ERRNO/TRACE).
> > -    */
> > -   if (__secure_computing())
> > -           return -1;
> > -
> > -   /*
> > -    * The syscall was allowed by seccomp, restore the register
> > -    * state to what audit expects.
> > -    * Note that we use orig_gpr3, which means a seccomp tracer can
> > -    * modify the first syscall parameter (in orig_gpr3) and also
> > -    * allow the syscall to proceed.
> > -    */
> > -   regs->gpr[3] = regs->orig_gpr3;
> > -
> > -   return 0;
> > -}
> > -#else
> > -static inline int do_seccomp(struct pt_regs *regs) { return 0; }
> > -#endif /* CONFIG_SECCOMP */
> > -
> > -/**
> > - * do_syscall_trace_enter() - Do syscall tracing on kernel entry.
> > - * @regs: the pt_regs of the task to trace (current)
> > - *
> > - * Performs various types of tracing on syscall entry. This includes 
> > seccomp,
> > - * ptrace, syscall tracepoints and audit.
> > - *
> > - * The pt_regs are potentially visible to userspace via ptrace, so their
> > - * contents is ABI.
> > - *
> > - * One or more of the tracers may modify the contents of pt_regs, in 
> > particular
> > - * to modify arguments or even the syscall number itself.
> > - *
> > - * It's also possible that a tracer can choose to reject the system call. 
> > In
> > - * that case this function will return an illegal syscall number, and will 
> > put
> > - * an appropriate return value in regs->r3.
> > - *
> > - * Return: the (possibly changed) syscall number.
> > - */
> > -long do_syscall_trace_enter(struct pt_regs *regs)
> 
> Remove prototype from arch/powerpc/include/asm/ptrace.h
> 
Sure will do.
> > -{
> > -   u32 flags;
> > -
> > -   flags = read_thread_flags() & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE);
> > -
> > -   if (flags) {
> > -           int rc = ptrace_report_syscall_entry(regs);
> > -
> > -           if (unlikely(flags & _TIF_SYSCALL_EMU)) {
> > -                   /*
> > -                    * A nonzero return code from
> > -                    * ptrace_report_syscall_entry() tells us to prevent
> > -                    * the syscall execution, but we are not going to
> > -                    * execute it anyway.
> > -                    *
> > -                    * Returning -1 will skip the syscall execution. We want
> > -                    * to avoid clobbering any registers, so we don't goto
> > -                    * the skip label below.
> > -                    */
> > -                   return -1;
> > -           }
> > -
> > -           if (rc) {
> > -                   /*
> > -                    * The tracer decided to abort the syscall. Note that
> > -                    * the tracer may also just change regs->gpr[0] to an
> > -                    * invalid syscall number, that is handled below on the
> > -                    * exit path.
> > -                    */
> > -                   goto skip;
> > -           }
> > -   }
> > -
> > -   /* Run seccomp after ptrace; allow it to set gpr[3]. */
> > -   if (do_seccomp(regs))
> > -           return -1;
> > -
> > -   /* Avoid trace and audit when syscall is invalid. */
> > -   if (regs->gpr[0] >= NR_syscalls)
> > -           goto skip;
> > -
> > -   if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
> > -           trace_sys_enter(regs, regs->gpr[0]);
> > -
> > -   if (!is_32bit_task())
> > -           audit_syscall_entry(regs->gpr[0], regs->gpr[3], regs->gpr[4],
> > -                               regs->gpr[5], regs->gpr[6]);
> > -   else
> > -           audit_syscall_entry(regs->gpr[0],
> > -                               regs->gpr[3] & 0xffffffff,
> > -                               regs->gpr[4] & 0xffffffff,
> > -                               regs->gpr[5] & 0xffffffff,
> > -                               regs->gpr[6] & 0xffffffff);
> > -
> > -   /* Return the possibly modified but valid syscall number */
> > -   return regs->gpr[0];
> > -
> > -skip:
> > -   /*
> > -    * If we are aborting explicitly, or if the syscall number is
> > -    * now invalid, set the return value to -ENOSYS.
> > -    */
> > -   regs->gpr[3] = -ENOSYS;
> > -   return -1;
> > -}
> > -
> > -void do_syscall_trace_leave(struct pt_regs *regs)
> > -{
> > -   int step;
> > -
> > -   audit_syscall_exit(regs);
> > -
> > -   if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
> > -           trace_sys_exit(regs, regs->result);
> > -
> > -   step = test_thread_flag(TIF_SINGLESTEP);
> > -   if (step || test_thread_flag(TIF_SYSCALL_TRACE))
> > -           ptrace_report_syscall_exit(regs, step);
> > -}
> > -
> >   void __init pt_regs_check(void);
> >   /*
> > diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
> > index 719930cf4ae1..9f1847b4742e 100644
> > --- a/arch/powerpc/kernel/signal.c
> > +++ b/arch/powerpc/kernel/signal.c
> > @@ -6,6 +6,7 @@
> >    *    Extracted from signal_32.c and signal_64.c
> >    */
> > +#include <linux/entry-common.h>
> >   #include <linux/resume_user_mode.h>
> >   #include <linux/signal.h>
> >   #include <linux/uprobes.h>
> > @@ -22,11 +23,6 @@
> >   #include "signal.h"
> > -/* This will be removed */
> > -#ifdef CONFIG_GENERIC_ENTRY
> > -#include <linux/entry-common.h>
> > -#endif /* CONFIG_GENERIC_ENTRY */
> > -
> 
> Until now CONFIG_GENERIC_ENTRY was not defined.
> 
> Now that it is defined, we remove the entire block ?
> 
> Then why has it been added at all ?
> 
I wanted all this to be a dead code till we enable the config. I kept
the name as CONFIG_GENERIC_ENTRY so that it would be explanatory why
it's dead.

> >   #ifdef CONFIG_VSX
> >   unsigned long copy_fpr_to_user(void __user *to,
> >                            struct task_struct *task)
> > @@ -374,11 +370,9 @@ void signal_fault(struct task_struct *tsk, struct 
> > pt_regs *regs,
> >                                task_pid_nr(tsk), where, ptr, regs->nip, 
> > regs->link);
> >   }
> > -#ifdef CONFIG_GENERIC_ENTRY
> >   void arch_do_signal_or_restart(struct pt_regs *regs)
> >   {
> >     BUG_ON(regs != current->thread.regs);
> > -   local_paca->generic_fw_flags |= GFW_RESTORE_ALL;
> 
> Why was that there ? I thought it was preparatory, then you remove it before
> even using it ?
> 
This should have been removed earlier. Will fix this in next revision.

I appreciate a detailed review. Thanks :).

Regards,
Mukesh
> > +   regs->exit_flags |= _TIF_RESTOREALL;
> >     do_signal(current);
> >   }
> > -#endif /* CONFIG_GENERIC_ENTRY */

Reply via email to