Enable the syscall entry and exit path from generic framework. Signed-off-by: Mukesh Kumar Chaurasiya <mchau...@linux.ibm.com> --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/entry-common.h | 2 +- arch/powerpc/kernel/interrupt.c | 135 +++++++---------------- arch/powerpc/kernel/ptrace/ptrace.c | 141 ------------------------ arch/powerpc/kernel/signal.c | 10 +- arch/powerpc/kernel/syscall.c | 119 +------------------- 6 files changed, 49 insertions(+), 359 deletions(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index e0c51d7b5638d..e67294a72e4d4 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -199,6 +199,7 @@ config PPC select GENERIC_CPU_AUTOPROBE select GENERIC_CPU_VULNERABILITIES if PPC_BARRIER_NOSPEC select GENERIC_EARLY_IOREMAP + select GENERIC_ENTRY select GENERIC_GETTIMEOFDAY select GENERIC_IDLE_POLL_SETUP select GENERIC_IOREMAP diff --git a/arch/powerpc/include/asm/entry-common.h b/arch/powerpc/include/asm/entry-common.h index d3f4a12aeafca..8fb74e6aa9560 100644 --- a/arch/powerpc/include/asm/entry-common.h +++ b/arch/powerpc/include/asm/entry-common.h @@ -3,7 +3,7 @@ #ifndef _ASM_PPC_ENTRY_COMMON_H #define _ASM_PPC_ENTRY_COMMON_H -#ifdef CONFIG_GENERIC_IRQ_ENTRY +#ifdef CONFIG_GENERIC_ENTRY #include <asm/cputime.h> #include <asm/interrupt.h> diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 7bb8a31b24ea7..642e22527f9dd 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include <linux/context_tracking.h> +#include <linux/entry-common.h> #include <linux/err.h> #include <linux/compat.h> #include <linux/rseq.h> @@ -77,79 +78,6 @@ static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable) return true; } -static notrace unsigned long -interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs) -{ - unsigned long ti_flags; - -again: - ti_flags = read_thread_flags(); - while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { - local_irq_enable(); - if (ti_flags & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) { - schedule(); - } else { - /* - * SIGPENDING must restore signal handler function - * argument GPRs, and some non-volatiles (e.g., r1). - * Restore all for now. This could be made lighter. - */ - if (ti_flags & _TIF_SIGPENDING) - ret |= _TIF_RESTOREALL; - do_notify_resume(regs, ti_flags); - } - local_irq_disable(); - ti_flags = read_thread_flags(); - } - - if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) { - if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && - unlikely((ti_flags & _TIF_RESTORE_TM))) { - restore_tm_state(regs); - } else { - unsigned long mathflags = MSR_FP; - - if (cpu_has_feature(CPU_FTR_VSX)) - mathflags |= MSR_VEC | MSR_VSX; - else if (cpu_has_feature(CPU_FTR_ALTIVEC)) - mathflags |= MSR_VEC; - - /* - * If userspace MSR has all available FP bits set, - * then they are live and no need to restore. If not, - * it means the regs were given up and restore_math - * may decide to restore them (to avoid taking an FP - * fault). - */ - if ((regs->msr & mathflags) != mathflags) - restore_math(regs); - } - } - - check_return_regs_valid(regs); - - user_enter_irqoff(); - if (!prep_irq_for_enabled_exit(true)) { - user_exit_irqoff(); - local_irq_enable(); - local_irq_disable(); - goto again; - } - -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - local_paca->tm_scratch = regs->msr; -#endif - - booke_load_dbcr0(); - - account_cpu_user_exit(); - - /* Restore user access locks last */ - kuap_user_restore(regs); - - return ret; -} - /* * This should be called after a syscall returns, with r3 the return value * from the syscall. If this function returns non-zero, the system call @@ -164,17 +92,12 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, long scv) { unsigned long ti_flags; - unsigned long ret = 0; bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv; - CT_WARN_ON(ct_state() == CT_STATE_USER); - kuap_assert_locked(); regs->result = r3; - - /* Check whether the syscall is issued inside a restartable sequence */ - rseq_syscall(regs); + regs->exit_flags = 0; ti_flags = read_thread_flags(); @@ -187,7 +110,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) { if (ti_flags & _TIF_RESTOREALL) - ret = _TIF_RESTOREALL; + regs->exit_flags = _TIF_RESTOREALL; else regs->gpr[3] = r3; clear_bits(_TIF_PERSYSCALL_MASK, ¤t_thread_info()->flags); @@ -196,18 +119,28 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, } if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) { - do_syscall_trace_leave(regs); - ret |= _TIF_RESTOREALL; + regs->exit_flags |= _TIF_RESTOREALL; } - local_irq_disable(); - ret = interrupt_exit_user_prepare_main(ret, regs); +again: + syscall_exit_to_user_mode(regs); + + user_enter_irqoff(); + if (!prep_irq_for_enabled_exit(true)) { + user_exit_irqoff(); + local_irq_enable(); + local_irq_disable(); + goto again; + } + + /* Restore user access locks last */ + kuap_user_restore(regs); #ifdef CONFIG_PPC64 - regs->exit_result = ret; + regs->exit_result = regs->exit_flags; #endif - return ret; + return regs->exit_flags; } #ifdef CONFIG_PPC64 @@ -226,14 +159,18 @@ notrace unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *reg #ifdef CONFIG_PPC_BOOK3S_64 set_kuap(AMR_KUAP_BLOCKED); #endif +again: + syscall_exit_to_user_mode(regs); - trace_hardirqs_off(); - user_exit_irqoff(); - account_cpu_user_entry(); - - BUG_ON(!user_mode(regs)); + user_enter_irqoff(); + if (!prep_irq_for_enabled_exit(true)) { + user_exit_irqoff(); + local_irq_enable(); + local_irq_disable(); + goto again; + } - regs->exit_result = interrupt_exit_user_prepare_main(regs->exit_result, regs); + regs->exit_result |= regs->exit_flags; return regs->exit_result; } @@ -254,8 +191,20 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs) kuap_assert_locked(); local_irq_disable(); + regs->exit_flags = 0; +again: + irqentry_exit_to_user_mode(regs); + check_return_regs_valid(regs); + + user_enter_irqoff(); + if (!prep_irq_for_enabled_exit(true)) { + user_exit_irqoff(); + local_irq_enable(); + local_irq_disable(); + goto again; + } - ret = interrupt_exit_user_prepare_main(0, regs); + ret = regs->exit_flags; #ifdef CONFIG_PPC64 regs->exit_result = ret; diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index 2134b6d155ff6..316d4f5ead8ed 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -21,9 +21,6 @@ #include <asm/switch_to.h> #include <asm/debug.h> -#define CREATE_TRACE_POINTS -#include <trace/events/syscalls.h> - #include "ptrace-decl.h" /* @@ -195,144 +192,6 @@ long arch_ptrace(struct task_struct *child, long request, return ret; } -#ifdef CONFIG_SECCOMP -static int do_seccomp(struct pt_regs *regs) -{ - if (!test_thread_flag(TIF_SECCOMP)) - return 0; - - /* - * The ABI we present to seccomp tracers is that r3 contains - * the syscall return value and orig_gpr3 contains the first - * syscall parameter. This is different to the ptrace ABI where - * both r3 and orig_gpr3 contain the first syscall parameter. - */ - regs->gpr[3] = -ENOSYS; - - /* - * We use the __ version here because we have already checked - * TIF_SECCOMP. If this fails, there is nothing left to do, we - * have already loaded -ENOSYS into r3, or seccomp has put - * something else in r3 (via SECCOMP_RET_ERRNO/TRACE). - */ - if (__secure_computing()) - return -1; - - /* - * The syscall was allowed by seccomp, restore the register - * state to what audit expects. - * Note that we use orig_gpr3, which means a seccomp tracer can - * modify the first syscall parameter (in orig_gpr3) and also - * allow the syscall to proceed. - */ - regs->gpr[3] = regs->orig_gpr3; - - return 0; -} -#else -static inline int do_seccomp(struct pt_regs *regs) { return 0; } -#endif /* CONFIG_SECCOMP */ - -/** - * do_syscall_trace_enter() - Do syscall tracing on kernel entry. - * @regs: the pt_regs of the task to trace (current) - * - * Performs various types of tracing on syscall entry. This includes seccomp, - * ptrace, syscall tracepoints and audit. - * - * The pt_regs are potentially visible to userspace via ptrace, so their - * contents is ABI. - * - * One or more of the tracers may modify the contents of pt_regs, in particular - * to modify arguments or even the syscall number itself. - * - * It's also possible that a tracer can choose to reject the system call. In - * that case this function will return an illegal syscall number, and will put - * an appropriate return value in regs->r3. - * - * Return: the (possibly changed) syscall number. - */ -long do_syscall_trace_enter(struct pt_regs *regs) -{ - u32 flags; - - flags = read_thread_flags() & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE); - - if (flags) { - int rc = ptrace_report_syscall_entry(regs); - - if (unlikely(flags & _TIF_SYSCALL_EMU)) { - /* - * A nonzero return code from - * ptrace_report_syscall_entry() tells us to prevent - * the syscall execution, but we are not going to - * execute it anyway. - * - * Returning -1 will skip the syscall execution. We want - * to avoid clobbering any registers, so we don't goto - * the skip label below. - */ - return -1; - } - - if (rc) { - /* - * The tracer decided to abort the syscall. Note that - * the tracer may also just change regs->gpr[0] to an - * invalid syscall number, that is handled below on the - * exit path. - */ - goto skip; - } - } - - /* Run seccomp after ptrace; allow it to set gpr[3]. */ - if (do_seccomp(regs)) - return -1; - - /* Avoid trace and audit when syscall is invalid. */ - if (regs->gpr[0] >= NR_syscalls) - goto skip; - - if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_enter(regs, regs->gpr[0]); - - if (!is_32bit_task()) - audit_syscall_entry(regs->gpr[0], regs->gpr[3], regs->gpr[4], - regs->gpr[5], regs->gpr[6]); - else - audit_syscall_entry(regs->gpr[0], - regs->gpr[3] & 0xffffffff, - regs->gpr[4] & 0xffffffff, - regs->gpr[5] & 0xffffffff, - regs->gpr[6] & 0xffffffff); - - /* Return the possibly modified but valid syscall number */ - return regs->gpr[0]; - -skip: - /* - * If we are aborting explicitly, or if the syscall number is - * now invalid, set the return value to -ENOSYS. - */ - regs->gpr[3] = -ENOSYS; - return -1; -} - -void do_syscall_trace_leave(struct pt_regs *regs) -{ - int step; - - audit_syscall_exit(regs); - - if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_exit(regs, regs->result); - - step = test_thread_flag(TIF_SINGLESTEP); - if (step || test_thread_flag(TIF_SYSCALL_TRACE)) - ptrace_report_syscall_exit(regs, step); -} - void __init pt_regs_check(void); /* diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 719930cf4ae1f..9f1847b4742e6 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -6,6 +6,7 @@ * Extracted from signal_32.c and signal_64.c */ +#include <linux/entry-common.h> #include <linux/resume_user_mode.h> #include <linux/signal.h> #include <linux/uprobes.h> @@ -22,11 +23,6 @@ #include "signal.h" -/* This will be removed */ -#ifdef CONFIG_GENERIC_ENTRY -#include <linux/entry-common.h> -#endif /* CONFIG_GENERIC_ENTRY */ - #ifdef CONFIG_VSX unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task) @@ -374,11 +370,9 @@ void signal_fault(struct task_struct *tsk, struct pt_regs *regs, task_pid_nr(tsk), where, ptr, regs->nip, regs->link); } -#ifdef CONFIG_GENERIC_ENTRY void arch_do_signal_or_restart(struct pt_regs *regs) { BUG_ON(regs != current->thread.regs); - local_paca->generic_fw_flags |= GFW_RESTORE_ALL; + regs->exit_flags |= _TIF_RESTOREALL; do_signal(current); } -#endif /* CONFIG_GENERIC_ENTRY */ diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c index 9f03a6263fb41..df1c9a8d62bc6 100644 --- a/arch/powerpc/kernel/syscall.c +++ b/arch/powerpc/kernel/syscall.c @@ -3,6 +3,7 @@ #include <linux/compat.h> #include <linux/context_tracking.h> #include <linux/randomize_kstack.h> +#include <linux/entry-common.h> #include <asm/interrupt.h> #include <asm/kup.h> @@ -18,124 +19,10 @@ notrace long system_call_exception(struct pt_regs *regs, unsigned long r0) long ret; syscall_fn f; - kuap_lock(); - add_random_kstack_offset(); + r0 = syscall_enter_from_user_mode(regs, r0); - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); - - trace_hardirqs_off(); /* finish reconciling */ - - CT_WARN_ON(ct_state() == CT_STATE_KERNEL); - user_exit_irqoff(); - - BUG_ON(regs_is_unrecoverable(regs)); - BUG_ON(!user_mode(regs)); - BUG_ON(regs_irqs_disabled(regs)); - -#ifdef CONFIG_PPC_PKEY - if (mmu_has_feature(MMU_FTR_PKEY)) { - unsigned long amr, iamr; - bool flush_needed = false; - /* - * When entering from userspace we mostly have the AMR/IAMR - * different from kernel default values. Hence don't compare. - */ - amr = mfspr(SPRN_AMR); - iamr = mfspr(SPRN_IAMR); - regs->amr = amr; - regs->iamr = iamr; - if (mmu_has_feature(MMU_FTR_KUAP)) { - mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); - flush_needed = true; - } - if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) { - mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED); - flush_needed = true; - } - if (flush_needed) - isync(); - } else -#endif - kuap_assert_locked(); - - booke_restore_dbcr0(); - - account_cpu_user_entry(); - - account_stolen_time(); - - /* - * This is not required for the syscall exit path, but makes the - * stack frame look nicer. If this was initialised in the first stack - * frame, or if the unwinder was taught the first stack frame always - * returns to user with IRQS_ENABLED, this store could be avoided! - */ - irq_soft_mask_regs_set_state(regs, IRQS_ENABLED); - - /* - * If system call is called with TM active, set _TIF_RESTOREALL to - * prevent RFSCV being used to return to userspace, because POWER9 - * TM implementation has problems with this instruction returning to - * transactional state. Final register values are not relevant because - * the transaction will be aborted upon return anyway. Or in the case - * of unsupported_scv SIGILL fault, the return state does not much - * matter because it's an edge case. - */ - if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && - unlikely(MSR_TM_TRANSACTIONAL(regs->msr))) - set_bits(_TIF_RESTOREALL, ¤t_thread_info()->flags); - - /* - * If the system call was made with a transaction active, doom it and - * return without performing the system call. Unless it was an - * unsupported scv vector, in which case it's treated like an illegal - * instruction. - */ -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) && - !trap_is_unsupported_scv(regs)) { - /* Enable TM in the kernel, and disable EE (for scv) */ - hard_irq_disable(); - mtmsr(mfmsr() | MSR_TM); - - /* tabort, this dooms the transaction, nothing else */ - asm volatile(".long 0x7c00071d | ((%0) << 16)" - :: "r"(TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)); - - /* - * Userspace will never see the return value. Execution will - * resume after the tbegin. of the aborted transaction with the - * checkpointed register state. A context switch could occur - * or signal delivered to the process before resuming the - * doomed transaction context, but that should all be handled - * as expected. - */ - return -ENOSYS; - } -#endif // CONFIG_PPC_TRANSACTIONAL_MEM - - local_irq_enable(); - - if (unlikely(read_thread_flags() & _TIF_SYSCALL_DOTRACE)) { - if (unlikely(trap_is_unsupported_scv(regs))) { - /* Unsupported scv vector */ - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - return regs->gpr[3]; - } - /* - * We use the return value of do_syscall_trace_enter() as the - * syscall number. If the syscall was rejected for any reason - * do_syscall_trace_enter() returns an invalid syscall number - * and the test against NR_syscalls will fail and the return - * value to be used is in regs->gpr[3]. - */ - r0 = do_syscall_trace_enter(regs); - if (unlikely(r0 >= NR_syscalls)) - return regs->gpr[3]; - - } else if (unlikely(r0 >= NR_syscalls)) { + if (unlikely(r0 >= NR_syscalls)) { if (unlikely(trap_is_unsupported_scv(regs))) { /* Unsupported scv vector */ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); -- 2.51.0