From: Thomas Gleixner <t...@linutronix.de>

On syscall entry certain work needs to be done conditionally like tracing,
seccomp etc. This code is duplicated in all architectures.

Provide a generic version.

Signed-off-by: Thomas Gleixner <t...@linutronix.de>
---
V2: Fix function documentation (Mike)
    Add comment about return value (Andy)
---
 arch/Kconfig                 |    3 
 include/linux/entry-common.h |  132 +++++++++++++++++++++++++++++++++++++++++++
 kernel/Makefile              |    1 
 kernel/entry/Makefile        |    3 
 kernel/entry/common.c        |   33 ++++++++++
 5 files changed, 172 insertions(+)

--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -27,6 +27,9 @@ config HAVE_IMA_KEXEC
 config HOTPLUG_SMT
        bool
 
+config GENERIC_ENTRY
+       bool
+
 config OPROFILE
        tristate "OProfile system profiling"
        depends on PROFILING
--- /dev/null
+++ b/include/linux/entry-common.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_ENTRYCOMMON_H
+#define __LINUX_ENTRYCOMMON_H
+
+#include <linux/tracehook.h>
+#include <linux/syscalls.h>
+#include <linux/seccomp.h>
+#include <linux/sched.h>
+#include <linux/audit.h>
+
+#include <asm/entry-common.h>
+
+/*
+ * Define dummy _TIF work flags if not defined by the architecture or for
+ * disabled functionality.
+ */
+#ifndef _TIF_SYSCALL_TRACE
+# define _TIF_SYSCALL_TRACE            (0)
+#endif
+
+#ifndef _TIF_SYSCALL_EMU
+# define _TIF_SYSCALL_EMU              (0)
+#endif
+
+#ifndef _TIF_SYSCALL_TRACEPOINT
+# define _TIF_SYSCALL_TRACEPOINT       (0)
+#endif
+
+#ifndef _TIF_SECCOMP
+# define _TIF_SECCOMP                  (0)
+#endif
+
+#ifndef _TIF_AUDIT
+# define _TIF_AUDIT                    (0)
+#endif
+
+/*
+ * TIF flags handled in syscall_enter_from_usermode()
+ */
+#ifndef ARCH_SYSCALL_ENTER_WORK
+# define ARCH_SYSCALL_ENTER_WORK       (0)
+#endif
+
+#define SYSCALL_ENTER_WORK                                             \
+       (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | TIF_SECCOMP |        \
+        _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_EMU |                   \
+        ARCH_SYSCALL_ENTER_WORK)
+
+/**
+ * arch_syscall_enter_tracehook - Wrapper around 
tracehook_report_syscall_entry()
+ * @regs:      Pointer to currents pt_regs
+ *
+ * Returns: 0 on success or an error code to skip the syscall.
+ *
+ * Defaults to tracehook_report_syscall_entry(). Can be replaced by
+ * architecture specific code.
+ *
+ * Invoked from syscall_enter_from_usermode()
+ */
+static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs 
*regs);
+
+#ifndef arch_syscall_enter_tracehook
+static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs 
*regs)
+{
+       return tracehook_report_syscall_entry(regs);
+}
+#endif
+
+/**
+ * arch_syscall_enter_seccomp - Architecture specific seccomp invocation
+ * @regs:      Pointer to currents pt_regs
+ *
+ * Returns: The original or a modified syscall number
+ *
+ * Invoked from syscall_enter_from_usermode(). Can be replaced by
+ * architecture specific code.
+ */
+static inline long arch_syscall_enter_seccomp(struct pt_regs *regs);
+
+#ifndef arch_syscall_enter_seccomp
+static inline long arch_syscall_enter_seccomp(struct pt_regs *regs)
+{
+       return secure_computing(NULL);
+}
+#endif
+
+/**
+ * arch_syscall_enter_audit - Architecture specific audit invocation
+ * @regs:      Pointer to currents pt_regs
+ *
+ * Invoked from syscall_enter_from_usermode(). Must be replaced by
+ * architecture specific code if the architecture supports audit.
+ */
+static inline void arch_syscall_enter_audit(struct pt_regs *regs);
+
+#ifndef arch_syscall_enter_audit
+static inline void arch_syscall_enter_audit(struct pt_regs *regs) { }
+#endif
+
+/* Common syscall enter function */
+long core_syscall_enter_from_usermode(struct pt_regs *regs, long syscall);
+
+/**
+ * syscall_enter_from_usermode - Check and handle work before invoking
+ *                              a syscall
+ * @regs:      Pointer to currents pt_regs
+ * @syscall:   The syscall number
+ *
+ * Invoked from architecture specific syscall entry code with interrupts
+ * enabled.
+ *
+ * Returns: The original or a modified syscall number
+ *
+ * If the returned syscall number is -1 then the syscall should be
+ * skipped. In this case the caller may invoke syscall_set_error() or
+ * syscall_set_return_value() first.  If neither of those is called and -1
+ * is returned, then the syscall will fail with ENOSYS.
+ */
+static inline long syscall_enter_from_usermode(struct pt_regs *regs,
+                                              long syscall)
+{
+       unsigned long ti_work = READ_ONCE(current_thread_info()->flags);
+
+       if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
+               BUG_ON(regs != task_pt_regs(current));
+
+       if (ti_work & SYSCALL_ENTER_WORK)
+               syscall = core_syscall_enter_from_usermode(regs, syscall);
+       return syscall;
+}
+
+#endif
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -43,6 +43,7 @@ obj-y += irq/
 obj-y += rcu/
 obj-y += livepatch/
 obj-y += dma/
+obj-y += entry/
 
 obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o
 obj-$(CONFIG_FREEZER) += freezer.o
--- /dev/null
+++ b/kernel/entry/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_GENERIC_ENTRY) += common.o
--- /dev/null
+++ b/kernel/entry/common.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/context_tracking.h>
+#include <linux/entry-common.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
+long core_syscall_enter_from_usermode(struct pt_regs *regs, long syscall)
+{
+       unsigned long ti_work = READ_ONCE(current_thread_info()->flags);
+       unsigned long ret = 0;
+
+       if (ti_work & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU)) {
+               ret = arch_syscall_enter_tracehook(regs);
+               if (ret || (ti_work & _TIF_SYSCALL_EMU))
+                       return -1L;
+       }
+
+       /* Do seccomp after ptrace, to catch any tracer changes. */
+       if (ti_work & _TIF_SECCOMP) {
+               ret = arch_syscall_enter_seccomp(regs);
+               if (ret == -1L)
+                       return ret;
+       }
+
+       if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+               trace_sys_enter(regs, syscall);
+
+       arch_syscall_enter_audit(regs);
+
+       return ret ? : syscall;
+}


Reply via email to