From: Benjamin Berg <benja...@sipsolutions.net>

This adds the kernel side of the seccomp based process handling.

Co-authored-by: Johannes Berg <johan...@sipsolutions.net>
Signed-off-by: Benjamin Berg <benja...@sipsolutions.net>
---
 arch/um/os-Linux/skas/mem.c     |  35 +-
 arch/um/os-Linux/skas/process.c | 561 ++++++++++++++++++++++++--------
 arch/um/os-Linux/start_up.c     |   3 -
 3 files changed, 439 insertions(+), 160 deletions(-)

diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c
index 28e50349ab91..619035151bc6 100644
--- a/arch/um/os-Linux/skas/mem.c
+++ b/arch/um/os-Linux/skas/mem.c
@@ -4,6 +4,7 @@
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  */
 
+#include <linux/kconfig.h>
 #include <stddef.h>
 #include <unistd.h>
 #include <errno.h>
@@ -22,6 +23,7 @@
 extern char __syscall_stub_start[];
 
 extern void wait_stub_done(int pid);
+void wait_stub_done_seccomp(int pid, struct stub_data *data, int running);
 
 static inline unsigned long *check_init_stack(struct mm_id *mm_idp,
                                              unsigned long *stack)
@@ -58,24 +60,29 @@ static inline long do_syscall_stub(struct mm_id *mm_idp)
        int n, i;
        int err, pid = mm_idp->u.pid;
 
-       n = ptrace_setregs(pid, syscall_regs);
-       if (n < 0) {
-               printk(UM_KERN_ERR "Registers - \n");
-               for (i = 0; i < MAX_REG_NR; i++)
-                       printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]);
-               panic("%s : PTRACE_SETREGS failed, errno = %d\n",
-                     __func__, -n);
-       }
-
        /* Inform process how much we have filled in. */
        proc_data->syscall_data_len = mm_idp->syscall_data_len;
 
-       err = ptrace(PTRACE_CONT, pid, 0, 0);
-       if (err)
-               panic("Failed to continue stub, pid = %d, errno = %d\n", pid,
-                     errno);
+       if (using_seccomp) {
+               proc_data->restart_wait = 1;
+               wait_stub_done_seccomp(pid, proc_data, 0);
+       } else {
+               n = ptrace_setregs(pid, syscall_regs);
+               if (n < 0) {
+                       printk(UM_KERN_ERR "Registers -\n");
+                       for (i = 0; i < MAX_REG_NR; i++)
+                               printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, 
syscall_regs[i]);
+                       panic("%s : PTRACE_SETREGS failed, errno = %d\n",
+                             __func__, -n);
+               }
+
+               err = ptrace(PTRACE_CONT, pid, 0, 0);
+               if (err)
+                       panic("Failed to continue stub, pid = %d, errno = %d\n",
+                             pid, errno);
 
-       wait_stub_done(pid);
+               wait_stub_done(pid);
+       }
 
        /*
         * proc_data->err will be non-zero if there was an (unexpected) error.
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 44a7d49538ce..55868eb35727 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -1,9 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
+ * Copyright (C) 2021 Benjamin Berg <benja...@sipsolutions.net>
  * Copyright (C) 2015 Thomas Meyer (tho...@m3y3r.de)
  * Copyright (C) 2002- 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  */
 
+#include <linux/kconfig.h>
 #include <stdlib.h>
 #include <stdbool.h>
 #include <unistd.h>
@@ -22,7 +24,13 @@
 #include <registers.h>
 #include <skas.h>
 #include <sysdep/stub.h>
+#include <sysdep/mcontext.h>
 #include <linux/threads.h>
+#include <sys/resource.h>
+#include <sys/prctl.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+#include <linux/futex.h>
 
 int is_skas_winch(int pid, int fd, void *data)
 {
@@ -137,6 +145,58 @@ void wait_stub_done(int pid)
        fatal_sigsegv();
 }
 
+#ifdef CONFIG_UML_SECCOMP
+void wait_stub_done_seccomp(int pid, struct stub_data *data, int running)
+{
+       int ret;
+
+       do {
+               if (!running) {
+                       data->signal = 0;
+                       data->futex = FUTEX_IN_CHILD;
+                       CATCH_EINTR(syscall(__NR_futex, &data->futex,
+                                           FUTEX_WAKE, 1, NULL, NULL, 0));
+               }
+
+               do {
+                       ret = syscall(__NR_futex, &data->futex,
+                                     FUTEX_WAIT, FUTEX_IN_CHILD,
+                                     NULL, NULL, 0);
+               } while ((ret == -1 && errno == EINTR) || data->futex == 
FUTEX_IN_CHILD);
+
+               running = 0;
+
+               /* We may receive a SIGALRM, if we do, we are not done yet and 
need to iterate. */
+       } while (data->signal == SIGALRM);
+
+       if (ret < 0 && errno != EAGAIN) {
+               printk(UM_KERN_ERR "%s : waiting for child futex failed, errno 
= %d\n",
+                      __func__, errno);
+               goto out_kill;
+       }
+
+       if (data->mctx_offset > sizeof(data->sigstack) - sizeof(mcontext_t)) {
+               printk(UM_KERN_ERR "%s : invalid mcontext offset", __func__);
+               goto out_kill;
+       }
+
+       if (data->signal != SIGTRAP) {
+               printk(UM_KERN_ERR "%s : expected SIGTRAP but got %d",
+                      __func__, data->signal);
+               goto out_kill;
+       }
+
+       return;
+
+out_kill:
+       printk(UM_KERN_ERR "%s : failed to wait for SIGTRAP, pid = %d, errno = 
%d\n",
+              __func__, pid, errno);
+       fatal_sigsegv();
+}
+#else
+void wait_stub_done_seccomp(int pid, struct stub_data *data, int running);
+#endif
+
 extern unsigned long current_stub_stack(void);
 
 static void get_skas_faultinfo(int pid, struct faultinfo *fi, unsigned long 
*aux_fp_regs)
@@ -198,16 +258,16 @@ extern char __syscall_stub_start[];
  */
 static int userspace_tramp(void *stack)
 {
-       struct sigaction sa;
        struct stub_data *data;
        void *addr;
        int fd;
        unsigned long long offset;
-       unsigned long segv_handler = STUB_CODE +
-                                    (unsigned long) stub_segv_handler -
-                                    (unsigned long) __syscall_stub_start;
 
-       ptrace(PTRACE_TRACEME, 0, 0, 0);
+       if (!using_seccomp)
+               ptrace(PTRACE_TRACEME, 0, 0, 0);
+
+       /* Needed for seccomp, but this is sane anyway. */
+       prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 
        signal(SIGTERM, SIG_DFL);
        signal(SIGWINCH, SIG_IGN);
@@ -233,17 +293,130 @@ static int userspace_tramp(void *stack)
        data = (void *) addr;
 
        set_sigstack((void *) &data->sigstack, sizeof(data->sigstack));
-       sigemptyset(&sa.sa_mask);
-       sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO;
-       sa.sa_sigaction = (void *) segv_handler;
-       sa.sa_restorer = NULL;
-       if (sigaction(SIGSEGV, &sa, NULL) < 0) {
-               os_info("%s - setting SIGSEGV handler failed - errno = %d\n",
-                       __func__, errno);
-               exit(1);
+
+       if (using_seccomp) {
+               struct rlimit lim;
+               struct sock_filter filter[] = {
+#if __BITS_PER_LONG > 32
+                       /* [0] Load upper 32bit of instruction pointer from 
seccomp_data */
+                       BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+                               (offsetof(struct seccomp_data, 
instruction_pointer) + 4)),
+
+                       /* [1] Jump forward 4 instructions if the upper address 
is not identical */
+                       BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, (STUB_CODE) >> 32, 
0, 4),
+#endif
+                       /* [2] Load lower 32bit of instruction pointer from 
seccomp_data */
+                       BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+                               (offsetof(struct seccomp_data, 
instruction_pointer))),
+
+                       /* [3] Mask out lower bits */
+                       BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0xfffff000),
+
+                       /* [4] Jump to [6] if the lower bits are not on the 
expected page */
+                       BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, (STUB_CODE) & 
0xfffff000, 0, 1),
+
+                       /* [5] Permitted call, allow */
+                       BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
+
+                       /* [6] Restricted call, replace with SIGSYS */
+                       BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRAP),
+               };
+               struct sock_fprog prog = {
+                       .len = ARRAY_SIZE(filter),
+                       .filter = filter,
+               };
+
+               /*
+                * With seccomp we return normally from the signal handler, so
+                * avoid setting things up through libc which may do its own
+                * thing for restoring.
+                */
+               struct sigaction_real {
+                       void *sa_handler_;
+                       unsigned long sa_flags;
+                       void *sa_restorer;
+                       sigset_t sa_mask;
+               } sa;
+
+               unsigned long v = STUB_CODE +
+                                 (unsigned long) stub_signal_interrupt -
+                                 (unsigned long) __syscall_stub_start;
+               unsigned long r = STUB_CODE +
+                                 (unsigned long) stub_signal_restorer -
+                                 (unsigned long) __syscall_stub_start;
+
+               /* Never coredump */
+               lim.rlim_cur = 0;
+               lim.rlim_max = 0;
+               if (setrlimit(RLIMIT_CORE, &lim) < 0) {
+                       os_info("Could not set coredump size limit, errno = 
%d\n",
+                               errno);
+                       exit(1);
+               }
+
+               sigemptyset(&sa.sa_mask);
+               sigaddset(&sa.sa_mask, SIGALRM);
+               sigaddset(&sa.sa_mask, SIGCHLD);
+               sa.sa_flags = SA_ONSTACK | SA_SIGINFO | 0x04000000; /* 
SA_RESTORER */
+               sa.sa_handler_ = (void *)v;
+               sa.sa_restorer = (void *)r;
+               if (syscall(__NR_rt_sigaction, SIGSEGV, &sa, NULL, 8) < 0) {
+                       os_info("%s - setting SIGSEGV handler failed - errno = 
%d\n",
+                               __func__, errno);
+                       exit(1);
+               }
+
+               if (syscall(__NR_rt_sigaction, SIGSYS, &sa, NULL, 8) < 0) {
+                       os_info("%s - setting SIGSYS handler failed - errno = 
%d\n",
+                               __func__, errno);
+                       exit(1);
+               }
+
+               if (syscall(__NR_rt_sigaction, SIGALRM, &sa, NULL, 8) < 0) {
+                       os_info("%s - setting SIGALRM handler failed - errno = 
%d\n",
+                               __func__, errno);
+                       exit(1);
+               }
+
+               if (syscall(__NR_rt_sigaction, SIGTRAP, &sa, NULL, 8) < 0) {
+                       os_info("%s - setting SIGTRAP handler failed - errno = 
%d\n",
+                               __func__, errno);
+                       exit(1);
+               }
+
+               if (syscall(__NR_rt_sigaction, SIGFPE, &sa, NULL, 8) < 0) {
+                       os_info("%s - setting SIGFPE handler failed - errno = 
%d\n",
+                               __func__, errno);
+                       exit(1);
+               }
+
+               if (syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER,
+                           SECCOMP_FILTER_FLAG_TSYNC, &prog) != 0) {
+                       os_info("%s - could not install seccomp filter - errno 
= %d\n",
+                               __func__, errno);
+                       exit(42);
+               }
+
+               trap_myself();
+       } else {
+               struct sigaction sa;
+               unsigned long segv_handler = STUB_CODE +
+                                            (unsigned long) stub_segv_handler -
+                                            (unsigned long) 
__syscall_stub_start;
+
+               sigemptyset(&sa.sa_mask);
+               sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO;
+               sa.sa_sigaction = (void *) segv_handler;
+               sa.sa_restorer = NULL;
+               if (sigaction(SIGSEGV, &sa, NULL) < 0) {
+                       os_info("%s - setting SIGSEGV handler failed - errno = 
%d\n",
+                               __func__, errno);
+                       exit(1);
+               }
+
+               kill(os_getpid(), SIGSTOP);
        }
 
-       kill(os_getpid(), SIGSTOP);
        return 0;
 }
 
@@ -269,6 +442,7 @@ int start_userspace(struct mm_id *id)
        void *stack;
        unsigned long sp;
        int status, n, flags, err;
+       struct stub_data *proc_data = (void *) id->stack;
 
        /* setup a temporary stack page */
        stack = mmap(NULL, UM_KERN_PAGE_SIZE,
@@ -286,6 +460,9 @@ int start_userspace(struct mm_id *id)
 
        flags = CLONE_FILES | SIGCHLD;
 
+       if (using_seccomp)
+               proc_data->futex = FUTEX_IN_CHILD;
+
        /* clone into new userspace process */
        id->u.pid = clone(userspace_tramp, (void *) sp, flags, (void *) 
id->stack);
        if (id->u.pid < 0) {
@@ -295,29 +472,33 @@ int start_userspace(struct mm_id *id)
                return err;
        }
 
-       do {
-               CATCH_EINTR(n = waitpid(id->u.pid, &status, WUNTRACED | 
__WALL));
-               if (n < 0) {
+       if (using_seccomp) {
+               wait_stub_done_seccomp(id->u.pid, proc_data, 1);
+       } else {
+               do {
+                       CATCH_EINTR(n = waitpid(id->u.pid, &status, WUNTRACED | 
__WALL));
+                       if (n < 0) {
+                               err = -errno;
+                               printk(UM_KERN_ERR "%s : wait failed, errno = 
%d\n",
+                                      __func__, errno);
+                               goto out_kill;
+                       }
+               } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM));
+
+               if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) {
+                       err = -EINVAL;
+                       printk(UM_KERN_ERR "%s : expected SIGSTOP, got status = 
%d\n",
+                              __func__, status);
+                       goto out_kill;
+               }
+
+               if (ptrace(PTRACE_SETOPTIONS, id->u.pid, NULL,
+                          (void *) PTRACE_O_TRACESYSGOOD) < 0) {
                        err = -errno;
-                       printk(UM_KERN_ERR "%s : wait failed, errno = %d\n",
+                       printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, 
errno = %d\n",
                               __func__, errno);
                        goto out_kill;
                }
-       } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM));
-
-       if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) {
-               err = -EINVAL;
-               printk(UM_KERN_ERR "%s : expected SIGSTOP, got status = %d\n",
-                      __func__, status);
-               goto out_kill;
-       }
-
-       if (ptrace(PTRACE_SETOPTIONS, id->u.pid, NULL,
-                  (void *) PTRACE_O_TRACESYSGOOD) < 0) {
-               err = -errno;
-               printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = 
%d\n",
-                      __func__, errno);
-               goto out_kill;
        }
 
        if (munmap(stack, UM_KERN_PAGE_SIZE) < 0) {
@@ -337,7 +518,9 @@ int start_userspace(struct mm_id *id)
 void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
 {
        int err, status, op, pid = userspace_pid[0];
-       siginfo_t si;
+       siginfo_t si_ptrace;
+       siginfo_t *si;
+       int sig;
 
        /* Handle any immediate reschedules or signals */
        interrupt_end();
@@ -346,94 +529,166 @@ void userspace(struct uml_pt_regs *regs, unsigned long 
*aux_fp_regs)
                if (kill_userspace_mm[0])
                        fatal_sigsegv();
 
-               /*
-                * This can legitimately fail if the process loads a
-                * bogus value into a segment register.  It will
-                * segfault and PTRACE_GETREGS will read that value
-                * out of the process.  However, PTRACE_SETREGS will
-                * fail.  In this case, there is nothing to do but
-                * just kill the process.
-                */
-               if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) {
-                       printk(UM_KERN_ERR "%s - ptrace set regs failed, errno 
= %d\n",
-                              __func__, errno);
-                       fatal_sigsegv();
-               }
+               if (using_seccomp) {
+                       struct stub_data *proc_data = (void 
*)current_stub_stack();
+                       int ret;
 
-               if (put_fp_registers(pid, regs->fp)) {
-                       printk(UM_KERN_ERR "%s - ptrace set fp regs failed, 
errno = %d\n",
-                              __func__, errno);
-                       fatal_sigsegv();
-               }
+                       ret = set_stub_state(regs, proc_data, singlestepping());
+                       if (ret) {
+                               printk(UM_KERN_ERR "%s - failed to set regs: 
%d",
+                                      __func__, ret);
+                               fatal_sigsegv();
+                       }
 
-               if (singlestepping())
-                       op = PTRACE_SYSEMU_SINGLESTEP;
-               else
-                       op = PTRACE_SYSEMU;
+                       /* Must have been reset by the syscall caller */
+                       if (proc_data->restart_wait != 0)
+                               panic("Programming error: Flag to only run 
syscalls in child was not cleared!");
+
+                       proc_data->signal = 0;
+                       proc_data->futex = FUTEX_IN_CHILD;
+                       CATCH_EINTR(syscall(__NR_futex, &proc_data->futex,
+                                           FUTEX_WAKE, 1, NULL, NULL, 0));
+                       do {
+                               ret = syscall(__NR_futex, &proc_data->futex,
+                                             FUTEX_WAIT, FUTEX_IN_CHILD, NULL, 
NULL, 0);
+                       } while ((ret == -1 && errno == EINTR) ||
+                                proc_data->futex == FUTEX_IN_CHILD);
+
+                       sig = proc_data->signal;
+
+                       ret = get_stub_state(regs, proc_data);
+                       if (ret) {
+                               printk(UM_KERN_ERR "%s - failed to get regs: 
%d",
+                                      __func__, ret);
+                               fatal_sigsegv();
+                       }
 
-               if (ptrace(op, pid, 0, 0)) {
-                       printk(UM_KERN_ERR "%s - ptrace continue failed, op = 
%d, errno = %d\n",
-                              __func__, op, errno);
-                       fatal_sigsegv();
-               }
+                       if (proc_data->si_offset > sizeof(proc_data->sigstack) 
- sizeof(*si))
+                               panic("%s - Invalid siginfo offset from child",
+                                     __func__);
+                       si = (void *)&proc_data->sigstack[proc_data->si_offset];
+
+                       if (sig == SIGSEGV && si->si_code == SI_KERNEL) {
+                               /* This happens if the host is unable to
+                                * restore the state from the mcontext.
+                                */
+                               panic("%s - SEGV with si_code == SI_KERNEL, 
faulted while returning to userspace (addr: 0x%lx)",
+                                     __func__, (unsigned long) si->si_addr);
+                       }
 
-               CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL));
-               if (err < 0) {
-                       printk(UM_KERN_ERR "%s - wait failed, errno = %d\n",
-                              __func__, errno);
-                       fatal_sigsegv();
-               }
+                       regs->is_user = 1;
 
-               regs->is_user = 1;
-               if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) {
-                       printk(UM_KERN_ERR "%s - PTRACE_GETREGS failed, errno = 
%d\n",
-                              __func__, errno);
-                       fatal_sigsegv();
-               }
+                       /* Fill in ORIG_RAX and extract fault information */
+                       PT_SYSCALL_NR(regs->gp) = si->si_syscall;
+                       if (sig == SIGSEGV) {
+                               mcontext_t *mcontext = (void 
*)&proc_data->sigstack[proc_data->mctx_offset];
 
-               if (get_fp_registers(pid, regs->fp)) {
-                       printk(UM_KERN_ERR "%s -  get_fp_registers failed, 
errno = %d\n",
-                              __func__, errno);
-                       fatal_sigsegv();
-               }
+                               GET_FAULTINFO_FROM_MC(regs->faultinfo, 
mcontext);
+                       }
+               } else {
+                       /*
+                        * This can legitimately fail if the process loads a
+                        * bogus value into a segment register.  It will
+                        * segfault and PTRACE_GETREGS will read that value
+                        * out of the process.  However, PTRACE_SETREGS will
+                        * fail.  In this case, there is nothing to do but
+                        * just kill the process.
+                        */
+                       if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) {
+                               printk(UM_KERN_ERR "%s - ptrace set regs 
failed, errno = %d\n",
+                                      __func__, errno);
+                               fatal_sigsegv();
+                       }
 
-               UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */
+                       if (put_fp_registers(pid, regs->fp)) {
+                               printk(UM_KERN_ERR "%s - ptrace set fp regs 
failed, errno = %d\n",
+                                      __func__, errno);
+                               fatal_sigsegv();
+                       }
 
-               if (WIFSTOPPED(status)) {
-                       int sig = WSTOPSIG(status);
+                       if (singlestepping())
+                               op = PTRACE_SYSEMU_SINGLESTEP;
+                       else
+                               op = PTRACE_SYSEMU;
 
-                       /* These signal handlers need the si argument.
-                        * The SIGIO and SIGALARM handlers which constitute the
-                        * majority of invocations, do not use it.
-                        */
-                       switch (sig) {
-                       case SIGSEGV:
-                       case SIGTRAP:
-                       case SIGILL:
-                       case SIGBUS:
-                       case SIGFPE:
-                       case SIGWINCH:
-                               ptrace(PTRACE_GETSIGINFO, pid, 0, (struct 
siginfo *)&si);
-                               break;
+                       if (ptrace(op, pid, 0, 0)) {
+                               printk(UM_KERN_ERR "%s - ptrace continue 
failed, op = %d, errno = %d\n",
+                                      __func__, op, errno);
+                               fatal_sigsegv();
                        }
 
+                       CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | 
__WALL));
+                       if (err < 0) {
+                               printk(UM_KERN_ERR "%s - wait failed, errno = 
%d\n",
+                                      __func__, errno);
+                               fatal_sigsegv();
+                       }
+
+                       regs->is_user = 1;
+                       if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) {
+                               printk(UM_KERN_ERR "%s - PTRACE_GETREGS failed, 
errno = %d\n",
+                                      __func__, errno);
+                               fatal_sigsegv();
+                       }
+
+                       if (get_fp_registers(pid, regs->fp)) {
+                               printk(UM_KERN_ERR "%s -  get_fp_registers 
failed, errno = %d\n",
+                                      __func__, errno);
+                               fatal_sigsegv();
+                       }
+
+                       if (WIFSTOPPED(status)) {
+                               sig = WSTOPSIG(status);
+
+                               /* These signal handlers need the si argument
+                                * and SIGSEGV needs the faultinfo.
+                                * The SIGIO and SIGALARM handlers which 
constitute the
+                                * majority of invocations, do not use it.
+                                */
+                               switch (sig) {
+                               case SIGSEGV:
+                                       get_skas_faultinfo(pid,
+                                                          &regs->faultinfo,
+                                                          aux_fp_regs);
+                                       fallthrough;
+                               case SIGTRAP:
+                               case SIGILL:
+                               case SIGBUS:
+                               case SIGFPE:
+                               case SIGWINCH:
+                                       ptrace(PTRACE_GETSIGINFO, pid, 0,
+                                              (struct siginfo *)&si_ptrace);
+                                       si = &si_ptrace;
+                                       break;
+                               default:
+                                       si = NULL;
+                                       break;
+                               }
+                       } else {
+                               sig = 0;
+                       }
+               }
+
+               UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */
+
+               if (sig) {
                        switch (sig) {
                        case SIGSEGV:
-                               get_skas_faultinfo(pid,
-                                                  &regs->faultinfo, 
aux_fp_regs);
-
-                               if (PTRACE_FULL_FAULTINFO)
-                                       (*sig_info[SIGSEGV])(SIGSEGV, (struct 
siginfo *)&si,
-                                                            regs);
+                               if (using_seccomp || PTRACE_FULL_FAULTINFO)
+                                       (*sig_info[SIGSEGV])(SIGSEGV, (struct 
siginfo *)si,
+                                                    regs);
                                else
                                        segv(regs->faultinfo, 0, 1, NULL);
 
+                               break;
+                       case SIGSYS:
+                               handle_syscall(regs);
                                break;
                        case SIGTRAP + 0x80:
                                handle_trap(pid, regs);
                                break;
                        case SIGTRAP:
-                               relay_signal(SIGTRAP, (struct siginfo *)&si, 
regs);
+                               relay_signal(SIGTRAP, (struct siginfo *)si, 
regs);
                                break;
                        case SIGALRM:
                                break;
@@ -443,7 +698,7 @@ void userspace(struct uml_pt_regs *regs, unsigned long 
*aux_fp_regs)
                        case SIGFPE:
                        case SIGWINCH:
                                block_signals_trace();
-                               (*sig_info[sig])(sig, (struct siginfo *)&si, 
regs);
+                               (*sig_info[sig])(sig, (struct siginfo *)si, 
regs);
                                unblock_signals_trace();
                                break;
                        default:
@@ -467,9 +722,14 @@ static int __init init_thread_regs(void)
 {
        get_safe_registers(thread_regs.gp, thread_regs.fp);
        /* Set parent's instruction pointer to start of clone-stub */
-       thread_regs.gp[REGS_IP_INDEX] = STUB_CODE +
-                                       (unsigned long)stub_clone_handler -
-                                       (unsigned long)__syscall_stub_start;
+       if (using_seccomp)
+               thread_regs.gp[REGS_IP_INDEX] = STUB_CODE +
+                               (unsigned long)stub_clone_handler_seccomp -
+                               (unsigned long)__syscall_stub_start;
+       else
+               thread_regs.gp[REGS_IP_INDEX] = STUB_CODE +
+                               (unsigned long)stub_clone_handler -
+                               (unsigned long)__syscall_stub_start;
 
        /* syscall data as a temporary stack area (top half). */
        thread_regs.gp[REGS_SP_INDEX] = STUB_DATA +
@@ -493,45 +753,55 @@ int copy_context_skas0(struct mm_id *id, struct mm_id 
*from)
         * prepare offset and fd of child's stack as argument for parent's
         * and child's mmap2 calls
         */
-       *data = ((struct stub_data) {
-               .offset = MMAP_OFFSET(new_offset),
-               .fd     = new_fd,
-               .err    = -ESRCH,
-               .child_err = 0,
-       });
-
-       *child_data = ((struct stub_data) {
-               .child_err = -ESRCH,
-       });
-
-       err = ptrace_setregs(from->u.pid, thread_regs.gp);
-       if (err < 0) {
-               err = -errno;
-               printk(UM_KERN_ERR "%s : PTRACE_SETREGS failed, pid = %d, errno 
= %d\n",
-                     __func__, from->u.pid, -err);
-               return err;
-       }
+       data->offset     = MMAP_OFFSET(new_offset);
+       data->fd         = new_fd;
+       data->err        = -ESRCH;
+       data->child_err  = 0;
 
-       err = put_fp_registers(from->u.pid, thread_regs.fp);
-       if (err < 0) {
-               printk(UM_KERN_ERR "%s : put_fp_registers failed, pid = %d, err 
= %d\n",
-                      __func__, from->u.pid, err);
-               return err;
-       }
+       child_data->child_err = -ESRCH;
 
-       /*
-        * Wait, until parent has finished its work: read child's pid from
-        * parent's stack, and check, if bad result.
-        */
-       err = ptrace(PTRACE_CONT, from->u.pid, 0, 0);
-       if (err) {
-               err = -errno;
-               printk(UM_KERN_ERR "Failed to continue new process, pid = %d, 
errno = %d\n",
-                      from->u.pid, errno);
-               return err;
-       }
+       if (using_seccomp) {
+               err = set_stub_state(&thread_regs, data, 0);
+               if (err)
+                       return err;
+
+               /* The architecture dependent state will be identical */
+               memcpy(&child_data->arch_data, &data->arch_data, 
sizeof(data->arch_data));
+
+               child_data->futex = FUTEX_IN_CHILD;
+
+               data->restart_wait = 0;
+               wait_stub_done_seccomp(from->u.pid, data, 0);
+       } else {
+               err = ptrace_setregs(from->u.pid, thread_regs.gp);
+               if (err < 0) {
+                       err = -errno;
+                       printk(UM_KERN_ERR "%s : PTRACE_SETREGS failed, pid = 
%d, errno = %d\n",
+                             __func__, from->u.pid, -err);
+                       return err;
+               }
 
-       wait_stub_done(from->u.pid);
+               err = put_fp_registers(from->u.pid, thread_regs.fp);
+               if (err < 0) {
+                       printk(UM_KERN_ERR "%s : put_fp_registers failed, pid = 
%d, err = %d\n",
+                              __func__, from->u.pid, err);
+                       return err;
+               }
+
+               /*
+                * Wait, until parent has finished its work: read child's pid 
from
+                * parent's stack, and check, if bad result.
+                */
+               err = ptrace(PTRACE_CONT, from->u.pid, 0, 0);
+               if (err) {
+                       err = -errno;
+                       printk(UM_KERN_ERR "Failed to continue new process, pid 
= %d, errno = %d\n",
+                              from->u.pid, errno);
+                       return err;
+               }
+
+               wait_stub_done(from->u.pid);
+       }
 
        id->u.pid = data->err;
        if (id->u.pid < 0) {
@@ -544,7 +814,11 @@ int copy_context_skas0(struct mm_id *id, struct mm_id 
*from)
         * Wait, until child has finished too: read child's result from
         * child's stack and check it.
         */
-       wait_stub_done(id->u.pid);
+       if (using_seccomp)
+               wait_stub_done_seccomp(id->u.pid, child_data, 1);
+       else
+               wait_stub_done(id->u.pid);
+
        if (child_data->child_err != STUB_DATA) {
                printk(UM_KERN_ERR "%s - stub-child %d reports error %ld\n",
                       __func__, id->u.pid, data->child_err);
@@ -552,7 +826,8 @@ int copy_context_skas0(struct mm_id *id, struct mm_id *from)
                goto out_kill;
        }
 
-       if (ptrace(PTRACE_SETOPTIONS, id->u.pid, NULL,
+       if (!using_seccomp &&
+           ptrace(PTRACE_SETOPTIONS, id->u.pid, NULL,
                   (void *)PTRACE_O_TRACESYSGOOD) < 0) {
                err = -errno;
                printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = 
%d\n",
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index f84eb13a0b98..4dd8b959c008 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -413,12 +413,9 @@ void __init os_early_checks(void)
        using_seccomp = 0;
 
        if (init_seccomp()) {
-               /* Not fully implemented */
-#if 0
                using_seccomp = 1;
 
                return;
-#endif
        }
 #endif
 
-- 
2.38.1


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um

Reply via email to