On Wed, Jun 15, 2022 at 10:40:35PM -0500, Scott Cheloha wrote:
> On Wed, Jun 15, 2022 at 06:17:07PM -0600, Theo de Raadt wrote:
> > Mark Kettenis <mark.kette...@xs4all.nl> wrote:
> > 
> > > Well, I believe that Scott was trying to fix a race condition that can
> > > only happen if code is using kbind(2) incorrectly, i.e. when the
> > > threads deliberately pass different cookies to kbind(2) or execute
> > > kbind(2) from different "text" addresses.
> > > 
> > > I still think the solution is simply to accept that race condition.
> > 
> > Right.
> > 
> > People are not calling kbind.  They are calling syscall(SYS_kbind
> > 
> > The man page says "don't do that".  No user serviceable parts inside.
> > Do not provide to children.
> > 
> > That said, Scott is about to share a diff he and I did a few cycles
> > around, to at least make the call-in transaction be a lock.
> 
> [...]
> 
> This patch reorganizes the logic in sys_kbind() preceding the
> copyin(9) so that we only need to take the kernel lock in a single
> spot if something goes wrong and we need to raise SIGILL.
> 
> It also puts the per-process mutex, ps_mtx, around that logic.  This
> guarantees that the first thread to reach sys_kbind() sets
> ps_kbind_addr and ps_kbind_cookie, even in oddball situations where
> the program isn't using ld.so(1) correctly.
> 
> [...]

10 days, no replies public or private.  I trust that means the patch
is fine.

Here is a tweaked patch with the binding loop wrapped with the kernel
lock.  We can more carefully determine whether uvm_unmap_remove(),
uvm_map_extract(), and uvm_unmap_detach() are MP-safe in a subsequent
patch.  They *look* safe but I can't be sure and nobody volunteered
any thoughts or review for the prior patch so we'll do the
conservative thing and push the kernel lock down, just as dlg@ was
going to do.

If something comes of guenther@'s PT_OPENBSD_KBIND idea we can
trivially remove ps_mtx from this code.

OK?

Index: kern/syscalls.master
===================================================================
RCS file: /cvs/src/sys/kern/syscalls.master,v
retrieving revision 1.224
diff -u -p -r1.224 syscalls.master
--- kern/syscalls.master        16 May 2022 07:36:04 -0000      1.224
+++ kern/syscalls.master        24 Jun 2022 14:43:26 -0000
@@ -194,7 +194,7 @@
                            const struct timespec *times, int flag); }
 85     STD             { int sys_futimens(int fd, \
                            const struct timespec *times); }
-86     STD             { int sys_kbind(const struct __kbind *param, \
+86     STD NOLOCK      { int sys_kbind(const struct __kbind *param, \
                            size_t psize, int64_t proc_cookie); }
 87     STD NOLOCK      { int sys_clock_gettime(clockid_t clock_id, \
                            struct timespec *tp); }
Index: kern/init_sysent.c
===================================================================
RCS file: /cvs/src/sys/kern/init_sysent.c,v
retrieving revision 1.237
diff -u -p -r1.237 init_sysent.c
--- kern/init_sysent.c  16 May 2022 07:38:10 -0000      1.237
+++ kern/init_sysent.c  24 Jun 2022 14:43:26 -0000
@@ -1,4 +1,4 @@
-/*     $OpenBSD: init_sysent.c,v 1.237 2022/05/16 07:38:10 mvs Exp $   */
+/*     $OpenBSD$       */
 
 /*
  * System call switch table.
@@ -204,7 +204,7 @@ const struct sysent sysent[] = {
            sys_utimensat },                    /* 84 = utimensat */
        { 2, s(struct sys_futimens_args), 0,
            sys_futimens },                     /* 85 = futimens */
-       { 3, s(struct sys_kbind_args), 0,
+       { 3, s(struct sys_kbind_args), SY_NOLOCK | 0,
            sys_kbind },                        /* 86 = kbind */
        { 2, s(struct sys_clock_gettime_args), SY_NOLOCK | 0,
            sys_clock_gettime },                /* 87 = clock_gettime */
Index: sys/proc.h
===================================================================
RCS file: /cvs/src/sys/sys/proc.h,v
retrieving revision 1.330
diff -u -p -r1.330 proc.h
--- sys/proc.h  13 May 2022 15:32:00 -0000      1.330
+++ sys/proc.h  24 Jun 2022 14:43:27 -0000
@@ -234,8 +234,8 @@ struct process {
        uint64_t ps_pledge;
        uint64_t ps_execpledge;
 
-       int64_t ps_kbind_cookie;
-       u_long  ps_kbind_addr;
+       int64_t ps_kbind_cookie;        /* [m] */
+       u_long  ps_kbind_addr;          /* [m] */
 
 /* End area that is copied on creation. */
 #define ps_endcopy     ps_refcnt
Index: sys/syscall.h
===================================================================
RCS file: /cvs/src/sys/sys/syscall.h,v
retrieving revision 1.234
diff -u -p -r1.234 syscall.h
--- sys/syscall.h       16 May 2022 07:38:10 -0000      1.234
+++ sys/syscall.h       24 Jun 2022 14:43:27 -0000
@@ -1,4 +1,4 @@
-/*     $OpenBSD: syscall.h,v 1.234 2022/05/16 07:38:10 mvs Exp $       */
+/*     $OpenBSD$       */
 
 /*
  * System call numbers.
Index: sys/syscallargs.h
===================================================================
RCS file: /cvs/src/sys/sys/syscallargs.h,v
retrieving revision 1.237
diff -u -p -r1.237 syscallargs.h
--- sys/syscallargs.h   16 May 2022 07:38:10 -0000      1.237
+++ sys/syscallargs.h   24 Jun 2022 14:43:27 -0000
@@ -1,4 +1,4 @@
-/*     $OpenBSD: syscallargs.h,v 1.237 2022/05/16 07:38:10 mvs Exp $   */
+/*     $OpenBSD$       */
 
 /*
  * System call argument lists.
Index: uvm/uvm_mmap.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_mmap.c,v
retrieving revision 1.169
diff -u -p -r1.169 uvm_mmap.c
--- uvm/uvm_mmap.c      19 Jan 2022 10:43:48 -0000      1.169
+++ uvm/uvm_mmap.c      24 Jun 2022 14:43:27 -0000
@@ -1127,7 +1127,7 @@ sys_kbind(struct proc *p, void *v, regis
        size_t psize, s;
        u_long pc;
        int count, i, extra;
-       int error;
+       int error, sigill = 0;
 
        /*
         * extract syscall args from uap
@@ -1135,23 +1135,42 @@ sys_kbind(struct proc *p, void *v, regis
        paramp = SCARG(uap, param);
        psize = SCARG(uap, psize);
 
-       /* a NULL paramp disables the syscall for the process */
-       if (paramp == NULL) {
-               if (pr->ps_kbind_addr != 0)
-                       sigexit(p, SIGILL);
-               pr->ps_kbind_addr = BOGO_PC;
-               return 0;
-       }
-
-       /* security checks */
+       /*
+        * If paramp is NULL and we're uninitialized, disable the syscall
+        * for the process.  Raise SIGILL if paramp is NULL and we're
+        * already initialized.
+        *
+        * If paramp is non-NULL and we're uninitialized, do initialization.
+        * Otherwise, do security checks and raise SIGILL on failure.
+        */
        pc = PROC_PC(p);
-       if (pr->ps_kbind_addr == 0) {
+       mtx_enter(&pr->ps_mtx);
+       if (paramp == NULL) {
+               if (pr->ps_kbind_addr == 0)
+                       pr->ps_kbind_addr = BOGO_PC;
+               else
+                       sigill = 1;
+       } else if (pr->ps_kbind_addr == 0) {
                pr->ps_kbind_addr = pc;
                pr->ps_kbind_cookie = SCARG(uap, proc_cookie);
-       } else if (pc != pr->ps_kbind_addr || pc == BOGO_PC)
-               sigexit(p, SIGILL);
-       else if (pr->ps_kbind_cookie != SCARG(uap, proc_cookie))
+       } else if (pc != pr->ps_kbind_addr || pc == BOGO_PC ||
+           pr->ps_kbind_cookie != SCARG(uap, proc_cookie)) {
+               sigill = 1;
+       }
+       mtx_leave(&pr->ps_mtx);
+
+       /* Raise SIGILL if something is off. */
+       if (sigill) {
+               KERNEL_LOCK();
                sigexit(p, SIGILL);
+               /* NOTREACHED */
+               KERNEL_UNLOCK();
+       }
+
+       /* We're done if we were disabling the syscall. */
+       if (paramp == NULL)
+               return 0;
+
        if (psize < sizeof(struct __kbind) || psize > sizeof(param))
                return EINVAL;
        if ((error = copyin(paramp, &param, psize)))
@@ -1190,6 +1209,7 @@ sys_kbind(struct proc *p, void *v, regis
        last_baseva = VM_MAXUSER_ADDRESS;
        kva = 0;
        TAILQ_INIT(&dead_entries);
+       KERNEL_LOCK();
        for (i = 0; i < count; i++) {
                baseva = (vaddr_t)paramp[i].kb_addr;
                s = paramp[i].kb_size;
@@ -1239,6 +1259,7 @@ redo:
                vm_map_unlock(kernel_map);
        }
        uvm_unmap_detach(&dead_entries, AMAP_REFALL);
+       KERNEL_UNLOCK();
 
        return error;
 }

Reply via email to