On Tue, May 31, 2022 at 04:48:16PM +0200, Martin Pieuchot wrote:
> On 18/05/22(Wed) 15:53, Alexander Bluhm wrote:
> > On Tue, May 17, 2022 at 10:44:54AM +1000, David Gwynne wrote:
> > > + cookie = SCARG(uap, proc_cookie);
> > > + if (pr->ps_kbind_addr == pc) {
> > > +         membar_datadep_consumer();
> > > +         if (pr->ps_kbind_cookie != cookie)
> > > +                 goto sigill;
> > > + } else {
> > 
> > You must use membar_consumer() here.  membar_datadep_consumer() is
> > a barrier between reading pointer and pointed data.  Only alpha
> > requires membar_datadep_consumer() for that, everywhere else it is
> > a NOP.
> > 
> > > +         mtx_enter(&pr->ps_mtx);
> > > +         kpc = pr->ps_kbind_addr;
> > 
> > Do we need kpc variable?  I would prefer to read explicit
> > pr->ps_kbind_addr in the two places where we use it.
> > 
> > I think the logic of barriers and mutexes is correct.
> > 
> > with the suggestions above OK bluhm@
> 
> I believe you should go ahead with the current diff.  ok with me.  Moving
> the field under the scope of another lock can be easily done afterward.

Agreed. I'll commit this version with tweaks to make bluhm@ happy
tomorrow.

Index: kern/init_sysent.c
===================================================================
RCS file: /cvs/src/sys/kern/init_sysent.c,v
retrieving revision 1.236
diff -u -p -r1.236 init_sysent.c
--- kern/init_sysent.c  1 May 2022 23:00:04 -0000       1.236
+++ kern/init_sysent.c  12 Jun 2022 04:48:06 -0000
@@ -1,4 +1,4 @@
-/*     $OpenBSD: init_sysent.c,v 1.236 2022/05/01 23:00:04 tedu Exp $  */
+/*     $OpenBSD$       */
 
 /*
  * System call switch table.
@@ -204,7 +204,7 @@ const struct sysent sysent[] = {
            sys_utimensat },                    /* 84 = utimensat */
        { 2, s(struct sys_futimens_args), 0,
            sys_futimens },                     /* 85 = futimens */
-       { 3, s(struct sys_kbind_args), 0,
+       { 3, s(struct sys_kbind_args), SY_NOLOCK | 0,
            sys_kbind },                        /* 86 = kbind */
        { 2, s(struct sys_clock_gettime_args), SY_NOLOCK | 0,
            sys_clock_gettime },                /* 87 = clock_gettime */
Index: kern/syscalls.master
===================================================================
RCS file: /cvs/src/sys/kern/syscalls.master,v
retrieving revision 1.223
diff -u -p -r1.223 syscalls.master
--- kern/syscalls.master        24 Feb 2022 07:41:51 -0000      1.223
+++ kern/syscalls.master        12 Jun 2022 04:48:06 -0000
@@ -194,7 +194,7 @@
                            const struct timespec *times, int flag); }
 85     STD             { int sys_futimens(int fd, \
                            const struct timespec *times); }
-86     STD             { int sys_kbind(const struct __kbind *param, \
+86     STD NOLOCK      { int sys_kbind(const struct __kbind *param, \
                            size_t psize, int64_t proc_cookie); }
 87     STD NOLOCK      { int sys_clock_gettime(clockid_t clock_id, \
                            struct timespec *tp); }
Index: uvm/uvm_mmap.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_mmap.c,v
retrieving revision 1.169
diff -u -p -r1.169 uvm_mmap.c
--- uvm/uvm_mmap.c      19 Jan 2022 10:43:48 -0000      1.169
+++ uvm/uvm_mmap.c      12 Jun 2022 04:48:06 -0000
@@ -70,6 +70,7 @@
 #include <sys/pledge.h>
 #include <sys/unistd.h>                /* for KBIND* */
 #include <sys/user.h>
+#include <sys/atomic.h>
 
 #include <machine/exec.h>      /* for __LDPGSZ */
 
@@ -1127,31 +1128,61 @@ sys_kbind(struct proc *p, void *v, regis
        size_t psize, s;
        u_long pc;
        int count, i, extra;
+       uint64_t cookie;
        int error;
 
        /*
         * extract syscall args from uap
         */
        paramp = SCARG(uap, param);
-       psize = SCARG(uap, psize);
 
        /* a NULL paramp disables the syscall for the process */
        if (paramp == NULL) {
+               mtx_enter(&pr->ps_mtx);
                if (pr->ps_kbind_addr != 0)
-                       sigexit(p, SIGILL);
+                       goto leave_sigill;
                pr->ps_kbind_addr = BOGO_PC;
+               mtx_leave(&pr->ps_mtx);
                return 0;
        }
 
        /* security checks */
+
+       /*
+        * ps_kbind_addr can only be set to 0 or BOGO_PC by the
+        * kernel, not by a call from userland.
+        */
        pc = PROC_PC(p);
-       if (pr->ps_kbind_addr == 0) {
-               pr->ps_kbind_addr = pc;
-               pr->ps_kbind_cookie = SCARG(uap, proc_cookie);
-       } else if (pc != pr->ps_kbind_addr || pc == BOGO_PC)
-               sigexit(p, SIGILL);
-       else if (pr->ps_kbind_cookie != SCARG(uap, proc_cookie))
-               sigexit(p, SIGILL);
+       if (pc == 0 || pc == BOGO_PC)
+               goto sigill;
+
+       cookie = SCARG(uap, proc_cookie);
+       if (pr->ps_kbind_addr == pc) {
+               membar_consumer();
+               if (pr->ps_kbind_cookie != cookie)
+                       goto sigill;
+       } else {
+               mtx_enter(&pr->ps_mtx);
+               /*
+                * If we're the first thread in (pr->ps_kbind_addr
+                * is 0), then remember where the call came from for
+                * the future.
+                *
+                * Maybe another thread raced us and won. pc cannot
+                * be BOGO_PC, so this also handles the check for
+                * the disabled case.
+                */
+               if (pr->ps_kbind_cookie == 0) {
+                       pr->ps_kbind_cookie = cookie;
+                       membar_producer();
+                       pr->ps_kbind_addr = pc;
+               } else if (pr->ps_kbind_cookie != pc ||
+                   pr->ps_kbind_cookie != cookie)
+                       goto leave_sigill;
+               mtx_leave(&pr->ps_mtx);
+       }
+
+       psize = SCARG(uap, psize);
        if (psize < sizeof(struct __kbind) || psize > sizeof(param))
                return EINVAL;
        if ((error = copyin(paramp, &param, psize)))
@@ -1187,6 +1218,7 @@ sys_kbind(struct proc *p, void *v, regis
        data = (const char *)&paramp[count];
 
        /* all looks good, so do the bindings */
+       KERNEL_LOCK();
        last_baseva = VM_MAXUSER_ADDRESS;
        kva = 0;
        TAILQ_INIT(&dead_entries);
@@ -1239,6 +1271,14 @@ redo:
                vm_map_unlock(kernel_map);
        }
        uvm_unmap_detach(&dead_entries, AMAP_REFALL);
+       KERNEL_UNLOCK();
 
        return error;
+
+leave_sigill:
+       mtx_leave(&pr->ps_mtx);
+sigill:
+       KERNEL_LOCK();
+       sigexit(p, SIGILL);
+       /* NOTREACHED KERNEL_UNLOCK(); */
 }
Index: sys/proc.h
===================================================================
RCS file: /cvs/src/sys/sys/proc.h,v
retrieving revision 1.329
diff -u -p -r1.329 proc.h
--- sys/proc.h  10 Mar 2022 15:21:08 -0000      1.329
+++ sys/proc.h  12 Jun 2022 04:48:06 -0000
@@ -234,8 +234,8 @@ struct process {
        uint64_t ps_pledge;
        uint64_t ps_execpledge;
 
-       int64_t ps_kbind_cookie;
-       u_long  ps_kbind_addr;
+       int64_t ps_kbind_cookie;        /* [m] */
+       u_long  ps_kbind_addr;          /* [m] */
 
 /* End area that is copied on creation. */
 #define ps_endcopy     ps_refcnt

Reply via email to