On 22/05/18(Tue) 15:39, Martin Pieuchot wrote:
> By assuming that `f_data' is immutable, which AFAIK is true for sockets,
> we can remove the KERNEL_LOCK() from the following syscalls iff files are
> refcounted in an MP-safe way.
> 
> This diff includes the EBUSY check in dup2(2) which is currently required
> to avoid races with accept(2) and will later make our life easier wrt
> open(2).
> 
> It also includes the fdinsert() diff I sent earlier.
> 
> On top of that I'm introducing a global mutex, `fhdlk', that protects
> `f_count' and the implicit reference in `filehead'.
> 
> A socket stays alive as long as its associated file has a positive
> refcount.  When this refcount drops, fdrop() will be called and soclose()
> will free/clean `f_data'.   That's the only place where `f_data' is
> changed during the life of a socket.  That's why it is safe to dereference
> `f_data' when getsock() returned a valid & refcounted `fp'.
> 
> Many ktrace(2) internals now need to grab the KERNEL_LOCK(), just like 
> ptsignal().
> 
> Note that for unix, routing and pfkey sockets, solock() still grabs the
> KERNEL_LOCK().  So even if syscalls are marked as SY_NOLOCK that doesn't
> mean they won't grab it.  In fact some network functions like
> ifa_ifwithaddr() below now need to grab the KERNEL_LOCK().  That's good
> that means we're pushing the lock down.
> 
> Tests?  Comments?

Updated diff that should prevent reported hangs, as analyzed by tb@ and
visa@.

Index: kern/exec_script.c
===================================================================
RCS file: /cvs/src/sys/kern/exec_script.c,v
retrieving revision 1.44
diff -u -p -r1.44 exec_script.c
--- kern/exec_script.c  2 May 2018 02:24:56 -0000       1.44
+++ kern/exec_script.c  25 May 2018 08:24:33 -0000
@@ -170,17 +170,20 @@ check_shell:
 #endif
 
                fdplock(p->p_fd);
-               error = falloc(p, 0, &fp, &epp->ep_fd);
-               fdpunlock(p->p_fd);
-               if (error)
+               error = falloc(p, &fp, &epp->ep_fd);
+               if (error) {
+                       fdpunlock(p->p_fd);
                        goto fail;
+               }
 
                epp->ep_flags |= EXEC_HASFD;
                fp->f_type = DTYPE_VNODE;
                fp->f_ops = &vnops;
                fp->f_data = (caddr_t) scriptvp;
                fp->f_flag = FREAD;
-               FILE_SET_MATURE(fp, p);
+               fdinsert(p->p_fd, epp->ep_fd, 0, fp);
+               fdpunlock(p->p_fd);
+               FRELE(fp, p);
        }
 
        /* set up the parameters for the recursive check_exec() call */
Index: kern/init_sysent.c
===================================================================
RCS file: /cvs/src/sys/kern/init_sysent.c,v
retrieving revision 1.191
diff -u -p -r1.191 init_sysent.c
--- kern/init_sysent.c  12 Dec 2017 01:13:14 -0000      1.191
+++ kern/init_sysent.c  25 May 2018 08:24:33 -0000
@@ -1,4 +1,4 @@
-/*     $OpenBSD: init_sysent.c,v 1.191 2017/12/12 01:13:14 deraadt Exp $       
*/
+/*     $OpenBSD$       */
 
 /*
  * System call switch table.
@@ -76,17 +76,17 @@ struct sysent sysent[] = {
        { 0, 0, 0,
            sys_nosys },                        /* 26 = unimplemented ptrace */
 #endif
-       { 3, s(struct sys_recvmsg_args), 0,
+       { 3, s(struct sys_recvmsg_args), SY_NOLOCK | 0,
            sys_recvmsg },                      /* 27 = recvmsg */
-       { 3, s(struct sys_sendmsg_args), 0,
+       { 3, s(struct sys_sendmsg_args), SY_NOLOCK | 0,
            sys_sendmsg },                      /* 28 = sendmsg */
-       { 6, s(struct sys_recvfrom_args), 0,
+       { 6, s(struct sys_recvfrom_args), SY_NOLOCK | 0,
            sys_recvfrom },                     /* 29 = recvfrom */
-       { 3, s(struct sys_accept_args), 0,
+       { 3, s(struct sys_accept_args), SY_NOLOCK | 0,
            sys_accept },                       /* 30 = accept */
-       { 3, s(struct sys_getpeername_args), 0,
+       { 3, s(struct sys_getpeername_args), SY_NOLOCK | 0,
            sys_getpeername },                  /* 31 = getpeername */
-       { 3, s(struct sys_getsockname_args), 0,
+       { 3, s(struct sys_getsockname_args), SY_NOLOCK | 0,
            sys_getsockname },                  /* 32 = getsockname */
        { 2, s(struct sys_access_args), 0,
            sys_access },                       /* 33 = access */
@@ -218,7 +218,7 @@ struct sysent sysent[] = {
            sys_nanosleep },                    /* 91 = nanosleep */
        { 3, s(struct sys_fcntl_args), 0,
            sys_fcntl },                        /* 92 = fcntl */
-       { 4, s(struct sys_accept4_args), 0,
+       { 4, s(struct sys_accept4_args), SY_NOLOCK | 0,
            sys_accept4 },                      /* 93 = accept4 */
        { 5, s(struct sys___thrsleep_args), 0,
            sys___thrsleep },                   /* 94 = __thrsleep */
@@ -226,9 +226,9 @@ struct sysent sysent[] = {
            sys_fsync },                        /* 95 = fsync */
        { 3, s(struct sys_setpriority_args), 0,
            sys_setpriority },                  /* 96 = setpriority */
-       { 3, s(struct sys_socket_args), 0,
+       { 3, s(struct sys_socket_args), SY_NOLOCK | 0,
            sys_socket },                       /* 97 = socket */
-       { 3, s(struct sys_connect_args), 0,
+       { 3, s(struct sys_connect_args), SY_NOLOCK | 0,
            sys_connect },                      /* 98 = connect */
        { 3, s(struct sys_getdents_args), 0,
            sys_getdents },                     /* 99 = getdents */
@@ -240,11 +240,11 @@ struct sysent sysent[] = {
            sys_dup3 },                         /* 102 = dup3 */
        { 1, s(struct sys_sigreturn_args), 0,
            sys_sigreturn },                    /* 103 = sigreturn */
-       { 3, s(struct sys_bind_args), 0,
+       { 3, s(struct sys_bind_args), SY_NOLOCK | 0,
            sys_bind },                         /* 104 = bind */
-       { 5, s(struct sys_setsockopt_args), 0,
+       { 5, s(struct sys_setsockopt_args), SY_NOLOCK | 0,
            sys_setsockopt },                   /* 105 = setsockopt */
-       { 2, s(struct sys_listen_args), 0,
+       { 2, s(struct sys_listen_args), SY_NOLOCK | 0,
            sys_listen },                       /* 106 = listen */
        { 4, s(struct sys_chflagsat_args), 0,
            sys_chflagsat },                    /* 107 = chflagsat */
@@ -268,7 +268,7 @@ struct sysent sysent[] = {
            sys_nosys },                        /* 116 = obsolete 
t32_gettimeofday */
        { 0, 0, 0,
            sys_nosys },                        /* 117 = obsolete t32_getrusage 
*/
-       { 5, s(struct sys_getsockopt_args), 0,
+       { 5, s(struct sys_getsockopt_args), SY_NOLOCK | 0,
            sys_getsockopt },                   /* 118 = getsockopt */
        { 3, s(struct sys_thrkill_args), 0,
            sys_thrkill },                      /* 119 = thrkill */
@@ -298,11 +298,11 @@ struct sysent sysent[] = {
            sys_flock },                        /* 131 = flock */
        { 2, s(struct sys_mkfifo_args), 0,
            sys_mkfifo },                       /* 132 = mkfifo */
-       { 6, s(struct sys_sendto_args), 0,
+       { 6, s(struct sys_sendto_args), SY_NOLOCK | 0,
            sys_sendto },                       /* 133 = sendto */
-       { 2, s(struct sys_shutdown_args), 0,
+       { 2, s(struct sys_shutdown_args), SY_NOLOCK | 0,
            sys_shutdown },                     /* 134 = shutdown */
-       { 4, s(struct sys_socketpair_args), 0,
+       { 4, s(struct sys_socketpair_args), SY_NOLOCK | 0,
            sys_socketpair },                   /* 135 = socketpair */
        { 2, s(struct sys_mkdir_args), 0,
            sys_mkdir },                        /* 136 = mkdir */
Index: kern/kern_descrip.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_descrip.c,v
retrieving revision 1.158
diff -u -p -r1.158 kern_descrip.c
--- kern/kern_descrip.c 8 May 2018 09:03:58 -0000       1.158
+++ kern/kern_descrip.c 25 May 2018 08:24:58 -0000
@@ -67,6 +67,7 @@
 /*
  * Descriptor management.
  */
+struct mutex fhdlk = MUTEX_INITIALIZER(IPL_VM);
 struct filelist filehead;      /* head of list of open files */
 int numfiles;                  /* actual number of open files */
 
@@ -144,6 +145,23 @@ find_last_set(struct filedesc *fd, int l
        return i;
 }
 
+static __inline int
+fd_inuse(struct filedesc *fdp, int fd)
+{
+       u_int off = fd >> NDENTRYSHIFT;
+
+       if (fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK)))
+               return 1;
+
+       if (fdp->fd_lomap[off] != ~0)
+               return 0;
+
+       if (fdp->fd_himap[off >> NDENTRYSHIFT] & (1 << (off & NDENTRYMASK)))
+               return 1;
+
+       return 0;
+}
+
 static __inline void
 fd_used(struct filedesc *fdp, int fd)
 {
@@ -184,16 +202,18 @@ fd_iterfile(struct file *fp, struct proc
 {
        struct file *nfp;
 
+       mtx_enter(&fhdlk);
        if (fp == NULL)
                nfp = LIST_FIRST(&filehead);
        else
                nfp = LIST_NEXT(fp, f_list);
 
-       /* don't FREF when f_count == 0 to avoid race in fdrop() */
-       while (nfp != NULL && (nfp->f_count == 0 || !FILE_IS_USABLE(nfp)))
+       /* don't refcount when f_count == 0 to avoid race in fdrop() */
+       while (nfp != NULL && nfp->f_count == 0)
                nfp = LIST_NEXT(nfp, f_list);
        if (nfp != NULL)
-               FREF(nfp);
+               nfp->f_count++;
+       mtx_leave(&fhdlk);
 
        if (fp != NULL)
                FRELE(fp, p);
@@ -206,13 +226,17 @@ fd_getfile(struct filedesc *fdp, int fd)
 {
        struct file *fp;
 
-       if ((u_int)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL)
-               return (NULL);
+       vfs_stall_barrier();
 
-       if (!FILE_IS_USABLE(fp))
+       if ((u_int)fd >= fdp->fd_nfiles)
                return (NULL);
 
-       FREF(fp);
+       mtx_enter(&fhdlk);
+       fp = fdp->fd_ofiles[fd];
+       if (fp != NULL)
+               fp->f_count++;
+       mtx_leave(&fhdlk);
+
        return (fp);
 }
 
@@ -634,18 +658,22 @@ finishdup(struct proc *p, struct file *f
                return (EDEADLK);
        }
 
-       /*
-        * Don't fd_getfile here. We want to closef LARVAL files and
-        * closef can deal with that.
-        */
+       mtx_enter(&fhdlk);
        oldfp = fdp->fd_ofiles[new];
        if (oldfp != NULL)
-               FREF(oldfp);
+               oldfp->f_count++;
+       mtx_leave(&fhdlk);
+
+       if (dup2 && oldfp == NULL) {
+               if (fd_inuse(fdp, new)) {
+                       FRELE(fp, p);
+                       return (EBUSY);
+               }
+               fd_used(fdp, new);
+       }
 
        fdp->fd_ofiles[new] = fp;
        fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] & ~UF_EXCLOSE;
-       if (dup2 && oldfp == NULL)
-               fd_used(fdp, new);
        *retval = new;
 
        if (oldfp != NULL) {
@@ -658,6 +686,25 @@ finishdup(struct proc *p, struct file *f
 }
 
 void
+fdinsert(struct filedesc *fdp, int fd, int flags, struct file *fp)
+{
+       struct file *fq;
+
+       fdpassertlocked(fdp);
+
+       mtx_enter(&fhdlk);
+       if ((fq = fdp->fd_ofiles[0]) != NULL) {
+               LIST_INSERT_AFTER(fq, fp, f_list);
+       } else {
+               LIST_INSERT_HEAD(&filehead, fp, f_list);
+       }
+       fdp->fd_ofiles[fd] = fp;
+       fdp->fd_ofileflags[fd] |= (flags & UF_EXCLOSE);
+       fp->f_iflags |= FIF_INSERTED;
+       mtx_leave(&fhdlk);
+}
+
+void
 fdremove(struct filedesc *fdp, int fd)
 {
        fdpassertlocked(fdp);
@@ -670,21 +717,14 @@ int
 fdrelease(struct proc *p, int fd)
 {
        struct filedesc *fdp = p->p_fd;
-       struct file **fpp, *fp;
+       struct file *fp;
 
        fdpassertlocked(fdp);
 
-       /*
-        * Don't fd_getfile here. We want to closef LARVAL files and closef
-        * can deal with that.
-        */
-       fpp = &fdp->fd_ofiles[fd];
-       fp = *fpp;
+       fp = fd_getfile(fdp, fd);
        if (fp == NULL)
                return (EBADF);
-       FREF(fp);
-       *fpp = NULL;
-       fd_unused(fdp, fd);
+       fdremove(fdp, fd);
        if (fd < fdp->fd_knlistsize)
                knote_fdclose(p, fd);
        return (closef(fp, p));
@@ -927,9 +967,9 @@ fdexpand(struct proc *p)
  * a file descriptor for the process that refers to it.
  */
 int
-falloc(struct proc *p, int flags, struct file **resultfp, int *resultfd)
+falloc(struct proc *p, struct file **resultfp, int *resultfd)
 {
-       struct file *fp, *fq;
+       struct file *fp;
        int error, i;
 
        KASSERT(resultfp != NULL);
@@ -957,21 +997,17 @@ restart:
         */
        numfiles++;
        fp = pool_get(&file_pool, PR_WAITOK|PR_ZERO);
-       mtx_init(&fp->f_mtx, IPL_NONE);
-       fp->f_iflags = FIF_LARVAL;
-       if ((fq = p->p_fd->fd_ofiles[0]) != NULL) {
-               LIST_INSERT_AFTER(fq, fp, f_list);
-       } else {
-               LIST_INSERT_HEAD(&filehead, fp, f_list);
-       }
-       p->p_fd->fd_ofiles[i] = fp;
-       p->p_fd->fd_ofileflags[i] |= (flags & UF_EXCLOSE);
+       mtx_init(&fp->f_mtx, IPL_VM);
        fp->f_count = 1;
        fp->f_cred = p->p_ucred;
        crhold(fp->f_cred);
        *resultfp = fp;
        *resultfd = i;
-       FREF(fp);
+
+       mtx_enter(&fhdlk);
+       fp->f_count++;
+       mtx_leave(&fhdlk);
+
        return (0);
 }
 
@@ -1063,6 +1099,7 @@ fdcopy(struct process *pr)
        newfdp->fd_flags = fdp->fd_flags;
        newfdp->fd_cmask = fdp->fd_cmask;
 
+       mtx_enter(&fhdlk);
        for (i = 0; i <= fdp->fd_lastfile; i++) {
                struct file *fp = fdp->fd_ofiles[i];
 
@@ -1079,12 +1116,13 @@ fdcopy(struct process *pr)
                            fp->f_type == DTYPE_KQUEUE)
                                continue;
 
-                       FREF(fp);
+                       fp->f_count++;
                        newfdp->fd_ofiles[i] = fp;
                        newfdp->fd_ofileflags[i] = fdp->fd_ofileflags[i];
                        fd_used(newfdp, i);
                }
        }
+       mtx_leave(&fhdlk);
        fdpunlock(fdp);
 
        return (newfdp);
@@ -1106,8 +1144,9 @@ fdfree(struct proc *p)
        for (i = fdp->fd_lastfile; i >= 0; i--, fpp++) {
                fp = *fpp;
                if (fp != NULL) {
-                       FREF(fp);
                        *fpp = NULL;
+                        /* closef() expects a refcount of 2 */
+                       FREF(fp);
                        (void) closef(fp, p);
                }
        }
@@ -1145,11 +1184,11 @@ closef(struct file *fp, struct proc *p)
        if (fp == NULL)
                return (0);
 
-#ifdef DIAGNOSTIC
-       if (fp->f_count < 2)
-               panic("closef: count (%ld) < 2", fp->f_count);
-#endif
+       KASSERTMSG(fp->f_count >= 2, "count (%ld) < 2", fp->f_count);
+
+       mtx_enter(&fhdlk);
        fp->f_count--;
+       mtx_leave(&fhdlk);
 
        /*
         * POSIX record locking dictates that any close releases ALL
@@ -1181,18 +1220,19 @@ fdrop(struct file *fp, struct proc *p)
 {
        int error;
 
-#ifdef DIAGNOSTIC
-       if (fp->f_count != 0)
-               panic("fdrop: count (%ld) != 0", fp->f_count);
-#endif
+       MUTEX_ASSERT_LOCKED(&fhdlk);
+
+       KASSERTMSG(fp->f_count == 0, "count (%ld) != 0", fp->f_count);
+
+       if (fp->f_iflags & FIF_INSERTED)
+               LIST_REMOVE(fp, f_list);
+       mtx_leave(&fhdlk);
 
        if (fp->f_ops)
                error = (*fp->f_ops->fo_close)(fp, p);
        else
                error = 0;
 
-       /* Free fp */
-       LIST_REMOVE(fp, f_list);
        crfree(fp->f_cred);
        numfiles--;
        pool_put(&file_pool, fp);
@@ -1307,7 +1347,7 @@ dupfdopen(struct proc *p, int indx, int 
         * of file descriptors, or the fd to be dup'd has already been
         * closed, reject. Note, there is no need to check for new == old
         * because fd_getfile will return NULL if the file at indx is
-        * newly created by falloc (FIF_LARVAL).
+        * newly created by falloc.
         */
        if ((wfp = fd_getfile(fdp, dupfd)) == NULL)
                return (EBADF);
Index: kern/kern_event.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_event.c,v
retrieving revision 1.89
diff -u -p -r1.89 kern_event.c
--- kern/kern_event.c   22 May 2018 19:15:22 -0000      1.89
+++ kern/kern_event.c   25 May 2018 08:24:33 -0000
@@ -441,10 +441,9 @@ sys_kqueue(struct proc *p, void *v, regi
        int fd, error;
 
        fdplock(fdp);
-       error = falloc(p, 0, &fp, &fd);
-       fdpunlock(fdp);
+       error = falloc(p, &fp, &fd);
        if (error)
-               return (error);
+               goto out;
        fp->f_flag = FREAD | FWRITE;
        fp->f_type = DTYPE_KQUEUE;
        fp->f_ops = &kqueueops;
@@ -456,8 +455,11 @@ sys_kqueue(struct proc *p, void *v, regi
        if (fdp->fd_knlistsize < 0)
                fdp->fd_knlistsize = 0;         /* this process has a kq */
        kq->kq_fdp = fdp;
-       FILE_SET_MATURE(fp, p);
-       return (0);
+       fdinsert(fdp, fd, 0, fp);
+       FRELE(fp, p);
+out:
+       fdpunlock(fdp);
+       return (error);
 }
 
 int
Index: kern/kern_exec.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_exec.c,v
retrieving revision 1.195
diff -u -p -r1.195 kern_exec.c
--- kern/kern_exec.c    28 Apr 2018 03:13:04 -0000      1.195
+++ kern/kern_exec.c    25 May 2018 08:24:33 -0000
@@ -584,7 +584,7 @@ sys_execve(struct proc *p, void *v, regi
                                struct vnode *vp;
                                int indx;
 
-                               if ((error = falloc(p, 0, &fp, &indx)) != 0)
+                               if ((error = falloc(p, &fp, &indx)) != 0)
                                        break;
 #ifdef DIAGNOSTIC
                                if (indx != i)
@@ -607,10 +607,9 @@ sys_execve(struct proc *p, void *v, regi
                                fp->f_type = DTYPE_VNODE;
                                fp->f_ops = &vnops;
                                fp->f_data = (caddr_t)vp;
-                               FILE_SET_MATURE(fp, p);
-                       } else {
-                               FRELE(fp, p);
+                               fdinsert(p->p_fd, indx, 0, fp);
                        }
+                       FRELE(fp, p);
                }
                fdpunlock(p->p_fd);
                if (error)
Index: kern/kern_ktrace.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_ktrace.c,v
retrieving revision 1.96
diff -u -p -r1.96 kern_ktrace.c
--- kern/kern_ktrace.c  28 Apr 2018 03:13:04 -0000      1.96
+++ kern/kern_ktrace.c  25 May 2018 08:24:33 -0000
@@ -225,7 +225,7 @@ ktrgenio(struct proc *p, int fd, enum ui
        struct ktr_header kth;
        struct ktr_genio ktp;
        caddr_t cp;
-       int count;
+       int count, error;
        int buflen;
 
        atomic_setbits_int(&p->p_flag, P_INKTR);
@@ -254,7 +254,10 @@ ktrgenio(struct proc *p, int fd, enum ui
                if (copyin(iov->iov_base, cp, count))
                        break;
 
-               if (ktrwrite2(p, &kth, &ktp, sizeof(ktp), cp, count) != 0)
+               KERNEL_LOCK();
+               error = ktrwrite2(p, &kth, &ktp, sizeof(ktp), cp, count);
+               KERNEL_UNLOCK();
+               if (error != 0)
                        break;
 
                iov->iov_len -= count;
@@ -294,13 +297,14 @@ ktrstruct(struct proc *p, const char *na
 {
        struct ktr_header kth;
 
-       KERNEL_ASSERT_LOCKED();
        atomic_setbits_int(&p->p_flag, P_INKTR);
        ktrinitheader(&kth, p, KTR_STRUCT);
-       
+
        if (data == NULL)
                datalen = 0;
+       KERNEL_LOCK();
        ktrwrite2(p, &kth, name, strlen(name) + 1, data, datalen);
+       KERNEL_UNLOCK();
        atomic_clearbits_int(&p->p_flag, P_INKTR);
 }
 
@@ -386,7 +390,9 @@ ktrpledge(struct proc *p, int error, uin
        kp.code = code;
        kp.syscall = syscall;
 
+       KERNEL_LOCK();
        ktrwrite(p, &kth, &kp, sizeof(kp));
+       KERNEL_UNLOCK();
        atomic_clearbits_int(&p->p_flag, P_INKTR);
 }
 
@@ -622,6 +628,8 @@ ktrwriteraw(struct proc *curp, struct vn
        struct iovec aiov[3];
        struct process *pr;
        int error;
+
+       KERNEL_ASSERT_LOCKED();
 
        auio.uio_iov = &aiov[0];
        auio.uio_offset = 0;
Index: kern/kern_pledge.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_pledge.c,v
retrieving revision 1.230
diff -u -p -r1.230 kern_pledge.c
--- kern/kern_pledge.c  28 Apr 2018 12:49:21 -0000      1.230
+++ kern/kern_pledge.c  25 May 2018 08:24:33 -0000
@@ -523,6 +523,7 @@ pledge_fail(struct proc *p, int error, u
        if (p->p_p->ps_pledge & PLEDGE_ERROR)
                return (ENOSYS);
 
+       KERNEL_LOCK();
        log(LOG_ERR, "%s[%d]: pledge \"%s\", syscall %d\n",
            p->p_p->ps_comm, p->p_p->ps_pid, codes, p->p_pledge_syscall);
        p->p_p->ps_acflag |= APLEDGE;
@@ -535,6 +536,7 @@ pledge_fail(struct proc *p, int error, u
        psignal(p, SIGABRT);
 
        p->p_p->ps_pledge = 0;          /* Disable all PLEDGE_ flags */
+       KERNEL_UNLOCK();
        return (error);
 }
 
Index: kern/kern_sysctl.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.337
diff -u -p -r1.337 kern_sysctl.c
--- kern/kern_sysctl.c  16 May 2018 14:53:43 -0000      1.337
+++ kern/kern_sysctl.c  25 May 2018 08:24:33 -0000
@@ -1059,7 +1059,9 @@ fill_file(struct kinfo_file *kf, struct 
                kf->f_flag = fp->f_flag;
                kf->f_iflags = fp->f_iflags;
                kf->f_type = fp->f_type;
+               mtx_enter(&fhdlk);
                kf->f_count = fp->f_count;
+               mtx_leave(&fhdlk);
                if (show_pointers)
                        kf->f_ucred = PTRTOINT64(fp->f_cred);
                kf->f_uid = fp->f_cred->cr_uid;
Index: kern/sys_pipe.c
===================================================================
RCS file: /cvs/src/sys/kern/sys_pipe.c,v
retrieving revision 1.78
diff -u -p -r1.78 sys_pipe.c
--- kern/sys_pipe.c     10 Apr 2018 09:17:45 -0000      1.78
+++ kern/sys_pipe.c     25 May 2018 08:24:33 -0000
@@ -154,7 +154,7 @@ dopipe(struct proc *p, int *ufds, int fl
 
        fdplock(fdp);
 
-       error = falloc(p, cloexec, &rf, &fds[0]);
+       error = falloc(p, &rf, &fds[0]);
        if (error != 0)
                goto free2;
        rf->f_flag = FREAD | FWRITE | (flags & FNONBLOCK);
@@ -162,7 +162,7 @@ dopipe(struct proc *p, int *ufds, int fl
        rf->f_data = rpipe;
        rf->f_ops = &pipeops;
 
-       error = falloc(p, cloexec, &wf, &fds[1]);
+       error = falloc(p, &wf, &fds[1]);
        if (error != 0)
                goto free3;
        wf->f_flag = FREAD | FWRITE | (flags & FNONBLOCK);
@@ -173,8 +173,8 @@ dopipe(struct proc *p, int *ufds, int fl
        rpipe->pipe_peer = wpipe;
        wpipe->pipe_peer = rpipe;
 
-       FILE_SET_MATURE(rf, p);
-       FILE_SET_MATURE(wf, p);
+       fdinsert(fdp, fds[0], cloexec, rf);
+       fdinsert(fdp, fds[1], cloexec, wf);
 
        error = copyout(fds, ufds, sizeof(fds));
        if (error != 0) {
@@ -186,6 +186,9 @@ dopipe(struct proc *p, int *ufds, int fl
                ktrfds(p, fds, 2);
 #endif
        fdpunlock(fdp);
+
+       FRELE(rf, p);
+       FRELE(wf, p);
        return (error);
 
 free3:
Index: kern/syscalls.c
===================================================================
RCS file: /cvs/src/sys/kern/syscalls.c,v
retrieving revision 1.190
diff -u -p -r1.190 syscalls.c
--- kern/syscalls.c     12 Dec 2017 01:13:14 -0000      1.190
+++ kern/syscalls.c     25 May 2018 08:24:33 -0000
@@ -1,4 +1,4 @@
-/*     $OpenBSD: syscalls.c,v 1.190 2017/12/12 01:13:14 deraadt Exp $  */
+/*     $OpenBSD$       */
 
 /*
  * System call names.
Index: kern/syscalls.master
===================================================================
RCS file: /cvs/src/sys/kern/syscalls.master,v
retrieving revision 1.180
diff -u -p -r1.180 syscalls.master
--- kern/syscalls.master        12 Dec 2017 01:12:34 -0000      1.180
+++ kern/syscalls.master        25 May 2018 08:24:33 -0000
@@ -88,18 +88,18 @@
 #else
 26     UNIMPL          ptrace
 #endif
-27     STD             { ssize_t sys_recvmsg(int s, struct msghdr *msg, \
+27     STD NOLOCK      { ssize_t sys_recvmsg(int s, struct msghdr *msg, \
                            int flags); }
-28     STD             { ssize_t sys_sendmsg(int s, \
+28     STD NOLOCK      { ssize_t sys_sendmsg(int s, \
                            const struct msghdr *msg, int flags); }
-29     STD             { ssize_t sys_recvfrom(int s, void *buf, size_t len, \
+29     STD NOLOCK      { ssize_t sys_recvfrom(int s, void *buf, size_t len, \
                            int flags, struct sockaddr *from, \
                            socklen_t *fromlenaddr); }
-30     STD             { int sys_accept(int s, struct sockaddr *name, \
+30     STD NOLOCK      { int sys_accept(int s, struct sockaddr *name, \
                            socklen_t *anamelen); }
-31     STD             { int sys_getpeername(int fdes, struct sockaddr *asa, \
+31     STD NOLOCK      { int sys_getpeername(int fdes, struct sockaddr *asa, \
                            socklen_t *alen); }
-32     STD             { int sys_getsockname(int fdes, struct sockaddr *asa, \
+32     STD NOLOCK      { int sys_getsockname(int fdes, struct sockaddr *asa, \
                            socklen_t *alen); }
 33     STD             { int sys_access(const char *path, int amode); }
 34     STD             { int sys_chflags(const char *path, u_int flags); }
@@ -205,26 +205,26 @@
 91     STD             { int sys_nanosleep(const struct timespec *rqtp, \
                            struct timespec *rmtp); }
 92     STD             { int sys_fcntl(int fd, int cmd, ... void *arg); }
-93     STD             { int sys_accept4(int s, struct sockaddr *name, \
+93     STD NOLOCK      { int sys_accept4(int s, struct sockaddr *name, \
                            socklen_t *anamelen, int flags); }
 94     STD             { int sys___thrsleep(const volatile void *ident, \
                            clockid_t clock_id, const struct timespec *tp, \
                            void *lock, const int *abort); }
 95     STD             { int sys_fsync(int fd); }
 96     STD             { int sys_setpriority(int which, id_t who, int prio); }
-97     STD             { int sys_socket(int domain, int type, int protocol); }
-98     STD             { int sys_connect(int s, const struct sockaddr *name, \
+97     STD NOLOCK      { int sys_socket(int domain, int type, int protocol); }
+98     STD NOLOCK      { int sys_connect(int s, const struct sockaddr *name, \
                            socklen_t namelen); }
 99     STD             { int sys_getdents(int fd, void *buf, size_t buflen); }
 100    STD             { int sys_getpriority(int which, id_t who); }
 101    STD             { int sys_pipe2(int *fdp, int flags); }
 102    STD             { int sys_dup3(int from, int to, int flags); }
 103    STD             { int sys_sigreturn(struct sigcontext *sigcntxp); }
-104    STD             { int sys_bind(int s, const struct sockaddr *name, \
+104    STD NOLOCK      { int sys_bind(int s, const struct sockaddr *name, \
                            socklen_t namelen); }
-105    STD             { int sys_setsockopt(int s, int level, int name, \
+105    STD NOLOCK      { int sys_setsockopt(int s, int level, int name, \
                            const void *val, socklen_t valsize); }
-106    STD             { int sys_listen(int s, int backlog); }
+106    STD NOLOCK      { int sys_listen(int s, int backlog); }
 107    STD             { int sys_chflagsat(int fd, const char *path, \
                            u_int flags, int atflags); }
 108    STD             { int sys_pledge(const char *promises, \
@@ -243,7 +243,7 @@
 115    OBSOL           vtrace
 116    OBSOL           t32_gettimeofday
 117    OBSOL           t32_getrusage
-118    STD             { int sys_getsockopt(int s, int level, int name, \
+118    STD NOLOCK      { int sys_getsockopt(int s, int level, int name, \
                            void *val, socklen_t *avalsize); }
 119    STD             { int sys_thrkill(pid_t tid, int signum, void *tcb); }
 120    STD             { ssize_t sys_readv(int fd, \
@@ -261,11 +261,11 @@
 130    OBSOL           oftruncate
 131    STD             { int sys_flock(int fd, int how); }
 132    STD             { int sys_mkfifo(const char *path, mode_t mode); }
-133    STD             { ssize_t sys_sendto(int s, const void *buf, \
+133    STD NOLOCK      { ssize_t sys_sendto(int s, const void *buf, \
                            size_t len, int flags, const struct sockaddr *to, \
                            socklen_t tolen); }
-134    STD             { int sys_shutdown(int s, int how); }
-135    STD             { int sys_socketpair(int domain, int type, \
+134    STD NOLOCK      { int sys_shutdown(int s, int how); }
+135    STD NOLOCK      { int sys_socketpair(int domain, int type, \
                            int protocol, int *rsv); }
 136    STD             { int sys_mkdir(const char *path, mode_t mode); }
 137    STD             { int sys_rmdir(const char *path); }
Index: kern/tty_pty.c
===================================================================
RCS file: /cvs/src/sys/kern/tty_pty.c,v
retrieving revision 1.84
diff -u -p -r1.84 tty_pty.c
--- kern/tty_pty.c      28 Apr 2018 03:13:04 -0000      1.84
+++ kern/tty_pty.c      25 May 2018 08:24:33 -0000
@@ -1070,11 +1070,11 @@ ptmioctl(dev_t dev, u_long cmd, caddr_t 
        case PTMGET:
                fdplock(fdp);
                /* Grab two filedescriptors. */
-               if ((error = falloc(p, 0, &cfp, &cindx)) != 0) {
+               if ((error = falloc(p, &cfp, &cindx)) != 0) {
                        fdpunlock(fdp);
                        break;
                }
-               if ((error = falloc(p, 0, &sfp, &sindx)) != 0) {
+               if ((error = falloc(p, &sfp, &sindx)) != 0) {
                        fdremove(fdp, cindx);
                        closef(cfp, p);
                        fdpunlock(fdp);
@@ -1166,11 +1166,12 @@ retry:
                memcpy(ptm->cn, pti->pty_pn, sizeof(pti->pty_pn));
                memcpy(ptm->sn, pti->pty_sn, sizeof(pti->pty_sn));
 
-               /* mark the files mature now that we've passed all errors */
-               FILE_SET_MATURE(cfp, p);
-               FILE_SET_MATURE(sfp, p);
-
+               /* insert files now that we've passed all errors */
+               fdinsert(fdp, cindx, 0, cfp);
+               fdinsert(fdp, sindx, 0, sfp);
                fdpunlock(fdp);
+               FRELE(cfp, p);
+               FRELE(sfp, p);
                break;
        default:
                error = EINVAL;
Index: kern/uipc_syscalls.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v
retrieving revision 1.171
diff -u -p -r1.171 uipc_syscalls.c
--- kern/uipc_syscalls.c        22 May 2018 09:51:01 -0000      1.171
+++ kern/uipc_syscalls.c        25 May 2018 08:24:33 -0000
@@ -101,13 +101,14 @@ sys_socket(struct proc *p, void *v, regi
        fflag = FREAD | FWRITE | (nonblock ? FNONBLOCK : 0);
 
        error = socreate(SCARG(uap, domain), &so, type, SCARG(uap, protocol));
-       if (error != 0)
-               goto out;
+       if (error)
+               return (error);
 
+       KERNEL_LOCK();
        fdplock(fdp);
-       error = falloc(p, cloexec, &fp, &fd);
-       fdpunlock(fdp);
+       error = falloc(p, &fp, &fd);
        if (error) {
+               fdpunlock(fdp);
                soclose(so);
        } else {
                fp->f_flag = fflag;
@@ -117,10 +118,12 @@ sys_socket(struct proc *p, void *v, regi
                        so->so_state |= SS_NBIO;
                so->so_state |= ss;
                fp->f_data = so;
-               FILE_SET_MATURE(fp, p);
+               fdinsert(fdp, fd, cloexec, fp);
+               fdpunlock(fdp);
+               FRELE(fp, p);
                *retval = fd;
        }
-out:
+       KERNEL_UNLOCK();
        return (error);
 }
 
@@ -272,7 +275,9 @@ doaccept(struct proc *p, int sock, struc
        socklen_t namelen;
        int error, s, tmpfd;
        struct socket *head, *so;
-       int nflag;
+       int cloexec, nflag;
+
+       cloexec = (flags & SOCK_CLOEXEC) ? UF_EXCLOSE : 0;
 
        if (name && (error = copyin(anamelen, &namelen, sizeof (namelen))))
                return (error);
@@ -282,7 +287,7 @@ doaccept(struct proc *p, int sock, struc
        headfp = fp;
 
        fdplock(fdp);
-       error = falloc(p, (flags & SOCK_CLOEXEC) ? UF_EXCLOSE : 0, &fp, &tmpfd);
+       error = falloc(p, &fp, &tmpfd);
        fdpunlock(fdp);
        if (error) {
                FRELE(headfp, p);
@@ -347,8 +352,11 @@ out:
                else
                        so->so_state &= ~SS_NBIO;
                sounlock(s);
+               fdplock(fdp);
                fp->f_data = so;
-               FILE_SET_MATURE(fp, p);
+               fdinsert(fdp, tmpfd, cloexec, fp);
+               fdpunlock(fdp);
+               FRELE(fp, p);
                *retval = tmpfd;
        } else {
                sounlock(s);
@@ -475,14 +483,15 @@ sys_socketpair(struct proc *p, void *v, 
                if (error != 0)
                        goto free2;
        }
+       KERNEL_LOCK();
        fdplock(fdp);
-       if ((error = falloc(p, cloexec, &fp1, &sv[0])) != 0)
+       if ((error = falloc(p, &fp1, &sv[0])) != 0)
                goto free3;
        fp1->f_flag = fflag;
        fp1->f_type = DTYPE_SOCKET;
        fp1->f_ops = &socketops;
        fp1->f_data = so1;
-       if ((error = falloc(p, cloexec, &fp2, &sv[1])) != 0)
+       if ((error = falloc(p, &fp2, &sv[1])) != 0)
                goto free4;
        fp2->f_flag = fflag;
        fp2->f_type = DTYPE_SOCKET;
@@ -500,9 +509,12 @@ sys_socketpair(struct proc *p, void *v, 
                        (*fp2->f_ops->fo_ioctl)(fp2, FIONBIO, (caddr_t)&type,
                            p);
                }
-               FILE_SET_MATURE(fp1, p);
-               FILE_SET_MATURE(fp2, p);
+               fdinsert(fdp, sv[0], cloexec, fp1);
+               fdinsert(fdp, sv[1], cloexec, fp2);
                fdpunlock(fdp);
+               FRELE(fp1, p);
+               FRELE(fp2, p);
+               KERNEL_UNLOCK();
                return (0);
        }
        fdremove(fdp, sv[1]);
@@ -514,6 +526,7 @@ free4:
        so1 = NULL;
 free3:
        fdpunlock(fdp);
+       KERNEL_UNLOCK();
 free2:
        if (so2 != NULL)
                (void)soclose(so2);
@@ -678,13 +691,16 @@ sendit(struct proc *p, int s, struct msg
        }
 #endif
        len = auio.uio_resid;
-       error = sosend(fp->f_data, to, &auio, NULL, control, flags);
+       error = sosend(so, to, &auio, NULL, control, flags);
        if (error) {
                if (auio.uio_resid != len && (error == ERESTART ||
                    error == EINTR || error == EWOULDBLOCK))
                        error = 0;
-               if (error == EPIPE && (flags & MSG_NOSIGNAL) == 0)
+               if (error == EPIPE && (flags & MSG_NOSIGNAL) == 0) {
+                       KERNEL_LOCK();
                        ptsignal(p, SIGPIPE, STHREAD);
+                       KERNEL_UNLOCK();
+               }
        }
        if (error == 0) {
                *retsize = len - auio.uio_resid;
@@ -925,11 +941,13 @@ sys_shutdown(struct proc *p, void *v, re
                syscallarg(int) how;
        } */ *uap = v;
        struct file *fp;
+       struct socket *so;
        int error;
 
        if ((error = getsock(p, SCARG(uap, s), &fp)) != 0)
                return (error);
-       error = soshutdown(fp->f_data, SCARG(uap, how));
+       so = fp->f_data;
+       error = soshutdown(so, SCARG(uap, how));
        FRELE(fp, p);
        return (error);
 }
@@ -1163,7 +1181,8 @@ getsock(struct proc *p, int fdes, struct
 {
        struct file *fp;
 
-       if ((fp = fd_getfile(p->p_fd, fdes)) == NULL)
+       fp = fd_getfile(p->p_fd, fdes);
+       if (fp == NULL)
                return (EBADF);
        if (fp->f_type != DTYPE_SOCKET) {
                FRELE(fp, p);
Index: kern/uipc_usrreq.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_usrreq.c,v
retrieving revision 1.126
diff -u -p -r1.126 uipc_usrreq.c
--- kern/uipc_usrreq.c  28 Apr 2018 03:13:04 -0000      1.126
+++ kern/uipc_usrreq.c  25 May 2018 08:24:33 -0000
@@ -899,6 +899,7 @@ unp_gc(void *arg __unused)
                        fp = defer->ud_fp[i].fp;
                        if (fp == NULL)
                                continue;
+                        /* closef() expects a refcount of 2 */
                        FREF(fp);
                        if ((unp = fptounp(fp)) != NULL)
                                unp->unp_msgcount--;
@@ -915,6 +916,8 @@ unp_gc(void *arg __unused)
        do {
                nunref = 0;
                LIST_FOREACH(unp, &unp_head, unp_link) {
+                       mtx_enter(&fhdlk);
+                       fp = unp->unp_file;
                        if (unp->unp_flags & UNP_GCDEFER) {
                                /*
                                 * This socket is referenced by another
@@ -925,8 +928,9 @@ unp_gc(void *arg __unused)
                                unp_defer--;
                        } else if (unp->unp_flags & UNP_GCMARK) {
                                /* marked as live in previous pass */
+                               mtx_leave(&fhdlk);
                                continue;
-                       } else if ((fp = unp->unp_file) == NULL) {
+                       } else if (fp == NULL) {
                                /* not being passed, so can't be in loop */
                        } else if (fp->f_count == 0) {
                                /*
@@ -943,9 +947,11 @@ unp_gc(void *arg __unused)
                                if (fp->f_count == unp->unp_msgcount) {
                                        nunref++;
                                        unp->unp_flags |= UNP_GCDEAD;
+                                       mtx_leave(&fhdlk);
                                        continue;
                                }
                        }
+                       mtx_leave(&fhdlk);
 
                        /*
                         * This is the first time we've seen this socket on
Index: kern/vfs_syscalls.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_syscalls.c,v
retrieving revision 1.283
diff -u -p -r1.283 vfs_syscalls.c
--- kern/vfs_syscalls.c 8 May 2018 08:53:41 -0000       1.283
+++ kern/vfs_syscalls.c 25 May 2018 08:24:33 -0000
@@ -899,7 +899,7 @@ doopenat(struct proc *p, int fd, const c
        struct file *fp;
        struct vnode *vp;
        struct vattr vattr;
-       int flags, cmode;
+       int flags, cloexec, cmode;
        int type, indx, error, localtrunc = 0;
        struct flock lf;
        struct nameidata nd;
@@ -911,10 +911,10 @@ doopenat(struct proc *p, int fd, const c
                        return (error);
        }
 
-       fdplock(fdp);
+       cloexec = (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0;
 
-       if ((error = falloc(p, (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0, &fp,
-           &indx)) != 0)
+       fdplock(fdp);
+       if ((error = falloc(p, &fp, &indx)) != 0)
                goto out;
        flags = FFLAGS(oflags);
        if (flags & FREAD)
@@ -999,7 +999,8 @@ doopenat(struct proc *p, int fd, const c
        }
        VOP_UNLOCK(vp);
        *retval = indx;
-       FILE_SET_MATURE(fp, p);
+       fdinsert(fdp, indx, cloexec, fp);
+       FRELE(fp, p);
 out:
        fdpunlock(fdp);
        return (error);
@@ -1060,7 +1061,7 @@ sys_fhopen(struct proc *p, void *v, regi
        struct vnode *vp = NULL;
        struct mount *mp;
        struct ucred *cred = p->p_ucred;
-       int flags;
+       int flags, cloexec;
        int type, indx, error=0;
        struct flock lf;
        struct vattr va;
@@ -1078,9 +1079,10 @@ sys_fhopen(struct proc *p, void *v, regi
        if ((flags & O_CREAT))
                return (EINVAL);
 
+       cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
+
        fdplock(fdp);
-       if ((error = falloc(p, (flags & O_CLOEXEC) ? UF_EXCLOSE : 0, &fp,
-           &indx)) != 0) {
+       if ((error = falloc(p, &fp, &indx)) != 0) {
                fp = NULL;
                goto bad;
        }
@@ -1160,9 +1162,9 @@ sys_fhopen(struct proc *p, void *v, regi
        }
        VOP_UNLOCK(vp);
        *retval = indx;
-       FILE_SET_MATURE(fp, p);
-
+       fdinsert(fdp, indx, cloexec, fp);
        fdpunlock(fdp);
+       FRELE(fp, p);
        return (0);
 
 bad:
Index: net/if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.552
diff -u -p -r1.552 if.c
--- net/if.c    17 May 2018 11:04:14 -0000      1.552
+++ net/if.c    25 May 2018 08:24:33 -0000
@@ -1378,7 +1378,7 @@ ifa_ifwithaddr(struct sockaddr *addr, u_
        struct ifaddr *ifa;
        u_int rdomain;
 
-       KERNEL_ASSERT_LOCKED();
+       KERNEL_LOCK();
        rdomain = rtable_l2(rtableid);
        TAILQ_FOREACH(ifp, &ifnet, if_list) {
                if (ifp->if_rdomain != rdomain)
@@ -1388,10 +1388,13 @@ ifa_ifwithaddr(struct sockaddr *addr, u_
                        if (ifa->ifa_addr->sa_family != addr->sa_family)
                                continue;
 
-                       if (equal(addr, ifa->ifa_addr))
+                       if (equal(addr, ifa->ifa_addr)) {
+                               KERNEL_UNLOCK();
                                return (ifa);
+                       }
                }
        }
+       KERNEL_UNLOCK();
        return (NULL);
 }
 
@@ -1404,8 +1407,8 @@ ifa_ifwithdstaddr(struct sockaddr *addr,
        struct ifnet *ifp;
        struct ifaddr *ifa;
 
-       KERNEL_ASSERT_LOCKED();
        rdomain = rtable_l2(rdomain);
+       KERNEL_LOCK();
        TAILQ_FOREACH(ifp, &ifnet, if_list) {
                if (ifp->if_rdomain != rdomain)
                        continue;
@@ -1414,11 +1417,14 @@ ifa_ifwithdstaddr(struct sockaddr *addr,
                                if (ifa->ifa_addr->sa_family !=
                                    addr->sa_family || ifa->ifa_dstaddr == NULL)
                                        continue;
-                               if (equal(addr, ifa->ifa_dstaddr))
+                               if (equal(addr, ifa->ifa_dstaddr)) {
+                                       KERNEL_UNLOCK();
                                        return (ifa);
+                               }
                        }
                }
        }
+       KERNEL_UNLOCK();
        return (NULL);
 }
 
Index: sys/file.h
===================================================================
RCS file: /cvs/src/sys/sys/file.h,v
retrieving revision 1.45
diff -u -p -r1.45 file.h
--- sys/file.h  9 May 2018 08:42:02 -0000       1.45
+++ sys/file.h  25 May 2018 08:24:33 -0000
@@ -65,6 +65,7 @@ struct        fileops {
  *
  *  Locks used to protect struct members in this file:
  *     I       immutable after creation
+ *     F       global `fhdlk' mutex
  *     f       per file `f_mtx'
  *     k       kernel lock
  */
@@ -77,7 +78,7 @@ struct file {
 #define        DTYPE_PIPE      3       /* pipe */
 #define        DTYPE_KQUEUE    4       /* event queue */
        short   f_type;         /* [I] descriptor type */
-       long    f_count;        /* [k] reference count */
+       long    f_count;        /* [F] reference count */
        struct  ucred *f_cred;  /* [I] credentials associated with descriptor */
        struct  fileops *f_ops; /* [I] file operation pointers */
        off_t   f_offset;       /* [k] */
@@ -91,26 +92,31 @@ struct file {
 };
 
 #define FIF_HASLOCK            0x01    /* descriptor holds advisory lock */
-#define FIF_LARVAL             0x02    /* not fully constructed, don't use */
-
-#define FILE_IS_USABLE(fp) \
-       (((fp)->f_iflags & FIF_LARVAL) == 0)
+#define FIF_INSERTED           0x80    /* present in `filehead' */
 
 #define FREF(fp) \
        do { \
                extern void vfs_stall_barrier(void); \
                vfs_stall_barrier(); \
+               mtx_enter(&fhdlk); \
                (fp)->f_count++; \
+               mtx_leave(&fhdlk); \
        } while (0)
-#define FRELE(fp,p)    (--(fp)->f_count == 0 ? fdrop(fp, p) : 0)
 
-#define FILE_SET_MATURE(fp,p) do {                             \
-       (fp)->f_iflags &= ~FIF_LARVAL;                          \
-       FRELE(fp, p);                                           \
-} while (0)
+#define FRELE(fp,p) \
+({ \
+       int rv = 0; \
+       mtx_enter(&fhdlk); \
+       if (--(fp)->f_count == 0) \
+               rv = fdrop(fp, p); \
+       else \
+               mtx_leave(&fhdlk); \
+       rv; \
+})
 
 int    fdrop(struct file *, struct proc *);
 
+extern struct mutex fhdlk;             /* protects `filehead' and f_count */
 LIST_HEAD(filelist, file);
 extern int maxfiles;                   /* kernel limit on number of open files 
*/
 extern int numfiles;                   /* actual number of open files */
Index: sys/filedesc.h
===================================================================
RCS file: /cvs/src/sys/sys/filedesc.h,v
retrieving revision 1.35
diff -u -p -r1.35 filedesc.h
--- sys/filedesc.h      25 Apr 2018 10:29:17 -0000      1.35
+++ sys/filedesc.h      25 May 2018 08:24:33 -0000
@@ -125,12 +125,13 @@ void      filedesc_init(void);
 int    dupfdopen(struct proc *, int, int);
 int    fdalloc(struct proc *p, int want, int *result);
 void   fdexpand(struct proc *);
-int    falloc(struct proc *_p, int _flags, struct file **_rfp, int *_rfd);
+int    falloc(struct proc *_p, struct file **_rfp, int *_rfd);
 struct filedesc *fdinit(void);
 struct filedesc *fdshare(struct process *);
 struct filedesc *fdcopy(struct process *);
 void   fdfree(struct proc *p);
 int    fdrelease(struct proc *p, int);
+void   fdinsert(struct filedesc *, int, int, struct file *);
 void   fdremove(struct filedesc *, int);
 void   fdcloseexec(struct proc *);
 struct file *fd_iterfile(struct file *, struct proc *);
Index: sys/syscall.h
===================================================================
RCS file: /cvs/src/sys/sys/syscall.h,v
retrieving revision 1.190
diff -u -p -r1.190 syscall.h
--- sys/syscall.h       12 Dec 2017 01:13:14 -0000      1.190
+++ sys/syscall.h       25 May 2018 08:24:33 -0000
@@ -1,4 +1,4 @@
-/*     $OpenBSD: syscall.h,v 1.190 2017/12/12 01:13:14 deraadt Exp $   */
+/*     $OpenBSD$       */
 
 /*
  * System call numbers.
Index: sys/syscallargs.h
===================================================================
RCS file: /cvs/src/sys/sys/syscallargs.h,v
retrieving revision 1.193
diff -u -p -r1.193 syscallargs.h
--- sys/syscallargs.h   12 Dec 2017 01:13:14 -0000      1.193
+++ sys/syscallargs.h   25 May 2018 08:24:33 -0000
@@ -1,4 +1,4 @@
-/*     $OpenBSD: syscallargs.h,v 1.193 2017/12/12 01:13:14 deraadt Exp $       
*/
+/*     $OpenBSD$       */
 
 /*
  * System call argument lists.

Reply via email to