Martin Pieuchot wrote:
> By assuming that `f_data' is immutable, which AFAIK is true for sockets,
> we can remove the KERNEL_LOCK() from the following syscalls iff files are
> refcounted in an MP-safe way.
> 
> This diff includes the EBUSY check in dup2(2) which is currently required
> to avoid races with accept(2) and will later make our life easier wrt
> open(2).
> 
> It also includes the fdinsert() diff I sent earlier.
> 
> On top of that I'm introducing a global mutex, `fhdlk', that protects
> `f_count' and the implicit reference in `filehead'.
> 
> A socket stays alive as long as its associated file has a positive
> refcount.  When this refcount drops, fdrop() will be called and soclose()
> will free/clean `f_data'.   That's the only place where `f_data' is
> changed during the life of a socket.  That's why it is safe to dereference
> `f_data' when getsock() returned a valid & refcounted `fp'.
> 
> Many ktrace(2) internals now need to grab the KERNEL_LOCK(), just like 
> ptsignal().
> 
> Note that for unix, routing and pfkey sockets, solock() still grabs the
> KERNEL_LOCK().  So even if syscalls are marked as SY_NOLOCK that doesn't
> mean they won't grab it.  In fact some network functions like
> ifa_ifwithaddr() below now need to grab the KERNEL_LOCK().  That's good
> that means we're pushing the lock down.
> 
> Tests?  Comments?

Been running this since this morning on a server, doing various stress
tests/syscalls etc running fine.

Tried it on a desktop (a vm), just running chrome for a bit of time and
I have a reproducible hang, leaving the filesystem dirty. It just hangs
and I cannot drop into ddb. I'll try getting more infos.

> 
> Index: kern/exec_script.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/exec_script.c,v
> retrieving revision 1.44
> diff -u -p -r1.44 exec_script.c
> --- kern/exec_script.c        2 May 2018 02:24:56 -0000       1.44
> +++ kern/exec_script.c        22 May 2018 13:21:54 -0000
> @@ -170,17 +170,20 @@ check_shell:
>  #endif
>  
>               fdplock(p->p_fd);
> -             error = falloc(p, 0, &fp, &epp->ep_fd);
> -             fdpunlock(p->p_fd);
> -             if (error)
> +             error = falloc(p, &fp, &epp->ep_fd);
> +             if (error) {
> +                     fdpunlock(p->p_fd);
>                       goto fail;
> +             }
>  
>               epp->ep_flags |= EXEC_HASFD;
>               fp->f_type = DTYPE_VNODE;
>               fp->f_ops = &vnops;
>               fp->f_data = (caddr_t) scriptvp;
>               fp->f_flag = FREAD;
> -             FILE_SET_MATURE(fp, p);
> +             fdinsert(p->p_fd, epp->ep_fd, 0, fp);
> +             fdpunlock(p->p_fd);
> +             FRELE(fp, p);
>       }
>  
>       /* set up the parameters for the recursive check_exec() call */
> Index: kern/init_sysent.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/init_sysent.c,v
> retrieving revision 1.191
> diff -u -p -r1.191 init_sysent.c
> --- kern/init_sysent.c        12 Dec 2017 01:13:14 -0000      1.191
> +++ kern/init_sysent.c        22 May 2018 13:21:54 -0000
> @@ -1,4 +1,4 @@
> -/*   $OpenBSD: init_sysent.c,v 1.191 2017/12/12 01:13:14 deraadt Exp $       
> */
> +/*   $OpenBSD$       */
>  
>  /*
>   * System call switch table.
> @@ -76,17 +76,17 @@ struct sysent sysent[] = {
>       { 0, 0, 0,
>           sys_nosys },                        /* 26 = unimplemented ptrace */
>  #endif
> -     { 3, s(struct sys_recvmsg_args), 0,
> +     { 3, s(struct sys_recvmsg_args), SY_NOLOCK | 0,
>           sys_recvmsg },                      /* 27 = recvmsg */
> -     { 3, s(struct sys_sendmsg_args), 0,
> +     { 3, s(struct sys_sendmsg_args), SY_NOLOCK | 0,
>           sys_sendmsg },                      /* 28 = sendmsg */
> -     { 6, s(struct sys_recvfrom_args), 0,
> +     { 6, s(struct sys_recvfrom_args), SY_NOLOCK | 0,
>           sys_recvfrom },                     /* 29 = recvfrom */
> -     { 3, s(struct sys_accept_args), 0,
> +     { 3, s(struct sys_accept_args), SY_NOLOCK | 0,
>           sys_accept },                       /* 30 = accept */
> -     { 3, s(struct sys_getpeername_args), 0,
> +     { 3, s(struct sys_getpeername_args), SY_NOLOCK | 0,
>           sys_getpeername },                  /* 31 = getpeername */
> -     { 3, s(struct sys_getsockname_args), 0,
> +     { 3, s(struct sys_getsockname_args), SY_NOLOCK | 0,
>           sys_getsockname },                  /* 32 = getsockname */
>       { 2, s(struct sys_access_args), 0,
>           sys_access },                       /* 33 = access */
> @@ -218,7 +218,7 @@ struct sysent sysent[] = {
>           sys_nanosleep },                    /* 91 = nanosleep */
>       { 3, s(struct sys_fcntl_args), 0,
>           sys_fcntl },                        /* 92 = fcntl */
> -     { 4, s(struct sys_accept4_args), 0,
> +     { 4, s(struct sys_accept4_args), SY_NOLOCK | 0,
>           sys_accept4 },                      /* 93 = accept4 */
>       { 5, s(struct sys___thrsleep_args), 0,
>           sys___thrsleep },                   /* 94 = __thrsleep */
> @@ -226,9 +226,9 @@ struct sysent sysent[] = {
>           sys_fsync },                        /* 95 = fsync */
>       { 3, s(struct sys_setpriority_args), 0,
>           sys_setpriority },                  /* 96 = setpriority */
> -     { 3, s(struct sys_socket_args), 0,
> +     { 3, s(struct sys_socket_args), SY_NOLOCK | 0,
>           sys_socket },                       /* 97 = socket */
> -     { 3, s(struct sys_connect_args), 0,
> +     { 3, s(struct sys_connect_args), SY_NOLOCK | 0,
>           sys_connect },                      /* 98 = connect */
>       { 3, s(struct sys_getdents_args), 0,
>           sys_getdents },                     /* 99 = getdents */
> @@ -240,11 +240,11 @@ struct sysent sysent[] = {
>           sys_dup3 },                         /* 102 = dup3 */
>       { 1, s(struct sys_sigreturn_args), 0,
>           sys_sigreturn },                    /* 103 = sigreturn */
> -     { 3, s(struct sys_bind_args), 0,
> +     { 3, s(struct sys_bind_args), SY_NOLOCK | 0,
>           sys_bind },                         /* 104 = bind */
> -     { 5, s(struct sys_setsockopt_args), 0,
> +     { 5, s(struct sys_setsockopt_args), SY_NOLOCK | 0,
>           sys_setsockopt },                   /* 105 = setsockopt */
> -     { 2, s(struct sys_listen_args), 0,
> +     { 2, s(struct sys_listen_args), SY_NOLOCK | 0,
>           sys_listen },                       /* 106 = listen */
>       { 4, s(struct sys_chflagsat_args), 0,
>           sys_chflagsat },                    /* 107 = chflagsat */
> @@ -268,7 +268,7 @@ struct sysent sysent[] = {
>           sys_nosys },                        /* 116 = obsolete 
> t32_gettimeofday */
>       { 0, 0, 0,
>           sys_nosys },                        /* 117 = obsolete t32_getrusage 
> */
> -     { 5, s(struct sys_getsockopt_args), 0,
> +     { 5, s(struct sys_getsockopt_args), SY_NOLOCK | 0,
>           sys_getsockopt },                   /* 118 = getsockopt */
>       { 3, s(struct sys_thrkill_args), 0,
>           sys_thrkill },                      /* 119 = thrkill */
> @@ -298,11 +298,11 @@ struct sysent sysent[] = {
>           sys_flock },                        /* 131 = flock */
>       { 2, s(struct sys_mkfifo_args), 0,
>           sys_mkfifo },                       /* 132 = mkfifo */
> -     { 6, s(struct sys_sendto_args), 0,
> +     { 6, s(struct sys_sendto_args), SY_NOLOCK | 0,
>           sys_sendto },                       /* 133 = sendto */
> -     { 2, s(struct sys_shutdown_args), 0,
> +     { 2, s(struct sys_shutdown_args), SY_NOLOCK | 0,
>           sys_shutdown },                     /* 134 = shutdown */
> -     { 4, s(struct sys_socketpair_args), 0,
> +     { 4, s(struct sys_socketpair_args), SY_NOLOCK | 0,
>           sys_socketpair },                   /* 135 = socketpair */
>       { 2, s(struct sys_mkdir_args), 0,
>           sys_mkdir },                        /* 136 = mkdir */
> Index: kern/kern_descrip.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_descrip.c,v
> retrieving revision 1.158
> diff -u -p -r1.158 kern_descrip.c
> --- kern/kern_descrip.c       8 May 2018 09:03:58 -0000       1.158
> +++ kern/kern_descrip.c       22 May 2018 13:21:54 -0000
> @@ -67,6 +67,7 @@
>  /*
>   * Descriptor management.
>   */
> +struct mutex fhdlk = MUTEX_INITIALIZER(IPL_NONE);
>  struct filelist filehead;    /* head of list of open files */
>  int numfiles;                        /* actual number of open files */
>  
> @@ -144,6 +145,23 @@ find_last_set(struct filedesc *fd, int l
>       return i;
>  }
>  
> +static __inline int
> +fd_inuse(struct filedesc *fdp, int fd)
> +{
> +     u_int off = fd >> NDENTRYSHIFT;
> +
> +     if (fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK)))
> +             return 1;
> +
> +     if (fdp->fd_lomap[off] != ~0)
> +             return 0;
> +
> +     if (fdp->fd_himap[off >> NDENTRYSHIFT] & (1 << (off & NDENTRYMASK)))
> +             return 1;
> +
> +     return 0;
> +}
> +
>  static __inline void
>  fd_used(struct filedesc *fdp, int fd)
>  {
> @@ -184,16 +202,18 @@ fd_iterfile(struct file *fp, struct proc
>  {
>       struct file *nfp;
>  
> +     mtx_enter(&fhdlk);
>       if (fp == NULL)
>               nfp = LIST_FIRST(&filehead);
>       else
>               nfp = LIST_NEXT(fp, f_list);
>  
> -     /* don't FREF when f_count == 0 to avoid race in fdrop() */
> -     while (nfp != NULL && (nfp->f_count == 0 || !FILE_IS_USABLE(nfp)))
> +     /* don't refcount when f_count == 0 to avoid race in fdrop() */
> +     while (nfp != NULL && nfp->f_count == 0)
>               nfp = LIST_NEXT(nfp, f_list);
>       if (nfp != NULL)
> -             FREF(nfp);
> +             nfp->f_count++;
> +     mtx_leave(&fhdlk);
>  
>       if (fp != NULL)
>               FRELE(fp, p);
> @@ -206,13 +226,17 @@ fd_getfile(struct filedesc *fdp, int fd)
>  {
>       struct file *fp;
>  
> -     if ((u_int)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL)
> -             return (NULL);
> +     vfs_stall_barrier();
>  
> -     if (!FILE_IS_USABLE(fp))
> +     if ((u_int)fd >= fdp->fd_nfiles)
>               return (NULL);
>  
> -     FREF(fp);
> +     mtx_enter(&fhdlk);
> +     fp = fdp->fd_ofiles[fd];
> +     if (fp != NULL)
> +             fp->f_count++;
> +     mtx_leave(&fhdlk);
> +
>       return (fp);
>  }
>  
> @@ -634,18 +658,22 @@ finishdup(struct proc *p, struct file *f
>               return (EDEADLK);
>       }
>  
> -     /*
> -      * Don't fd_getfile here. We want to closef LARVAL files and
> -      * closef can deal with that.
> -      */
> +     mtx_enter(&fhdlk);
>       oldfp = fdp->fd_ofiles[new];
>       if (oldfp != NULL)
> -             FREF(oldfp);
> +             oldfp->f_count++;
> +     mtx_leave(&fhdlk);
> +
> +     if (dup2 && oldfp == NULL) {
> +             if (fd_inuse(fdp, new)) {
> +                     FRELE(fp, p);
> +                     return (EBUSY);
> +             }
> +             fd_used(fdp, new);
> +     }
>  
>       fdp->fd_ofiles[new] = fp;
>       fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] & ~UF_EXCLOSE;
> -     if (dup2 && oldfp == NULL)
> -             fd_used(fdp, new);
>       *retval = new;
>  
>       if (oldfp != NULL) {
> @@ -658,6 +686,25 @@ finishdup(struct proc *p, struct file *f
>  }
>  
>  void
> +fdinsert(struct filedesc *fdp, int fd, int flags, struct file *fp)
> +{
> +     struct file *fq;
> +
> +     fdpassertlocked(fdp);
> +
> +     mtx_enter(&fhdlk);
> +     if ((fq = fdp->fd_ofiles[0]) != NULL) {
> +             LIST_INSERT_AFTER(fq, fp, f_list);
> +     } else {
> +             LIST_INSERT_HEAD(&filehead, fp, f_list);
> +     }
> +     fdp->fd_ofiles[fd] = fp;
> +     fdp->fd_ofileflags[fd] |= (flags & UF_EXCLOSE);
> +     fp->f_iflags |= FIF_INSERTED;
> +     mtx_leave(&fhdlk);
> +}
> +
> +void
>  fdremove(struct filedesc *fdp, int fd)
>  {
>       fdpassertlocked(fdp);
> @@ -670,21 +717,14 @@ int
>  fdrelease(struct proc *p, int fd)
>  {
>       struct filedesc *fdp = p->p_fd;
> -     struct file **fpp, *fp;
> +     struct file *fp;
>  
>       fdpassertlocked(fdp);
>  
> -     /*
> -      * Don't fd_getfile here. We want to closef LARVAL files and closef
> -      * can deal with that.
> -      */
> -     fpp = &fdp->fd_ofiles[fd];
> -     fp = *fpp;
> +     fp = fd_getfile(fdp, fd);
>       if (fp == NULL)
>               return (EBADF);
> -     FREF(fp);
> -     *fpp = NULL;
> -     fd_unused(fdp, fd);
> +     fdremove(fdp, fd);
>       if (fd < fdp->fd_knlistsize)
>               knote_fdclose(p, fd);
>       return (closef(fp, p));
> @@ -927,9 +967,9 @@ fdexpand(struct proc *p)
>   * a file descriptor for the process that refers to it.
>   */
>  int
> -falloc(struct proc *p, int flags, struct file **resultfp, int *resultfd)
> +falloc(struct proc *p, struct file **resultfp, int *resultfd)
>  {
> -     struct file *fp, *fq;
> +     struct file *fp;
>       int error, i;
>  
>       KASSERT(resultfp != NULL);
> @@ -958,20 +998,16 @@ restart:
>       numfiles++;
>       fp = pool_get(&file_pool, PR_WAITOK|PR_ZERO);
>       mtx_init(&fp->f_mtx, IPL_NONE);
> -     fp->f_iflags = FIF_LARVAL;
> -     if ((fq = p->p_fd->fd_ofiles[0]) != NULL) {
> -             LIST_INSERT_AFTER(fq, fp, f_list);
> -     } else {
> -             LIST_INSERT_HEAD(&filehead, fp, f_list);
> -     }
> -     p->p_fd->fd_ofiles[i] = fp;
> -     p->p_fd->fd_ofileflags[i] |= (flags & UF_EXCLOSE);
>       fp->f_count = 1;
>       fp->f_cred = p->p_ucred;
>       crhold(fp->f_cred);
>       *resultfp = fp;
>       *resultfd = i;
> -     FREF(fp);
> +
> +     mtx_enter(&fhdlk);
> +     fp->f_count++;
> +     mtx_leave(&fhdlk);
> +
>       return (0);
>  }
>  
> @@ -1063,6 +1099,7 @@ fdcopy(struct process *pr)
>       newfdp->fd_flags = fdp->fd_flags;
>       newfdp->fd_cmask = fdp->fd_cmask;
>  
> +     mtx_enter(&fhdlk);
>       for (i = 0; i <= fdp->fd_lastfile; i++) {
>               struct file *fp = fdp->fd_ofiles[i];
>  
> @@ -1079,12 +1116,13 @@ fdcopy(struct process *pr)
>                           fp->f_type == DTYPE_KQUEUE)
>                               continue;
>  
> -                     FREF(fp);
> +                     fp->f_count++;
>                       newfdp->fd_ofiles[i] = fp;
>                       newfdp->fd_ofileflags[i] = fdp->fd_ofileflags[i];
>                       fd_used(newfdp, i);
>               }
>       }
> +     mtx_leave(&fhdlk);
>       fdpunlock(fdp);
>  
>       return (newfdp);
> @@ -1106,8 +1144,9 @@ fdfree(struct proc *p)
>       for (i = fdp->fd_lastfile; i >= 0; i--, fpp++) {
>               fp = *fpp;
>               if (fp != NULL) {
> -                     FREF(fp);
>                       *fpp = NULL;
> +                      /* closef() expects a refcount of 2 */
> +                     FREF(fp);
>                       (void) closef(fp, p);
>               }
>       }
> @@ -1145,11 +1184,11 @@ closef(struct file *fp, struct proc *p)
>       if (fp == NULL)
>               return (0);
>  
> -#ifdef DIAGNOSTIC
> -     if (fp->f_count < 2)
> -             panic("closef: count (%ld) < 2", fp->f_count);
> -#endif
> +     KASSERTMSG(fp->f_count >= 2, "count (%ld) < 2", fp->f_count);
> +
> +     mtx_enter(&fhdlk);
>       fp->f_count--;
> +     mtx_leave(&fhdlk);
>  
>       /*
>        * POSIX record locking dictates that any close releases ALL
> @@ -1181,18 +1220,19 @@ fdrop(struct file *fp, struct proc *p)
>  {
>       int error;
>  
> -#ifdef DIAGNOSTIC
> -     if (fp->f_count != 0)
> -             panic("fdrop: count (%ld) != 0", fp->f_count);
> -#endif
> +     MUTEX_ASSERT_LOCKED(&fhdlk);
> +
> +     KASSERTMSG(fp->f_count == 0, "count (%ld) != 0", fp->f_count);
> +
> +     if (fp->f_iflags & FIF_INSERTED)
> +             LIST_REMOVE(fp, f_list);
> +     mtx_leave(&fhdlk);
>  
>       if (fp->f_ops)
>               error = (*fp->f_ops->fo_close)(fp, p);
>       else
>               error = 0;
>  
> -     /* Free fp */
> -     LIST_REMOVE(fp, f_list);
>       crfree(fp->f_cred);
>       numfiles--;
>       pool_put(&file_pool, fp);
> @@ -1307,7 +1347,7 @@ dupfdopen(struct proc *p, int indx, int 
>        * of file descriptors, or the fd to be dup'd has already been
>        * closed, reject. Note, there is no need to check for new == old
>        * because fd_getfile will return NULL if the file at indx is
> -      * newly created by falloc (FIF_LARVAL).
> +      * newly created by falloc.
>        */
>       if ((wfp = fd_getfile(fdp, dupfd)) == NULL)
>               return (EBADF);
> Index: kern/kern_event.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_event.c,v
> retrieving revision 1.88
> diff -u -p -r1.88 kern_event.c
> --- kern/kern_event.c 27 Apr 2018 10:13:37 -0000      1.88
> +++ kern/kern_event.c 22 May 2018 13:21:54 -0000
> @@ -441,10 +441,9 @@ sys_kqueue(struct proc *p, void *v, regi
>       int fd, error;
>  
>       fdplock(fdp);
> -     error = falloc(p, 0, &fp, &fd);
> -     fdpunlock(fdp);
> +     error = falloc(p, &fp, &fd);
>       if (error)
> -             return (error);
> +             goto out;
>       fp->f_flag = FREAD | FWRITE;
>       fp->f_type = DTYPE_KQUEUE;
>       fp->f_ops = &kqueueops;
> @@ -456,8 +455,11 @@ sys_kqueue(struct proc *p, void *v, regi
>       if (fdp->fd_knlistsize < 0)
>               fdp->fd_knlistsize = 0;         /* this process has a kq */
>       kq->kq_fdp = fdp;
> -     FILE_SET_MATURE(fp, p);
> -     return (0);
> +     fdinsert(fdp, fd, 0, fp);
> +     FRELE(fp, p);
> +out:
> +     fdpunlock(fdp);
> +     return (error);
>  }
>  
>  int
> Index: kern/kern_exec.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_exec.c,v
> retrieving revision 1.195
> diff -u -p -r1.195 kern_exec.c
> --- kern/kern_exec.c  28 Apr 2018 03:13:04 -0000      1.195
> +++ kern/kern_exec.c  22 May 2018 13:21:54 -0000
> @@ -584,7 +584,7 @@ sys_execve(struct proc *p, void *v, regi
>                               struct vnode *vp;
>                               int indx;
>  
> -                             if ((error = falloc(p, 0, &fp, &indx)) != 0)
> +                             if ((error = falloc(p, &fp, &indx)) != 0)
>                                       break;
>  #ifdef DIAGNOSTIC
>                               if (indx != i)
> @@ -607,10 +607,9 @@ sys_execve(struct proc *p, void *v, regi
>                               fp->f_type = DTYPE_VNODE;
>                               fp->f_ops = &vnops;
>                               fp->f_data = (caddr_t)vp;
> -                             FILE_SET_MATURE(fp, p);
> -                     } else {
> -                             FRELE(fp, p);
> +                             fdinsert(p->p_fd, indx, 0, fp);
>                       }
> +                     FRELE(fp, p);
>               }
>               fdpunlock(p->p_fd);
>               if (error)
> Index: kern/kern_ktrace.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_ktrace.c,v
> retrieving revision 1.96
> diff -u -p -r1.96 kern_ktrace.c
> --- kern/kern_ktrace.c        28 Apr 2018 03:13:04 -0000      1.96
> +++ kern/kern_ktrace.c        22 May 2018 13:21:54 -0000
> @@ -225,7 +225,7 @@ ktrgenio(struct proc *p, int fd, enum ui
>       struct ktr_header kth;
>       struct ktr_genio ktp;
>       caddr_t cp;
> -     int count;
> +     int count, error;
>       int buflen;
>  
>       atomic_setbits_int(&p->p_flag, P_INKTR);
> @@ -254,7 +254,10 @@ ktrgenio(struct proc *p, int fd, enum ui
>               if (copyin(iov->iov_base, cp, count))
>                       break;
>  
> -             if (ktrwrite2(p, &kth, &ktp, sizeof(ktp), cp, count) != 0)
> +             KERNEL_LOCK();
> +             error = ktrwrite2(p, &kth, &ktp, sizeof(ktp), cp, count);
> +             KERNEL_UNLOCK();
> +             if (error != 0)
>                       break;
>  
>               iov->iov_len -= count;
> @@ -294,13 +297,14 @@ ktrstruct(struct proc *p, const char *na
>  {
>       struct ktr_header kth;
>  
> -     KERNEL_ASSERT_LOCKED();
>       atomic_setbits_int(&p->p_flag, P_INKTR);
>       ktrinitheader(&kth, p, KTR_STRUCT);
> -     
> +
>       if (data == NULL)
>               datalen = 0;
> +     KERNEL_LOCK();
>       ktrwrite2(p, &kth, name, strlen(name) + 1, data, datalen);
> +     KERNEL_UNLOCK();
>       atomic_clearbits_int(&p->p_flag, P_INKTR);
>  }
>  
> @@ -386,7 +390,9 @@ ktrpledge(struct proc *p, int error, uin
>       kp.code = code;
>       kp.syscall = syscall;
>  
> +     KERNEL_LOCK();
>       ktrwrite(p, &kth, &kp, sizeof(kp));
> +     KERNEL_UNLOCK();
>       atomic_clearbits_int(&p->p_flag, P_INKTR);
>  }
>  
> @@ -622,6 +628,8 @@ ktrwriteraw(struct proc *curp, struct vn
>       struct iovec aiov[3];
>       struct process *pr;
>       int error;
> +
> +     KERNEL_ASSERT_LOCKED();
>  
>       auio.uio_iov = &aiov[0];
>       auio.uio_offset = 0;
> Index: kern/kern_pledge.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_pledge.c,v
> retrieving revision 1.230
> diff -u -p -r1.230 kern_pledge.c
> --- kern/kern_pledge.c        28 Apr 2018 12:49:21 -0000      1.230
> +++ kern/kern_pledge.c        22 May 2018 13:21:54 -0000
> @@ -523,6 +523,7 @@ pledge_fail(struct proc *p, int error, u
>       if (p->p_p->ps_pledge & PLEDGE_ERROR)
>               return (ENOSYS);
>  
> +     KERNEL_LOCK();
>       log(LOG_ERR, "%s[%d]: pledge \"%s\", syscall %d\n",
>           p->p_p->ps_comm, p->p_p->ps_pid, codes, p->p_pledge_syscall);
>       p->p_p->ps_acflag |= APLEDGE;
> @@ -535,6 +536,7 @@ pledge_fail(struct proc *p, int error, u
>       psignal(p, SIGABRT);
>  
>       p->p_p->ps_pledge = 0;          /* Disable all PLEDGE_ flags */
> +     KERNEL_UNLOCK();
>       return (error);
>  }
>  
> Index: kern/kern_sysctl.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
> retrieving revision 1.337
> diff -u -p -r1.337 kern_sysctl.c
> --- kern/kern_sysctl.c        16 May 2018 14:53:43 -0000      1.337
> +++ kern/kern_sysctl.c        22 May 2018 13:21:54 -0000
> @@ -1059,7 +1059,9 @@ fill_file(struct kinfo_file *kf, struct 
>               kf->f_flag = fp->f_flag;
>               kf->f_iflags = fp->f_iflags;
>               kf->f_type = fp->f_type;
> +             mtx_enter(&fhdlk);
>               kf->f_count = fp->f_count;
> +             mtx_leave(&fhdlk);
>               if (show_pointers)
>                       kf->f_ucred = PTRTOINT64(fp->f_cred);
>               kf->f_uid = fp->f_cred->cr_uid;
> Index: kern/sys_pipe.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/sys_pipe.c,v
> retrieving revision 1.78
> diff -u -p -r1.78 sys_pipe.c
> --- kern/sys_pipe.c   10 Apr 2018 09:17:45 -0000      1.78
> +++ kern/sys_pipe.c   22 May 2018 13:21:54 -0000
> @@ -154,7 +154,7 @@ dopipe(struct proc *p, int *ufds, int fl
>  
>       fdplock(fdp);
>  
> -     error = falloc(p, cloexec, &rf, &fds[0]);
> +     error = falloc(p, &rf, &fds[0]);
>       if (error != 0)
>               goto free2;
>       rf->f_flag = FREAD | FWRITE | (flags & FNONBLOCK);
> @@ -162,7 +162,7 @@ dopipe(struct proc *p, int *ufds, int fl
>       rf->f_data = rpipe;
>       rf->f_ops = &pipeops;
>  
> -     error = falloc(p, cloexec, &wf, &fds[1]);
> +     error = falloc(p, &wf, &fds[1]);
>       if (error != 0)
>               goto free3;
>       wf->f_flag = FREAD | FWRITE | (flags & FNONBLOCK);
> @@ -173,8 +173,8 @@ dopipe(struct proc *p, int *ufds, int fl
>       rpipe->pipe_peer = wpipe;
>       wpipe->pipe_peer = rpipe;
>  
> -     FILE_SET_MATURE(rf, p);
> -     FILE_SET_MATURE(wf, p);
> +     fdinsert(fdp, fds[0], cloexec, rf);
> +     fdinsert(fdp, fds[1], cloexec, wf);
>  
>       error = copyout(fds, ufds, sizeof(fds));
>       if (error != 0) {
> @@ -186,6 +186,9 @@ dopipe(struct proc *p, int *ufds, int fl
>               ktrfds(p, fds, 2);
>  #endif
>       fdpunlock(fdp);
> +
> +     FRELE(rf, p);
> +     FRELE(wf, p);
>       return (error);
>  
>  free3:
> Index: kern/syscalls.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/syscalls.c,v
> retrieving revision 1.190
> diff -u -p -r1.190 syscalls.c
> --- kern/syscalls.c   12 Dec 2017 01:13:14 -0000      1.190
> +++ kern/syscalls.c   22 May 2018 13:21:54 -0000
> @@ -1,4 +1,4 @@
> -/*   $OpenBSD: syscalls.c,v 1.190 2017/12/12 01:13:14 deraadt Exp $  */
> +/*   $OpenBSD$       */
>  
>  /*
>   * System call names.
> Index: kern/syscalls.master
> ===================================================================
> RCS file: /cvs/src/sys/kern/syscalls.master,v
> retrieving revision 1.180
> diff -u -p -r1.180 syscalls.master
> --- kern/syscalls.master      12 Dec 2017 01:12:34 -0000      1.180
> +++ kern/syscalls.master      22 May 2018 13:21:54 -0000
> @@ -88,18 +88,18 @@
>  #else
>  26   UNIMPL          ptrace
>  #endif
> -27   STD             { ssize_t sys_recvmsg(int s, struct msghdr *msg, \
> +27   STD NOLOCK      { ssize_t sys_recvmsg(int s, struct msghdr *msg, \
>                           int flags); }
> -28   STD             { ssize_t sys_sendmsg(int s, \
> +28   STD NOLOCK      { ssize_t sys_sendmsg(int s, \
>                           const struct msghdr *msg, int flags); }
> -29   STD             { ssize_t sys_recvfrom(int s, void *buf, size_t len, \
> +29   STD NOLOCK      { ssize_t sys_recvfrom(int s, void *buf, size_t len, \
>                           int flags, struct sockaddr *from, \
>                           socklen_t *fromlenaddr); }
> -30   STD             { int sys_accept(int s, struct sockaddr *name, \
> +30   STD NOLOCK      { int sys_accept(int s, struct sockaddr *name, \
>                           socklen_t *anamelen); }
> -31   STD             { int sys_getpeername(int fdes, struct sockaddr *asa, \
> +31   STD NOLOCK      { int sys_getpeername(int fdes, struct sockaddr *asa, \
>                           socklen_t *alen); }
> -32   STD             { int sys_getsockname(int fdes, struct sockaddr *asa, \
> +32   STD NOLOCK      { int sys_getsockname(int fdes, struct sockaddr *asa, \
>                           socklen_t *alen); }
>  33   STD             { int sys_access(const char *path, int amode); }
>  34   STD             { int sys_chflags(const char *path, u_int flags); }
> @@ -205,26 +205,26 @@
>  91   STD             { int sys_nanosleep(const struct timespec *rqtp, \
>                           struct timespec *rmtp); }
>  92   STD             { int sys_fcntl(int fd, int cmd, ... void *arg); }
> -93   STD             { int sys_accept4(int s, struct sockaddr *name, \
> +93   STD NOLOCK      { int sys_accept4(int s, struct sockaddr *name, \
>                           socklen_t *anamelen, int flags); }
>  94   STD             { int sys___thrsleep(const volatile void *ident, \
>                           clockid_t clock_id, const struct timespec *tp, \
>                           void *lock, const int *abort); }
>  95   STD             { int sys_fsync(int fd); }
>  96   STD             { int sys_setpriority(int which, id_t who, int prio); }
> -97   STD             { int sys_socket(int domain, int type, int protocol); }
> -98   STD             { int sys_connect(int s, const struct sockaddr *name, \
> +97   STD NOLOCK      { int sys_socket(int domain, int type, int protocol); }
> +98   STD NOLOCK      { int sys_connect(int s, const struct sockaddr *name, \
>                           socklen_t namelen); }
>  99   STD             { int sys_getdents(int fd, void *buf, size_t buflen); }
>  100  STD             { int sys_getpriority(int which, id_t who); }
>  101  STD             { int sys_pipe2(int *fdp, int flags); }
>  102  STD             { int sys_dup3(int from, int to, int flags); }
>  103  STD             { int sys_sigreturn(struct sigcontext *sigcntxp); }
> -104  STD             { int sys_bind(int s, const struct sockaddr *name, \
> +104  STD NOLOCK      { int sys_bind(int s, const struct sockaddr *name, \
>                           socklen_t namelen); }
> -105  STD             { int sys_setsockopt(int s, int level, int name, \
> +105  STD NOLOCK      { int sys_setsockopt(int s, int level, int name, \
>                           const void *val, socklen_t valsize); }
> -106  STD             { int sys_listen(int s, int backlog); }
> +106  STD NOLOCK      { int sys_listen(int s, int backlog); }
>  107  STD             { int sys_chflagsat(int fd, const char *path, \
>                           u_int flags, int atflags); }
>  108  STD             { int sys_pledge(const char *promises, \
> @@ -243,7 +243,7 @@
>  115  OBSOL           vtrace
>  116  OBSOL           t32_gettimeofday
>  117  OBSOL           t32_getrusage
> -118  STD             { int sys_getsockopt(int s, int level, int name, \
> +118  STD NOLOCK      { int sys_getsockopt(int s, int level, int name, \
>                           void *val, socklen_t *avalsize); }
>  119  STD             { int sys_thrkill(pid_t tid, int signum, void *tcb); }
>  120  STD             { ssize_t sys_readv(int fd, \
> @@ -261,11 +261,11 @@
>  130  OBSOL           oftruncate
>  131  STD             { int sys_flock(int fd, int how); }
>  132  STD             { int sys_mkfifo(const char *path, mode_t mode); }
> -133  STD             { ssize_t sys_sendto(int s, const void *buf, \
> +133  STD NOLOCK      { ssize_t sys_sendto(int s, const void *buf, \
>                           size_t len, int flags, const struct sockaddr *to, \
>                           socklen_t tolen); }
> -134  STD             { int sys_shutdown(int s, int how); }
> -135  STD             { int sys_socketpair(int domain, int type, \
> +134  STD NOLOCK      { int sys_shutdown(int s, int how); }
> +135  STD NOLOCK      { int sys_socketpair(int domain, int type, \
>                           int protocol, int *rsv); }
>  136  STD             { int sys_mkdir(const char *path, mode_t mode); }
>  137  STD             { int sys_rmdir(const char *path); }
> Index: kern/tty_pty.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/tty_pty.c,v
> retrieving revision 1.84
> diff -u -p -r1.84 tty_pty.c
> --- kern/tty_pty.c    28 Apr 2018 03:13:04 -0000      1.84
> +++ kern/tty_pty.c    22 May 2018 13:21:54 -0000
> @@ -1070,11 +1070,11 @@ ptmioctl(dev_t dev, u_long cmd, caddr_t 
>       case PTMGET:
>               fdplock(fdp);
>               /* Grab two filedescriptors. */
> -             if ((error = falloc(p, 0, &cfp, &cindx)) != 0) {
> +             if ((error = falloc(p, &cfp, &cindx)) != 0) {
>                       fdpunlock(fdp);
>                       break;
>               }
> -             if ((error = falloc(p, 0, &sfp, &sindx)) != 0) {
> +             if ((error = falloc(p, &sfp, &sindx)) != 0) {
>                       fdremove(fdp, cindx);
>                       closef(cfp, p);
>                       fdpunlock(fdp);
> @@ -1166,11 +1166,12 @@ retry:
>               memcpy(ptm->cn, pti->pty_pn, sizeof(pti->pty_pn));
>               memcpy(ptm->sn, pti->pty_sn, sizeof(pti->pty_sn));
>  
> -             /* mark the files mature now that we've passed all errors */
> -             FILE_SET_MATURE(cfp, p);
> -             FILE_SET_MATURE(sfp, p);
> -
> +             /* insert files now that we've passed all errors */
> +             fdinsert(fdp, cindx, 0, cfp);
> +             fdinsert(fdp, sindx, 0, sfp);
>               fdpunlock(fdp);
> +             FRELE(cfp, p);
> +             FRELE(sfp, p);
>               break;
>       default:
>               error = EINVAL;
> Index: kern/uipc_syscalls.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v
> retrieving revision 1.171
> diff -u -p -r1.171 uipc_syscalls.c
> --- kern/uipc_syscalls.c      22 May 2018 09:51:01 -0000      1.171
> +++ kern/uipc_syscalls.c      22 May 2018 13:21:54 -0000
> @@ -101,13 +101,14 @@ sys_socket(struct proc *p, void *v, regi
>       fflag = FREAD | FWRITE | (nonblock ? FNONBLOCK : 0);
>  
>       error = socreate(SCARG(uap, domain), &so, type, SCARG(uap, protocol));
> -     if (error != 0)
> -             goto out;
> +     if (error)
> +             return (error);
>  
> +     KERNEL_LOCK();
>       fdplock(fdp);
> -     error = falloc(p, cloexec, &fp, &fd);
> -     fdpunlock(fdp);
> +     error = falloc(p, &fp, &fd);
>       if (error) {
> +             fdpunlock(fdp);
>               soclose(so);
>       } else {
>               fp->f_flag = fflag;
> @@ -117,10 +118,12 @@ sys_socket(struct proc *p, void *v, regi
>                       so->so_state |= SS_NBIO;
>               so->so_state |= ss;
>               fp->f_data = so;
> -             FILE_SET_MATURE(fp, p);
> +             fdinsert(fdp, fd, cloexec, fp);
> +             fdpunlock(fdp);
> +             FRELE(fp, p);
>               *retval = fd;
>       }
> -out:
> +     KERNEL_UNLOCK();
>       return (error);
>  }
>  
> @@ -272,7 +275,9 @@ doaccept(struct proc *p, int sock, struc
>       socklen_t namelen;
>       int error, s, tmpfd;
>       struct socket *head, *so;
> -     int nflag;
> +     int cloexec, nflag;
> +
> +     cloexec = (flags & SOCK_CLOEXEC) ? UF_EXCLOSE : 0;
>  
>       if (name && (error = copyin(anamelen, &namelen, sizeof (namelen))))
>               return (error);
> @@ -282,7 +287,7 @@ doaccept(struct proc *p, int sock, struc
>       headfp = fp;
>  
>       fdplock(fdp);
> -     error = falloc(p, (flags & SOCK_CLOEXEC) ? UF_EXCLOSE : 0, &fp, &tmpfd);
> +     error = falloc(p, &fp, &tmpfd);
>       fdpunlock(fdp);
>       if (error) {
>               FRELE(headfp, p);
> @@ -347,8 +352,11 @@ out:
>               else
>                       so->so_state &= ~SS_NBIO;
>               sounlock(s);
> +             fdplock(fdp);
>               fp->f_data = so;
> -             FILE_SET_MATURE(fp, p);
> +             fdinsert(fdp, tmpfd, cloexec, fp);
> +             fdpunlock(fdp);
> +             FRELE(fp, p);
>               *retval = tmpfd;
>       } else {
>               sounlock(s);
> @@ -475,14 +483,15 @@ sys_socketpair(struct proc *p, void *v, 
>               if (error != 0)
>                       goto free2;
>       }
> +     KERNEL_LOCK();
>       fdplock(fdp);
> -     if ((error = falloc(p, cloexec, &fp1, &sv[0])) != 0)
> +     if ((error = falloc(p, &fp1, &sv[0])) != 0)
>               goto free3;
>       fp1->f_flag = fflag;
>       fp1->f_type = DTYPE_SOCKET;
>       fp1->f_ops = &socketops;
>       fp1->f_data = so1;
> -     if ((error = falloc(p, cloexec, &fp2, &sv[1])) != 0)
> +     if ((error = falloc(p, &fp2, &sv[1])) != 0)
>               goto free4;
>       fp2->f_flag = fflag;
>       fp2->f_type = DTYPE_SOCKET;
> @@ -500,9 +509,12 @@ sys_socketpair(struct proc *p, void *v, 
>                       (*fp2->f_ops->fo_ioctl)(fp2, FIONBIO, (caddr_t)&type,
>                           p);
>               }
> -             FILE_SET_MATURE(fp1, p);
> -             FILE_SET_MATURE(fp2, p);
> +             fdinsert(fdp, sv[0], cloexec, fp1);
> +             fdinsert(fdp, sv[1], cloexec, fp2);
>               fdpunlock(fdp);
> +             FRELE(fp1, p);
> +             FRELE(fp2, p);
> +             KERNEL_UNLOCK();
>               return (0);
>       }
>       fdremove(fdp, sv[1]);
> @@ -514,6 +526,7 @@ free4:
>       so1 = NULL;
>  free3:
>       fdpunlock(fdp);
> +     KERNEL_UNLOCK();
>  free2:
>       if (so2 != NULL)
>               (void)soclose(so2);
> @@ -678,13 +691,16 @@ sendit(struct proc *p, int s, struct msg
>       }
>  #endif
>       len = auio.uio_resid;
> -     error = sosend(fp->f_data, to, &auio, NULL, control, flags);
> +     error = sosend(so, to, &auio, NULL, control, flags);
>       if (error) {
>               if (auio.uio_resid != len && (error == ERESTART ||
>                   error == EINTR || error == EWOULDBLOCK))
>                       error = 0;
> -             if (error == EPIPE && (flags & MSG_NOSIGNAL) == 0)
> +             if (error == EPIPE && (flags & MSG_NOSIGNAL) == 0) {
> +                     KERNEL_LOCK();
>                       ptsignal(p, SIGPIPE, STHREAD);
> +                     KERNEL_UNLOCK();
> +             }
>       }
>       if (error == 0) {
>               *retsize = len - auio.uio_resid;
> @@ -925,11 +941,13 @@ sys_shutdown(struct proc *p, void *v, re
>               syscallarg(int) how;
>       } */ *uap = v;
>       struct file *fp;
> +     struct socket *so;
>       int error;
>  
>       if ((error = getsock(p, SCARG(uap, s), &fp)) != 0)
>               return (error);
> -     error = soshutdown(fp->f_data, SCARG(uap, how));
> +     so = fp->f_data;
> +     error = soshutdown(so, SCARG(uap, how));
>       FRELE(fp, p);
>       return (error);
>  }
> @@ -1163,7 +1181,8 @@ getsock(struct proc *p, int fdes, struct
>  {
>       struct file *fp;
>  
> -     if ((fp = fd_getfile(p->p_fd, fdes)) == NULL)
> +     fp = fd_getfile(p->p_fd, fdes);
> +     if (fp == NULL)
>               return (EBADF);
>       if (fp->f_type != DTYPE_SOCKET) {
>               FRELE(fp, p);
> Index: kern/uipc_usrreq.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_usrreq.c,v
> retrieving revision 1.126
> diff -u -p -r1.126 uipc_usrreq.c
> --- kern/uipc_usrreq.c        28 Apr 2018 03:13:04 -0000      1.126
> +++ kern/uipc_usrreq.c        22 May 2018 13:21:54 -0000
> @@ -899,6 +899,7 @@ unp_gc(void *arg __unused)
>                       fp = defer->ud_fp[i].fp;
>                       if (fp == NULL)
>                               continue;
> +                      /* closef() expects a refcount of 2 */
>                       FREF(fp);
>                       if ((unp = fptounp(fp)) != NULL)
>                               unp->unp_msgcount--;
> @@ -915,6 +916,8 @@ unp_gc(void *arg __unused)
>       do {
>               nunref = 0;
>               LIST_FOREACH(unp, &unp_head, unp_link) {
> +                     mtx_enter(&fhdlk);
> +                     fp = unp->unp_file;
>                       if (unp->unp_flags & UNP_GCDEFER) {
>                               /*
>                                * This socket is referenced by another
> @@ -925,8 +928,9 @@ unp_gc(void *arg __unused)
>                               unp_defer--;
>                       } else if (unp->unp_flags & UNP_GCMARK) {
>                               /* marked as live in previous pass */
> +                             mtx_leave(&fhdlk);
>                               continue;
> -                     } else if ((fp = unp->unp_file) == NULL) {
> +                     } else if (fp == NULL) {
>                               /* not being passed, so can't be in loop */
>                       } else if (fp->f_count == 0) {
>                               /*
> @@ -943,9 +947,11 @@ unp_gc(void *arg __unused)
>                               if (fp->f_count == unp->unp_msgcount) {
>                                       nunref++;
>                                       unp->unp_flags |= UNP_GCDEAD;
> +                                     mtx_leave(&fhdlk);
>                                       continue;
>                               }
>                       }
> +                     mtx_leave(&fhdlk);
>  
>                       /*
>                        * This is the first time we've seen this socket on
> Index: kern/vfs_syscalls.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/vfs_syscalls.c,v
> retrieving revision 1.283
> diff -u -p -r1.283 vfs_syscalls.c
> --- kern/vfs_syscalls.c       8 May 2018 08:53:41 -0000       1.283
> +++ kern/vfs_syscalls.c       22 May 2018 13:21:54 -0000
> @@ -899,7 +899,7 @@ doopenat(struct proc *p, int fd, const c
>       struct file *fp;
>       struct vnode *vp;
>       struct vattr vattr;
> -     int flags, cmode;
> +     int flags, cloexec, cmode;
>       int type, indx, error, localtrunc = 0;
>       struct flock lf;
>       struct nameidata nd;
> @@ -911,10 +911,10 @@ doopenat(struct proc *p, int fd, const c
>                       return (error);
>       }
>  
> -     fdplock(fdp);
> +     cloexec = (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0;
>  
> -     if ((error = falloc(p, (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0, &fp,
> -         &indx)) != 0)
> +     fdplock(fdp);
> +     if ((error = falloc(p, &fp, &indx)) != 0)
>               goto out;
>       flags = FFLAGS(oflags);
>       if (flags & FREAD)
> @@ -999,7 +999,8 @@ doopenat(struct proc *p, int fd, const c
>       }
>       VOP_UNLOCK(vp);
>       *retval = indx;
> -     FILE_SET_MATURE(fp, p);
> +     fdinsert(fdp, indx, cloexec, fp);
> +     FRELE(fp, p);
>  out:
>       fdpunlock(fdp);
>       return (error);
> @@ -1060,7 +1061,7 @@ sys_fhopen(struct proc *p, void *v, regi
>       struct vnode *vp = NULL;
>       struct mount *mp;
>       struct ucred *cred = p->p_ucred;
> -     int flags;
> +     int flags, cloexec;
>       int type, indx, error=0;
>       struct flock lf;
>       struct vattr va;
> @@ -1078,9 +1079,10 @@ sys_fhopen(struct proc *p, void *v, regi
>       if ((flags & O_CREAT))
>               return (EINVAL);
>  
> +     cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
> +
>       fdplock(fdp);
> -     if ((error = falloc(p, (flags & O_CLOEXEC) ? UF_EXCLOSE : 0, &fp,
> -         &indx)) != 0) {
> +     if ((error = falloc(p, &fp, &indx)) != 0) {
>               fp = NULL;
>               goto bad;
>       }
> @@ -1160,9 +1162,9 @@ sys_fhopen(struct proc *p, void *v, regi
>       }
>       VOP_UNLOCK(vp);
>       *retval = indx;
> -     FILE_SET_MATURE(fp, p);
> -
> +     fdinsert(fdp, indx, cloexec, fp);
>       fdpunlock(fdp);
> +     FRELE(fp, p);
>       return (0);
>  
>  bad:
> Index: net/if.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if.c,v
> retrieving revision 1.552
> diff -u -p -r1.552 if.c
> --- net/if.c  17 May 2018 11:04:14 -0000      1.552
> +++ net/if.c  22 May 2018 13:21:54 -0000
> @@ -1378,7 +1378,7 @@ ifa_ifwithaddr(struct sockaddr *addr, u_
>       struct ifaddr *ifa;
>       u_int rdomain;
>  
> -     KERNEL_ASSERT_LOCKED();
> +     KERNEL_LOCK();
>       rdomain = rtable_l2(rtableid);
>       TAILQ_FOREACH(ifp, &ifnet, if_list) {
>               if (ifp->if_rdomain != rdomain)
> @@ -1388,10 +1388,13 @@ ifa_ifwithaddr(struct sockaddr *addr, u_
>                       if (ifa->ifa_addr->sa_family != addr->sa_family)
>                               continue;
>  
> -                     if (equal(addr, ifa->ifa_addr))
> +                     if (equal(addr, ifa->ifa_addr)) {
> +                             KERNEL_UNLOCK();
>                               return (ifa);
> +                     }
>               }
>       }
> +     KERNEL_UNLOCK();
>       return (NULL);
>  }
>  
> @@ -1404,8 +1407,8 @@ ifa_ifwithdstaddr(struct sockaddr *addr,
>       struct ifnet *ifp;
>       struct ifaddr *ifa;
>  
> -     KERNEL_ASSERT_LOCKED();
>       rdomain = rtable_l2(rdomain);
> +     KERNEL_LOCK();
>       TAILQ_FOREACH(ifp, &ifnet, if_list) {
>               if (ifp->if_rdomain != rdomain)
>                       continue;
> @@ -1414,11 +1417,14 @@ ifa_ifwithdstaddr(struct sockaddr *addr,
>                               if (ifa->ifa_addr->sa_family !=
>                                   addr->sa_family || ifa->ifa_dstaddr == NULL)
>                                       continue;
> -                             if (equal(addr, ifa->ifa_dstaddr))
> +                             if (equal(addr, ifa->ifa_dstaddr)) {
> +                                     KERNEL_UNLOCK();
>                                       return (ifa);
> +                             }
>                       }
>               }
>       }
> +     KERNEL_UNLOCK();
>       return (NULL);
>  }
>  
> Index: sys/file.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/file.h,v
> retrieving revision 1.45
> diff -u -p -r1.45 file.h
> --- sys/file.h        9 May 2018 08:42:02 -0000       1.45
> +++ sys/file.h        22 May 2018 13:21:54 -0000
> @@ -65,6 +65,7 @@ struct      fileops {
>   *
>   *  Locks used to protect struct members in this file:
>   *   I       immutable after creation
> + *   F       global `fhdlk' mutex
>   *   f       per file `f_mtx'
>   *   k       kernel lock
>   */
> @@ -77,7 +78,7 @@ struct file {
>  #define      DTYPE_PIPE      3       /* pipe */
>  #define      DTYPE_KQUEUE    4       /* event queue */
>       short   f_type;         /* [I] descriptor type */
> -     long    f_count;        /* [k] reference count */
> +     long    f_count;        /* [F] reference count */
>       struct  ucred *f_cred;  /* [I] credentials associated with descriptor */
>       struct  fileops *f_ops; /* [I] file operation pointers */
>       off_t   f_offset;       /* [k] */
> @@ -91,26 +92,31 @@ struct file {
>  };
>  
>  #define FIF_HASLOCK          0x01    /* descriptor holds advisory lock */
> -#define FIF_LARVAL           0x02    /* not fully constructed, don't use */
> -
> -#define FILE_IS_USABLE(fp) \
> -     (((fp)->f_iflags & FIF_LARVAL) == 0)
> +#define FIF_INSERTED         0x80    /* present in `filehead' */
>  
>  #define FREF(fp) \
>       do { \
>               extern void vfs_stall_barrier(void); \
>               vfs_stall_barrier(); \
> +             mtx_enter(&fhdlk); \
>               (fp)->f_count++; \
> +             mtx_leave(&fhdlk); \
>       } while (0)
> -#define FRELE(fp,p)  (--(fp)->f_count == 0 ? fdrop(fp, p) : 0)
>  
> -#define FILE_SET_MATURE(fp,p) do {                           \
> -     (fp)->f_iflags &= ~FIF_LARVAL;                          \
> -     FRELE(fp, p);                                           \
> -} while (0)
> +#define FRELE(fp,p) \
> +({ \
> +     int rv = 0; \
> +     mtx_enter(&fhdlk); \
> +     if (--(fp)->f_count == 0) \
> +             rv = fdrop(fp, p); \
> +     else \
> +             mtx_leave(&fhdlk); \
> +     rv; \
> +})
>  
>  int  fdrop(struct file *, struct proc *);
>  
> +extern struct mutex fhdlk;           /* protects `filehead' and f_count */
>  LIST_HEAD(filelist, file);
>  extern int maxfiles;                 /* kernel limit on number of open files 
> */
>  extern int numfiles;                 /* actual number of open files */
> Index: sys/filedesc.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/filedesc.h,v
> retrieving revision 1.35
> diff -u -p -r1.35 filedesc.h
> --- sys/filedesc.h    25 Apr 2018 10:29:17 -0000      1.35
> +++ sys/filedesc.h    22 May 2018 13:21:54 -0000
> @@ -125,12 +125,13 @@ void    filedesc_init(void);
>  int  dupfdopen(struct proc *, int, int);
>  int  fdalloc(struct proc *p, int want, int *result);
>  void fdexpand(struct proc *);
> -int  falloc(struct proc *_p, int _flags, struct file **_rfp, int *_rfd);
> +int  falloc(struct proc *_p, struct file **_rfp, int *_rfd);
>  struct       filedesc *fdinit(void);
>  struct       filedesc *fdshare(struct process *);
>  struct       filedesc *fdcopy(struct process *);
>  void fdfree(struct proc *p);
>  int  fdrelease(struct proc *p, int);
> +void fdinsert(struct filedesc *, int, int, struct file *);
>  void fdremove(struct filedesc *, int);
>  void fdcloseexec(struct proc *);
>  struct file *fd_iterfile(struct file *, struct proc *);
> Index: sys/syscall.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/syscall.h,v
> retrieving revision 1.190
> diff -u -p -r1.190 syscall.h
> --- sys/syscall.h     12 Dec 2017 01:13:14 -0000      1.190
> +++ sys/syscall.h     22 May 2018 13:21:54 -0000
> @@ -1,4 +1,4 @@
> -/*   $OpenBSD: syscall.h,v 1.190 2017/12/12 01:13:14 deraadt Exp $   */
> +/*   $OpenBSD$       */
>  
>  /*
>   * System call numbers.
> Index: sys/syscallargs.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/syscallargs.h,v
> retrieving revision 1.193
> diff -u -p -r1.193 syscallargs.h
> --- sys/syscallargs.h 12 Dec 2017 01:13:14 -0000      1.193
> +++ sys/syscallargs.h 22 May 2018 13:21:54 -0000
> @@ -1,4 +1,4 @@
> -/*   $OpenBSD: syscallargs.h,v 1.193 2017/12/12 01:13:14 deraadt Exp $       
> */
> +/*   $OpenBSD$       */
>  
>  /*
>   * System call argument lists.
> 

Reply via email to