Martin Pieuchot wrote:
> By assuming that `f_data' is immutable, which AFAIK is true for sockets,
> we can remove the KERNEL_LOCK() from the following syscalls iff files are
> refcounted in an MP-safe way.
>
> This diff includes the EBUSY check in dup2(2) which is currently required
> to avoid races with accept(2) and will later make our life easier wrt
> open(2).
>
> It also includes the fdinsert() diff I sent earlier.
>
> On top of that I'm introducing a global mutex, `fhdlk', that protects
> `f_count' and the implicit reference in `filehead'.
>
> A socket stays alive as long as its associated file has a positive
> refcount. When this refcount drops, fdrop() will be called and soclose()
> will free/clean `f_data'. That's the only place where `f_data' is
> changed during the life of a socket. That's why it is safe to dereference
> `f_data' when getsock() returned a valid & refcounted `fp'.
>
> Many ktrace(2) internals now need to grab the KERNEL_LOCK(), just like
> ptsignal().
>
> Note that for unix, routing and pfkey sockets, solock() still grabs the
> KERNEL_LOCK(). So even if syscalls are marked as SY_NOLOCK that doesn't
> mean they won't grab it. In fact some network functions like
> ifa_ifwithaddr() below now need to grab the KERNEL_LOCK(). That's good
> that means we're pushing the lock down.
>
> Tests? Comments?
Been running this since this morning on a server, doing various stress
tests/syscalls etc running fine.
Tried it on a desktop (a vm), just running chrome for a bit of time and
I have a reproducible hang, leaving the filesystem dirty. It just hangs
and I cannot drop into ddb. I'll try getting more infos.
>
> Index: kern/exec_script.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/exec_script.c,v
> retrieving revision 1.44
> diff -u -p -r1.44 exec_script.c
> --- kern/exec_script.c 2 May 2018 02:24:56 -0000 1.44
> +++ kern/exec_script.c 22 May 2018 13:21:54 -0000
> @@ -170,17 +170,20 @@ check_shell:
> #endif
>
> fdplock(p->p_fd);
> - error = falloc(p, 0, &fp, &epp->ep_fd);
> - fdpunlock(p->p_fd);
> - if (error)
> + error = falloc(p, &fp, &epp->ep_fd);
> + if (error) {
> + fdpunlock(p->p_fd);
> goto fail;
> + }
>
> epp->ep_flags |= EXEC_HASFD;
> fp->f_type = DTYPE_VNODE;
> fp->f_ops = &vnops;
> fp->f_data = (caddr_t) scriptvp;
> fp->f_flag = FREAD;
> - FILE_SET_MATURE(fp, p);
> + fdinsert(p->p_fd, epp->ep_fd, 0, fp);
> + fdpunlock(p->p_fd);
> + FRELE(fp, p);
> }
>
> /* set up the parameters for the recursive check_exec() call */
> Index: kern/init_sysent.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/init_sysent.c,v
> retrieving revision 1.191
> diff -u -p -r1.191 init_sysent.c
> --- kern/init_sysent.c 12 Dec 2017 01:13:14 -0000 1.191
> +++ kern/init_sysent.c 22 May 2018 13:21:54 -0000
> @@ -1,4 +1,4 @@
> -/* $OpenBSD: init_sysent.c,v 1.191 2017/12/12 01:13:14 deraadt Exp $
> */
> +/* $OpenBSD$ */
>
> /*
> * System call switch table.
> @@ -76,17 +76,17 @@ struct sysent sysent[] = {
> { 0, 0, 0,
> sys_nosys }, /* 26 = unimplemented ptrace */
> #endif
> - { 3, s(struct sys_recvmsg_args), 0,
> + { 3, s(struct sys_recvmsg_args), SY_NOLOCK | 0,
> sys_recvmsg }, /* 27 = recvmsg */
> - { 3, s(struct sys_sendmsg_args), 0,
> + { 3, s(struct sys_sendmsg_args), SY_NOLOCK | 0,
> sys_sendmsg }, /* 28 = sendmsg */
> - { 6, s(struct sys_recvfrom_args), 0,
> + { 6, s(struct sys_recvfrom_args), SY_NOLOCK | 0,
> sys_recvfrom }, /* 29 = recvfrom */
> - { 3, s(struct sys_accept_args), 0,
> + { 3, s(struct sys_accept_args), SY_NOLOCK | 0,
> sys_accept }, /* 30 = accept */
> - { 3, s(struct sys_getpeername_args), 0,
> + { 3, s(struct sys_getpeername_args), SY_NOLOCK | 0,
> sys_getpeername }, /* 31 = getpeername */
> - { 3, s(struct sys_getsockname_args), 0,
> + { 3, s(struct sys_getsockname_args), SY_NOLOCK | 0,
> sys_getsockname }, /* 32 = getsockname */
> { 2, s(struct sys_access_args), 0,
> sys_access }, /* 33 = access */
> @@ -218,7 +218,7 @@ struct sysent sysent[] = {
> sys_nanosleep }, /* 91 = nanosleep */
> { 3, s(struct sys_fcntl_args), 0,
> sys_fcntl }, /* 92 = fcntl */
> - { 4, s(struct sys_accept4_args), 0,
> + { 4, s(struct sys_accept4_args), SY_NOLOCK | 0,
> sys_accept4 }, /* 93 = accept4 */
> { 5, s(struct sys___thrsleep_args), 0,
> sys___thrsleep }, /* 94 = __thrsleep */
> @@ -226,9 +226,9 @@ struct sysent sysent[] = {
> sys_fsync }, /* 95 = fsync */
> { 3, s(struct sys_setpriority_args), 0,
> sys_setpriority }, /* 96 = setpriority */
> - { 3, s(struct sys_socket_args), 0,
> + { 3, s(struct sys_socket_args), SY_NOLOCK | 0,
> sys_socket }, /* 97 = socket */
> - { 3, s(struct sys_connect_args), 0,
> + { 3, s(struct sys_connect_args), SY_NOLOCK | 0,
> sys_connect }, /* 98 = connect */
> { 3, s(struct sys_getdents_args), 0,
> sys_getdents }, /* 99 = getdents */
> @@ -240,11 +240,11 @@ struct sysent sysent[] = {
> sys_dup3 }, /* 102 = dup3 */
> { 1, s(struct sys_sigreturn_args), 0,
> sys_sigreturn }, /* 103 = sigreturn */
> - { 3, s(struct sys_bind_args), 0,
> + { 3, s(struct sys_bind_args), SY_NOLOCK | 0,
> sys_bind }, /* 104 = bind */
> - { 5, s(struct sys_setsockopt_args), 0,
> + { 5, s(struct sys_setsockopt_args), SY_NOLOCK | 0,
> sys_setsockopt }, /* 105 = setsockopt */
> - { 2, s(struct sys_listen_args), 0,
> + { 2, s(struct sys_listen_args), SY_NOLOCK | 0,
> sys_listen }, /* 106 = listen */
> { 4, s(struct sys_chflagsat_args), 0,
> sys_chflagsat }, /* 107 = chflagsat */
> @@ -268,7 +268,7 @@ struct sysent sysent[] = {
> sys_nosys }, /* 116 = obsolete
> t32_gettimeofday */
> { 0, 0, 0,
> sys_nosys }, /* 117 = obsolete t32_getrusage
> */
> - { 5, s(struct sys_getsockopt_args), 0,
> + { 5, s(struct sys_getsockopt_args), SY_NOLOCK | 0,
> sys_getsockopt }, /* 118 = getsockopt */
> { 3, s(struct sys_thrkill_args), 0,
> sys_thrkill }, /* 119 = thrkill */
> @@ -298,11 +298,11 @@ struct sysent sysent[] = {
> sys_flock }, /* 131 = flock */
> { 2, s(struct sys_mkfifo_args), 0,
> sys_mkfifo }, /* 132 = mkfifo */
> - { 6, s(struct sys_sendto_args), 0,
> + { 6, s(struct sys_sendto_args), SY_NOLOCK | 0,
> sys_sendto }, /* 133 = sendto */
> - { 2, s(struct sys_shutdown_args), 0,
> + { 2, s(struct sys_shutdown_args), SY_NOLOCK | 0,
> sys_shutdown }, /* 134 = shutdown */
> - { 4, s(struct sys_socketpair_args), 0,
> + { 4, s(struct sys_socketpair_args), SY_NOLOCK | 0,
> sys_socketpair }, /* 135 = socketpair */
> { 2, s(struct sys_mkdir_args), 0,
> sys_mkdir }, /* 136 = mkdir */
> Index: kern/kern_descrip.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_descrip.c,v
> retrieving revision 1.158
> diff -u -p -r1.158 kern_descrip.c
> --- kern/kern_descrip.c 8 May 2018 09:03:58 -0000 1.158
> +++ kern/kern_descrip.c 22 May 2018 13:21:54 -0000
> @@ -67,6 +67,7 @@
> /*
> * Descriptor management.
> */
> +struct mutex fhdlk = MUTEX_INITIALIZER(IPL_NONE);
> struct filelist filehead; /* head of list of open files */
> int numfiles; /* actual number of open files */
>
> @@ -144,6 +145,23 @@ find_last_set(struct filedesc *fd, int l
> return i;
> }
>
> +static __inline int
> +fd_inuse(struct filedesc *fdp, int fd)
> +{
> + u_int off = fd >> NDENTRYSHIFT;
> +
> + if (fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK)))
> + return 1;
> +
> + if (fdp->fd_lomap[off] != ~0)
> + return 0;
> +
> + if (fdp->fd_himap[off >> NDENTRYSHIFT] & (1 << (off & NDENTRYMASK)))
> + return 1;
> +
> + return 0;
> +}
> +
> static __inline void
> fd_used(struct filedesc *fdp, int fd)
> {
> @@ -184,16 +202,18 @@ fd_iterfile(struct file *fp, struct proc
> {
> struct file *nfp;
>
> + mtx_enter(&fhdlk);
> if (fp == NULL)
> nfp = LIST_FIRST(&filehead);
> else
> nfp = LIST_NEXT(fp, f_list);
>
> - /* don't FREF when f_count == 0 to avoid race in fdrop() */
> - while (nfp != NULL && (nfp->f_count == 0 || !FILE_IS_USABLE(nfp)))
> + /* don't refcount when f_count == 0 to avoid race in fdrop() */
> + while (nfp != NULL && nfp->f_count == 0)
> nfp = LIST_NEXT(nfp, f_list);
> if (nfp != NULL)
> - FREF(nfp);
> + nfp->f_count++;
> + mtx_leave(&fhdlk);
>
> if (fp != NULL)
> FRELE(fp, p);
> @@ -206,13 +226,17 @@ fd_getfile(struct filedesc *fdp, int fd)
> {
> struct file *fp;
>
> - if ((u_int)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL)
> - return (NULL);
> + vfs_stall_barrier();
>
> - if (!FILE_IS_USABLE(fp))
> + if ((u_int)fd >= fdp->fd_nfiles)
> return (NULL);
>
> - FREF(fp);
> + mtx_enter(&fhdlk);
> + fp = fdp->fd_ofiles[fd];
> + if (fp != NULL)
> + fp->f_count++;
> + mtx_leave(&fhdlk);
> +
> return (fp);
> }
>
> @@ -634,18 +658,22 @@ finishdup(struct proc *p, struct file *f
> return (EDEADLK);
> }
>
> - /*
> - * Don't fd_getfile here. We want to closef LARVAL files and
> - * closef can deal with that.
> - */
> + mtx_enter(&fhdlk);
> oldfp = fdp->fd_ofiles[new];
> if (oldfp != NULL)
> - FREF(oldfp);
> + oldfp->f_count++;
> + mtx_leave(&fhdlk);
> +
> + if (dup2 && oldfp == NULL) {
> + if (fd_inuse(fdp, new)) {
> + FRELE(fp, p);
> + return (EBUSY);
> + }
> + fd_used(fdp, new);
> + }
>
> fdp->fd_ofiles[new] = fp;
> fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] & ~UF_EXCLOSE;
> - if (dup2 && oldfp == NULL)
> - fd_used(fdp, new);
> *retval = new;
>
> if (oldfp != NULL) {
> @@ -658,6 +686,25 @@ finishdup(struct proc *p, struct file *f
> }
>
> void
> +fdinsert(struct filedesc *fdp, int fd, int flags, struct file *fp)
> +{
> + struct file *fq;
> +
> + fdpassertlocked(fdp);
> +
> + mtx_enter(&fhdlk);
> + if ((fq = fdp->fd_ofiles[0]) != NULL) {
> + LIST_INSERT_AFTER(fq, fp, f_list);
> + } else {
> + LIST_INSERT_HEAD(&filehead, fp, f_list);
> + }
> + fdp->fd_ofiles[fd] = fp;
> + fdp->fd_ofileflags[fd] |= (flags & UF_EXCLOSE);
> + fp->f_iflags |= FIF_INSERTED;
> + mtx_leave(&fhdlk);
> +}
> +
> +void
> fdremove(struct filedesc *fdp, int fd)
> {
> fdpassertlocked(fdp);
> @@ -670,21 +717,14 @@ int
> fdrelease(struct proc *p, int fd)
> {
> struct filedesc *fdp = p->p_fd;
> - struct file **fpp, *fp;
> + struct file *fp;
>
> fdpassertlocked(fdp);
>
> - /*
> - * Don't fd_getfile here. We want to closef LARVAL files and closef
> - * can deal with that.
> - */
> - fpp = &fdp->fd_ofiles[fd];
> - fp = *fpp;
> + fp = fd_getfile(fdp, fd);
> if (fp == NULL)
> return (EBADF);
> - FREF(fp);
> - *fpp = NULL;
> - fd_unused(fdp, fd);
> + fdremove(fdp, fd);
> if (fd < fdp->fd_knlistsize)
> knote_fdclose(p, fd);
> return (closef(fp, p));
> @@ -927,9 +967,9 @@ fdexpand(struct proc *p)
> * a file descriptor for the process that refers to it.
> */
> int
> -falloc(struct proc *p, int flags, struct file **resultfp, int *resultfd)
> +falloc(struct proc *p, struct file **resultfp, int *resultfd)
> {
> - struct file *fp, *fq;
> + struct file *fp;
> int error, i;
>
> KASSERT(resultfp != NULL);
> @@ -958,20 +998,16 @@ restart:
> numfiles++;
> fp = pool_get(&file_pool, PR_WAITOK|PR_ZERO);
> mtx_init(&fp->f_mtx, IPL_NONE);
> - fp->f_iflags = FIF_LARVAL;
> - if ((fq = p->p_fd->fd_ofiles[0]) != NULL) {
> - LIST_INSERT_AFTER(fq, fp, f_list);
> - } else {
> - LIST_INSERT_HEAD(&filehead, fp, f_list);
> - }
> - p->p_fd->fd_ofiles[i] = fp;
> - p->p_fd->fd_ofileflags[i] |= (flags & UF_EXCLOSE);
> fp->f_count = 1;
> fp->f_cred = p->p_ucred;
> crhold(fp->f_cred);
> *resultfp = fp;
> *resultfd = i;
> - FREF(fp);
> +
> + mtx_enter(&fhdlk);
> + fp->f_count++;
> + mtx_leave(&fhdlk);
> +
> return (0);
> }
>
> @@ -1063,6 +1099,7 @@ fdcopy(struct process *pr)
> newfdp->fd_flags = fdp->fd_flags;
> newfdp->fd_cmask = fdp->fd_cmask;
>
> + mtx_enter(&fhdlk);
> for (i = 0; i <= fdp->fd_lastfile; i++) {
> struct file *fp = fdp->fd_ofiles[i];
>
> @@ -1079,12 +1116,13 @@ fdcopy(struct process *pr)
> fp->f_type == DTYPE_KQUEUE)
> continue;
>
> - FREF(fp);
> + fp->f_count++;
> newfdp->fd_ofiles[i] = fp;
> newfdp->fd_ofileflags[i] = fdp->fd_ofileflags[i];
> fd_used(newfdp, i);
> }
> }
> + mtx_leave(&fhdlk);
> fdpunlock(fdp);
>
> return (newfdp);
> @@ -1106,8 +1144,9 @@ fdfree(struct proc *p)
> for (i = fdp->fd_lastfile; i >= 0; i--, fpp++) {
> fp = *fpp;
> if (fp != NULL) {
> - FREF(fp);
> *fpp = NULL;
> + /* closef() expects a refcount of 2 */
> + FREF(fp);
> (void) closef(fp, p);
> }
> }
> @@ -1145,11 +1184,11 @@ closef(struct file *fp, struct proc *p)
> if (fp == NULL)
> return (0);
>
> -#ifdef DIAGNOSTIC
> - if (fp->f_count < 2)
> - panic("closef: count (%ld) < 2", fp->f_count);
> -#endif
> + KASSERTMSG(fp->f_count >= 2, "count (%ld) < 2", fp->f_count);
> +
> + mtx_enter(&fhdlk);
> fp->f_count--;
> + mtx_leave(&fhdlk);
>
> /*
> * POSIX record locking dictates that any close releases ALL
> @@ -1181,18 +1220,19 @@ fdrop(struct file *fp, struct proc *p)
> {
> int error;
>
> -#ifdef DIAGNOSTIC
> - if (fp->f_count != 0)
> - panic("fdrop: count (%ld) != 0", fp->f_count);
> -#endif
> + MUTEX_ASSERT_LOCKED(&fhdlk);
> +
> + KASSERTMSG(fp->f_count == 0, "count (%ld) != 0", fp->f_count);
> +
> + if (fp->f_iflags & FIF_INSERTED)
> + LIST_REMOVE(fp, f_list);
> + mtx_leave(&fhdlk);
>
> if (fp->f_ops)
> error = (*fp->f_ops->fo_close)(fp, p);
> else
> error = 0;
>
> - /* Free fp */
> - LIST_REMOVE(fp, f_list);
> crfree(fp->f_cred);
> numfiles--;
> pool_put(&file_pool, fp);
> @@ -1307,7 +1347,7 @@ dupfdopen(struct proc *p, int indx, int
> * of file descriptors, or the fd to be dup'd has already been
> * closed, reject. Note, there is no need to check for new == old
> * because fd_getfile will return NULL if the file at indx is
> - * newly created by falloc (FIF_LARVAL).
> + * newly created by falloc.
> */
> if ((wfp = fd_getfile(fdp, dupfd)) == NULL)
> return (EBADF);
> Index: kern/kern_event.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_event.c,v
> retrieving revision 1.88
> diff -u -p -r1.88 kern_event.c
> --- kern/kern_event.c 27 Apr 2018 10:13:37 -0000 1.88
> +++ kern/kern_event.c 22 May 2018 13:21:54 -0000
> @@ -441,10 +441,9 @@ sys_kqueue(struct proc *p, void *v, regi
> int fd, error;
>
> fdplock(fdp);
> - error = falloc(p, 0, &fp, &fd);
> - fdpunlock(fdp);
> + error = falloc(p, &fp, &fd);
> if (error)
> - return (error);
> + goto out;
> fp->f_flag = FREAD | FWRITE;
> fp->f_type = DTYPE_KQUEUE;
> fp->f_ops = &kqueueops;
> @@ -456,8 +455,11 @@ sys_kqueue(struct proc *p, void *v, regi
> if (fdp->fd_knlistsize < 0)
> fdp->fd_knlistsize = 0; /* this process has a kq */
> kq->kq_fdp = fdp;
> - FILE_SET_MATURE(fp, p);
> - return (0);
> + fdinsert(fdp, fd, 0, fp);
> + FRELE(fp, p);
> +out:
> + fdpunlock(fdp);
> + return (error);
> }
>
> int
> Index: kern/kern_exec.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_exec.c,v
> retrieving revision 1.195
> diff -u -p -r1.195 kern_exec.c
> --- kern/kern_exec.c 28 Apr 2018 03:13:04 -0000 1.195
> +++ kern/kern_exec.c 22 May 2018 13:21:54 -0000
> @@ -584,7 +584,7 @@ sys_execve(struct proc *p, void *v, regi
> struct vnode *vp;
> int indx;
>
> - if ((error = falloc(p, 0, &fp, &indx)) != 0)
> + if ((error = falloc(p, &fp, &indx)) != 0)
> break;
> #ifdef DIAGNOSTIC
> if (indx != i)
> @@ -607,10 +607,9 @@ sys_execve(struct proc *p, void *v, regi
> fp->f_type = DTYPE_VNODE;
> fp->f_ops = &vnops;
> fp->f_data = (caddr_t)vp;
> - FILE_SET_MATURE(fp, p);
> - } else {
> - FRELE(fp, p);
> + fdinsert(p->p_fd, indx, 0, fp);
> }
> + FRELE(fp, p);
> }
> fdpunlock(p->p_fd);
> if (error)
> Index: kern/kern_ktrace.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_ktrace.c,v
> retrieving revision 1.96
> diff -u -p -r1.96 kern_ktrace.c
> --- kern/kern_ktrace.c 28 Apr 2018 03:13:04 -0000 1.96
> +++ kern/kern_ktrace.c 22 May 2018 13:21:54 -0000
> @@ -225,7 +225,7 @@ ktrgenio(struct proc *p, int fd, enum ui
> struct ktr_header kth;
> struct ktr_genio ktp;
> caddr_t cp;
> - int count;
> + int count, error;
> int buflen;
>
> atomic_setbits_int(&p->p_flag, P_INKTR);
> @@ -254,7 +254,10 @@ ktrgenio(struct proc *p, int fd, enum ui
> if (copyin(iov->iov_base, cp, count))
> break;
>
> - if (ktrwrite2(p, &kth, &ktp, sizeof(ktp), cp, count) != 0)
> + KERNEL_LOCK();
> + error = ktrwrite2(p, &kth, &ktp, sizeof(ktp), cp, count);
> + KERNEL_UNLOCK();
> + if (error != 0)
> break;
>
> iov->iov_len -= count;
> @@ -294,13 +297,14 @@ ktrstruct(struct proc *p, const char *na
> {
> struct ktr_header kth;
>
> - KERNEL_ASSERT_LOCKED();
> atomic_setbits_int(&p->p_flag, P_INKTR);
> ktrinitheader(&kth, p, KTR_STRUCT);
> -
> +
> if (data == NULL)
> datalen = 0;
> + KERNEL_LOCK();
> ktrwrite2(p, &kth, name, strlen(name) + 1, data, datalen);
> + KERNEL_UNLOCK();
> atomic_clearbits_int(&p->p_flag, P_INKTR);
> }
>
> @@ -386,7 +390,9 @@ ktrpledge(struct proc *p, int error, uin
> kp.code = code;
> kp.syscall = syscall;
>
> + KERNEL_LOCK();
> ktrwrite(p, &kth, &kp, sizeof(kp));
> + KERNEL_UNLOCK();
> atomic_clearbits_int(&p->p_flag, P_INKTR);
> }
>
> @@ -622,6 +628,8 @@ ktrwriteraw(struct proc *curp, struct vn
> struct iovec aiov[3];
> struct process *pr;
> int error;
> +
> + KERNEL_ASSERT_LOCKED();
>
> auio.uio_iov = &aiov[0];
> auio.uio_offset = 0;
> Index: kern/kern_pledge.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_pledge.c,v
> retrieving revision 1.230
> diff -u -p -r1.230 kern_pledge.c
> --- kern/kern_pledge.c 28 Apr 2018 12:49:21 -0000 1.230
> +++ kern/kern_pledge.c 22 May 2018 13:21:54 -0000
> @@ -523,6 +523,7 @@ pledge_fail(struct proc *p, int error, u
> if (p->p_p->ps_pledge & PLEDGE_ERROR)
> return (ENOSYS);
>
> + KERNEL_LOCK();
> log(LOG_ERR, "%s[%d]: pledge \"%s\", syscall %d\n",
> p->p_p->ps_comm, p->p_p->ps_pid, codes, p->p_pledge_syscall);
> p->p_p->ps_acflag |= APLEDGE;
> @@ -535,6 +536,7 @@ pledge_fail(struct proc *p, int error, u
> psignal(p, SIGABRT);
>
> p->p_p->ps_pledge = 0; /* Disable all PLEDGE_ flags */
> + KERNEL_UNLOCK();
> return (error);
> }
>
> Index: kern/kern_sysctl.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
> retrieving revision 1.337
> diff -u -p -r1.337 kern_sysctl.c
> --- kern/kern_sysctl.c 16 May 2018 14:53:43 -0000 1.337
> +++ kern/kern_sysctl.c 22 May 2018 13:21:54 -0000
> @@ -1059,7 +1059,9 @@ fill_file(struct kinfo_file *kf, struct
> kf->f_flag = fp->f_flag;
> kf->f_iflags = fp->f_iflags;
> kf->f_type = fp->f_type;
> + mtx_enter(&fhdlk);
> kf->f_count = fp->f_count;
> + mtx_leave(&fhdlk);
> if (show_pointers)
> kf->f_ucred = PTRTOINT64(fp->f_cred);
> kf->f_uid = fp->f_cred->cr_uid;
> Index: kern/sys_pipe.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/sys_pipe.c,v
> retrieving revision 1.78
> diff -u -p -r1.78 sys_pipe.c
> --- kern/sys_pipe.c 10 Apr 2018 09:17:45 -0000 1.78
> +++ kern/sys_pipe.c 22 May 2018 13:21:54 -0000
> @@ -154,7 +154,7 @@ dopipe(struct proc *p, int *ufds, int fl
>
> fdplock(fdp);
>
> - error = falloc(p, cloexec, &rf, &fds[0]);
> + error = falloc(p, &rf, &fds[0]);
> if (error != 0)
> goto free2;
> rf->f_flag = FREAD | FWRITE | (flags & FNONBLOCK);
> @@ -162,7 +162,7 @@ dopipe(struct proc *p, int *ufds, int fl
> rf->f_data = rpipe;
> rf->f_ops = &pipeops;
>
> - error = falloc(p, cloexec, &wf, &fds[1]);
> + error = falloc(p, &wf, &fds[1]);
> if (error != 0)
> goto free3;
> wf->f_flag = FREAD | FWRITE | (flags & FNONBLOCK);
> @@ -173,8 +173,8 @@ dopipe(struct proc *p, int *ufds, int fl
> rpipe->pipe_peer = wpipe;
> wpipe->pipe_peer = rpipe;
>
> - FILE_SET_MATURE(rf, p);
> - FILE_SET_MATURE(wf, p);
> + fdinsert(fdp, fds[0], cloexec, rf);
> + fdinsert(fdp, fds[1], cloexec, wf);
>
> error = copyout(fds, ufds, sizeof(fds));
> if (error != 0) {
> @@ -186,6 +186,9 @@ dopipe(struct proc *p, int *ufds, int fl
> ktrfds(p, fds, 2);
> #endif
> fdpunlock(fdp);
> +
> + FRELE(rf, p);
> + FRELE(wf, p);
> return (error);
>
> free3:
> Index: kern/syscalls.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/syscalls.c,v
> retrieving revision 1.190
> diff -u -p -r1.190 syscalls.c
> --- kern/syscalls.c 12 Dec 2017 01:13:14 -0000 1.190
> +++ kern/syscalls.c 22 May 2018 13:21:54 -0000
> @@ -1,4 +1,4 @@
> -/* $OpenBSD: syscalls.c,v 1.190 2017/12/12 01:13:14 deraadt Exp $ */
> +/* $OpenBSD$ */
>
> /*
> * System call names.
> Index: kern/syscalls.master
> ===================================================================
> RCS file: /cvs/src/sys/kern/syscalls.master,v
> retrieving revision 1.180
> diff -u -p -r1.180 syscalls.master
> --- kern/syscalls.master 12 Dec 2017 01:12:34 -0000 1.180
> +++ kern/syscalls.master 22 May 2018 13:21:54 -0000
> @@ -88,18 +88,18 @@
> #else
> 26 UNIMPL ptrace
> #endif
> -27 STD { ssize_t sys_recvmsg(int s, struct msghdr *msg, \
> +27 STD NOLOCK { ssize_t sys_recvmsg(int s, struct msghdr *msg, \
> int flags); }
> -28 STD { ssize_t sys_sendmsg(int s, \
> +28 STD NOLOCK { ssize_t sys_sendmsg(int s, \
> const struct msghdr *msg, int flags); }
> -29 STD { ssize_t sys_recvfrom(int s, void *buf, size_t len, \
> +29 STD NOLOCK { ssize_t sys_recvfrom(int s, void *buf, size_t len, \
> int flags, struct sockaddr *from, \
> socklen_t *fromlenaddr); }
> -30 STD { int sys_accept(int s, struct sockaddr *name, \
> +30 STD NOLOCK { int sys_accept(int s, struct sockaddr *name, \
> socklen_t *anamelen); }
> -31 STD { int sys_getpeername(int fdes, struct sockaddr *asa, \
> +31 STD NOLOCK { int sys_getpeername(int fdes, struct sockaddr *asa, \
> socklen_t *alen); }
> -32 STD { int sys_getsockname(int fdes, struct sockaddr *asa, \
> +32 STD NOLOCK { int sys_getsockname(int fdes, struct sockaddr *asa, \
> socklen_t *alen); }
> 33 STD { int sys_access(const char *path, int amode); }
> 34 STD { int sys_chflags(const char *path, u_int flags); }
> @@ -205,26 +205,26 @@
> 91 STD { int sys_nanosleep(const struct timespec *rqtp, \
> struct timespec *rmtp); }
> 92 STD { int sys_fcntl(int fd, int cmd, ... void *arg); }
> -93 STD { int sys_accept4(int s, struct sockaddr *name, \
> +93 STD NOLOCK { int sys_accept4(int s, struct sockaddr *name, \
> socklen_t *anamelen, int flags); }
> 94 STD { int sys___thrsleep(const volatile void *ident, \
> clockid_t clock_id, const struct timespec *tp, \
> void *lock, const int *abort); }
> 95 STD { int sys_fsync(int fd); }
> 96 STD { int sys_setpriority(int which, id_t who, int prio); }
> -97 STD { int sys_socket(int domain, int type, int protocol); }
> -98 STD { int sys_connect(int s, const struct sockaddr *name, \
> +97 STD NOLOCK { int sys_socket(int domain, int type, int protocol); }
> +98 STD NOLOCK { int sys_connect(int s, const struct sockaddr *name, \
> socklen_t namelen); }
> 99 STD { int sys_getdents(int fd, void *buf, size_t buflen); }
> 100 STD { int sys_getpriority(int which, id_t who); }
> 101 STD { int sys_pipe2(int *fdp, int flags); }
> 102 STD { int sys_dup3(int from, int to, int flags); }
> 103 STD { int sys_sigreturn(struct sigcontext *sigcntxp); }
> -104 STD { int sys_bind(int s, const struct sockaddr *name, \
> +104 STD NOLOCK { int sys_bind(int s, const struct sockaddr *name, \
> socklen_t namelen); }
> -105 STD { int sys_setsockopt(int s, int level, int name, \
> +105 STD NOLOCK { int sys_setsockopt(int s, int level, int name, \
> const void *val, socklen_t valsize); }
> -106 STD { int sys_listen(int s, int backlog); }
> +106 STD NOLOCK { int sys_listen(int s, int backlog); }
> 107 STD { int sys_chflagsat(int fd, const char *path, \
> u_int flags, int atflags); }
> 108 STD { int sys_pledge(const char *promises, \
> @@ -243,7 +243,7 @@
> 115 OBSOL vtrace
> 116 OBSOL t32_gettimeofday
> 117 OBSOL t32_getrusage
> -118 STD { int sys_getsockopt(int s, int level, int name, \
> +118 STD NOLOCK { int sys_getsockopt(int s, int level, int name, \
> void *val, socklen_t *avalsize); }
> 119 STD { int sys_thrkill(pid_t tid, int signum, void *tcb); }
> 120 STD { ssize_t sys_readv(int fd, \
> @@ -261,11 +261,11 @@
> 130 OBSOL oftruncate
> 131 STD { int sys_flock(int fd, int how); }
> 132 STD { int sys_mkfifo(const char *path, mode_t mode); }
> -133 STD { ssize_t sys_sendto(int s, const void *buf, \
> +133 STD NOLOCK { ssize_t sys_sendto(int s, const void *buf, \
> size_t len, int flags, const struct sockaddr *to, \
> socklen_t tolen); }
> -134 STD { int sys_shutdown(int s, int how); }
> -135 STD { int sys_socketpair(int domain, int type, \
> +134 STD NOLOCK { int sys_shutdown(int s, int how); }
> +135 STD NOLOCK { int sys_socketpair(int domain, int type, \
> int protocol, int *rsv); }
> 136 STD { int sys_mkdir(const char *path, mode_t mode); }
> 137 STD { int sys_rmdir(const char *path); }
> Index: kern/tty_pty.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/tty_pty.c,v
> retrieving revision 1.84
> diff -u -p -r1.84 tty_pty.c
> --- kern/tty_pty.c 28 Apr 2018 03:13:04 -0000 1.84
> +++ kern/tty_pty.c 22 May 2018 13:21:54 -0000
> @@ -1070,11 +1070,11 @@ ptmioctl(dev_t dev, u_long cmd, caddr_t
> case PTMGET:
> fdplock(fdp);
> /* Grab two filedescriptors. */
> - if ((error = falloc(p, 0, &cfp, &cindx)) != 0) {
> + if ((error = falloc(p, &cfp, &cindx)) != 0) {
> fdpunlock(fdp);
> break;
> }
> - if ((error = falloc(p, 0, &sfp, &sindx)) != 0) {
> + if ((error = falloc(p, &sfp, &sindx)) != 0) {
> fdremove(fdp, cindx);
> closef(cfp, p);
> fdpunlock(fdp);
> @@ -1166,11 +1166,12 @@ retry:
> memcpy(ptm->cn, pti->pty_pn, sizeof(pti->pty_pn));
> memcpy(ptm->sn, pti->pty_sn, sizeof(pti->pty_sn));
>
> - /* mark the files mature now that we've passed all errors */
> - FILE_SET_MATURE(cfp, p);
> - FILE_SET_MATURE(sfp, p);
> -
> + /* insert files now that we've passed all errors */
> + fdinsert(fdp, cindx, 0, cfp);
> + fdinsert(fdp, sindx, 0, sfp);
> fdpunlock(fdp);
> + FRELE(cfp, p);
> + FRELE(sfp, p);
> break;
> default:
> error = EINVAL;
> Index: kern/uipc_syscalls.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v
> retrieving revision 1.171
> diff -u -p -r1.171 uipc_syscalls.c
> --- kern/uipc_syscalls.c 22 May 2018 09:51:01 -0000 1.171
> +++ kern/uipc_syscalls.c 22 May 2018 13:21:54 -0000
> @@ -101,13 +101,14 @@ sys_socket(struct proc *p, void *v, regi
> fflag = FREAD | FWRITE | (nonblock ? FNONBLOCK : 0);
>
> error = socreate(SCARG(uap, domain), &so, type, SCARG(uap, protocol));
> - if (error != 0)
> - goto out;
> + if (error)
> + return (error);
>
> + KERNEL_LOCK();
> fdplock(fdp);
> - error = falloc(p, cloexec, &fp, &fd);
> - fdpunlock(fdp);
> + error = falloc(p, &fp, &fd);
> if (error) {
> + fdpunlock(fdp);
> soclose(so);
> } else {
> fp->f_flag = fflag;
> @@ -117,10 +118,12 @@ sys_socket(struct proc *p, void *v, regi
> so->so_state |= SS_NBIO;
> so->so_state |= ss;
> fp->f_data = so;
> - FILE_SET_MATURE(fp, p);
> + fdinsert(fdp, fd, cloexec, fp);
> + fdpunlock(fdp);
> + FRELE(fp, p);
> *retval = fd;
> }
> -out:
> + KERNEL_UNLOCK();
> return (error);
> }
>
> @@ -272,7 +275,9 @@ doaccept(struct proc *p, int sock, struc
> socklen_t namelen;
> int error, s, tmpfd;
> struct socket *head, *so;
> - int nflag;
> + int cloexec, nflag;
> +
> + cloexec = (flags & SOCK_CLOEXEC) ? UF_EXCLOSE : 0;
>
> if (name && (error = copyin(anamelen, &namelen, sizeof (namelen))))
> return (error);
> @@ -282,7 +287,7 @@ doaccept(struct proc *p, int sock, struc
> headfp = fp;
>
> fdplock(fdp);
> - error = falloc(p, (flags & SOCK_CLOEXEC) ? UF_EXCLOSE : 0, &fp, &tmpfd);
> + error = falloc(p, &fp, &tmpfd);
> fdpunlock(fdp);
> if (error) {
> FRELE(headfp, p);
> @@ -347,8 +352,11 @@ out:
> else
> so->so_state &= ~SS_NBIO;
> sounlock(s);
> + fdplock(fdp);
> fp->f_data = so;
> - FILE_SET_MATURE(fp, p);
> + fdinsert(fdp, tmpfd, cloexec, fp);
> + fdpunlock(fdp);
> + FRELE(fp, p);
> *retval = tmpfd;
> } else {
> sounlock(s);
> @@ -475,14 +483,15 @@ sys_socketpair(struct proc *p, void *v,
> if (error != 0)
> goto free2;
> }
> + KERNEL_LOCK();
> fdplock(fdp);
> - if ((error = falloc(p, cloexec, &fp1, &sv[0])) != 0)
> + if ((error = falloc(p, &fp1, &sv[0])) != 0)
> goto free3;
> fp1->f_flag = fflag;
> fp1->f_type = DTYPE_SOCKET;
> fp1->f_ops = &socketops;
> fp1->f_data = so1;
> - if ((error = falloc(p, cloexec, &fp2, &sv[1])) != 0)
> + if ((error = falloc(p, &fp2, &sv[1])) != 0)
> goto free4;
> fp2->f_flag = fflag;
> fp2->f_type = DTYPE_SOCKET;
> @@ -500,9 +509,12 @@ sys_socketpair(struct proc *p, void *v,
> (*fp2->f_ops->fo_ioctl)(fp2, FIONBIO, (caddr_t)&type,
> p);
> }
> - FILE_SET_MATURE(fp1, p);
> - FILE_SET_MATURE(fp2, p);
> + fdinsert(fdp, sv[0], cloexec, fp1);
> + fdinsert(fdp, sv[1], cloexec, fp2);
> fdpunlock(fdp);
> + FRELE(fp1, p);
> + FRELE(fp2, p);
> + KERNEL_UNLOCK();
> return (0);
> }
> fdremove(fdp, sv[1]);
> @@ -514,6 +526,7 @@ free4:
> so1 = NULL;
> free3:
> fdpunlock(fdp);
> + KERNEL_UNLOCK();
> free2:
> if (so2 != NULL)
> (void)soclose(so2);
> @@ -678,13 +691,16 @@ sendit(struct proc *p, int s, struct msg
> }
> #endif
> len = auio.uio_resid;
> - error = sosend(fp->f_data, to, &auio, NULL, control, flags);
> + error = sosend(so, to, &auio, NULL, control, flags);
> if (error) {
> if (auio.uio_resid != len && (error == ERESTART ||
> error == EINTR || error == EWOULDBLOCK))
> error = 0;
> - if (error == EPIPE && (flags & MSG_NOSIGNAL) == 0)
> + if (error == EPIPE && (flags & MSG_NOSIGNAL) == 0) {
> + KERNEL_LOCK();
> ptsignal(p, SIGPIPE, STHREAD);
> + KERNEL_UNLOCK();
> + }
> }
> if (error == 0) {
> *retsize = len - auio.uio_resid;
> @@ -925,11 +941,13 @@ sys_shutdown(struct proc *p, void *v, re
> syscallarg(int) how;
> } */ *uap = v;
> struct file *fp;
> + struct socket *so;
> int error;
>
> if ((error = getsock(p, SCARG(uap, s), &fp)) != 0)
> return (error);
> - error = soshutdown(fp->f_data, SCARG(uap, how));
> + so = fp->f_data;
> + error = soshutdown(so, SCARG(uap, how));
> FRELE(fp, p);
> return (error);
> }
> @@ -1163,7 +1181,8 @@ getsock(struct proc *p, int fdes, struct
> {
> struct file *fp;
>
> - if ((fp = fd_getfile(p->p_fd, fdes)) == NULL)
> + fp = fd_getfile(p->p_fd, fdes);
> + if (fp == NULL)
> return (EBADF);
> if (fp->f_type != DTYPE_SOCKET) {
> FRELE(fp, p);
> Index: kern/uipc_usrreq.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_usrreq.c,v
> retrieving revision 1.126
> diff -u -p -r1.126 uipc_usrreq.c
> --- kern/uipc_usrreq.c 28 Apr 2018 03:13:04 -0000 1.126
> +++ kern/uipc_usrreq.c 22 May 2018 13:21:54 -0000
> @@ -899,6 +899,7 @@ unp_gc(void *arg __unused)
> fp = defer->ud_fp[i].fp;
> if (fp == NULL)
> continue;
> + /* closef() expects a refcount of 2 */
> FREF(fp);
> if ((unp = fptounp(fp)) != NULL)
> unp->unp_msgcount--;
> @@ -915,6 +916,8 @@ unp_gc(void *arg __unused)
> do {
> nunref = 0;
> LIST_FOREACH(unp, &unp_head, unp_link) {
> + mtx_enter(&fhdlk);
> + fp = unp->unp_file;
> if (unp->unp_flags & UNP_GCDEFER) {
> /*
> * This socket is referenced by another
> @@ -925,8 +928,9 @@ unp_gc(void *arg __unused)
> unp_defer--;
> } else if (unp->unp_flags & UNP_GCMARK) {
> /* marked as live in previous pass */
> + mtx_leave(&fhdlk);
> continue;
> - } else if ((fp = unp->unp_file) == NULL) {
> + } else if (fp == NULL) {
> /* not being passed, so can't be in loop */
> } else if (fp->f_count == 0) {
> /*
> @@ -943,9 +947,11 @@ unp_gc(void *arg __unused)
> if (fp->f_count == unp->unp_msgcount) {
> nunref++;
> unp->unp_flags |= UNP_GCDEAD;
> + mtx_leave(&fhdlk);
> continue;
> }
> }
> + mtx_leave(&fhdlk);
>
> /*
> * This is the first time we've seen this socket on
> Index: kern/vfs_syscalls.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/vfs_syscalls.c,v
> retrieving revision 1.283
> diff -u -p -r1.283 vfs_syscalls.c
> --- kern/vfs_syscalls.c 8 May 2018 08:53:41 -0000 1.283
> +++ kern/vfs_syscalls.c 22 May 2018 13:21:54 -0000
> @@ -899,7 +899,7 @@ doopenat(struct proc *p, int fd, const c
> struct file *fp;
> struct vnode *vp;
> struct vattr vattr;
> - int flags, cmode;
> + int flags, cloexec, cmode;
> int type, indx, error, localtrunc = 0;
> struct flock lf;
> struct nameidata nd;
> @@ -911,10 +911,10 @@ doopenat(struct proc *p, int fd, const c
> return (error);
> }
>
> - fdplock(fdp);
> + cloexec = (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0;
>
> - if ((error = falloc(p, (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0, &fp,
> - &indx)) != 0)
> + fdplock(fdp);
> + if ((error = falloc(p, &fp, &indx)) != 0)
> goto out;
> flags = FFLAGS(oflags);
> if (flags & FREAD)
> @@ -999,7 +999,8 @@ doopenat(struct proc *p, int fd, const c
> }
> VOP_UNLOCK(vp);
> *retval = indx;
> - FILE_SET_MATURE(fp, p);
> + fdinsert(fdp, indx, cloexec, fp);
> + FRELE(fp, p);
> out:
> fdpunlock(fdp);
> return (error);
> @@ -1060,7 +1061,7 @@ sys_fhopen(struct proc *p, void *v, regi
> struct vnode *vp = NULL;
> struct mount *mp;
> struct ucred *cred = p->p_ucred;
> - int flags;
> + int flags, cloexec;
> int type, indx, error=0;
> struct flock lf;
> struct vattr va;
> @@ -1078,9 +1079,10 @@ sys_fhopen(struct proc *p, void *v, regi
> if ((flags & O_CREAT))
> return (EINVAL);
>
> + cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
> +
> fdplock(fdp);
> - if ((error = falloc(p, (flags & O_CLOEXEC) ? UF_EXCLOSE : 0, &fp,
> - &indx)) != 0) {
> + if ((error = falloc(p, &fp, &indx)) != 0) {
> fp = NULL;
> goto bad;
> }
> @@ -1160,9 +1162,9 @@ sys_fhopen(struct proc *p, void *v, regi
> }
> VOP_UNLOCK(vp);
> *retval = indx;
> - FILE_SET_MATURE(fp, p);
> -
> + fdinsert(fdp, indx, cloexec, fp);
> fdpunlock(fdp);
> + FRELE(fp, p);
> return (0);
>
> bad:
> Index: net/if.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if.c,v
> retrieving revision 1.552
> diff -u -p -r1.552 if.c
> --- net/if.c 17 May 2018 11:04:14 -0000 1.552
> +++ net/if.c 22 May 2018 13:21:54 -0000
> @@ -1378,7 +1378,7 @@ ifa_ifwithaddr(struct sockaddr *addr, u_
> struct ifaddr *ifa;
> u_int rdomain;
>
> - KERNEL_ASSERT_LOCKED();
> + KERNEL_LOCK();
> rdomain = rtable_l2(rtableid);
> TAILQ_FOREACH(ifp, &ifnet, if_list) {
> if (ifp->if_rdomain != rdomain)
> @@ -1388,10 +1388,13 @@ ifa_ifwithaddr(struct sockaddr *addr, u_
> if (ifa->ifa_addr->sa_family != addr->sa_family)
> continue;
>
> - if (equal(addr, ifa->ifa_addr))
> + if (equal(addr, ifa->ifa_addr)) {
> + KERNEL_UNLOCK();
> return (ifa);
> + }
> }
> }
> + KERNEL_UNLOCK();
> return (NULL);
> }
>
> @@ -1404,8 +1407,8 @@ ifa_ifwithdstaddr(struct sockaddr *addr,
> struct ifnet *ifp;
> struct ifaddr *ifa;
>
> - KERNEL_ASSERT_LOCKED();
> rdomain = rtable_l2(rdomain);
> + KERNEL_LOCK();
> TAILQ_FOREACH(ifp, &ifnet, if_list) {
> if (ifp->if_rdomain != rdomain)
> continue;
> @@ -1414,11 +1417,14 @@ ifa_ifwithdstaddr(struct sockaddr *addr,
> if (ifa->ifa_addr->sa_family !=
> addr->sa_family || ifa->ifa_dstaddr == NULL)
> continue;
> - if (equal(addr, ifa->ifa_dstaddr))
> + if (equal(addr, ifa->ifa_dstaddr)) {
> + KERNEL_UNLOCK();
> return (ifa);
> + }
> }
> }
> }
> + KERNEL_UNLOCK();
> return (NULL);
> }
>
> Index: sys/file.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/file.h,v
> retrieving revision 1.45
> diff -u -p -r1.45 file.h
> --- sys/file.h 9 May 2018 08:42:02 -0000 1.45
> +++ sys/file.h 22 May 2018 13:21:54 -0000
> @@ -65,6 +65,7 @@ struct fileops {
> *
> * Locks used to protect struct members in this file:
> * I immutable after creation
> + * F global `fhdlk' mutex
> * f per file `f_mtx'
> * k kernel lock
> */
> @@ -77,7 +78,7 @@ struct file {
> #define DTYPE_PIPE 3 /* pipe */
> #define DTYPE_KQUEUE 4 /* event queue */
> short f_type; /* [I] descriptor type */
> - long f_count; /* [k] reference count */
> + long f_count; /* [F] reference count */
> struct ucred *f_cred; /* [I] credentials associated with descriptor */
> struct fileops *f_ops; /* [I] file operation pointers */
> off_t f_offset; /* [k] */
> @@ -91,26 +92,31 @@ struct file {
> };
>
> #define FIF_HASLOCK 0x01 /* descriptor holds advisory lock */
> -#define FIF_LARVAL 0x02 /* not fully constructed, don't use */
> -
> -#define FILE_IS_USABLE(fp) \
> - (((fp)->f_iflags & FIF_LARVAL) == 0)
> +#define FIF_INSERTED 0x80 /* present in `filehead' */
>
> #define FREF(fp) \
> do { \
> extern void vfs_stall_barrier(void); \
> vfs_stall_barrier(); \
> + mtx_enter(&fhdlk); \
> (fp)->f_count++; \
> + mtx_leave(&fhdlk); \
> } while (0)
> -#define FRELE(fp,p) (--(fp)->f_count == 0 ? fdrop(fp, p) : 0)
>
> -#define FILE_SET_MATURE(fp,p) do { \
> - (fp)->f_iflags &= ~FIF_LARVAL; \
> - FRELE(fp, p); \
> -} while (0)
> +#define FRELE(fp,p) \
> +({ \
> + int rv = 0; \
> + mtx_enter(&fhdlk); \
> + if (--(fp)->f_count == 0) \
> + rv = fdrop(fp, p); \
> + else \
> + mtx_leave(&fhdlk); \
> + rv; \
> +})
>
> int fdrop(struct file *, struct proc *);
>
> +extern struct mutex fhdlk; /* protects `filehead' and f_count */
> LIST_HEAD(filelist, file);
> extern int maxfiles; /* kernel limit on number of open files
> */
> extern int numfiles; /* actual number of open files */
> Index: sys/filedesc.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/filedesc.h,v
> retrieving revision 1.35
> diff -u -p -r1.35 filedesc.h
> --- sys/filedesc.h 25 Apr 2018 10:29:17 -0000 1.35
> +++ sys/filedesc.h 22 May 2018 13:21:54 -0000
> @@ -125,12 +125,13 @@ void filedesc_init(void);
> int dupfdopen(struct proc *, int, int);
> int fdalloc(struct proc *p, int want, int *result);
> void fdexpand(struct proc *);
> -int falloc(struct proc *_p, int _flags, struct file **_rfp, int *_rfd);
> +int falloc(struct proc *_p, struct file **_rfp, int *_rfd);
> struct filedesc *fdinit(void);
> struct filedesc *fdshare(struct process *);
> struct filedesc *fdcopy(struct process *);
> void fdfree(struct proc *p);
> int fdrelease(struct proc *p, int);
> +void fdinsert(struct filedesc *, int, int, struct file *);
> void fdremove(struct filedesc *, int);
> void fdcloseexec(struct proc *);
> struct file *fd_iterfile(struct file *, struct proc *);
> Index: sys/syscall.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/syscall.h,v
> retrieving revision 1.190
> diff -u -p -r1.190 syscall.h
> --- sys/syscall.h 12 Dec 2017 01:13:14 -0000 1.190
> +++ sys/syscall.h 22 May 2018 13:21:54 -0000
> @@ -1,4 +1,4 @@
> -/* $OpenBSD: syscall.h,v 1.190 2017/12/12 01:13:14 deraadt Exp $ */
> +/* $OpenBSD$ */
>
> /*
> * System call numbers.
> Index: sys/syscallargs.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/syscallargs.h,v
> retrieving revision 1.193
> diff -u -p -r1.193 syscallargs.h
> --- sys/syscallargs.h 12 Dec 2017 01:13:14 -0000 1.193
> +++ sys/syscallargs.h 22 May 2018 13:21:54 -0000
> @@ -1,4 +1,4 @@
> -/* $OpenBSD: syscallargs.h,v 1.193 2017/12/12 01:13:14 deraadt Exp $
> */
> +/* $OpenBSD$ */
>
> /*
> * System call argument lists.
>