The way our kernel allocates and populates new 'struct file *' is
currently a complete mess.  One of the problems is that it puts
incomplete descriptors on the global `filehead' list and on the
open file array `fd_ofiles'.  But to make sure that other threads
won't use such descriptors before they are ready, they are first
marked as LARVAL then unmarked via the FILE_SET_MATURE() macro.

This is a problem because we'll soon need to serialize accesses
to the above mentioned data structures with finer locks.  And having
to deal with specific checks in multiple places is simply not viable.

So the diff below changes the logic by introducing a new function:
fdinsert().  This function will be used to insert new descriptors into
shared data structured when they are completely initialized.

Note that file initializations could still be improved, but let's do it
step by step.

My sparc64 build machine is happy with that, so I'm looking for reviews
and oks :)

Index: kern/kern_exec.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_exec.c,v
retrieving revision 1.195
diff -u -p -r1.195 kern_exec.c
--- kern/kern_exec.c    28 Apr 2018 03:13:04 -0000      1.195
+++ kern/kern_exec.c    8 May 2018 11:54:34 -0000
@@ -584,7 +584,7 @@ sys_execve(struct proc *p, void *v, regi
                                struct vnode *vp;
                                int indx;
 
-                               if ((error = falloc(p, 0, &fp, &indx)) != 0)
+                               if ((error = falloc(p, &fp, &indx)) != 0)
                                        break;
 #ifdef DIAGNOSTIC
                                if (indx != i)
@@ -607,10 +607,9 @@ sys_execve(struct proc *p, void *v, regi
                                fp->f_type = DTYPE_VNODE;
                                fp->f_ops = &vnops;
                                fp->f_data = (caddr_t)vp;
-                               FILE_SET_MATURE(fp, p);
-                       } else {
-                               FRELE(fp, p);
+                               fdinsert(p->p_fd, indx, 0, fp);
                        }
+                       FRELE(fp, p);
                }
                fdpunlock(p->p_fd);
                if (error)
Index: kern/exec_script.c
===================================================================
RCS file: /cvs/src/sys/kern/exec_script.c,v
retrieving revision 1.44
diff -u -p -r1.44 exec_script.c
--- kern/exec_script.c  2 May 2018 02:24:56 -0000       1.44
+++ kern/exec_script.c  8 May 2018 11:54:39 -0000
@@ -170,17 +170,20 @@ check_shell:
 #endif
 
                fdplock(p->p_fd);
-               error = falloc(p, 0, &fp, &epp->ep_fd);
-               fdpunlock(p->p_fd);
-               if (error)
+               error = falloc(p, &fp, &epp->ep_fd);
+               if (error) {
+                       fdpunlock(p->p_fd);
                        goto fail;
+               }
 
                epp->ep_flags |= EXEC_HASFD;
                fp->f_type = DTYPE_VNODE;
                fp->f_ops = &vnops;
                fp->f_data = (caddr_t) scriptvp;
                fp->f_flag = FREAD;
-               FILE_SET_MATURE(fp, p);
+               fdinsert(p->p_fd, epp->ep_fd, 0, fp);
+               fdpunlock(p->p_fd);
+               FRELE(fp, p);
        }
 
        /* set up the parameters for the recursive check_exec() call */
Index: kern/kern_descrip.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_descrip.c,v
retrieving revision 1.158
diff -u -p -r1.158 kern_descrip.c
--- kern/kern_descrip.c 8 May 2018 09:03:58 -0000       1.158
+++ kern/kern_descrip.c 8 May 2018 12:21:49 -0000
@@ -190,7 +190,7 @@ fd_iterfile(struct file *fp, struct proc
                nfp = LIST_NEXT(fp, f_list);
 
        /* don't FREF when f_count == 0 to avoid race in fdrop() */
-       while (nfp != NULL && (nfp->f_count == 0 || !FILE_IS_USABLE(nfp)))
+       while (nfp != NULL && nfp->f_count == 0)
                nfp = LIST_NEXT(nfp, f_list);
        if (nfp != NULL)
                FREF(nfp);
@@ -209,9 +209,6 @@ fd_getfile(struct filedesc *fdp, int fd)
        if ((u_int)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL)
                return (NULL);
 
-       if (!FILE_IS_USABLE(fp))
-               return (NULL);
-
        FREF(fp);
        return (fp);
 }
@@ -658,6 +655,22 @@ finishdup(struct proc *p, struct file *f
 }
 
 void
+fdinsert(struct filedesc *fdp, int fd, int flags, struct file *fp)
+{
+       struct file *fq;
+
+       fdpassertlocked(fdp);
+       if ((fq = fdp->fd_ofiles[0]) != NULL) {
+               LIST_INSERT_AFTER(fq, fp, f_list);
+       } else {
+               LIST_INSERT_HEAD(&filehead, fp, f_list);
+       }
+       fdp->fd_ofiles[fd] = fp;
+       fdp->fd_ofileflags[fd] |= (flags & UF_EXCLOSE);
+       fp->f_iflags |= FIF_INSERTED;
+}
+
+void
 fdremove(struct filedesc *fdp, int fd)
 {
        fdpassertlocked(fdp);
@@ -927,9 +940,9 @@ fdexpand(struct proc *p)
  * a file descriptor for the process that refers to it.
  */
 int
-falloc(struct proc *p, int flags, struct file **resultfp, int *resultfd)
+falloc(struct proc *p, struct file **resultfp, int *resultfd)
 {
-       struct file *fp, *fq;
+       struct file *fp;
        int error, i;
 
        KASSERT(resultfp != NULL);
@@ -958,14 +971,6 @@ restart:
        numfiles++;
        fp = pool_get(&file_pool, PR_WAITOK|PR_ZERO);
        mtx_init(&fp->f_mtx, IPL_NONE);
-       fp->f_iflags = FIF_LARVAL;
-       if ((fq = p->p_fd->fd_ofiles[0]) != NULL) {
-               LIST_INSERT_AFTER(fq, fp, f_list);
-       } else {
-               LIST_INSERT_HEAD(&filehead, fp, f_list);
-       }
-       p->p_fd->fd_ofiles[i] = fp;
-       p->p_fd->fd_ofileflags[i] |= (flags & UF_EXCLOSE);
        fp->f_count = 1;
        fp->f_cred = p->p_ucred;
        crhold(fp->f_cred);
@@ -1191,8 +1196,8 @@ fdrop(struct file *fp, struct proc *p)
        else
                error = 0;
 
-       /* Free fp */
-       LIST_REMOVE(fp, f_list);
+       if (fp->f_iflags & FIF_INSERTED)
+               LIST_REMOVE(fp, f_list);
        crfree(fp->f_cred);
        numfiles--;
        pool_put(&file_pool, fp);
Index: kern/kern_event.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_event.c,v
retrieving revision 1.88
diff -u -p -r1.88 kern_event.c
--- kern/kern_event.c   27 Apr 2018 10:13:37 -0000      1.88
+++ kern/kern_event.c   8 May 2018 11:54:46 -0000
@@ -441,10 +441,9 @@ sys_kqueue(struct proc *p, void *v, regi
        int fd, error;
 
        fdplock(fdp);
-       error = falloc(p, 0, &fp, &fd);
-       fdpunlock(fdp);
+       error = falloc(p, &fp, &fd);
        if (error)
-               return (error);
+               goto out;
        fp->f_flag = FREAD | FWRITE;
        fp->f_type = DTYPE_KQUEUE;
        fp->f_ops = &kqueueops;
@@ -456,8 +455,11 @@ sys_kqueue(struct proc *p, void *v, regi
        if (fdp->fd_knlistsize < 0)
                fdp->fd_knlistsize = 0;         /* this process has a kq */
        kq->kq_fdp = fdp;
-       FILE_SET_MATURE(fp, p);
-       return (0);
+       fdinsert(fdp, fd, 0, fp);
+       FRELE(fp, p);
+out:
+       fdpunlock(fdp);
+       return (error);
 }
 
 int
Index: kern/tty_pty.c
===================================================================
RCS file: /cvs/src/sys/kern/tty_pty.c,v
retrieving revision 1.84
diff -u -p -r1.84 tty_pty.c
--- kern/tty_pty.c      28 Apr 2018 03:13:04 -0000      1.84
+++ kern/tty_pty.c      8 May 2018 11:55:05 -0000
@@ -1070,11 +1070,11 @@ ptmioctl(dev_t dev, u_long cmd, caddr_t 
        case PTMGET:
                fdplock(fdp);
                /* Grab two filedescriptors. */
-               if ((error = falloc(p, 0, &cfp, &cindx)) != 0) {
+               if ((error = falloc(p, &cfp, &cindx)) != 0) {
                        fdpunlock(fdp);
                        break;
                }
-               if ((error = falloc(p, 0, &sfp, &sindx)) != 0) {
+               if ((error = falloc(p, &sfp, &sindx)) != 0) {
                        fdremove(fdp, cindx);
                        closef(cfp, p);
                        fdpunlock(fdp);
@@ -1166,11 +1166,12 @@ retry:
                memcpy(ptm->cn, pti->pty_pn, sizeof(pti->pty_pn));
                memcpy(ptm->sn, pti->pty_sn, sizeof(pti->pty_sn));
 
-               /* mark the files mature now that we've passed all errors */
-               FILE_SET_MATURE(cfp, p);
-               FILE_SET_MATURE(sfp, p);
-
+               /* insert files now that we've passed all errors */
+               fdinsert(fdp, cindx, 0, cfp);
+               fdinsert(fdp, sindx, 0, sfp);
                fdpunlock(fdp);
+               FRELE(cfp, p);
+               FRELE(sfp, p);
                break;
        default:
                error = EINVAL;
Index: kern/sys_pipe.c
===================================================================
RCS file: /cvs/src/sys/kern/sys_pipe.c,v
retrieving revision 1.78
diff -u -p -r1.78 sys_pipe.c
--- kern/sys_pipe.c     10 Apr 2018 09:17:45 -0000      1.78
+++ kern/sys_pipe.c     8 May 2018 11:55:26 -0000
@@ -154,7 +154,7 @@ dopipe(struct proc *p, int *ufds, int fl
 
        fdplock(fdp);
 
-       error = falloc(p, cloexec, &rf, &fds[0]);
+       error = falloc(p, &rf, &fds[0]);
        if (error != 0)
                goto free2;
        rf->f_flag = FREAD | FWRITE | (flags & FNONBLOCK);
@@ -162,7 +162,7 @@ dopipe(struct proc *p, int *ufds, int fl
        rf->f_data = rpipe;
        rf->f_ops = &pipeops;
 
-       error = falloc(p, cloexec, &wf, &fds[1]);
+       error = falloc(p, &wf, &fds[1]);
        if (error != 0)
                goto free3;
        wf->f_flag = FREAD | FWRITE | (flags & FNONBLOCK);
@@ -173,8 +173,8 @@ dopipe(struct proc *p, int *ufds, int fl
        rpipe->pipe_peer = wpipe;
        wpipe->pipe_peer = rpipe;
 
-       FILE_SET_MATURE(rf, p);
-       FILE_SET_MATURE(wf, p);
+       fdinsert(fdp, fds[0], cloexec, rf);
+       fdinsert(fdp, fds[1], cloexec, wf);
 
        error = copyout(fds, ufds, sizeof(fds));
        if (error != 0) {
@@ -186,6 +186,9 @@ dopipe(struct proc *p, int *ufds, int fl
                ktrfds(p, fds, 2);
 #endif
        fdpunlock(fdp);
+
+       FRELE(rf, p);
+       FRELE(wf, p);
        return (error);
 
 free3:
Index: kern/uipc_syscalls.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v
retrieving revision 1.170
diff -u -p -r1.170 uipc_syscalls.c
--- kern/uipc_syscalls.c        8 May 2018 08:53:41 -0000       1.170
+++ kern/uipc_syscalls.c        8 May 2018 12:16:39 -0000
@@ -105,9 +105,9 @@ sys_socket(struct proc *p, void *v, regi
                goto out;
 
        fdplock(fdp);
-       error = falloc(p, cloexec, &fp, &fd);
-       fdpunlock(fdp);
+       error = falloc(p, &fp, &fd);
        if (error) {
+               fdpunlock(fdp);
                soclose(so);
        } else {
                fp->f_flag = fflag;
@@ -117,7 +117,9 @@ sys_socket(struct proc *p, void *v, regi
                        so->so_state |= SS_NBIO;
                so->so_state |= ss;
                fp->f_data = so;
-               FILE_SET_MATURE(fp, p);
+               fdinsert(fdp, fd, cloexec, fp);
+               fdpunlock(fdp);
+               FRELE(fp, p);
                *retval = fd;
        }
 out:
@@ -272,7 +274,9 @@ doaccept(struct proc *p, int sock, struc
        socklen_t namelen;
        int error, s, tmpfd;
        struct socket *head, *so;
-       int nflag;
+       int cloexec, nflag;
+
+       cloexec = (flags & SOCK_CLOEXEC) ? UF_EXCLOSE : 0;
 
        if (name && (error = copyin(anamelen, &namelen, sizeof (namelen))))
                return (error);
@@ -282,7 +286,7 @@ doaccept(struct proc *p, int sock, struc
        headfp = fp;
 
        fdplock(fdp);
-       error = falloc(p, (flags & SOCK_CLOEXEC) ? UF_EXCLOSE : 0, &fp, &tmpfd);
+       error = falloc(p, &fp, &tmpfd);
        fdpunlock(fdp);
        if (error) {
                FRELE(headfp, p);
@@ -345,14 +349,18 @@ doaccept(struct proc *p, int sock, struc
                        so->so_state |= SS_NBIO;
                else
                        so->so_state &= ~SS_NBIO;
+               sounlock(s);
+               fdplock(fdp);
                fp->f_data = so;
-               FILE_SET_MATURE(fp, p);
+               fdinsert(fdp, tmpfd, cloexec, fp);
+               fdpunlock(fdp);
+               FRELE(fp, p);
                *retval = tmpfd;
        }
 out:
-       sounlock(s);
        m_freem(nam);
        if (error) {
+               sounlock(s);
                fdplock(fdp);
                fdremove(fdp, tmpfd);
                closef(fp, p);
@@ -475,13 +483,13 @@ sys_socketpair(struct proc *p, void *v, 
                        goto free2;
        }
        fdplock(fdp);
-       if ((error = falloc(p, cloexec, &fp1, &sv[0])) != 0)
+       if ((error = falloc(p, &fp1, &sv[0])) != 0)
                goto free3;
        fp1->f_flag = fflag;
        fp1->f_type = DTYPE_SOCKET;
        fp1->f_ops = &socketops;
        fp1->f_data = so1;
-       if ((error = falloc(p, cloexec, &fp2, &sv[1])) != 0)
+       if ((error = falloc(p, &fp2, &sv[1])) != 0)
                goto free4;
        fp2->f_flag = fflag;
        fp2->f_type = DTYPE_SOCKET;
@@ -499,9 +507,11 @@ sys_socketpair(struct proc *p, void *v, 
                        (*fp2->f_ops->fo_ioctl)(fp2, FIONBIO, (caddr_t)&type,
                            p);
                }
-               FILE_SET_MATURE(fp1, p);
-               FILE_SET_MATURE(fp2, p);
+               fdinsert(fdp, sv[0], cloexec, fp1);
+               fdinsert(fdp, sv[1], cloexec, fp2);
                fdpunlock(fdp);
+               FRELE(fp1, p);
+               FRELE(fp2, p);
                return (0);
        }
        fdremove(fdp, sv[1]);
Index: kern/vfs_syscalls.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_syscalls.c,v
retrieving revision 1.283
diff -u -p -r1.283 vfs_syscalls.c
--- kern/vfs_syscalls.c 8 May 2018 08:53:41 -0000       1.283
+++ kern/vfs_syscalls.c 8 May 2018 11:55:41 -0000
@@ -899,7 +899,7 @@ doopenat(struct proc *p, int fd, const c
        struct file *fp;
        struct vnode *vp;
        struct vattr vattr;
-       int flags, cmode;
+       int flags, cloexec, cmode;
        int type, indx, error, localtrunc = 0;
        struct flock lf;
        struct nameidata nd;
@@ -911,10 +911,10 @@ doopenat(struct proc *p, int fd, const c
                        return (error);
        }
 
-       fdplock(fdp);
+       cloexec = (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0;
 
-       if ((error = falloc(p, (oflags & O_CLOEXEC) ? UF_EXCLOSE : 0, &fp,
-           &indx)) != 0)
+       fdplock(fdp);
+       if ((error = falloc(p, &fp, &indx)) != 0)
                goto out;
        flags = FFLAGS(oflags);
        if (flags & FREAD)
@@ -999,7 +999,8 @@ doopenat(struct proc *p, int fd, const c
        }
        VOP_UNLOCK(vp);
        *retval = indx;
-       FILE_SET_MATURE(fp, p);
+       fdinsert(fdp, indx, cloexec, fp);
+       FRELE(fp, p);
 out:
        fdpunlock(fdp);
        return (error);
@@ -1060,7 +1061,7 @@ sys_fhopen(struct proc *p, void *v, regi
        struct vnode *vp = NULL;
        struct mount *mp;
        struct ucred *cred = p->p_ucred;
-       int flags;
+       int flags, cloexec;
        int type, indx, error=0;
        struct flock lf;
        struct vattr va;
@@ -1078,9 +1079,10 @@ sys_fhopen(struct proc *p, void *v, regi
        if ((flags & O_CREAT))
                return (EINVAL);
 
+       cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
+
        fdplock(fdp);
-       if ((error = falloc(p, (flags & O_CLOEXEC) ? UF_EXCLOSE : 0, &fp,
-           &indx)) != 0) {
+       if ((error = falloc(p, &fp, &indx)) != 0) {
                fp = NULL;
                goto bad;
        }
@@ -1160,9 +1162,9 @@ sys_fhopen(struct proc *p, void *v, regi
        }
        VOP_UNLOCK(vp);
        *retval = indx;
-       FILE_SET_MATURE(fp, p);
-
+       fdinsert(fdp, indx, cloexec, fp);
        fdpunlock(fdp);
+       FRELE(fp, p);
        return (0);
 
 bad:
Index: sys/filedesc.h
===================================================================
RCS file: /cvs/src/sys/sys/filedesc.h,v
retrieving revision 1.35
diff -u -p -r1.35 filedesc.h
--- sys/filedesc.h      25 Apr 2018 10:29:17 -0000      1.35
+++ sys/filedesc.h      8 May 2018 11:54:18 -0000
@@ -125,12 +125,13 @@ void      filedesc_init(void);
 int    dupfdopen(struct proc *, int, int);
 int    fdalloc(struct proc *p, int want, int *result);
 void   fdexpand(struct proc *);
-int    falloc(struct proc *_p, int _flags, struct file **_rfp, int *_rfd);
+int    falloc(struct proc *_p, struct file **_rfp, int *_rfd);
 struct filedesc *fdinit(void);
 struct filedesc *fdshare(struct process *);
 struct filedesc *fdcopy(struct process *);
 void   fdfree(struct proc *p);
 int    fdrelease(struct proc *p, int);
+void   fdinsert(struct filedesc *, int, int, struct file *);
 void   fdremove(struct filedesc *, int);
 void   fdcloseexec(struct proc *);
 struct file *fd_iterfile(struct file *, struct proc *);
Index: sys/file.h
===================================================================
RCS file: /cvs/src/sys/sys/file.h,v
retrieving revision 1.43
diff -u -p -r1.43 file.h
--- sys/file.h  8 May 2018 08:58:49 -0000       1.43
+++ sys/file.h  8 May 2018 12:20:46 -0000
@@ -92,10 +92,7 @@ struct file {
 };
 
 #define FIF_HASLOCK            0x01    /* descriptor holds advisory lock */
-#define FIF_LARVAL             0x02    /* not fully constructed, don't use */
-
-#define FILE_IS_USABLE(fp) \
-       (((fp)->f_iflags & FIF_LARVAL) == 0)
+#define FIF_INSERTED           0x80    /* present in `filehead' */
 
 #define FREF(fp) \
        do { \
@@ -103,11 +100,6 @@ struct file {
                (fp)->f_count++; \
        } while (0)
 #define FRELE(fp,p)    (--(fp)->f_count == 0 ? fdrop(fp, p) : 0)
-
-#define FILE_SET_MATURE(fp,p) do {                             \
-       (fp)->f_iflags &= ~FIF_LARVAL;                          \
-       FRELE(fp, p);                                           \
-} while (0)
 
 int    fdrop(struct file *, struct proc *);
 

Reply via email to