Ping? On Sun, Jul 13, 2014 at 03:45:44PM -0400, Jean-Philippe Ouellet wrote: > Updated for mallocarray() and free(size). > > On Thu, Jul 10, 2014 at 04:13:38PM -0400, Jean-Philippe Ouellet wrote: > > This diff adds another struct between filedesc and file to store > > process-local per-descriptor information. Currently, the only thing in > > this struct is the file pointer and some flags, however I have another > > patch on top of this that adds capsicum capabilities to it. (And > > another that uses mallocarray(9) where applicable. One thing at a time.) > > > > The data is currently stored in one big allocation with all the file *s > > in front, and all the flags after. When that allocation grows, there are > > two copies and two zeroings. This is fine when we only have two fields > > (fd_ofiles and fd_ofileflags), but adding more is ugly. > > > > Instead, I propose we have a struct for each file descriptor table entry > > (filedescent) and put the files, flags, and capabilities in that. > > > > The immediate impact is some memory waste due to padding (3 or 7 bytes > > per file descriptor, depending on the arch), however once capabilities > > are added it will be negligible. I discussed it with matthew@ and > > guenther@ and concluded this was the saner way. For what it's worth, > > FreeBSD and NetBSD have done the same thing. > > > > I've been running different versions of this for about a month or so, > > no issues noticed so far. > > > > A similar earlier version was reviewed by matthew@ and guenther@.
Index: sys/filedesc.h =================================================================== RCS file: /cvs/src/sys/sys/filedesc.h,v retrieving revision 1.28 diff -u -p -r1.28 filedesc.h --- sys/filedesc.h 15 May 2014 03:52:25 -0000 1.28 +++ sys/filedesc.h 13 Jul 2014 19:40:01 -0000 @@ -34,9 +34,6 @@ #include <sys/rwlock.h> /* - * This structure is used for the management of descriptors. It may be - * shared by multiple processes. - * * A process is initially started out with NDFILE descriptors stored within * this structure, selected to be enough for typical applications based on * the historical limit of 20 open files (and the usage of descriptors by @@ -56,22 +53,33 @@ #define NDHISLOTS(x) (NDREDUCE(NDREDUCE(x))) #define NDLOSLOTS(x) (NDHISLOTS(x) << NDENTRYSHIFT) +/* + * File wrapper for per-process per-file data. + */ +struct filedescent { + struct file *fde_file; /* file structure for open file */ + char fde_flags; /* process-local file flags */ +}; + +/* + * This structure is used for the management of descriptors. It may be + * shared by multiple processes. + */ struct filedesc { - struct file **fd_ofiles; /* file structures for open files */ - char *fd_ofileflags; /* per-process open file flags */ + struct filedescent *fd_fdents; /* per-process file wrappers */ struct vnode *fd_cdir; /* current directory */ struct vnode *fd_rdir; /* root directory */ int fd_nfiles; /* number of open files allocated */ int fd_openfd; /* number of files currently open */ u_int *fd_himap; /* each bit points to 32 fds */ u_int *fd_lomap; /* bitmap of free fds */ - int fd_lastfile; /* high-water mark of fd_ofiles */ + int fd_lastfile; /* high-water mark of fd_fdents */ int fd_freefile; /* approx. next free file */ u_short fd_cmask; /* mask for file creation */ u_short fd_refcnt; /* reference count */ struct rwlock fd_lock; /* lock for the file descs; must be */ - /* held when writing to fd_ofiles, */ - /* fd_ofileflags, or fd_{hi,lo}map */ + /* held when writing to fd_fdents */ + /* or fd_{hi,lo}map */ int fd_knlistsize; /* size of knlist */ struct klist *fd_knlist; /* list of attached knotes */ @@ -91,8 +99,7 @@ struct filedesc0 { * These arrays are used when the number of open files is * <= NDFILE, and are then pointed to by the pointers above. */ - struct file *fd_dfiles[NDFILE]; - char fd_dfileflags[NDFILE]; + struct filedescent fd_dfdents[NDFILE]; /* * There arrays are used when the number of open files is * <= 1024, and are then pointed to by the pointers above. Index: kern/kern_descrip.c =================================================================== RCS file: /cvs/src/sys/kern/kern_descrip.c,v retrieving revision 1.112 diff -u -p -r1.112 kern_descrip.c --- kern/kern_descrip.c 13 Jul 2014 15:29:04 -0000 1.112 +++ kern/kern_descrip.c 13 Jul 2014 19:40:01 -0000 @@ -123,7 +123,7 @@ int find_last_set(struct filedesc *fd, int last) { int off, i; - struct file **ofiles = fd->fd_ofiles; + struct filedescent *fdents = fd->fd_fdents; u_int *bitmap = fd->fd_lomap; off = (last - 1) >> NDENTRYSHIFT; @@ -137,7 +137,7 @@ find_last_set(struct filedesc *fd, int l if (i >= last) i = last - 1; - while (i > 0 && ofiles[i] == NULL) + while (i > 0 && fdents[i].fde_file == NULL) i--; return i; } @@ -182,7 +182,11 @@ fd_getfile(struct filedesc *fdp, int fd) { struct file *fp; - if ((u_int)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) + if ((u_int)fd >= fdp->fd_nfiles) + return (NULL); + + fp = fdp->fd_fdents[fd].fde_file; + if (fp == NULL) return (NULL); if (!FILE_IS_USABLE(fp)) @@ -334,22 +338,22 @@ restart: error = finishdup(p, fp, fd, i, retval, 0); if (!error && SCARG(uap, cmd) == F_DUPFD_CLOEXEC) - fdp->fd_ofileflags[i] |= UF_EXCLOSE; + fdp->fd_fdents[i].fde_flags |= UF_EXCLOSE; } fdpunlock(fdp); return (error); case F_GETFD: - *retval = fdp->fd_ofileflags[fd] & UF_EXCLOSE ? 1 : 0; + *retval = fdp->fd_fdents[fd].fde_flags & UF_EXCLOSE ? 1 : 0; break; case F_SETFD: fdplock(fdp); if ((long)SCARG(uap, arg) & 1) - fdp->fd_ofileflags[fd] |= UF_EXCLOSE; + fdp->fd_fdents[fd].fde_flags |= UF_EXCLOSE; else - fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE; + fdp->fd_fdents[fd].fde_flags &= ~UF_EXCLOSE; fdpunlock(fdp); break; @@ -524,6 +528,7 @@ finishdup(struct proc *p, struct file *f { struct file *oldfp; struct filedesc *fdp = p->p_fd; + struct filedescent *fdents = fdp->fd_fdents; fdpassertlocked(fdp); if (fp->f_count == LONG_MAX-2) { @@ -535,12 +540,12 @@ finishdup(struct proc *p, struct file *f * Don't fd_getfile here. We want to closef LARVAL files and * closef can deal with that. */ - oldfp = fdp->fd_ofiles[new]; + oldfp = fdents[new].fde_file; if (oldfp != NULL) FREF(oldfp); - fdp->fd_ofiles[new] = fp; - fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] & ~UF_EXCLOSE; + fdents[new].fde_file = fp; + fdents[new].fde_flags = fdents[old].fde_flags & ~UF_EXCLOSE; fp->f_count++; FRELE(fp, p); if (dup2 && oldfp == NULL) @@ -560,7 +565,7 @@ void fdremove(struct filedesc *fdp, int fd) { fdpassertlocked(fdp); - fdp->fd_ofiles[fd] = NULL; + fdp->fd_fdents[fd].fde_file = NULL; fd_unused(fdp, fd); } @@ -568,7 +573,8 @@ int fdrelease(struct proc *p, int fd) { struct filedesc *fdp = p->p_fd; - struct file **fpp, *fp; + struct filedescent *fde; + struct file *fp; fdpassertlocked(fdp); @@ -576,12 +582,12 @@ fdrelease(struct proc *p, int fd) * Don't fd_getfile here. We want to closef LARVAL files and closef * can deal with that. */ - fpp = &fdp->fd_ofiles[fd]; - fp = *fpp; + fde = &fdp->fd_fdents[fd]; + fp = fde->fde_file; if (fp == NULL) return (EBADF); FREF(fp); - *fpp = NULL; + fde->fde_file = NULL; fd_unused(fdp, fd); if (fd < fdp->fd_knlistsize) knote_fdclose(p, fd); @@ -707,7 +713,7 @@ fdalloc(struct proc *p, int want, int *r /* * Search for a free descriptor starting at the higher * of want or fd_freefile. If that fails, consider - * expanding the ofile array. + * expanding the fdents array. */ restart: lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); @@ -735,7 +741,7 @@ restart: if (want <= fdp->fd_freefile) fdp->fd_freefile = i; *result = i; - fdp->fd_ofileflags[i] = 0; + fdp->fd_fdents[i].fde_flags = 0; return (0); } } @@ -750,9 +756,8 @@ fdexpand(struct proc *p) { struct filedesc *fdp = p->p_fd; int nfiles; - size_t copylen; - struct file **newofile; - char *newofileflags; + size_t copylen, newsize; + struct filedescent *newfdents; u_int *newhimap, *newlomap; fdpassertlocked(fdp); @@ -765,23 +770,22 @@ fdexpand(struct proc *p) else nfiles = 2 * fdp->fd_nfiles; - newofile = mallocarray(nfiles, OFILESIZE, M_FILEDESC, M_WAITOK); - newofileflags = (char *) &newofile[nfiles]; + if (nfiles < fdp->fd_nfiles) + panic("fdp->fd_nfiles overflow"); - /* - * Copy the existing ofile and ofileflags arrays - * and zero the new portion of each array. - */ - copylen = sizeof(struct file *) * fdp->fd_nfiles; - memcpy(newofile, fdp->fd_ofiles, copylen); - memset((char *)newofile + copylen, 0, - nfiles * sizeof(struct file *) - copylen); - copylen = sizeof(char) * fdp->fd_nfiles; - memcpy(newofileflags, fdp->fd_ofileflags, copylen); - memset(newofileflags + copylen, 0, nfiles * sizeof(char) - copylen); + newfdents = mallocarray(nfiles, sizeof(struct filedescent), + M_FILEDESC, M_WAITOK); + + /* If mallocarray() works, these won't overflow. */ + newsize = nfiles * sizeof(struct filedescent); + copylen = fdp->fd_nfiles * sizeof(struct filedescent); + + /* Copy existing fdents and zero new ones. */ + memcpy(newfdents, fdp->fd_fdents, copylen); + memset((char *)newfdents + copylen, 0, newsize - copylen); if (fdp->fd_nfiles > NDFILE) - free(fdp->fd_ofiles, M_FILEDESC, 0); + free(fdp->fd_fdents, M_FILEDESC, 0); if (NDHISLOTS(nfiles) > NDHISLOTS(fdp->fd_nfiles)) { newhimap = mallocarray(NDHISLOTS(nfiles), sizeof(u_int), @@ -806,8 +810,7 @@ fdexpand(struct proc *p) fdp->fd_himap = newhimap; fdp->fd_lomap = newlomap; } - fdp->fd_ofiles = newofile; - fdp->fd_ofileflags = newofileflags; + fdp->fd_fdents = newfdents; fdp->fd_nfiles = nfiles; } @@ -844,12 +847,12 @@ restart: nfiles++; fp = pool_get(&file_pool, PR_WAITOK|PR_ZERO); fp->f_iflags = FIF_LARVAL; - if ((fq = p->p_fd->fd_ofiles[0]) != NULL) { + if ((fq = p->p_fd->fd_fdents[0].fde_file) != NULL) { LIST_INSERT_AFTER(fq, fp, f_list); } else { LIST_INSERT_HEAD(&filehead, fp, f_list); } - p->p_fd->fd_ofiles[i] = fp; + p->p_fd->fd_fdents[i].fde_file = fp; fp->f_count = 1; fp->f_cred = p->p_ucred; crhold(fp->f_cred); @@ -876,8 +879,7 @@ fdinit(void) /* Create the file descriptor table. */ newfdp->fd_fd.fd_refcnt = 1; newfdp->fd_fd.fd_cmask = cmask; - newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; - newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; + newfdp->fd_fd.fd_fdents = newfdp->fd_dfdents; newfdp->fd_fd.fd_nfiles = NDFILE; newfdp->fd_fd.fd_himap = newfdp->fd_dhimap; newfdp->fd_fd.fd_lomap = newfdp->fd_dlomap; @@ -906,7 +908,7 @@ struct filedesc * fdcopy(struct process *pr) { struct filedesc *newfdp, *fdp = pr->ps_fd; - struct file **fpp; + struct file *fp; int i; fdplock(fdp); @@ -926,9 +928,7 @@ fdcopy(struct process *pr) * in use. */ if (newfdp->fd_lastfile < NDFILE) { - newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles; - newfdp->fd_ofileflags = - ((struct filedesc0 *) newfdp)->fd_dfileflags; + newfdp->fd_fdents = ((struct filedesc0 *) newfdp)->fd_dfdents; i = NDFILE; } else { /* @@ -939,8 +939,8 @@ fdcopy(struct process *pr) i = newfdp->fd_nfiles; while (i >= 2 * NDEXTENT && i > newfdp->fd_lastfile * 2) i /= 2; - newfdp->fd_ofiles = mallocarray(i, OFILESIZE, M_FILEDESC, M_WAITOK); - newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i]; + newfdp->fd_fdents = mallocarray(i, sizeof(struct filedescent), + M_FILEDESC, M_WAITOK); } if (NDHISLOTS(i) <= NDHISLOTS(NDFILE)) { newfdp->fd_himap = @@ -954,16 +954,16 @@ fdcopy(struct process *pr) M_FILEDESC, M_WAITOK); } newfdp->fd_nfiles = i; - memcpy(newfdp->fd_ofiles, fdp->fd_ofiles, i * sizeof(struct file *)); - memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags, i * sizeof(char)); + memcpy(newfdp->fd_fdents, fdp->fd_fdents, + i * sizeof(struct filedescent)); memcpy(newfdp->fd_himap, fdp->fd_himap, NDHISLOTS(i) * sizeof(u_int)); memcpy(newfdp->fd_lomap, fdp->fd_lomap, NDLOSLOTS(i) * sizeof(u_int)); fdpunlock(fdp); fdplock(newfdp); - fpp = newfdp->fd_ofiles; - for (i = 0; i <= newfdp->fd_lastfile; i++, fpp++) - if (*fpp != NULL) { + for (i = 0; i <= newfdp->fd_lastfile; i++) { + fp = newfdp->fd_fdents[i].fde_file; + if (fp != NULL) { /* * XXX Gruesome hack. If count gets too high, fail * to copy an fd, since fdcopy()'s callers do not @@ -972,13 +972,14 @@ fdcopy(struct process *pr) * tied to the process that opened them to enforce * their internal consistency, so close them here. */ - if ((*fpp)->f_count == LONG_MAX-2 || - (*fpp)->f_type == DTYPE_KQUEUE || - (*fpp)->f_type == DTYPE_SYSTRACE) + if (fp->f_count == LONG_MAX-2 || + fp->f_type == DTYPE_KQUEUE || + fp->f_type == DTYPE_SYSTRACE) fdremove(newfdp, i); else - (*fpp)->f_count++; + fp->f_count++; } + } /* finish cleaning up kq bits */ if (newfdp->fd_knlistsize != -1) { @@ -1004,8 +1005,8 @@ fdfree(struct proc *p) if (--fdp->fd_refcnt > 0) return; - fpp = fdp->fd_ofiles; - for (i = fdp->fd_lastfile; i >= 0; i--, fpp++) { + for (i = fdp->fd_lastfile; i >= 0; i--) { + fpp = &fdp->fd_fdents[i].fde_file; fp = *fpp; if (fp != NULL) { FREF(fp); @@ -1015,7 +1016,7 @@ fdfree(struct proc *p) } p->p_fd = NULL; if (fdp->fd_nfiles > NDFILE) - free(fdp->fd_ofiles, M_FILEDESC, 0); + free(fdp->fd_fdents, M_FILEDESC, 0); if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) { free(fdp->fd_himap, M_FILEDESC, 0); free(fdp->fd_lomap, M_FILEDESC, 0); @@ -1189,6 +1190,7 @@ int dupfdopen(struct filedesc *fdp, int indx, int dfd, int mode) { struct file *wfp; + struct filedescent *fdentp; fdpassertlocked(fdp); @@ -1222,9 +1224,10 @@ dupfdopen(struct filedesc *fdp, int indx if (wfp->f_count == LONG_MAX-2) return (EDEADLK); - fdp->fd_ofiles[indx] = wfp; - fdp->fd_ofileflags[indx] = (fdp->fd_ofileflags[indx] & UF_EXCLOSE) | - (fdp->fd_ofileflags[dfd] & ~UF_EXCLOSE); + fdentp = &fdp->fd_fdents[indx]; + fdentp->fde_file = wfp; + fdentp->fde_flags &= UF_EXCLOSE; + fdentp->fde_flags |= fdp->fd_fdents[dfd].fde_flags & ~UF_EXCLOSE; wfp->f_count++; fd_used(fdp, indx); return (0); @@ -1241,7 +1244,7 @@ fdcloseexec(struct proc *p) fdplock(fdp); for (fd = 0; fd <= fdp->fd_lastfile; fd++) - if (fdp->fd_ofileflags[fd] & UF_EXCLOSE) + if (fdp->fd_fdents[fd].fde_flags & UF_EXCLOSE) (void) fdrelease(p, fd); fdpunlock(fdp); } Index: kern/kern_sysctl.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sysctl.c,v retrieving revision 1.258 diff -u -p -r1.258 kern_sysctl.c --- kern/kern_sysctl.c 13 Jul 2014 16:41:21 -0000 1.258 +++ kern/kern_sysctl.c 13 Jul 2014 19:40:02 -0000 @@ -1181,8 +1181,11 @@ fill_file(struct kinfo_file *kf, struct strlcpy(kf->p_comm, pr->ps_mainproc->p_comm, sizeof(kf->p_comm)); } - if (fdp != NULL) - kf->fd_ofileflags = fdp->fd_ofileflags[fd]; + if (fdp != NULL) { + KASSERT(fd < fdp->fd_nfiles); + KASSERT(fdp->fd_fdents[fd].fde_file != NULL); + kf->fd_ofileflags = fdp->fd_fdents[fd].fde_flags; + } } /* @@ -1274,7 +1277,7 @@ sysctl_file(int *name, u_int namelen, ch if (pr->ps_tracevp) FILLIT(NULL, NULL, KERN_FILE_TRACE, pr->ps_tracevp, pr); for (i = 0; i < fdp->fd_nfiles; i++) { - if ((fp = fdp->fd_ofiles[i]) == NULL) + if ((fp = fdp->fd_fdents[i].fde_file) == NULL) continue; if (!FILE_IS_USABLE(fp)) continue; @@ -1302,7 +1305,7 @@ sysctl_file(int *name, u_int namelen, ch if (pr->ps_tracevp) FILLIT(NULL, NULL, KERN_FILE_TRACE, pr->ps_tracevp, pr); for (i = 0; i < fdp->fd_nfiles; i++) { - if ((fp = fdp->fd_ofiles[i]) == NULL) + if ((fp = fdp->fd_fdents[i].fde_file) == NULL) continue; if (!FILE_IS_USABLE(fp)) continue; Index: kern/sys_generic.c =================================================================== RCS file: /cvs/src/sys/kern/sys_generic.c,v retrieving revision 1.92 diff -u -p -r1.92 sys_generic.c --- kern/sys_generic.c 13 Jul 2014 15:48:41 -0000 1.92 +++ kern/sys_generic.c 13 Jul 2014 19:40:02 -0000 @@ -414,9 +414,9 @@ sys_ioctl(struct proc *p, void *v, regis case FIOCLEX: fdplock(fdp); if (com == FIONCLEX) - fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; + fdp->fd_fdents[SCARG(uap, fd)].fde_flags &= ~UF_EXCLOSE; else - fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; + fdp->fd_fdents[SCARG(uap, fd)].fde_flags |= UF_EXCLOSE; fdpunlock(fdp); return (0); } Index: kern/uipc_usrreq.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_usrreq.c,v retrieving revision 1.76 diff -u -p -r1.76 uipc_usrreq.c --- kern/uipc_usrreq.c 13 Jul 2014 15:52:38 -0000 1.76 +++ kern/uipc_usrreq.c 13 Jul 2014 19:40:03 -0000 @@ -743,7 +743,7 @@ restart: * fdalloc() works properly.. We finalize it all * in the loop below. */ - p->p_fd->fd_ofiles[fdp[i]] = *rp++; + p->p_fd->fd_fdents[fdp[i]].fde_file = *rp++; } /* Index: kern/vfs_syscalls.c =================================================================== RCS file: /cvs/src/sys/kern/vfs_syscalls.c,v retrieving revision 1.208 diff -u -p -r1.208 vfs_syscalls.c --- kern/vfs_syscalls.c 12 Jul 2014 18:43:32 -0000 1.208 +++ kern/vfs_syscalls.c 13 Jul 2014 19:40:03 -0000 @@ -849,7 +849,7 @@ doopenat(struct proc *p, int fd, const c goto out; flags = FFLAGS(oflags); if (flags & O_CLOEXEC) - fdp->fd_ofileflags[indx] |= UF_EXCLOSE; + fdp->fd_fdents[indx].fde_flags |= UF_EXCLOSE; cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; NDINITAT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fd, path, p); @@ -1008,7 +1008,7 @@ sys_fhopen(struct proc *p, void *v, regi goto bad; } if (flags & O_CLOEXEC) - fdp->fd_ofileflags[indx] |= UF_EXCLOSE; + fdp->fd_fdents[indx].fde_flags |= UF_EXCLOSE; if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0) goto bad;