This diff adds another struct between filedesc and file to store
process-local per-descriptor information. Currently, the only thing in
this struct is the file pointer and some flags, however I have another
patch on top of this that adds capsicum capabilities to it. (And
another that uses mallocarray(9) where applicable. One thing at a time.)
The data is currently stored in one big allocation with all the file *s
in front, and all the flags after. When that allocation grows, there are
two copies and two zeroings. This is fine when we only have two fields
(fd_ofiles and fd_ofileflags), but adding more is ugly.
Instead, I propose we have a struct for each file descriptor table entry
(filedescent) and put the files, flags, and capabilities in that.
The immediate impact is some memory waste due to padding (3 or 7 bytes
per file descriptor, depending on the arch), however once capabilities
are added it will be negligible. I discussed it with matthew@ and
guenther@ and concluded this was the saner way. For what it's worth,
FreeBSD and NetBSD have done the same thing.
I've been running different versions of this for about a month or so,
no issues noticed so far.
A similar earlier version was reviewed by matthew@ and guenther@.
Index: sys/filedesc.h
===================================================================
RCS file: /cvs/src/sys/sys/filedesc.h,v
retrieving revision 1.28
diff -u -p -r1.28 filedesc.h
--- sys/filedesc.h 15 May 2014 03:52:25 -0000 1.28
+++ sys/filedesc.h 10 Jul 2014 19:44:42 -0000
@@ -34,9 +34,6 @@
#include <sys/rwlock.h>
/*
- * This structure is used for the management of descriptors. It may be
- * shared by multiple processes.
- *
* A process is initially started out with NDFILE descriptors stored within
* this structure, selected to be enough for typical applications based on
* the historical limit of 20 open files (and the usage of descriptors by
@@ -56,22 +53,33 @@
#define NDHISLOTS(x) (NDREDUCE(NDREDUCE(x)))
#define NDLOSLOTS(x) (NDHISLOTS(x) << NDENTRYSHIFT)
+/*
+ * File wrapper for per-process per-file data.
+ */
+struct filedescent {
+ struct file *fde_file; /* file structure for open file */
+ char fde_flags; /* process-local file flags */
+};
+
+/*
+ * This structure is used for the management of descriptors. It may be
+ * shared by multiple processes.
+ */
struct filedesc {
- struct file **fd_ofiles; /* file structures for open files */
- char *fd_ofileflags; /* per-process open file flags */
+ struct filedescent *fd_fdents; /* per-process file wrappers */
struct vnode *fd_cdir; /* current directory */
struct vnode *fd_rdir; /* root directory */
int fd_nfiles; /* number of open files allocated */
int fd_openfd; /* number of files currently open */
u_int *fd_himap; /* each bit points to 32 fds */
u_int *fd_lomap; /* bitmap of free fds */
- int fd_lastfile; /* high-water mark of fd_ofiles */
+ int fd_lastfile; /* high-water mark of fd_fdents */
int fd_freefile; /* approx. next free file */
u_short fd_cmask; /* mask for file creation */
u_short fd_refcnt; /* reference count */
struct rwlock fd_lock; /* lock for the file descs; must be */
- /* held when writing to fd_ofiles, */
- /* fd_ofileflags, or fd_{hi,lo}map */
+ /* held when writing to fd_fdents */
+ /* or fd_{hi,lo}map */
int fd_knlistsize; /* size of knlist */
struct klist *fd_knlist; /* list of attached knotes */
@@ -91,8 +99,7 @@ struct filedesc0 {
* These arrays are used when the number of open files is
* <= NDFILE, and are then pointed to by the pointers above.
*/
- struct file *fd_dfiles[NDFILE];
- char fd_dfileflags[NDFILE];
+ struct filedescent fd_dfdents[NDFILE];
/*
* There arrays are used when the number of open files is
* <= 1024, and are then pointed to by the pointers above.
Index: kern/kern_descrip.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_descrip.c,v
retrieving revision 1.110
diff -u -p -r1.110 kern_descrip.c
--- kern/kern_descrip.c 8 Jul 2014 17:19:25 -0000 1.110
+++ kern/kern_descrip.c 10 Jul 2014 19:44:42 -0000
@@ -123,7 +123,7 @@ int
find_last_set(struct filedesc *fd, int last)
{
int off, i;
- struct file **ofiles = fd->fd_ofiles;
+ struct filedescent *fdents = fd->fd_fdents;
u_int *bitmap = fd->fd_lomap;
off = (last - 1) >> NDENTRYSHIFT;
@@ -137,7 +137,7 @@ find_last_set(struct filedesc *fd, int l
if (i >= last)
i = last - 1;
- while (i > 0 && ofiles[i] == NULL)
+ while (i > 0 && fdents[i].fde_file == NULL)
i--;
return i;
}
@@ -182,7 +182,11 @@ fd_getfile(struct filedesc *fdp, int fd)
{
struct file *fp;
- if ((u_int)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL)
+ if ((u_int)fd >= fdp->fd_nfiles)
+ return (NULL);
+
+ fp = fdp->fd_fdents[fd].fde_file;
+ if (fp == NULL)
return (NULL);
if (!FILE_IS_USABLE(fp))
@@ -334,22 +338,22 @@ restart:
error = finishdup(p, fp, fd, i, retval, 0);
if (!error && SCARG(uap, cmd) == F_DUPFD_CLOEXEC)
- fdp->fd_ofileflags[i] |= UF_EXCLOSE;
+ fdp->fd_fdents[i].fde_flags |= UF_EXCLOSE;
}
fdpunlock(fdp);
return (error);
case F_GETFD:
- *retval = fdp->fd_ofileflags[fd] & UF_EXCLOSE ? 1 : 0;
+ *retval = fdp->fd_fdents[fd].fde_flags & UF_EXCLOSE ? 1 : 0;
break;
case F_SETFD:
fdplock(fdp);
if ((long)SCARG(uap, arg) & 1)
- fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
+ fdp->fd_fdents[fd].fde_flags |= UF_EXCLOSE;
else
- fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE;
+ fdp->fd_fdents[fd].fde_flags &= ~UF_EXCLOSE;
fdpunlock(fdp);
break;
@@ -524,6 +528,7 @@ finishdup(struct proc *p, struct file *f
{
struct file *oldfp;
struct filedesc *fdp = p->p_fd;
+ struct filedescent *fdents = fdp->fd_fdents;
fdpassertlocked(fdp);
if (fp->f_count == LONG_MAX-2) {
@@ -535,12 +540,12 @@ finishdup(struct proc *p, struct file *f
* Don't fd_getfile here. We want to closef LARVAL files and
* closef can deal with that.
*/
- oldfp = fdp->fd_ofiles[new];
+ oldfp = fdents[new].fde_file;
if (oldfp != NULL)
FREF(oldfp);
- fdp->fd_ofiles[new] = fp;
- fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] & ~UF_EXCLOSE;
+ fdents[new].fde_file = fp;
+ fdents[new].fde_flags = fdents[old].fde_flags & ~UF_EXCLOSE;
fp->f_count++;
FRELE(fp, p);
if (dup2 && oldfp == NULL)
@@ -560,7 +565,7 @@ void
fdremove(struct filedesc *fdp, int fd)
{
fdpassertlocked(fdp);
- fdp->fd_ofiles[fd] = NULL;
+ fdp->fd_fdents[fd].fde_file = NULL;
fd_unused(fdp, fd);
}
@@ -568,7 +573,8 @@ int
fdrelease(struct proc *p, int fd)
{
struct filedesc *fdp = p->p_fd;
- struct file **fpp, *fp;
+ struct filedescent *fde;
+ struct file *fp;
fdpassertlocked(fdp);
@@ -576,12 +582,12 @@ fdrelease(struct proc *p, int fd)
* Don't fd_getfile here. We want to closef LARVAL files and closef
* can deal with that.
*/
- fpp = &fdp->fd_ofiles[fd];
- fp = *fpp;
+ fde = &fdp->fd_fdents[fd];
+ fp = fde->fde_file;
if (fp == NULL)
return (EBADF);
FREF(fp);
- *fpp = NULL;
+ fde->fde_file = NULL;
fd_unused(fdp, fd);
if (fd < fdp->fd_knlistsize)
knote_fdclose(p, fd);
@@ -707,7 +713,7 @@ fdalloc(struct proc *p, int want, int *r
/*
* Search for a free descriptor starting at the higher
* of want or fd_freefile. If that fails, consider
- * expanding the ofile array.
+ * expanding the fdents array.
*/
restart:
lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
@@ -735,7 +741,7 @@ restart:
if (want <= fdp->fd_freefile)
fdp->fd_freefile = i;
*result = i;
- fdp->fd_ofileflags[i] = 0;
+ fdp->fd_fdents[i].fde_flags = 0;
return (0);
}
}
@@ -750,9 +756,8 @@ fdexpand(struct proc *p)
{
struct filedesc *fdp = p->p_fd;
int nfiles;
- size_t copylen;
- struct file **newofile;
- char *newofileflags;
+ size_t copylen, newsize;
+ struct filedescent *newfdents;
u_int *newhimap, *newlomap;
fdpassertlocked(fdp);
@@ -765,23 +770,16 @@ fdexpand(struct proc *p)
else
nfiles = 2 * fdp->fd_nfiles;
- newofile = malloc(nfiles * OFILESIZE, M_FILEDESC, M_WAITOK);
- newofileflags = (char *) &newofile[nfiles];
+ newsize = (size_t)nfiles * sizeof(struct filedescent);
+ newfdents = malloc(newsize, M_FILEDESC, M_WAITOK);
- /*
- * Copy the existing ofile and ofileflags arrays
- * and zero the new portion of each array.
- */
- copylen = sizeof(struct file *) * fdp->fd_nfiles;
- memcpy(newofile, fdp->fd_ofiles, copylen);
- memset((char *)newofile + copylen, 0,
- nfiles * sizeof(struct file *) - copylen);
- copylen = sizeof(char) * fdp->fd_nfiles;
- memcpy(newofileflags, fdp->fd_ofileflags, copylen);
- memset(newofileflags + copylen, 0, nfiles * sizeof(char) - copylen);
+ /* Copy existing fdents and zero new ones. */
+ copylen = sizeof(struct filedescent) * fdp->fd_nfiles;
+ memcpy(newfdents, fdp->fd_fdents, copylen);
+ memset((char *)newfdents + copylen, 0, newsize - copylen);
if (fdp->fd_nfiles > NDFILE)
- free(fdp->fd_ofiles, M_FILEDESC);
+ free(fdp->fd_fdents, M_FILEDESC);
if (NDHISLOTS(nfiles) > NDHISLOTS(fdp->fd_nfiles)) {
newhimap = malloc(NDHISLOTS(nfiles) * sizeof(u_int),
@@ -806,8 +804,7 @@ fdexpand(struct proc *p)
fdp->fd_himap = newhimap;
fdp->fd_lomap = newlomap;
}
- fdp->fd_ofiles = newofile;
- fdp->fd_ofileflags = newofileflags;
+ fdp->fd_fdents = newfdents;
fdp->fd_nfiles = nfiles;
}
@@ -844,12 +841,12 @@ restart:
nfiles++;
fp = pool_get(&file_pool, PR_WAITOK|PR_ZERO);
fp->f_iflags = FIF_LARVAL;
- if ((fq = p->p_fd->fd_ofiles[0]) != NULL) {
+ if ((fq = p->p_fd->fd_fdents[0].fde_file) != NULL) {
LIST_INSERT_AFTER(fq, fp, f_list);
} else {
LIST_INSERT_HEAD(&filehead, fp, f_list);
}
- p->p_fd->fd_ofiles[i] = fp;
+ p->p_fd->fd_fdents[i].fde_file = fp;
fp->f_count = 1;
fp->f_cred = p->p_ucred;
crhold(fp->f_cred);
@@ -876,8 +873,7 @@ fdinit(void)
/* Create the file descriptor table. */
newfdp->fd_fd.fd_refcnt = 1;
newfdp->fd_fd.fd_cmask = cmask;
- newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
- newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
+ newfdp->fd_fd.fd_fdents = newfdp->fd_dfdents;
newfdp->fd_fd.fd_nfiles = NDFILE;
newfdp->fd_fd.fd_himap = newfdp->fd_dhimap;
newfdp->fd_fd.fd_lomap = newfdp->fd_dlomap;
@@ -906,7 +902,7 @@ struct filedesc *
fdcopy(struct process *pr)
{
struct filedesc *newfdp, *fdp = pr->ps_fd;
- struct file **fpp;
+ struct file *fp;
int i;
fdplock(fdp);
@@ -926,9 +922,7 @@ fdcopy(struct process *pr)
* in use.
*/
if (newfdp->fd_lastfile < NDFILE) {
- newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
- newfdp->fd_ofileflags =
- ((struct filedesc0 *) newfdp)->fd_dfileflags;
+ newfdp->fd_fdents = ((struct filedesc0 *) newfdp)->fd_dfdents;
i = NDFILE;
} else {
/*
@@ -939,8 +933,8 @@ fdcopy(struct process *pr)
i = newfdp->fd_nfiles;
while (i >= 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
i /= 2;
- newfdp->fd_ofiles = malloc(i * OFILESIZE, M_FILEDESC, M_WAITOK);
- newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
+ newfdp->fd_fdents = malloc(i * sizeof(struct filedescent),
+ M_FILEDESC, M_WAITOK);
}
if (NDHISLOTS(i) <= NDHISLOTS(NDFILE)) {
newfdp->fd_himap =
@@ -954,16 +948,16 @@ fdcopy(struct process *pr)
M_FILEDESC, M_WAITOK);
}
newfdp->fd_nfiles = i;
- memcpy(newfdp->fd_ofiles, fdp->fd_ofiles, i * sizeof(struct file *));
- memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags, i * sizeof(char));
+ memcpy(newfdp->fd_fdents, fdp->fd_fdents,
+ i * sizeof(struct filedescent));
memcpy(newfdp->fd_himap, fdp->fd_himap, NDHISLOTS(i) * sizeof(u_int));
memcpy(newfdp->fd_lomap, fdp->fd_lomap, NDLOSLOTS(i) * sizeof(u_int));
fdpunlock(fdp);
fdplock(newfdp);
- fpp = newfdp->fd_ofiles;
- for (i = 0; i <= newfdp->fd_lastfile; i++, fpp++)
- if (*fpp != NULL) {
+ for (i = 0; i <= newfdp->fd_lastfile; i++) {
+ fp = newfdp->fd_fdents[i].fde_file;
+ if (fp != NULL) {
/*
* XXX Gruesome hack. If count gets too high, fail
* to copy an fd, since fdcopy()'s callers do not
@@ -972,13 +966,14 @@ fdcopy(struct process *pr)
* tied to the process that opened them to enforce
* their internal consistency, so close them here.
*/
- if ((*fpp)->f_count == LONG_MAX-2 ||
- (*fpp)->f_type == DTYPE_KQUEUE ||
- (*fpp)->f_type == DTYPE_SYSTRACE)
+ if (fp->f_count == LONG_MAX-2 ||
+ fp->f_type == DTYPE_KQUEUE ||
+ fp->f_type == DTYPE_SYSTRACE)
fdremove(newfdp, i);
else
- (*fpp)->f_count++;
+ fp->f_count++;
}
+ }
/* finish cleaning up kq bits */
if (newfdp->fd_knlistsize != -1) {
@@ -1004,8 +999,8 @@ fdfree(struct proc *p)
if (--fdp->fd_refcnt > 0)
return;
- fpp = fdp->fd_ofiles;
- for (i = fdp->fd_lastfile; i >= 0; i--, fpp++) {
+ for (i = fdp->fd_lastfile; i >= 0; i--) {
+ fpp = &fdp->fd_fdents[i].fde_file;
fp = *fpp;
if (fp != NULL) {
FREF(fp);
@@ -1015,7 +1010,7 @@ fdfree(struct proc *p)
}
p->p_fd = NULL;
if (fdp->fd_nfiles > NDFILE)
- free(fdp->fd_ofiles, M_FILEDESC);
+ free(fdp->fd_fdents, M_FILEDESC);
if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) {
free(fdp->fd_himap, M_FILEDESC);
free(fdp->fd_lomap, M_FILEDESC);
@@ -1189,6 +1184,7 @@ int
dupfdopen(struct filedesc *fdp, int indx, int dfd, int mode)
{
struct file *wfp;
+ struct filedescent *fdentp;
fdpassertlocked(fdp);
@@ -1222,9 +1218,10 @@ dupfdopen(struct filedesc *fdp, int indx
if (wfp->f_count == LONG_MAX-2)
return (EDEADLK);
- fdp->fd_ofiles[indx] = wfp;
- fdp->fd_ofileflags[indx] = (fdp->fd_ofileflags[indx] & UF_EXCLOSE) |
- (fdp->fd_ofileflags[dfd] & ~UF_EXCLOSE);
+ fdentp = &fdp->fd_fdents[indx];
+ fdentp->fde_file = wfp;
+ fdentp->fde_flags &= UF_EXCLOSE;
+ fdentp->fde_flags |= fdp->fd_fdents[dfd].fde_flags & ~UF_EXCLOSE;
wfp->f_count++;
fd_used(fdp, indx);
return (0);
@@ -1241,7 +1238,7 @@ fdcloseexec(struct proc *p)
fdplock(fdp);
for (fd = 0; fd <= fdp->fd_lastfile; fd++)
- if (fdp->fd_ofileflags[fd] & UF_EXCLOSE)
+ if (fdp->fd_fdents[fd].fde_flags & UF_EXCLOSE)
(void) fdrelease(p, fd);
fdpunlock(fdp);
}
Index: kern/kern_sysctl.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.255
diff -u -p -r1.255 kern_sysctl.c
--- kern/kern_sysctl.c 8 Jul 2014 17:19:25 -0000 1.255
+++ kern/kern_sysctl.c 10 Jul 2014 19:44:43 -0000
@@ -1175,8 +1175,11 @@ fill_file(struct kinfo_file *kf, struct
strlcpy(kf->p_comm, pr->ps_mainproc->p_comm,
sizeof(kf->p_comm));
}
- if (fdp != NULL)
- kf->fd_ofileflags = fdp->fd_ofileflags[fd];
+ if (fdp != NULL) {
+ KASSERT(fd < fdp->fd_nfiles);
+ KASSERT(fdp->fd_fdents[fd].fde_file != NULL);
+ kf->fd_ofileflags = fdp->fd_fdents[fd].fde_flags;
+ }
}
/*
@@ -1268,7 +1271,7 @@ sysctl_file(int *name, u_int namelen, ch
if (pr->ps_tracevp)
FILLIT(NULL, NULL, KERN_FILE_TRACE,
pr->ps_tracevp, pr);
for (i = 0; i < fdp->fd_nfiles; i++) {
- if ((fp = fdp->fd_ofiles[i]) == NULL)
+ if ((fp = fdp->fd_fdents[i].fde_file) == NULL)
continue;
if (!FILE_IS_USABLE(fp))
continue;
@@ -1296,7 +1299,7 @@ sysctl_file(int *name, u_int namelen, ch
if (pr->ps_tracevp)
FILLIT(NULL, NULL, KERN_FILE_TRACE,
pr->ps_tracevp, pr);
for (i = 0; i < fdp->fd_nfiles; i++) {
- if ((fp = fdp->fd_ofiles[i]) == NULL)
+ if ((fp = fdp->fd_fdents[i].fde_file) == NULL)
continue;
if (!FILE_IS_USABLE(fp))
continue;
Index: kern/sys_generic.c
===================================================================
RCS file: /cvs/src/sys/kern/sys_generic.c,v
retrieving revision 1.88
diff -u -p -r1.88 sys_generic.c
--- kern/sys_generic.c 8 Jul 2014 17:42:50 -0000 1.88
+++ kern/sys_generic.c 10 Jul 2014 19:44:43 -0000
@@ -414,9 +414,9 @@ sys_ioctl(struct proc *p, void *v, regis
case FIOCLEX:
fdplock(fdp);
if (com == FIONCLEX)
- fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
+ fdp->fd_fdents[SCARG(uap, fd)].fde_flags &= ~UF_EXCLOSE;
else
- fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
+ fdp->fd_fdents[SCARG(uap, fd)].fde_flags |= UF_EXCLOSE;
fdpunlock(fdp);
return (0);
}
Index: kern/uipc_usrreq.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_usrreq.c,v
retrieving revision 1.73
diff -u -p -r1.73 uipc_usrreq.c
--- kern/uipc_usrreq.c 18 Mar 2014 06:59:00 -0000 1.73
+++ kern/uipc_usrreq.c 10 Jul 2014 19:44:43 -0000
@@ -743,7 +743,7 @@ restart:
* fdalloc() works properly.. We finalize it all
* in the loop below.
*/
- p->p_fd->fd_ofiles[fdp[i]] = *rp++;
+ p->p_fd->fd_fdents[fdp[i]].fde_file = *rp++;
}
/*
Index: kern/vfs_syscalls.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_syscalls.c,v
retrieving revision 1.207
diff -u -p -r1.207 vfs_syscalls.c
--- kern/vfs_syscalls.c 8 Jul 2014 17:19:25 -0000 1.207
+++ kern/vfs_syscalls.c 10 Jul 2014 19:44:44 -0000
@@ -849,7 +849,7 @@ doopenat(struct proc *p, int fd, const c
goto out;
flags = FFLAGS(oflags);
if (flags & O_CLOEXEC)
- fdp->fd_ofileflags[indx] |= UF_EXCLOSE;
+ fdp->fd_fdents[indx].fde_flags |= UF_EXCLOSE;
cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
NDINITAT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fd, path, p);
@@ -1008,7 +1008,7 @@ sys_fhopen(struct proc *p, void *v, regi
goto bad;
}
if (flags & O_CLOEXEC)
- fdp->fd_ofileflags[indx] |= UF_EXCLOSE;
+ fdp->fd_fdents[indx].fde_flags |= UF_EXCLOSE;
if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fhandle_t))) != 0)
goto bad;