Hi, This diff moves the kqueue related members from struct filedesc to struct kqueue with the prime motivation of fixing a panic in knote_processexit() that can occur when the filedesc is gone. Since filedesc is no longer shared between kqueues belong to the same process, I added a list of the current kqueues to struct process. This list is used in knote_closefd() in order to remove all knotes using the given fd as its ident from all kqueues belonging to the given process.
Similiar work has been done in both FreeBSD[1] and DragonFlyBSD[2]. Testing would be much appreciated. Not asking for anything particular, just apply the diff and exercise the kernel with your daily tasks. [1] https://github.com/freebsd/freebsd/commit/bc1805c6e871c178d0b6516c3baa774ffd77224a [2] http://gitweb.dragonflybsd.org/dragonfly.git/commitdiff/ccafe911a3aa55fd5262850ecfc5765cd31a56a2 Index: kern/kern_descrip.c =================================================================== RCS file: /cvs/src/sys/kern/kern_descrip.c,v retrieving revision 1.164 diff -u -p -r1.164 kern_descrip.c --- kern/kern_descrip.c 5 Jun 2018 09:29:05 -0000 1.164 +++ kern/kern_descrip.c 12 Jun 2018 17:11:44 -0000 @@ -650,8 +650,7 @@ finishdup(struct proc *p, struct file *f *retval = new; if (oldfp != NULL) { - if (new < fdp->fd_knlistsize) - knote_fdclose(p, new); + knote_fdclose(p, new); closef(oldfp, p); } @@ -686,8 +685,7 @@ fdrelease(struct proc *p, int fd) FREF(fp); *fpp = NULL; fd_unused(fdp, fd); - if (fd < fdp->fd_knlistsize) - knote_fdclose(p, fd); + knote_fdclose(p, fd); return (closef(fp, p)); } @@ -999,7 +997,6 @@ fdinit(void) newfdp->fd_fd.fd_nfiles = NDFILE; newfdp->fd_fd.fd_himap = newfdp->fd_dhimap; newfdp->fd_fd.fd_lomap = newfdp->fd_dlomap; - newfdp->fd_fd.fd_knlistsize = -1; newfdp->fd_fd.fd_freefile = 0; newfdp->fd_fd.fd_lastfile = 0; @@ -1129,8 +1126,6 @@ fdfree(struct proc *p) vrele(fdp->fd_cdir); if (fdp->fd_rdir) vrele(fdp->fd_rdir); - free(fdp->fd_knlist, M_TEMP, fdp->fd_knlistsize * sizeof(struct klist)); - hashfree(fdp->fd_knhash, KN_HASHSIZE, M_TEMP); pool_put(&fdesc_pool, fdp); } Index: kern/kern_event.c =================================================================== RCS file: /cvs/src/sys/kern/kern_event.c,v retrieving revision 1.91 diff -u -p -r1.91 kern_event.c --- kern/kern_event.c 5 Jun 2018 09:29:05 -0000 1.91 +++ kern/kern_event.c 12 Jun 2018 17:11:44 -0000 @@ -80,8 +80,8 @@ struct fileops kqueueops = { .fo_close = kqueue_close }; -void knote_attach(struct knote *kn, struct filedesc *fdp); -void knote_drop(struct knote *kn, struct proc *p, struct filedesc *fdp); +void knote_attach(struct knote *kn); +void knote_drop(struct knote *kn, struct proc *p); void knote_enqueue(struct knote *kn); void knote_dequeue(struct knote *kn); #define knote_alloc() ((struct knote *)pool_get(&knote_pool, PR_WAITOK)) @@ -152,9 +152,13 @@ KQREF(struct kqueue *kq) void KQRELE(struct kqueue *kq) { - if (--kq->kq_refs == 0) { - pool_put(&kqueue_pool, kq); - } + if (--kq->kq_refs > 0) + return; + + LIST_REMOVE(kq, kq_next); + free(kq->kq_knlist, M_TEMP, kq->kq_knlistsize * sizeof(struct klist)); + hashfree(kq->kq_knhash, KN_HASHSIZE, M_TEMP); + pool_put(&kqueue_pool, kq); } void kqueue_init(void); @@ -453,10 +457,9 @@ sys_kqueue(struct proc *p, void *v, regi fp->f_data = kq; KQREF(kq); *retval = fd; - if (fdp->fd_knlistsize < 0) - fdp->fd_knlistsize = 0; /* this process has a kq */ kq->kq_fdp = fdp; FILE_SET_MATURE(fp, p); + LIST_INSERT_HEAD(&p->p_p->ps_kqlist, kq, kq_next); return (0); } @@ -583,19 +586,19 @@ kqueue_register(struct kqueue *kq, struc if ((fp = fd_getfile(fdp, kev->ident)) == NULL) return (EBADF); - if (kev->ident < fdp->fd_knlistsize) { - SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link) { + if (kev->ident < kq->kq_knlistsize) { + SLIST_FOREACH(kn, &kq->kq_knlist[kev->ident], kn_link) { if (kq == kn->kn_kq && kev->filter == kn->kn_filter) break; } } } else { - if (fdp->fd_knhashmask != 0) { + if (kq->kq_knhashmask != 0) { struct klist *list; - list = &fdp->fd_knhash[ - KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)]; + list = &kq->kq_knhash[ + KN_HASH((u_long)kev->ident, kq->kq_knhashmask)]; SLIST_FOREACH(kn, list, kn_link) { if (kev->ident == kn->kn_id && kq == kn->kn_kq && @@ -637,9 +640,9 @@ kqueue_register(struct kqueue *kq, struc kev->data = 0; kn->kn_kevent = *kev; - knote_attach(kn, fdp); + knote_attach(kn); if ((error = fops->f_attach(kn)) != 0) { - knote_drop(kn, p, fdp); + knote_drop(kn, p); goto done; } } else { @@ -660,7 +663,7 @@ kqueue_register(struct kqueue *kq, struc } else if (kev->flags & EV_DELETE) { kn->kn_fop->f_detach(kn); - knote_drop(kn, p, p->p_fd); + knote_drop(kn, p); goto done; } @@ -796,7 +799,7 @@ start: kn->kn_status &= ~KN_QUEUED; splx(s); kn->kn_fop->f_detach(kn); - knote_drop(kn, p, p->p_fd); + knote_drop(kn, p); s = splhigh(); } else if (kn->kn_flags & (EV_CLEAR | EV_DISPATCH)) { if (kn->kn_flags & EV_CLEAR) { @@ -900,12 +903,11 @@ int kqueue_close(struct file *fp, struct proc *p) { struct kqueue *kq = fp->f_data; - struct filedesc *fdp = p->p_fd; struct knote **knp, *kn, *kn0; int i; - for (i = 0; i < fdp->fd_knlistsize; i++) { - knp = &SLIST_FIRST(&fdp->fd_knlist[i]); + for (i = 0; i < kq->kq_knlistsize; i++) { + knp = &SLIST_FIRST(&kq->kq_knlist[i]); kn = *knp; while (kn != NULL) { kn0 = SLIST_NEXT(kn, kn_link); @@ -920,9 +922,9 @@ kqueue_close(struct file *fp, struct pro kn = kn0; } } - if (fdp->fd_knhashmask != 0) { - for (i = 0; i < fdp->fd_knhashmask + 1; i++) { - knp = &SLIST_FIRST(&fdp->fd_knhash[i]); + if (kq->kq_knhashmask != 0) { + for (i = 0; i < kq->kq_knhashmask + 1; i++) { + knp = &SLIST_FIRST(&kq->kq_knhash[i]); kn = *knp; while (kn != NULL) { kn0 = SLIST_NEXT(kn, kn_link); @@ -994,7 +996,7 @@ knote_remove(struct proc *p, struct klis while ((kn = SLIST_FIRST(list)) != NULL) { kn->kn_fop->f_detach(kn); - knote_drop(kn, p, p->p_fd); + knote_drop(kn, p); } } @@ -1004,10 +1006,16 @@ knote_remove(struct proc *p, struct klis void knote_fdclose(struct proc *p, int fd) { - struct filedesc *fdp = p->p_fd; - struct klist *list = &fdp->fd_knlist[fd]; + struct kqueue *kq; + struct klist *list; - knote_remove(p, list); + LIST_FOREACH(kq, &p->p_p->ps_kqlist, kq_next) { + if (fd >= kq->kq_knlistsize) + continue; + + list = &kq->kq_knlist[fd]; + knote_remove(p, list); + } } /* @@ -1026,35 +1034,36 @@ knote_processexit(struct proc *p) } void -knote_attach(struct knote *kn, struct filedesc *fdp) +knote_attach(struct knote *kn) { + struct kqueue *kq = kn->kn_kq; struct klist *list; int size; if (!kn->kn_fop->f_isfd) { - if (fdp->fd_knhashmask == 0) - fdp->fd_knhash = hashinit(KN_HASHSIZE, M_TEMP, - M_WAITOK, &fdp->fd_knhashmask); - list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)]; + if (kq->kq_knhashmask == 0) + kq->kq_knhash = hashinit(KN_HASHSIZE, M_TEMP, + M_WAITOK, &kq->kq_knhashmask); + list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; goto done; } - if (fdp->fd_knlistsize <= kn->kn_id) { - size = fdp->fd_knlistsize; + if (kq->kq_knlistsize <= kn->kn_id) { + size = kq->kq_knlistsize; while (size <= kn->kn_id) size += KQEXTENT; list = mallocarray(size, sizeof(struct klist), M_TEMP, M_WAITOK); - memcpy(list, fdp->fd_knlist, - fdp->fd_knlistsize * sizeof(struct klist)); - memset(&list[fdp->fd_knlistsize], 0, - (size - fdp->fd_knlistsize) * sizeof(struct klist)); - free(fdp->fd_knlist, M_TEMP, - fdp->fd_knlistsize * sizeof(struct klist)); - fdp->fd_knlistsize = size; - fdp->fd_knlist = list; + memcpy(list, kq->kq_knlist, + kq->kq_knlistsize * sizeof(struct klist)); + memset(&list[kq->kq_knlistsize], 0, + (size - kq->kq_knlistsize) * sizeof(struct klist)); + free(kq->kq_knlist, M_TEMP, + kq->kq_knlistsize * sizeof(struct klist)); + kq->kq_knlistsize = size; + kq->kq_knlist = list; } - list = &fdp->fd_knlist[kn->kn_id]; + list = &kq->kq_knlist[kn->kn_id]; done: SLIST_INSERT_HEAD(list, kn, kn_link); kn->kn_status = 0; @@ -1065,14 +1074,15 @@ done: * while calling FRELE and knote_free. */ void -knote_drop(struct knote *kn, struct proc *p, struct filedesc *fdp) +knote_drop(struct knote *kn, struct proc *p) { + struct kqueue *kq = kn->kn_kq; struct klist *list; if (kn->kn_fop->f_isfd) - list = &fdp->fd_knlist[kn->kn_id]; + list = &kq->kq_knlist[kn->kn_id]; else - list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)]; + list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; SLIST_REMOVE(list, kn, knote, kn_link); if (kn->kn_status & KN_QUEUED) Index: kern/kern_fork.c =================================================================== RCS file: /cvs/src/sys/kern/kern_fork.c,v retrieving revision 1.202 diff -u -p -r1.202 kern_fork.c --- kern/kern_fork.c 30 Dec 2017 20:47:00 -0000 1.202 +++ kern/kern_fork.c 12 Jun 2018 17:11:44 -0000 @@ -198,6 +198,7 @@ process_initialize(struct process *pr, s KASSERT(p->p_ucred->cr_ref >= 2); /* new thread and new process */ LIST_INIT(&pr->ps_children); + LIST_INIT(&pr->ps_kqlist); timeout_set(&pr->ps_realit_to, realitexpire, pr); } Index: sys/eventvar.h =================================================================== RCS file: /cvs/src/sys/sys/eventvar.h,v retrieving revision 1.4 diff -u -p -r1.4 eventvar.h --- sys/eventvar.h 11 Oct 2017 08:01:10 -0000 1.4 +++ sys/eventvar.h 12 Jun 2018 17:11:44 -0000 @@ -40,6 +40,14 @@ struct kqueue { int kq_refs; /* number of references */ struct selinfo kq_sel; struct filedesc *kq_fdp; + + LIST_ENTRY(kqueue) kq_next; + + int kq_knlistsize; /* size of knlist */ + struct klist *kq_knlist; /* list of attached knotes */ + u_long kq_knhashmask; /* size of knhash */ + struct klist *kq_knhash; /* hash table for attached knotes */ + int kq_state; #define KQ_SEL 0x01 #define KQ_SLEEP 0x02 Index: sys/filedesc.h =================================================================== RCS file: /cvs/src/sys/sys/filedesc.h,v retrieving revision 1.37 diff -u -p -r1.37 filedesc.h --- sys/filedesc.h 5 Jun 2018 09:29:05 -0000 1.37 +++ sys/filedesc.h 12 Jun 2018 17:11:44 -0000 @@ -73,11 +73,6 @@ struct filedesc { /* held when writing to fd_ofiles, */ /* fd_ofileflags, or fd_{hi,lo}map */ - int fd_knlistsize; /* size of knlist */ - struct klist *fd_knlist; /* list of attached knotes */ - u_long fd_knhashmask; /* size of knhash */ - struct klist *fd_knhash; /* hash table for attached knotes */ - int fd_flags; /* flags on the file descriptor table */ }; Index: sys/proc.h =================================================================== RCS file: /cvs/src/sys/sys/proc.h,v retrieving revision 1.248 diff -u -p -r1.248 proc.h --- sys/proc.h 12 Apr 2018 17:13:44 -0000 1.248 +++ sys/proc.h 12 Jun 2018 17:11:44 -0000 @@ -168,6 +168,8 @@ struct process { struct vmspace *ps_vmspace; /* Address space */ pid_t ps_pid; /* Process identifier. */ + LIST_HEAD(, kqueue) ps_kqlist; /* kqueues attached to this process */ + /* The following fields are all zeroed upon creation in process_new. */ #define ps_startzero ps_klist struct klist ps_klist; /* knotes attached to this process */