Hi,
This diff moves the kqueue related members from struct filedesc to
struct kqueue with the prime motivation of fixing a panic in
knote_processexit() that can occur when the filedesc is gone.
Since filedesc is no longer shared between kqueues belong to the same
process, I added a list of the current kqueues to struct process. This
list is used in knote_closefd() in order to remove all knotes using the
given fd as its ident from all kqueues belonging to the given process.

Similiar work has been done in both FreeBSD[1] and DragonFlyBSD[2].

Testing would be much appreciated. Not asking for anything particular,
just apply the diff and exercise the kernel with your daily tasks.

[1] 
https://github.com/freebsd/freebsd/commit/bc1805c6e871c178d0b6516c3baa774ffd77224a
[2] 
http://gitweb.dragonflybsd.org/dragonfly.git/commitdiff/ccafe911a3aa55fd5262850ecfc5765cd31a56a2

Index: kern/kern_descrip.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_descrip.c,v
retrieving revision 1.164
diff -u -p -r1.164 kern_descrip.c
--- kern/kern_descrip.c 5 Jun 2018 09:29:05 -0000       1.164
+++ kern/kern_descrip.c 12 Jun 2018 17:11:44 -0000
@@ -650,8 +650,7 @@ finishdup(struct proc *p, struct file *f
        *retval = new;
 
        if (oldfp != NULL) {
-               if (new < fdp->fd_knlistsize)
-                       knote_fdclose(p, new);
+               knote_fdclose(p, new);
                closef(oldfp, p);
        }
 
@@ -686,8 +685,7 @@ fdrelease(struct proc *p, int fd)
        FREF(fp);
        *fpp = NULL;
        fd_unused(fdp, fd);
-       if (fd < fdp->fd_knlistsize)
-               knote_fdclose(p, fd);
+       knote_fdclose(p, fd);
        return (closef(fp, p));
 }
 
@@ -999,7 +997,6 @@ fdinit(void)
        newfdp->fd_fd.fd_nfiles = NDFILE;
        newfdp->fd_fd.fd_himap = newfdp->fd_dhimap;
        newfdp->fd_fd.fd_lomap = newfdp->fd_dlomap;
-       newfdp->fd_fd.fd_knlistsize = -1;
 
        newfdp->fd_fd.fd_freefile = 0;
        newfdp->fd_fd.fd_lastfile = 0;
@@ -1129,8 +1126,6 @@ fdfree(struct proc *p)
                vrele(fdp->fd_cdir);
        if (fdp->fd_rdir)
                vrele(fdp->fd_rdir);
-       free(fdp->fd_knlist, M_TEMP, fdp->fd_knlistsize * sizeof(struct klist));
-       hashfree(fdp->fd_knhash, KN_HASHSIZE, M_TEMP);
        pool_put(&fdesc_pool, fdp);
 }
 
Index: kern/kern_event.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_event.c,v
retrieving revision 1.91
diff -u -p -r1.91 kern_event.c
--- kern/kern_event.c   5 Jun 2018 09:29:05 -0000       1.91
+++ kern/kern_event.c   12 Jun 2018 17:11:44 -0000
@@ -80,8 +80,8 @@ struct fileops kqueueops = {
        .fo_close       = kqueue_close
 };
 
-void   knote_attach(struct knote *kn, struct filedesc *fdp);
-void   knote_drop(struct knote *kn, struct proc *p, struct filedesc *fdp);
+void   knote_attach(struct knote *kn);
+void   knote_drop(struct knote *kn, struct proc *p);
 void   knote_enqueue(struct knote *kn);
 void   knote_dequeue(struct knote *kn);
 #define knote_alloc() ((struct knote *)pool_get(&knote_pool, PR_WAITOK))
@@ -152,9 +152,13 @@ KQREF(struct kqueue *kq)
 void
 KQRELE(struct kqueue *kq)
 {
-       if (--kq->kq_refs == 0) {
-               pool_put(&kqueue_pool, kq);
-       }
+       if (--kq->kq_refs > 0)
+               return;
+
+       LIST_REMOVE(kq, kq_next);
+       free(kq->kq_knlist, M_TEMP, kq->kq_knlistsize * sizeof(struct klist));
+       hashfree(kq->kq_knhash, KN_HASHSIZE, M_TEMP);
+       pool_put(&kqueue_pool, kq);
 }
 
 void kqueue_init(void);
@@ -453,10 +457,9 @@ sys_kqueue(struct proc *p, void *v, regi
        fp->f_data = kq;
        KQREF(kq);
        *retval = fd;
-       if (fdp->fd_knlistsize < 0)
-               fdp->fd_knlistsize = 0;         /* this process has a kq */
        kq->kq_fdp = fdp;
        FILE_SET_MATURE(fp, p);
+       LIST_INSERT_HEAD(&p->p_p->ps_kqlist, kq, kq_next);
        return (0);
 }
 
@@ -583,19 +586,19 @@ kqueue_register(struct kqueue *kq, struc
                if ((fp = fd_getfile(fdp, kev->ident)) == NULL)
                        return (EBADF);
 
-               if (kev->ident < fdp->fd_knlistsize) {
-                       SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link) 
{
+               if (kev->ident < kq->kq_knlistsize) {
+                       SLIST_FOREACH(kn, &kq->kq_knlist[kev->ident], kn_link) {
                                if (kq == kn->kn_kq &&
                                    kev->filter == kn->kn_filter)
                                        break;
                        }
                }
        } else {
-               if (fdp->fd_knhashmask != 0) {
+               if (kq->kq_knhashmask != 0) {
                        struct klist *list;
 
-                       list = &fdp->fd_knhash[
-                           KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)];
+                       list = &kq->kq_knhash[
+                           KN_HASH((u_long)kev->ident, kq->kq_knhashmask)];
                        SLIST_FOREACH(kn, list, kn_link) {
                                if (kev->ident == kn->kn_id &&
                                    kq == kn->kn_kq &&
@@ -637,9 +640,9 @@ kqueue_register(struct kqueue *kq, struc
                        kev->data = 0;
                        kn->kn_kevent = *kev;
 
-                       knote_attach(kn, fdp);
+                       knote_attach(kn);
                        if ((error = fops->f_attach(kn)) != 0) {
-                               knote_drop(kn, p, fdp);
+                               knote_drop(kn, p);
                                goto done;
                        }
                } else {
@@ -660,7 +663,7 @@ kqueue_register(struct kqueue *kq, struc
 
        } else if (kev->flags & EV_DELETE) {
                kn->kn_fop->f_detach(kn);
-               knote_drop(kn, p, p->p_fd);
+               knote_drop(kn, p);
                goto done;
        }
 
@@ -796,7 +799,7 @@ start:
                        kn->kn_status &= ~KN_QUEUED;
                        splx(s);
                        kn->kn_fop->f_detach(kn);
-                       knote_drop(kn, p, p->p_fd);
+                       knote_drop(kn, p);
                        s = splhigh();
                } else if (kn->kn_flags & (EV_CLEAR | EV_DISPATCH)) {
                        if (kn->kn_flags & EV_CLEAR) {
@@ -900,12 +903,11 @@ int
 kqueue_close(struct file *fp, struct proc *p)
 {
        struct kqueue *kq = fp->f_data;
-       struct filedesc *fdp = p->p_fd;
        struct knote **knp, *kn, *kn0;
        int i;
 
-       for (i = 0; i < fdp->fd_knlistsize; i++) {
-               knp = &SLIST_FIRST(&fdp->fd_knlist[i]);
+       for (i = 0; i < kq->kq_knlistsize; i++) {
+               knp = &SLIST_FIRST(&kq->kq_knlist[i]);
                kn = *knp;
                while (kn != NULL) {
                        kn0 = SLIST_NEXT(kn, kn_link);
@@ -920,9 +922,9 @@ kqueue_close(struct file *fp, struct pro
                        kn = kn0;
                }
        }
-       if (fdp->fd_knhashmask != 0) {
-               for (i = 0; i < fdp->fd_knhashmask + 1; i++) {
-                       knp = &SLIST_FIRST(&fdp->fd_knhash[i]);
+       if (kq->kq_knhashmask != 0) {
+               for (i = 0; i < kq->kq_knhashmask + 1; i++) {
+                       knp = &SLIST_FIRST(&kq->kq_knhash[i]);
                        kn = *knp;
                        while (kn != NULL) {
                                kn0 = SLIST_NEXT(kn, kn_link);
@@ -994,7 +996,7 @@ knote_remove(struct proc *p, struct klis
 
        while ((kn = SLIST_FIRST(list)) != NULL) {
                kn->kn_fop->f_detach(kn);
-               knote_drop(kn, p, p->p_fd);
+               knote_drop(kn, p);
        }
 }
 
@@ -1004,10 +1006,16 @@ knote_remove(struct proc *p, struct klis
 void
 knote_fdclose(struct proc *p, int fd)
 {
-       struct filedesc *fdp = p->p_fd;
-       struct klist *list = &fdp->fd_knlist[fd];
+       struct kqueue *kq;
+       struct klist *list;
 
-       knote_remove(p, list);
+       LIST_FOREACH(kq, &p->p_p->ps_kqlist, kq_next) {
+               if (fd >= kq->kq_knlistsize)
+                       continue;
+
+               list = &kq->kq_knlist[fd];
+               knote_remove(p, list);
+       }
 }
 
 /*
@@ -1026,35 +1034,36 @@ knote_processexit(struct proc *p)
 }
 
 void
-knote_attach(struct knote *kn, struct filedesc *fdp)
+knote_attach(struct knote *kn)
 {
+       struct kqueue *kq = kn->kn_kq;
        struct klist *list;
        int size;
 
        if (!kn->kn_fop->f_isfd) {
-               if (fdp->fd_knhashmask == 0)
-                       fdp->fd_knhash = hashinit(KN_HASHSIZE, M_TEMP,
-                           M_WAITOK, &fdp->fd_knhashmask);
-               list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
+               if (kq->kq_knhashmask == 0)
+                       kq->kq_knhash = hashinit(KN_HASHSIZE, M_TEMP,
+                           M_WAITOK, &kq->kq_knhashmask);
+               list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)];
                goto done;
        }
 
-       if (fdp->fd_knlistsize <= kn->kn_id) {
-               size = fdp->fd_knlistsize;
+       if (kq->kq_knlistsize <= kn->kn_id) {
+               size = kq->kq_knlistsize;
                while (size <= kn->kn_id)
                        size += KQEXTENT;
                list = mallocarray(size, sizeof(struct klist), M_TEMP,
                    M_WAITOK);
-               memcpy(list, fdp->fd_knlist,
-                   fdp->fd_knlistsize * sizeof(struct klist));
-               memset(&list[fdp->fd_knlistsize], 0,
-                   (size - fdp->fd_knlistsize) * sizeof(struct klist));
-               free(fdp->fd_knlist, M_TEMP,
-                   fdp->fd_knlistsize * sizeof(struct klist));
-               fdp->fd_knlistsize = size;
-               fdp->fd_knlist = list;
+               memcpy(list, kq->kq_knlist,
+                   kq->kq_knlistsize * sizeof(struct klist));
+               memset(&list[kq->kq_knlistsize], 0,
+                   (size - kq->kq_knlistsize) * sizeof(struct klist));
+               free(kq->kq_knlist, M_TEMP,
+                   kq->kq_knlistsize * sizeof(struct klist));
+               kq->kq_knlistsize = size;
+               kq->kq_knlist = list;
        }
-       list = &fdp->fd_knlist[kn->kn_id];
+       list = &kq->kq_knlist[kn->kn_id];
 done:
        SLIST_INSERT_HEAD(list, kn, kn_link);
        kn->kn_status = 0;
@@ -1065,14 +1074,15 @@ done:
  * while calling FRELE and knote_free.
  */
 void
-knote_drop(struct knote *kn, struct proc *p, struct filedesc *fdp)
+knote_drop(struct knote *kn, struct proc *p)
 {
+       struct kqueue *kq = kn->kn_kq;
        struct klist *list;
 
        if (kn->kn_fop->f_isfd)
-               list = &fdp->fd_knlist[kn->kn_id];
+               list = &kq->kq_knlist[kn->kn_id];
        else
-               list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
+               list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)];
 
        SLIST_REMOVE(list, kn, knote, kn_link);
        if (kn->kn_status & KN_QUEUED)
Index: kern/kern_fork.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_fork.c,v
retrieving revision 1.202
diff -u -p -r1.202 kern_fork.c
--- kern/kern_fork.c    30 Dec 2017 20:47:00 -0000      1.202
+++ kern/kern_fork.c    12 Jun 2018 17:11:44 -0000
@@ -198,6 +198,7 @@ process_initialize(struct process *pr, s
        KASSERT(p->p_ucred->cr_ref >= 2);       /* new thread and new process */
 
        LIST_INIT(&pr->ps_children);
+       LIST_INIT(&pr->ps_kqlist);
 
        timeout_set(&pr->ps_realit_to, realitexpire, pr);
 }
Index: sys/eventvar.h
===================================================================
RCS file: /cvs/src/sys/sys/eventvar.h,v
retrieving revision 1.4
diff -u -p -r1.4 eventvar.h
--- sys/eventvar.h      11 Oct 2017 08:01:10 -0000      1.4
+++ sys/eventvar.h      12 Jun 2018 17:11:44 -0000
@@ -40,6 +40,14 @@ struct kqueue {
        int             kq_refs;                /* number of references */
        struct          selinfo kq_sel;
        struct          filedesc *kq_fdp;
+
+       LIST_ENTRY(kqueue) kq_next;
+
+       int             kq_knlistsize;          /* size of knlist */
+       struct          klist *kq_knlist;       /* list of attached knotes */
+       u_long          kq_knhashmask;          /* size of knhash */
+       struct          klist *kq_knhash;       /* hash table for attached 
knotes */
+
        int             kq_state;
 #define KQ_SEL         0x01
 #define KQ_SLEEP       0x02
Index: sys/filedesc.h
===================================================================
RCS file: /cvs/src/sys/sys/filedesc.h,v
retrieving revision 1.37
diff -u -p -r1.37 filedesc.h
--- sys/filedesc.h      5 Jun 2018 09:29:05 -0000       1.37
+++ sys/filedesc.h      12 Jun 2018 17:11:44 -0000
@@ -73,11 +73,6 @@ struct filedesc {
                                        /* held when writing to fd_ofiles, */
                                        /* fd_ofileflags, or fd_{hi,lo}map */
 
-       int     fd_knlistsize;          /* size of knlist */
-       struct  klist *fd_knlist;       /* list of attached knotes */
-       u_long  fd_knhashmask;          /* size of knhash */
-       struct  klist *fd_knhash;       /* hash table for attached knotes */
-
        int fd_flags;                   /* flags on the file descriptor table */
 };
 
Index: sys/proc.h
===================================================================
RCS file: /cvs/src/sys/sys/proc.h,v
retrieving revision 1.248
diff -u -p -r1.248 proc.h
--- sys/proc.h  12 Apr 2018 17:13:44 -0000      1.248
+++ sys/proc.h  12 Jun 2018 17:11:44 -0000
@@ -168,6 +168,8 @@ struct process {
        struct  vmspace *ps_vmspace;    /* Address space */
        pid_t   ps_pid;                 /* Process identifier. */
 
+       LIST_HEAD(, kqueue) ps_kqlist;  /* kqueues attached to this process */
+
 /* The following fields are all zeroed upon creation in process_new. */
 #define        ps_startzero    ps_klist
        struct  klist ps_klist;         /* knotes attached to this process */

Reply via email to