The previous change introducing the kqueue_scan_setup()/finish() API required to switch poll(2) internals to use the kqueue mechanism has been backed out. The reason for the regression is still unknown, so let's take a baby step approach.
Diff below introduces the new API with only minimal changes. It should not introduce any change in behavior. Comments? Oks? Index: kern/kern_event.c =================================================================== RCS file: /cvs/src/sys/kern/kern_event.c,v retrieving revision 1.142 diff -u -p -r1.142 kern_event.c --- kern/kern_event.c 12 Aug 2020 13:49:24 -0000 1.142 +++ kern/kern_event.c 14 Aug 2020 10:13:38 -0000 @@ -64,9 +64,6 @@ void KQREF(struct kqueue *); void KQRELE(struct kqueue *); int kqueue_sleep(struct kqueue *, struct timespec *); -int kqueue_scan(struct kqueue *kq, int maxevents, - struct kevent *ulistp, struct timespec *timeout, - struct kevent *kev, struct proc *p, int *retval); int kqueue_read(struct file *, struct uio *, int); int kqueue_write(struct file *, struct uio *, int); @@ -554,6 +551,7 @@ out: int sys_kevent(struct proc *p, void *v, register_t *retval) { + struct kqueue_scan_state scan; struct filedesc* fdp = p->p_fd; struct sys_kevent_args /* { syscallarg(int) fd; @@ -635,11 +633,12 @@ sys_kevent(struct proc *p, void *v, regi goto done; } - KQREF(kq); + kqueue_scan_setup(&scan, kq); FRELE(fp, p); - error = kqueue_scan(kq, SCARG(uap, nevents), SCARG(uap, eventlist), + error = kqueue_scan(&scan, SCARG(uap, nevents), SCARG(uap, eventlist), tsp, kev, p, &n); - KQRELE(kq); + kqueue_scan_finish(&scan); + *retval = n; return (error); @@ -895,11 +894,13 @@ kqueue_sleep(struct kqueue *kq, struct t } int -kqueue_scan(struct kqueue *kq, int maxevents, struct kevent *ulistp, - struct timespec *tsp, struct kevent *kev, struct proc *p, int *retval) +kqueue_scan(struct kqueue_scan_state *scan, int maxevents, + struct kevent *ulistp, struct timespec *tsp, struct kevent *kev, + struct proc *p, int *retval) { + struct kqueue *kq = scan->kqs_kq; struct kevent *kevp; - struct knote mend, mstart, *kn; + struct knote *kn; int s, count, nkev, error = 0; nkev = 0; @@ -909,9 +910,6 @@ kqueue_scan(struct kqueue *kq, int maxev if (count == 0) goto done; - memset(&mstart, 0, sizeof(mstart)); - memset(&mend, 0, sizeof(mend)); - retry: KASSERT(count == maxevents); KASSERT(nkev == 0); @@ -939,18 +937,16 @@ retry: goto done; } - mstart.kn_filter = EVFILT_MARKER; - mstart.kn_status = KN_PROCESSING; - TAILQ_INSERT_HEAD(&kq->kq_head, &mstart, kn_tqe); - mend.kn_filter = EVFILT_MARKER; - mend.kn_status = KN_PROCESSING; - TAILQ_INSERT_TAIL(&kq->kq_head, &mend, kn_tqe); + TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); + TAILQ_INSERT_HEAD(&kq->kq_head, &scan->kqs_start, kn_tqe); while (count) { - kn = TAILQ_NEXT(&mstart, kn_tqe); + kn = TAILQ_NEXT(&scan->kqs_start, kn_tqe); if (kn->kn_filter == EVFILT_MARKER) { - if (kn == &mend) { - TAILQ_REMOVE(&kq->kq_head, &mend, kn_tqe); - TAILQ_REMOVE(&kq->kq_head, &mstart, kn_tqe); + if (kn == &scan->kqs_end) { + TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, + kn_tqe); + TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, + kn_tqe); splx(s); if (count == maxevents) goto retry; @@ -958,8 +954,9 @@ retry: } /* Move start marker past another thread's marker. */ - TAILQ_REMOVE(&kq->kq_head, &mstart, kn_tqe); - TAILQ_INSERT_AFTER(&kq->kq_head, kn, &mstart, kn_tqe); + TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); + TAILQ_INSERT_AFTER(&kq->kq_head, kn, &scan->kqs_start, + kn_tqe); continue; } @@ -1029,8 +1026,8 @@ retry: break; } } - TAILQ_REMOVE(&kq->kq_head, &mend, kn_tqe); - TAILQ_REMOVE(&kq->kq_head, &mstart, kn_tqe); + TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); + TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); splx(s); done: if (nkev != 0) { @@ -1044,6 +1041,33 @@ done: *retval = maxevents - count; return (error); } + +void +kqueue_scan_setup(struct kqueue_scan_state *scan, struct kqueue *kq) +{ + memset(scan, 0, sizeof(*scan)); + + KQREF(kq); + scan->kqs_kq = kq; + scan->kqs_start.kn_filter = EVFILT_MARKER; + scan->kqs_start.kn_status = KN_PROCESSING; + scan->kqs_end.kn_filter = EVFILT_MARKER; + scan->kqs_end.kn_status = KN_PROCESSING; +} + +void +kqueue_scan_finish(struct kqueue_scan_state *scan) +{ + struct kqueue *kq = scan->kqs_kq; + + KASSERT(scan->kqs_start.kn_filter == EVFILT_MARKER); + KASSERT(scan->kqs_start.kn_status == KN_PROCESSING); + KASSERT(scan->kqs_end.kn_filter == EVFILT_MARKER); + KASSERT(scan->kqs_end.kn_status == KN_PROCESSING); + + KQRELE(kq); +} + /* * XXX Index: sys/event.h =================================================================== RCS file: /cvs/src/sys/sys/event.h,v retrieving revision 1.44 diff -u -p -r1.44 event.h --- sys/event.h 22 Jun 2020 13:14:32 -0000 1.44 +++ sys/event.h 14 Aug 2020 10:14:18 -0000 @@ -200,7 +200,14 @@ struct knote { #define kn_fp kn_ptr.p_fp }; +struct kqueue_scan_state { + struct kqueue *kqs_kq; /* kqueue of this scan */ + struct knote kqs_start; /* start marker */ + struct knote kqs_end; /* end marker */ +}; + struct proc; +struct timespec; extern const struct filterops sig_filtops; extern const struct filterops dead_filtops; @@ -212,6 +219,10 @@ extern void knote_fdclose(struct proc *p extern void knote_processexit(struct proc *); extern int kqueue_register(struct kqueue *kq, struct kevent *kev, struct proc *p); +int kqueue_scan(struct kqueue_scan_state *, int, struct kevent *, + struct timespec *, struct kevent *, struct proc *, int *); +extern void kqueue_scan_setup(struct kqueue_scan_state *, struct kqueue *); +extern void kqueue_scan_finish(struct kqueue_scan_state *); extern int filt_seltrue(struct knote *kn, long hint); extern int seltrue_kqfilter(dev_t, struct knote *); extern void klist_insert(struct klist *, struct knote *);