On Fri, Aug 14, 2020 at 12:31:33PM +0200, Martin Pieuchot wrote: > The previous change introducing the kqueue_scan_setup()/finish() API > required to switch poll(2) internals to use the kqueue mechanism has > been backed out. The reason for the regression is still unknown, so > let's take a baby step approach. > > Diff below introduces the new API with only minimal changes. It should > not introduce any change in behavior.
There is a subtle change in behaviour: the markers' kn_filter and kn_status are now initialized only once, at the start of the scan. Previously, they were set also at the start of each retry. This diff should be tested at least by those who had problems with the original kqueue_scan_state patch. The original patch could cause data loss, and the exact cause is still unknown. Hence extra caution is warranted. > Index: kern/kern_event.c > =================================================================== > RCS file: /cvs/src/sys/kern/kern_event.c,v > retrieving revision 1.142 > diff -u -p -r1.142 kern_event.c > --- kern/kern_event.c 12 Aug 2020 13:49:24 -0000 1.142 > +++ kern/kern_event.c 14 Aug 2020 10:13:38 -0000 > @@ -64,9 +64,6 @@ void KQREF(struct kqueue *); > void KQRELE(struct kqueue *); > > int kqueue_sleep(struct kqueue *, struct timespec *); > -int kqueue_scan(struct kqueue *kq, int maxevents, > - struct kevent *ulistp, struct timespec *timeout, > - struct kevent *kev, struct proc *p, int *retval); > > int kqueue_read(struct file *, struct uio *, int); > int kqueue_write(struct file *, struct uio *, int); > @@ -554,6 +551,7 @@ out: > int > sys_kevent(struct proc *p, void *v, register_t *retval) > { > + struct kqueue_scan_state scan; > struct filedesc* fdp = p->p_fd; > struct sys_kevent_args /* { > syscallarg(int) fd; > @@ -635,11 +633,12 @@ sys_kevent(struct proc *p, void *v, regi > goto done; > } > > - KQREF(kq); > + kqueue_scan_setup(&scan, kq); > FRELE(fp, p); > - error = kqueue_scan(kq, SCARG(uap, nevents), SCARG(uap, eventlist), > + error = kqueue_scan(&scan, SCARG(uap, nevents), SCARG(uap, eventlist), > tsp, kev, p, &n); > - KQRELE(kq); > + kqueue_scan_finish(&scan); > + > *retval = n; > return (error); > > @@ -895,11 +894,13 @@ kqueue_sleep(struct kqueue *kq, struct t > } > > int > -kqueue_scan(struct kqueue *kq, int maxevents, struct kevent *ulistp, > - struct timespec *tsp, struct kevent *kev, struct proc *p, int *retval) > +kqueue_scan(struct kqueue_scan_state *scan, int maxevents, > + struct kevent *ulistp, struct timespec *tsp, struct kevent *kev, > + struct proc *p, int *retval) > { > + struct kqueue *kq = scan->kqs_kq; > struct kevent *kevp; > - struct knote mend, mstart, *kn; > + struct knote *kn; > int s, count, nkev, error = 0; > > nkev = 0; > @@ -909,9 +910,6 @@ kqueue_scan(struct kqueue *kq, int maxev > if (count == 0) > goto done; > > - memset(&mstart, 0, sizeof(mstart)); > - memset(&mend, 0, sizeof(mend)); > - > retry: > KASSERT(count == maxevents); > KASSERT(nkev == 0); > @@ -939,18 +937,16 @@ retry: > goto done; > } > > - mstart.kn_filter = EVFILT_MARKER; > - mstart.kn_status = KN_PROCESSING; > - TAILQ_INSERT_HEAD(&kq->kq_head, &mstart, kn_tqe); > - mend.kn_filter = EVFILT_MARKER; > - mend.kn_status = KN_PROCESSING; > - TAILQ_INSERT_TAIL(&kq->kq_head, &mend, kn_tqe); > + TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); > + TAILQ_INSERT_HEAD(&kq->kq_head, &scan->kqs_start, kn_tqe); > while (count) { > - kn = TAILQ_NEXT(&mstart, kn_tqe); > + kn = TAILQ_NEXT(&scan->kqs_start, kn_tqe); > if (kn->kn_filter == EVFILT_MARKER) { > - if (kn == &mend) { > - TAILQ_REMOVE(&kq->kq_head, &mend, kn_tqe); > - TAILQ_REMOVE(&kq->kq_head, &mstart, kn_tqe); > + if (kn == &scan->kqs_end) { > + TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, > + kn_tqe); > + TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, > + kn_tqe); > splx(s); > if (count == maxevents) > goto retry; > @@ -958,8 +954,9 @@ retry: > } > > /* Move start marker past another thread's marker. */ > - TAILQ_REMOVE(&kq->kq_head, &mstart, kn_tqe); > - TAILQ_INSERT_AFTER(&kq->kq_head, kn, &mstart, kn_tqe); > + TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); > + TAILQ_INSERT_AFTER(&kq->kq_head, kn, &scan->kqs_start, > + kn_tqe); > continue; > } > > @@ -1029,8 +1026,8 @@ retry: > break; > } > } > - TAILQ_REMOVE(&kq->kq_head, &mend, kn_tqe); > - TAILQ_REMOVE(&kq->kq_head, &mstart, kn_tqe); > + TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); > + TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); > splx(s); > done: > if (nkev != 0) { > @@ -1044,6 +1041,33 @@ done: > *retval = maxevents - count; > return (error); > } > + > +void > +kqueue_scan_setup(struct kqueue_scan_state *scan, struct kqueue *kq) > +{ > + memset(scan, 0, sizeof(*scan)); > + > + KQREF(kq); > + scan->kqs_kq = kq; > + scan->kqs_start.kn_filter = EVFILT_MARKER; > + scan->kqs_start.kn_status = KN_PROCESSING; > + scan->kqs_end.kn_filter = EVFILT_MARKER; > + scan->kqs_end.kn_status = KN_PROCESSING; > +} > + > +void > +kqueue_scan_finish(struct kqueue_scan_state *scan) > +{ > + struct kqueue *kq = scan->kqs_kq; > + > + KASSERT(scan->kqs_start.kn_filter == EVFILT_MARKER); > + KASSERT(scan->kqs_start.kn_status == KN_PROCESSING); > + KASSERT(scan->kqs_end.kn_filter == EVFILT_MARKER); > + KASSERT(scan->kqs_end.kn_status == KN_PROCESSING); > + > + KQRELE(kq); > +} > + > > /* > * XXX > Index: sys/event.h > =================================================================== > RCS file: /cvs/src/sys/sys/event.h,v > retrieving revision 1.44 > diff -u -p -r1.44 event.h > --- sys/event.h 22 Jun 2020 13:14:32 -0000 1.44 > +++ sys/event.h 14 Aug 2020 10:14:18 -0000 > @@ -200,7 +200,14 @@ struct knote { > #define kn_fp kn_ptr.p_fp > }; > > +struct kqueue_scan_state { > + struct kqueue *kqs_kq; /* kqueue of this scan */ > + struct knote kqs_start; /* start marker */ > + struct knote kqs_end; /* end marker */ > +}; > + > struct proc; > +struct timespec; > > extern const struct filterops sig_filtops; > extern const struct filterops dead_filtops; > @@ -212,6 +219,10 @@ extern void knote_fdclose(struct proc *p > extern void knote_processexit(struct proc *); > extern int kqueue_register(struct kqueue *kq, > struct kevent *kev, struct proc *p); > +int kqueue_scan(struct kqueue_scan_state *, int, struct kevent *, > + struct timespec *, struct kevent *, struct proc *, int *); > +extern void kqueue_scan_setup(struct kqueue_scan_state *, struct kqueue *); > +extern void kqueue_scan_finish(struct kqueue_scan_state *); > extern int filt_seltrue(struct knote *kn, long hint); > extern int seltrue_kqfilter(dev_t, struct knote *); > extern void klist_insert(struct klist *, struct knote *); >