Re: kqueue_scan_setup/finish
On Fri, Aug 14, 2020 at 12:31:33PM +0200, Martin Pieuchot wrote: > The previous change introducing the kqueue_scan_setup()/finish() API > required to switch poll(2) internals to use the kqueue mechanism has > been backed out. The reason for the regression is still unknown, so > let's take a baby step approach. > > Diff below introduces the new API with only minimal changes. It should > not introduce any change in behavior. There is a subtle change in behaviour: the markers' kn_filter and kn_status are now initialized only once, at the start of the scan. Previously, they were set also at the start of each retry. This diff should be tested at least by those who had problems with the original kqueue_scan_state patch. The original patch could cause data loss, and the exact cause is still unknown. Hence extra caution is warranted. > Index: kern/kern_event.c > === > RCS file: /cvs/src/sys/kern/kern_event.c,v > retrieving revision 1.142 > diff -u -p -r1.142 kern_event.c > --- kern/kern_event.c 12 Aug 2020 13:49:24 - 1.142 > +++ kern/kern_event.c 14 Aug 2020 10:13:38 - > @@ -64,9 +64,6 @@ voidKQREF(struct kqueue *); > void KQRELE(struct kqueue *); > > int kqueue_sleep(struct kqueue *, struct timespec *); > -int kqueue_scan(struct kqueue *kq, int maxevents, > - struct kevent *ulistp, struct timespec *timeout, > - struct kevent *kev, struct proc *p, int *retval); > > int kqueue_read(struct file *, struct uio *, int); > int kqueue_write(struct file *, struct uio *, int); > @@ -554,6 +551,7 @@ out: > int > sys_kevent(struct proc *p, void *v, register_t *retval) > { > + struct kqueue_scan_state scan; > struct filedesc* fdp = p->p_fd; > struct sys_kevent_args /* { > syscallarg(int) fd; > @@ -635,11 +633,12 @@ sys_kevent(struct proc *p, void *v, regi > goto done; > } > > - KQREF(kq); > + kqueue_scan_setup(, kq); > FRELE(fp, p); > - error = kqueue_scan(kq, SCARG(uap, nevents), SCARG(uap, eventlist), > + error = kqueue_scan(, SCARG(uap, nevents), SCARG(uap, eventlist), > tsp, kev, p, ); > - KQRELE(kq); > + kqueue_scan_finish(); > + > *retval = n; > return (error); > > @@ -895,11 +894,13 @@ kqueue_sleep(struct kqueue *kq, struct t > } > > int > -kqueue_scan(struct kqueue *kq, int maxevents, struct kevent *ulistp, > -struct timespec *tsp, struct kevent *kev, struct proc *p, int *retval) > +kqueue_scan(struct kqueue_scan_state *scan, int maxevents, > +struct kevent *ulistp, struct timespec *tsp, struct kevent *kev, > +struct proc *p, int *retval) > { > + struct kqueue *kq = scan->kqs_kq; > struct kevent *kevp; > - struct knote mend, mstart, *kn; > + struct knote *kn; > int s, count, nkev, error = 0; > > nkev = 0; > @@ -909,9 +910,6 @@ kqueue_scan(struct kqueue *kq, int maxev > if (count == 0) > goto done; > > - memset(, 0, sizeof(mstart)); > - memset(, 0, sizeof(mend)); > - > retry: > KASSERT(count == maxevents); > KASSERT(nkev == 0); > @@ -939,18 +937,16 @@ retry: > goto done; > } > > - mstart.kn_filter = EVFILT_MARKER; > - mstart.kn_status = KN_PROCESSING; > - TAILQ_INSERT_HEAD(>kq_head, , kn_tqe); > - mend.kn_filter = EVFILT_MARKER; > - mend.kn_status = KN_PROCESSING; > - TAILQ_INSERT_TAIL(>kq_head, , kn_tqe); > + TAILQ_INSERT_TAIL(>kq_head, >kqs_end, kn_tqe); > + TAILQ_INSERT_HEAD(>kq_head, >kqs_start, kn_tqe); > while (count) { > - kn = TAILQ_NEXT(, kn_tqe); > + kn = TAILQ_NEXT(>kqs_start, kn_tqe); > if (kn->kn_filter == EVFILT_MARKER) { > - if (kn == ) { > - TAILQ_REMOVE(>kq_head, , kn_tqe); > - TAILQ_REMOVE(>kq_head, , kn_tqe); > + if (kn == >kqs_end) { > + TAILQ_REMOVE(>kq_head, >kqs_end, > + kn_tqe); > + TAILQ_REMOVE(>kq_head, >kqs_start, > + kn_tqe); > splx(s); > if (count == maxevents) > goto retry; > @@ -958,8 +954,9 @@ retry: > } > > /* Move start marker past another thread's marker. */ > - TAILQ_REMOVE(>kq_head, ,
kqueue_scan_setup/finish
The previous change introducing the kqueue_scan_setup()/finish() API required to switch poll(2) internals to use the kqueue mechanism has been backed out. The reason for the regression is still unknown, so let's take a baby step approach. Diff below introduces the new API with only minimal changes. It should not introduce any change in behavior. Comments? Oks? Index: kern/kern_event.c === RCS file: /cvs/src/sys/kern/kern_event.c,v retrieving revision 1.142 diff -u -p -r1.142 kern_event.c --- kern/kern_event.c 12 Aug 2020 13:49:24 - 1.142 +++ kern/kern_event.c 14 Aug 2020 10:13:38 - @@ -64,9 +64,6 @@ void KQREF(struct kqueue *); void KQRELE(struct kqueue *); intkqueue_sleep(struct kqueue *, struct timespec *); -intkqueue_scan(struct kqueue *kq, int maxevents, - struct kevent *ulistp, struct timespec *timeout, - struct kevent *kev, struct proc *p, int *retval); intkqueue_read(struct file *, struct uio *, int); intkqueue_write(struct file *, struct uio *, int); @@ -554,6 +551,7 @@ out: int sys_kevent(struct proc *p, void *v, register_t *retval) { + struct kqueue_scan_state scan; struct filedesc* fdp = p->p_fd; struct sys_kevent_args /* { syscallarg(int) fd; @@ -635,11 +633,12 @@ sys_kevent(struct proc *p, void *v, regi goto done; } - KQREF(kq); + kqueue_scan_setup(, kq); FRELE(fp, p); - error = kqueue_scan(kq, SCARG(uap, nevents), SCARG(uap, eventlist), + error = kqueue_scan(, SCARG(uap, nevents), SCARG(uap, eventlist), tsp, kev, p, ); - KQRELE(kq); + kqueue_scan_finish(); + *retval = n; return (error); @@ -895,11 +894,13 @@ kqueue_sleep(struct kqueue *kq, struct t } int -kqueue_scan(struct kqueue *kq, int maxevents, struct kevent *ulistp, -struct timespec *tsp, struct kevent *kev, struct proc *p, int *retval) +kqueue_scan(struct kqueue_scan_state *scan, int maxevents, +struct kevent *ulistp, struct timespec *tsp, struct kevent *kev, +struct proc *p, int *retval) { + struct kqueue *kq = scan->kqs_kq; struct kevent *kevp; - struct knote mend, mstart, *kn; + struct knote *kn; int s, count, nkev, error = 0; nkev = 0; @@ -909,9 +910,6 @@ kqueue_scan(struct kqueue *kq, int maxev if (count == 0) goto done; - memset(, 0, sizeof(mstart)); - memset(, 0, sizeof(mend)); - retry: KASSERT(count == maxevents); KASSERT(nkev == 0); @@ -939,18 +937,16 @@ retry: goto done; } - mstart.kn_filter = EVFILT_MARKER; - mstart.kn_status = KN_PROCESSING; - TAILQ_INSERT_HEAD(>kq_head, , kn_tqe); - mend.kn_filter = EVFILT_MARKER; - mend.kn_status = KN_PROCESSING; - TAILQ_INSERT_TAIL(>kq_head, , kn_tqe); + TAILQ_INSERT_TAIL(>kq_head, >kqs_end, kn_tqe); + TAILQ_INSERT_HEAD(>kq_head, >kqs_start, kn_tqe); while (count) { - kn = TAILQ_NEXT(, kn_tqe); + kn = TAILQ_NEXT(>kqs_start, kn_tqe); if (kn->kn_filter == EVFILT_MARKER) { - if (kn == ) { - TAILQ_REMOVE(>kq_head, , kn_tqe); - TAILQ_REMOVE(>kq_head, , kn_tqe); + if (kn == >kqs_end) { + TAILQ_REMOVE(>kq_head, >kqs_end, + kn_tqe); + TAILQ_REMOVE(>kq_head, >kqs_start, + kn_tqe); splx(s); if (count == maxevents) goto retry; @@ -958,8 +954,9 @@ retry: } /* Move start marker past another thread's marker. */ - TAILQ_REMOVE(>kq_head, , kn_tqe); - TAILQ_INSERT_AFTER(>kq_head, kn, , kn_tqe); + TAILQ_REMOVE(>kq_head, >kqs_start, kn_tqe); + TAILQ_INSERT_AFTER(>kq_head, kn, >kqs_start, + kn_tqe); continue; } @@ -1029,8 +1026,8 @@ retry: break; } } - TAILQ_REMOVE(>kq_head, , kn_tqe); - TAILQ_REMOVE(>kq_head, , kn_tqe); + TAILQ_REMOVE(>kq_head, >kqs_end, kn_tqe); + TAILQ_REMOVE(>kq_head, >kqs_start, kn_tqe); splx(s); done: if (nkev != 0) { @@ -1044,6 +1041,33 @@ done: *retval = maxevents - count; return (error); } + +void +kqueue_scan_setup(struct kqueue_scan_state *scan, struct kqueue *kq) +{ + memset(scan, 0, sizeof(*scan)); + +