The previous change introducing the kqueue_scan_setup()/finish() API
required to switch poll(2) internals to use the kqueue mechanism has
been backed out.  The reason for the regression is still unknown, so
let's take a baby step approach.

Diff below introduces the new API with only minimal changes.  It should
not introduce any change in behavior.

Comments?  Oks?

Index: kern/kern_event.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_event.c,v
retrieving revision 1.142
diff -u -p -r1.142 kern_event.c
--- kern/kern_event.c   12 Aug 2020 13:49:24 -0000      1.142
+++ kern/kern_event.c   14 Aug 2020 10:13:38 -0000
@@ -64,9 +64,6 @@ void  KQREF(struct kqueue *);
 void   KQRELE(struct kqueue *);
 
 int    kqueue_sleep(struct kqueue *, struct timespec *);
-int    kqueue_scan(struct kqueue *kq, int maxevents,
-                   struct kevent *ulistp, struct timespec *timeout,
-                   struct kevent *kev, struct proc *p, int *retval);
 
 int    kqueue_read(struct file *, struct uio *, int);
 int    kqueue_write(struct file *, struct uio *, int);
@@ -554,6 +551,7 @@ out:
 int
 sys_kevent(struct proc *p, void *v, register_t *retval)
 {
+       struct kqueue_scan_state scan;
        struct filedesc* fdp = p->p_fd;
        struct sys_kevent_args /* {
                syscallarg(int) fd;
@@ -635,11 +633,12 @@ sys_kevent(struct proc *p, void *v, regi
                goto done;
        }
 
-       KQREF(kq);
+       kqueue_scan_setup(&scan, kq);
        FRELE(fp, p);
-       error = kqueue_scan(kq, SCARG(uap, nevents), SCARG(uap, eventlist),
+       error = kqueue_scan(&scan, SCARG(uap, nevents), SCARG(uap, eventlist),
            tsp, kev, p, &n);
-       KQRELE(kq);
+       kqueue_scan_finish(&scan);
+
        *retval = n;
        return (error);
 
@@ -895,11 +894,13 @@ kqueue_sleep(struct kqueue *kq, struct t
 }
 
 int
-kqueue_scan(struct kqueue *kq, int maxevents, struct kevent *ulistp,
-    struct timespec *tsp, struct kevent *kev, struct proc *p, int *retval)
+kqueue_scan(struct kqueue_scan_state *scan, int maxevents,
+    struct kevent *ulistp, struct timespec *tsp, struct kevent *kev,
+    struct proc *p, int *retval)
 {
+       struct kqueue *kq = scan->kqs_kq;
        struct kevent *kevp;
-       struct knote mend, mstart, *kn;
+       struct knote *kn;
        int s, count, nkev, error = 0;
 
        nkev = 0;
@@ -909,9 +910,6 @@ kqueue_scan(struct kqueue *kq, int maxev
        if (count == 0)
                goto done;
 
-       memset(&mstart, 0, sizeof(mstart));
-       memset(&mend, 0, sizeof(mend));
-
 retry:
        KASSERT(count == maxevents);
        KASSERT(nkev == 0);
@@ -939,18 +937,16 @@ retry:
                goto done;
        }
 
-       mstart.kn_filter = EVFILT_MARKER;
-       mstart.kn_status = KN_PROCESSING;
-       TAILQ_INSERT_HEAD(&kq->kq_head, &mstart, kn_tqe);
-       mend.kn_filter = EVFILT_MARKER;
-       mend.kn_status = KN_PROCESSING;
-       TAILQ_INSERT_TAIL(&kq->kq_head, &mend, kn_tqe);
+       TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe);
+       TAILQ_INSERT_HEAD(&kq->kq_head, &scan->kqs_start, kn_tqe);
        while (count) {
-               kn = TAILQ_NEXT(&mstart, kn_tqe);
+               kn = TAILQ_NEXT(&scan->kqs_start, kn_tqe);
                if (kn->kn_filter == EVFILT_MARKER) {
-                       if (kn == &mend) {
-                               TAILQ_REMOVE(&kq->kq_head, &mend, kn_tqe);
-                               TAILQ_REMOVE(&kq->kq_head, &mstart, kn_tqe);
+                       if (kn == &scan->kqs_end) {
+                               TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end,
+                                   kn_tqe);
+                               TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start,
+                                   kn_tqe);
                                splx(s);
                                if (count == maxevents)
                                        goto retry;
@@ -958,8 +954,9 @@ retry:
                        }
 
                        /* Move start marker past another thread's marker. */
-                       TAILQ_REMOVE(&kq->kq_head, &mstart, kn_tqe);
-                       TAILQ_INSERT_AFTER(&kq->kq_head, kn, &mstart, kn_tqe);
+                       TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe);
+                       TAILQ_INSERT_AFTER(&kq->kq_head, kn, &scan->kqs_start,
+                           kn_tqe);
                        continue;
                }
 
@@ -1029,8 +1026,8 @@ retry:
                                break;
                }
        }
-       TAILQ_REMOVE(&kq->kq_head, &mend, kn_tqe);
-       TAILQ_REMOVE(&kq->kq_head, &mstart, kn_tqe);
+       TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe);
+       TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe);
        splx(s);
 done:
        if (nkev != 0) {
@@ -1044,6 +1041,33 @@ done:
        *retval = maxevents - count;
        return (error);
 }
+
+void
+kqueue_scan_setup(struct kqueue_scan_state *scan, struct kqueue *kq)
+{
+       memset(scan, 0, sizeof(*scan));
+
+       KQREF(kq);
+       scan->kqs_kq = kq;
+       scan->kqs_start.kn_filter = EVFILT_MARKER;
+       scan->kqs_start.kn_status = KN_PROCESSING;
+       scan->kqs_end.kn_filter = EVFILT_MARKER;
+       scan->kqs_end.kn_status = KN_PROCESSING;
+}
+
+void
+kqueue_scan_finish(struct kqueue_scan_state *scan)
+{
+       struct kqueue *kq = scan->kqs_kq;
+
+       KASSERT(scan->kqs_start.kn_filter == EVFILT_MARKER);
+       KASSERT(scan->kqs_start.kn_status == KN_PROCESSING);
+       KASSERT(scan->kqs_end.kn_filter == EVFILT_MARKER);
+       KASSERT(scan->kqs_end.kn_status == KN_PROCESSING);
+
+       KQRELE(kq);
+}
+
 
 /*
  * XXX
Index: sys/event.h
===================================================================
RCS file: /cvs/src/sys/sys/event.h,v
retrieving revision 1.44
diff -u -p -r1.44 event.h
--- sys/event.h 22 Jun 2020 13:14:32 -0000      1.44
+++ sys/event.h 14 Aug 2020 10:14:18 -0000
@@ -200,7 +200,14 @@ struct knote {
 #define kn_fp          kn_ptr.p_fp
 };
 
+struct kqueue_scan_state {
+       struct kqueue   *kqs_kq;                /* kqueue of this scan */
+       struct knote     kqs_start;             /* start marker */
+       struct knote     kqs_end;               /* end marker */
+};
+
 struct proc;
+struct timespec;
 
 extern const struct filterops sig_filtops;
 extern const struct filterops dead_filtops;
@@ -212,6 +219,10 @@ extern void        knote_fdclose(struct proc *p
 extern void    knote_processexit(struct proc *);
 extern int     kqueue_register(struct kqueue *kq,
                    struct kevent *kev, struct proc *p);
+int    kqueue_scan(struct kqueue_scan_state *, int, struct kevent *,
+                   struct timespec *, struct kevent *, struct proc *, int *);
+extern void    kqueue_scan_setup(struct kqueue_scan_state *, struct kqueue *);
+extern void    kqueue_scan_finish(struct kqueue_scan_state *);
 extern int     filt_seltrue(struct knote *kn, long hint);
 extern int     seltrue_kqfilter(dev_t, struct knote *);
 extern void    klist_insert(struct klist *, struct knote *);

Reply via email to