This patch modifies the filterops callback interface so that it becomes
easier to use with fine-grained locking. The current code is an
entanglement of accesses to various data. Some items are more or less
controlled by kqueue, while some others are more or less under the
control of event sources. The main point of the patch is to make the
event source responsible of serializing access to struct knote's
kn_event. This idea has been influenced by XNU's kqueue implementation.

The patch adds two callbacks to struct filterops: .f_modify() and
.f_process(). .f_modify() applies parameters from user to a knote.
As a feature, event sources now have the option to refuse parameter
update. .f_process() retrieves a pending event from a knote, or checks
if there is an event pending.

After the refactoring, .f_event() is used when event source calls
knote(). However, I will not change all the filterops implementations at
once, so the patch uses .f_event() to emulate the new callbacks if they
are missing. The emulation attempts to preserve old behaviour.

One of the reasons for this change is to keep kqueue's and event
sources' lockings clearly separate. The restructuring allows, for
example, atomic fetching and clearing of event state (EV_CLEAR).

The patch includes filterops changes for pipes and sockets, to give an
idea of what the refactoring means in practice. One point of interest
is that the NOTE_SUBMIT-based conditional locking becomes unnecessary.
Also, there is a subtle change with EV_ONESHOT because now the pipe and
socket callbacks do recheck the event during kqueue_scan() even when
EV_ONESHOT is set. This could be avoided with a little extra code, but
I doubt it is worth it with level-triggered events.

The patch additionally makes preparations for kqueue unlocking
by introducing a flag that indicates if filterops callbacks are MP-safe.
However, kqueue itself still needs the kernel lock.

The long section of splhigh() in kqueue_scan() is split as preparation
for an upcoming mutex. This step is made possible by the refactoring.

The socket filterops are not marked MP-safe yet because socket klists
are not MP-safe. I have earlier posted a diff that uses solock() for
the klists, but am having second thoughts about it. Inet sockets'
solock() is based on NET_LOCK() and that lock is already showing signs
of contention. Getting kqueue unlocked might help with that a bit since
rw_lock() is allowed to spin for a moment when kernel lock is not held.

OK?

Index: kern/kern_event.c
===================================================================
RCS file: src/sys/kern/kern_event.c,v
retrieving revision 1.159
diff -u -p -r1.159 kern_event.c
--- kern/kern_event.c   17 Jan 2021 05:56:32 -0000      1.159
+++ kern/kern_event.c   25 Jan 2021 14:54:19 -0000
@@ -95,6 +95,11 @@ void kqueue_do_check(struct kqueue *kq, 
 
 void   kqpoll_dequeue(struct proc *p);
 
+static int     filter_attach(struct knote *kn);
+static void    filter_detach(struct knote *kn);
+static int     filter_event(struct knote *kn, long hint);
+static int     filter_modify(struct kevent *kev, struct knote *kn);
+static int     filter_process(struct knote *kn, struct kevent *kev);
 static void    kqueue_expand_hash(struct kqueue *kq);
 static void    kqueue_expand_list(struct kqueue *kq, int fd);
 static void    kqueue_task(void *);
@@ -372,7 +377,7 @@ filt_proc(struct knote *kn, long hint)
                kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1;
                kev.fflags = kn->kn_sfflags;
                kev.data = kn->kn_id;                   /* parent */
-               kev.udata = kn->kn_kevent.udata;        /* preserve udata */
+               kev.udata = kn->kn_udata;               /* preserve udata */
                error = kqueue_register(kn->kn_kq, &kev, NULL);
                if (error)
                        kn->kn_fflags |= NOTE_TRACKERR;
@@ -467,6 +472,20 @@ filt_seltrue(struct knote *kn, long hint
        return (1);
 }
 
+int
+filt_seltruemodify(struct kevent *kev, struct knote *kn)
+{
+       knote_modify(kev, kn);
+       return (1);
+}
+
+int
+filt_seltrueprocess(struct knote *kn, struct kevent *kev)
+{
+       knote_submit(kn, kev);
+       return (1);
+}
+
 /*
  * This provides full kqfilter entry for device switch tables, which
  * has same effect as filter using filt_seltrue() as filter method.
@@ -478,10 +497,12 @@ filt_seltruedetach(struct knote *kn)
 }
 
 const struct filterops seltrue_filtops = {
-       .f_flags        = FILTEROP_ISFD,
+       .f_flags        = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach       = NULL,
        .f_detach       = filt_seltruedetach,
        .f_event        = filt_seltrue,
+       .f_modify       = filt_seltruemodify,
+       .f_process      = filt_seltrueprocess,
 };
 
 int
@@ -510,6 +531,20 @@ filt_dead(struct knote *kn, long hint)
        return (1);
 }
 
+static int
+filt_deadmodify(struct kevent *kev, struct knote *kn)
+{
+       knote_modify(kev, kn);
+       return (1);
+}
+
+static int
+filt_deadprocess(struct knote *kn, struct kevent *kev)
+{
+       knote_submit(kn, kev);
+       return (1);
+}
+
 static void
 filt_deaddetach(struct knote *kn)
 {
@@ -517,10 +552,12 @@ filt_deaddetach(struct knote *kn)
 }
 
 const struct filterops dead_filtops = {
-       .f_flags        = FILTEROP_ISFD,
+       .f_flags        = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach       = NULL,
        .f_detach       = filt_deaddetach,
        .f_event        = filt_dead,
+       .f_modify       = filt_deadmodify,
+       .f_process      = filt_deadprocess,
 };
 
 static int
@@ -533,12 +570,104 @@ filt_badfd(struct knote *kn, long hint)
 
 /* For use with kqpoll. */
 const struct filterops badfd_filtops = {
-       .f_flags        = FILTEROP_ISFD,
+       .f_flags        = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach       = NULL,
        .f_detach       = filt_deaddetach,
        .f_event        = filt_badfd,
+       .f_modify       = filt_deadmodify,
+       .f_process      = filt_deadprocess,
 };
 
+static int
+filter_attach(struct knote *kn)
+{
+       int error;
+
+       if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) {
+               error = kn->kn_fop->f_attach(kn);
+       } else {
+               KERNEL_LOCK();
+               error = kn->kn_fop->f_attach(kn);
+               KERNEL_UNLOCK();
+       }
+       return (error);
+}
+
+static void
+filter_detach(struct knote *kn)
+{
+       if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) {
+               kn->kn_fop->f_detach(kn);
+       } else {
+               KERNEL_LOCK();
+               kn->kn_fop->f_detach(kn);
+               KERNEL_UNLOCK();
+       }
+}
+
+static int
+filter_event(struct knote *kn, long hint)
+{
+       if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0)
+               KERNEL_ASSERT_LOCKED();
+
+       return (kn->kn_fop->f_event(kn, hint));
+}
+
+static int
+filter_modify(struct kevent *kev, struct knote *kn)
+{
+       int active, s;
+
+       if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) {
+               active = kn->kn_fop->f_modify(kev, kn);
+       } else {
+               KERNEL_LOCK();
+               if (kn->kn_fop->f_modify != NULL) {
+                       active = kn->kn_fop->f_modify(kev, kn);
+               } else {
+                       /* Emulate f_modify using f_event. */
+                       s = splhigh();
+                       knote_modify(kev, kn);
+                       active = kn->kn_fop->f_event(kn, 0);
+                       splx(s);
+               }
+               KERNEL_UNLOCK();
+       }
+       return (active);
+}
+
+static int
+filter_process(struct knote *kn, struct kevent *kev)
+{
+       int active, s;
+
+       if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) {
+               active = kn->kn_fop->f_process(kn, kev);
+       } else {
+               KERNEL_LOCK();
+               if (kn->kn_fop->f_process != NULL) {
+                       active = kn->kn_fop->f_process(kn, kev);
+               } else {
+                       /* Emulate f_process using f_event. */
+                       s = splhigh();
+                       /*
+                        * If called from kqueue_scan(), skip f_event
+                        * when EV_ONESHOT is set, to preserve old behaviour.
+                        */
+                       if (kev != NULL && (kn->kn_flags & EV_ONESHOT))
+                               active = 1;
+                       else
+                               active = kn->kn_fop->f_event(kn, 0);
+                       if (active)
+                               knote_submit(kn, kev);
+                       splx(s);
+               }
+               KERNEL_UNLOCK();
+       }
+       return (active);
+}
+
 void
 kqpoll_init(void)
 {
@@ -916,7 +1045,8 @@ again:
                        kn->kn_kevent = *kev;
 
                        knote_attach(kn);
-                       if ((error = fops->f_attach(kn)) != 0) {
+                       error = filter_attach(kn);
+                       if (error != 0) {
                                knote_drop(kn, p);
                                goto done;
                        }
@@ -935,28 +1065,29 @@ again:
                                 * seen it. This corresponds to the insert
                                 * happening in full before the close.
                                 */
-                               kn->kn_fop->f_detach(kn);
+                               filter_detach(kn);
                                knote_drop(kn, p);
                                goto done;
                        }
+
+                       /* Check if there is a pending event. */
+                       if (filter_process(kn, NULL))
+                               knote_activate(kn);
                } else {
                        /*
                         * The user may change some filter values after the
                         * initial EV_ADD, but doing so will not reset any
                         * filters which have already been triggered.
                         */
-                       kn->kn_sfflags = kev->fflags;
-                       kn->kn_sdata = kev->data;
-                       kn->kn_kevent.udata = kev->udata;
+                       if (filter_modify(kev, kn))
+                               knote_activate(kn);
+                       if (kev->flags & EV_ERROR) {
+                               error = kev->data;
+                               goto release;
+                       }
                }
-
-               s = splhigh();
-               if (kn->kn_fop->f_event(kn, 0))
-                       knote_activate(kn);
-               splx(s);
-
        } else if (kev->flags & EV_DELETE) {
-               kn->kn_fop->f_detach(kn);
+               filter_detach(kn);
                knote_drop(kn, p);
                goto done;
        }
@@ -971,14 +1102,13 @@ again:
        if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) {
                s = splhigh();
                kn->kn_status &= ~KN_DISABLED;
-               if (kn->kn_fop->f_event(kn, 0))
-                       kn->kn_status |= KN_ACTIVE;
-               if ((kn->kn_status & KN_ACTIVE) &&
-                   ((kn->kn_status & KN_QUEUED) == 0))
-                       knote_enqueue(kn);
                splx(s);
+               /* Check if there is a pending event. */
+               if (filter_process(kn, NULL))
+                       knote_activate(kn);
        }
 
+release:
        s = splhigh();
        knote_release(kn);
        splx(s);
@@ -1108,39 +1238,36 @@ retry:
                        knote_release(kn);
                        continue;
                }
-               if ((kn->kn_flags & EV_ONESHOT) == 0 &&
-                   kn->kn_fop->f_event(kn, 0) == 0) {
+
+               splx(s);
+
+               memset(kevp, 0, sizeof(*kevp));
+               if (filter_process(kn, kevp) == 0) {
+                       s = splhigh();
                        if ((kn->kn_status & KN_QUEUED) == 0)
                                kn->kn_status &= ~KN_ACTIVE;
                        knote_release(kn);
                        kqueue_check(kq);
                        continue;
                }
-               *kevp = kn->kn_kevent;
-               kevp++;
-               nkev++;
-               scan->kqs_nevent++;
 
                /*
                 * Post-event action on the note
                 */
-               if (kn->kn_flags & EV_ONESHOT) {
-                       splx(s);
-                       kn->kn_fop->f_detach(kn);
+               if (kevp->flags & EV_ONESHOT) {
+                       filter_detach(kn);
                        knote_drop(kn, p);
                        s = splhigh();
-               } else if (kn->kn_flags & (EV_CLEAR | EV_DISPATCH)) {
-                       if (kn->kn_flags & EV_CLEAR) {
-                               kn->kn_data = 0;
-                               kn->kn_fflags = 0;
-                       }
-                       if (kn->kn_flags & EV_DISPATCH)
+               } else if (kevp->flags & (EV_CLEAR | EV_DISPATCH)) {
+                       s = splhigh();
+                       if (kevp->flags & EV_DISPATCH)
                                kn->kn_status |= KN_DISABLED;
                        if ((kn->kn_status & KN_QUEUED) == 0)
                                kn->kn_status &= ~KN_ACTIVE;
                        KASSERT(kn->kn_status & KN_ATTACHED);
                        knote_release(kn);
                } else {
+                       s = splhigh();
                        if ((kn->kn_status & KN_QUEUED) == 0) {
                                kqueue_check(kq);
                                kq->kq_count++;
@@ -1151,6 +1278,10 @@ retry:
                        knote_release(kn);
                }
                kqueue_check(kq);
+
+               kevp++;
+               nkev++;
+               scan->kqs_nevent++;
        }
        TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe);
        splx(s);
@@ -1439,7 +1570,7 @@ knote(struct klist *list, long hint)
        KLIST_ASSERT_LOCKED(list);
 
        SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, kn0)
-               if (kn->kn_fop->f_event(kn, hint))
+               if (filter_event(kn, hint))
                        knote_activate(kn);
 }
 
@@ -1459,7 +1590,7 @@ knote_remove(struct proc *p, struct knli
                        continue;
                }
                splx(s);
-               kn->kn_fop->f_detach(kn);
+               filter_detach(kn);
 
                /*
                 * Notify poll(2) and select(2) when a monitored
@@ -1645,6 +1776,36 @@ knote_dequeue(struct knote *kn)
        kqueue_check(kq);
 }
 
+/*
+ * Modify the knote's parameters.
+ *
+ * The knote's object lock must be held.
+ */
+void
+knote_modify(const struct kevent *kev, struct knote *kn)
+{
+       kn->kn_sfflags = kev->fflags;
+       kn->kn_sdata = kev->data;
+       kn->kn_udata = kev->udata;
+}
+
+/*
+ * Submit the knote's event for delivery.
+ *
+ * The knote's object lock must be held.
+ */
+void
+knote_submit(struct knote *kn, struct kevent *kev)
+{
+       if (kev != NULL) {
+               *kev = kn->kn_kevent;
+               if (kn->kn_flags & EV_CLEAR) {
+                       kn->kn_fflags = 0;
+                       kn->kn_data = 0;
+               }
+       }
+}
+
 void
 klist_init(struct klist *klist, const struct klistops *ops, void *arg)
 {
@@ -1727,10 +1888,10 @@ klist_invalidate(struct klist *list)
                }
                klist_unlock(list, ls);
                splx(s);
-               kn->kn_fop->f_detach(kn);
+               filter_detach(kn);
                if (kn->kn_fop->f_flags & FILTEROP_ISFD) {
                        kn->kn_fop = &dead_filtops;
-                       kn->kn_fop->f_event(kn, 0);
+                       filter_event(kn, 0);
                        knote_activate(kn);
                        s = splhigh();
                        knote_release(kn);
Index: kern/sys_pipe.c
===================================================================
RCS file: src/sys/kern/sys_pipe.c,v
retrieving revision 1.126
diff -u -p -r1.126 sys_pipe.c
--- kern/sys_pipe.c     30 Dec 2020 17:02:32 -0000      1.126
+++ kern/sys_pipe.c     25 Jan 2021 14:54:19 -0000
@@ -78,20 +78,30 @@ static const struct fileops pipeops = {
 
 void   filt_pipedetach(struct knote *kn);
 int    filt_piperead(struct knote *kn, long hint);
+int    filt_pipereadmodify(struct kevent *kev, struct knote *kn);
+int    filt_pipereadprocess(struct knote *kn, struct kevent *kev);
+int    filt_piperead_common(struct knote *kn, struct pipe *rpipe);
 int    filt_pipewrite(struct knote *kn, long hint);
+int    filt_pipewritemodify(struct kevent *kev, struct knote *kn);
+int    filt_pipewriteprocess(struct knote *kn, struct kevent *kev);
+int    filt_pipewrite_common(struct knote *kn, struct pipe *rpipe);
 
 const struct filterops pipe_rfiltops = {
-       .f_flags        = FILTEROP_ISFD,
+       .f_flags        = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach       = NULL,
        .f_detach       = filt_pipedetach,
        .f_event        = filt_piperead,
+       .f_modify       = filt_pipereadmodify,
+       .f_process      = filt_pipereadprocess,
 };
 
 const struct filterops pipe_wfiltops = {
-       .f_flags        = FILTEROP_ISFD,
+       .f_flags        = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach       = NULL,
        .f_detach       = filt_pipedetach,
        .f_event        = filt_pipewrite,
+       .f_modify       = filt_pipewritemodify,
+       .f_process      = filt_pipewriteprocess,
 };
 
 /*
@@ -363,7 +373,7 @@ pipeselwakeup(struct pipe *cpipe)
                selwakeup(&cpipe->pipe_sel);
        } else {
                KERNEL_LOCK();
-               KNOTE(&cpipe->pipe_sel.si_note, NOTE_SUBMIT);
+               KNOTE(&cpipe->pipe_sel.si_note, 0);
                KERNEL_UNLOCK();
        }
 
@@ -918,45 +928,73 @@ filt_pipedetach(struct knote *kn)
 }
 
 int
-filt_piperead(struct knote *kn, long hint)
+filt_piperead_common(struct knote *kn, struct pipe *rpipe)
 {
-       struct pipe *rpipe = kn->kn_fp->f_data, *wpipe;
-       struct rwlock *lock = rpipe->pipe_lock;
+       struct pipe *wpipe;
+
+       rw_assert_wrlock(rpipe->pipe_lock);
 
-       if ((hint & NOTE_SUBMIT) == 0)
-               rw_enter_read(lock);
        wpipe = pipe_peer(rpipe);
 
        kn->kn_data = rpipe->pipe_buffer.cnt;
 
        if ((rpipe->pipe_state & PIPE_EOF) || wpipe == NULL) {
-               if ((hint & NOTE_SUBMIT) == 0)
-                       rw_exit_read(lock);
                kn->kn_flags |= EV_EOF; 
                if (kn->kn_flags & __EV_POLL)
                        kn->kn_flags |= __EV_HUP;
                return (1);
        }
 
-       if ((hint & NOTE_SUBMIT) == 0)
-               rw_exit_read(lock);
-
        return (kn->kn_data > 0);
 }
 
 int
-filt_pipewrite(struct knote *kn, long hint)
+filt_piperead(struct knote *kn, long hint)
 {
-       struct pipe *rpipe = kn->kn_fp->f_data, *wpipe;
-       struct rwlock *lock = rpipe->pipe_lock;
+       struct pipe *rpipe = kn->kn_fp->f_data;
+
+       return (filt_piperead_common(kn, rpipe));
+}
+
+int
+filt_pipereadmodify(struct kevent *kev, struct knote *kn)
+{
+       struct pipe *rpipe = kn->kn_fp->f_data;
+       int active;
+
+       rw_enter_write(rpipe->pipe_lock);
+       knote_modify(kev, kn);
+       active = filt_piperead_common(kn, rpipe);
+       rw_exit_write(rpipe->pipe_lock);
+
+       return (active);
+}
+
+int
+filt_pipereadprocess(struct knote *kn, struct kevent *kev)
+{
+       struct pipe *rpipe = kn->kn_fp->f_data;
+       int active;
+
+       rw_enter_write(rpipe->pipe_lock);
+       active = filt_piperead_common(kn, rpipe);
+       if (active)
+               knote_submit(kn, kev);
+       rw_exit_write(rpipe->pipe_lock);
+
+       return (active);
+}
+
+int
+filt_pipewrite_common(struct knote *kn, struct pipe *rpipe)
+{
+       struct pipe *wpipe;
+
+       rw_assert_wrlock(rpipe->pipe_lock);
 
-       if ((hint & NOTE_SUBMIT) == 0)
-               rw_enter_read(lock);
        wpipe = pipe_peer(rpipe);
 
        if (wpipe == NULL) {
-               if ((hint & NOTE_SUBMIT) == 0)
-                       rw_exit_read(lock);
                kn->kn_data = 0;
                kn->kn_flags |= EV_EOF; 
                if (kn->kn_flags & __EV_POLL)
@@ -965,10 +1003,44 @@ filt_pipewrite(struct knote *kn, long hi
        }
        kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
 
-       if ((hint & NOTE_SUBMIT) == 0)
-               rw_exit_read(lock);
-
        return (kn->kn_data >= PIPE_BUF);
+}
+
+int
+filt_pipewrite(struct knote *kn, long hint)
+{
+       struct pipe *rpipe = kn->kn_fp->f_data;
+
+       return (filt_pipewrite_common(kn, rpipe));
+}
+
+int
+filt_pipewritemodify(struct kevent *kev, struct knote *kn)
+{
+       struct pipe *rpipe = kn->kn_fp->f_data;
+       int active;
+
+       rw_enter_write(rpipe->pipe_lock);
+       knote_modify(kev, kn);
+       active = filt_pipewrite_common(kn, rpipe);
+       rw_exit_write(rpipe->pipe_lock);
+
+       return (active);
+}
+
+int
+filt_pipewriteprocess(struct knote *kn, struct kevent *kev)
+{
+       struct pipe *rpipe = kn->kn_fp->f_data;
+       int active;
+
+       rw_enter_write(rpipe->pipe_lock);
+       active = filt_pipewrite_common(kn, rpipe);
+       if (active)
+               knote_submit(kn, kev);
+       rw_exit_write(rpipe->pipe_lock);
+
+       return (active);
 }
 
 void
Index: kern/uipc_socket.c
===================================================================
RCS file: src/sys/kern/uipc_socket.c,v
retrieving revision 1.254
diff -u -p -r1.254 uipc_socket.c
--- kern/uipc_socket.c  17 Jan 2021 05:23:34 -0000      1.254
+++ kern/uipc_socket.c  25 Jan 2021 14:54:20 -0000
@@ -69,15 +69,26 @@ int somove(struct socket *, int);
 
 void   filt_sordetach(struct knote *kn);
 int    filt_soread(struct knote *kn, long hint);
+int    filt_soreadmodify(struct kevent *kev, struct knote *kn);
+int    filt_soreadprocess(struct knote *kn, struct kevent *kev);
+int    filt_soread_common(struct knote *kn, struct socket *so);
 void   filt_sowdetach(struct knote *kn);
 int    filt_sowrite(struct knote *kn, long hint);
+int    filt_sowritemodify(struct kevent *kev, struct knote *kn);
+int    filt_sowriteprocess(struct knote *kn, struct kevent *kev);
+int    filt_sowrite_common(struct knote *kn, struct socket *so);
 int    filt_solisten(struct knote *kn, long hint);
+int    filt_solistenmodify(struct kevent *kev, struct knote *kn);
+int    filt_solistenprocess(struct knote *kn, struct kevent *kev);
+int    filt_solisten_common(struct knote *kn, struct socket *so);
 
 const struct filterops solisten_filtops = {
        .f_flags        = FILTEROP_ISFD,
        .f_attach       = NULL,
        .f_detach       = filt_sordetach,
        .f_event        = filt_solisten,
+       .f_modify       = filt_solistenmodify,
+       .f_process      = filt_solistenprocess,
 };
 
 const struct filterops soread_filtops = {
@@ -85,6 +96,8 @@ const struct filterops soread_filtops = 
        .f_attach       = NULL,
        .f_detach       = filt_sordetach,
        .f_event        = filt_soread,
+       .f_modify       = filt_soreadmodify,
+       .f_process      = filt_soreadprocess,
 };
 
 const struct filterops sowrite_filtops = {
@@ -92,6 +105,8 @@ const struct filterops sowrite_filtops =
        .f_attach       = NULL,
        .f_detach       = filt_sowdetach,
        .f_event        = filt_sowrite,
+       .f_modify       = filt_sowritemodify,
+       .f_process      = filt_sowriteprocess,
 };
 
 const struct filterops soexcept_filtops = {
@@ -99,6 +114,8 @@ const struct filterops soexcept_filtops 
        .f_attach       = NULL,
        .f_detach       = filt_sordetach,
        .f_event        = filt_soread,
+       .f_modify       = filt_soreadmodify,
+       .f_process      = filt_soreadprocess,
 };
 
 #ifndef SOMINCONN
@@ -2055,13 +2072,12 @@ filt_sordetach(struct knote *kn)
 }
 
 int
-filt_soread(struct knote *kn, long hint)
+filt_soread_common(struct knote *kn, struct socket *so)
 {
-       struct socket *so = kn->kn_fp->f_data;
-       int s, rv = 0;
+       int rv = 0;
+
+       soassertlocked(so);
 
-       if ((hint & NOTE_SUBMIT) == 0)
-               s = solock(so);
        kn->kn_data = so->so_rcv.sb_cc;
 #ifdef SOCKET_SPLICE
        if (isspliced(so)) {
@@ -2089,12 +2105,47 @@ filt_soread(struct knote *kn, long hint)
        } else {
                rv = (kn->kn_data >= so->so_rcv.sb_lowat);
        }
-       if ((hint & NOTE_SUBMIT) == 0)
-               sounlock(so, s);
 
        return rv;
 }
 
+int
+filt_soread(struct knote *kn, long hint)
+{
+       struct socket *so = kn->kn_fp->f_data;
+
+       return (filt_soread_common(kn, so));
+}
+
+int
+filt_soreadmodify(struct kevent *kev, struct knote *kn)
+{
+       struct socket *so = kn->kn_fp->f_data;
+       int active, s;
+
+       s = solock(so);
+       knote_modify(kev, kn);
+       active = filt_soread_common(kn, so);
+       sounlock(so, s);
+
+       return (active);
+}
+
+int
+filt_soreadprocess(struct knote *kn, struct kevent *kev)
+{
+       struct socket *so = kn->kn_fp->f_data;
+       int active, s;
+
+       s = solock(so);
+       active = filt_soread_common(kn, so);
+       if (active)
+               knote_submit(kn, kev);
+       sounlock(so, s);
+
+       return (active);
+}
+
 void
 filt_sowdetach(struct knote *kn)
 {
@@ -2106,13 +2157,12 @@ filt_sowdetach(struct knote *kn)
 }
 
 int
-filt_sowrite(struct knote *kn, long hint)
+filt_sowrite_common(struct knote *kn, struct socket *so)
 {
-       struct socket *so = kn->kn_fp->f_data;
-       int s, rv;
+       int rv;
+
+       soassertlocked(so);
 
-       if ((hint & NOTE_SUBMIT) == 0)
-               s = solock(so);
        kn->kn_data = sbspace(so, &so->so_snd);
        if (so->so_state & SS_CANTSENDMORE) {
                kn->kn_flags |= EV_EOF;
@@ -2132,25 +2182,92 @@ filt_sowrite(struct knote *kn, long hint
        } else {
                rv = (kn->kn_data >= so->so_snd.sb_lowat);
        }
-       if ((hint & NOTE_SUBMIT) == 0)
-               sounlock(so, s);
 
        return (rv);
 }
 
 int
-filt_solisten(struct knote *kn, long hint)
+filt_sowrite(struct knote *kn, long hint)
 {
        struct socket *so = kn->kn_fp->f_data;
-       int s;
 
-       if ((hint & NOTE_SUBMIT) == 0)
-               s = solock(so);
+       return (filt_sowrite_common(kn, so));
+}
+
+int
+filt_sowritemodify(struct kevent *kev, struct knote *kn)
+{
+       struct socket *so = kn->kn_fp->f_data;
+       int active, s;
+
+       s = solock(so);
+       knote_modify(kev, kn);
+       active = filt_sowrite_common(kn, so);
+       sounlock(so, s);
+
+       return (active);
+}
+
+int
+filt_sowriteprocess(struct knote *kn, struct kevent *kev)
+{
+       struct socket *so = kn->kn_fp->f_data;
+       int active, s;
+
+       s = solock(so);
+       active = filt_sowrite_common(kn, so);
+       if (active)
+               knote_submit(kn, kev);
+       sounlock(so, s);
+
+       return (active);
+}
+
+int
+filt_solisten_common(struct knote *kn, struct socket *so)
+{
+       soassertlocked(so);
+
        kn->kn_data = so->so_qlen;
-       if ((hint & NOTE_SUBMIT) == 0)
-               sounlock(so, s);
 
        return (kn->kn_data != 0);
+}
+
+int
+filt_solisten(struct knote *kn, long hint)
+{
+       struct socket *so = kn->kn_fp->f_data;
+
+       return (filt_solisten_common(kn, so));
+}
+
+int
+filt_solistenmodify(struct kevent *kev, struct knote *kn)
+{
+       struct socket *so = kn->kn_fp->f_data;
+       int active, s;
+
+       s = solock(so);
+       knote_modify(kev, kn);
+       active = filt_solisten_common(kn, so);
+       sounlock(so, s);
+
+       return (active);
+}
+
+int
+filt_solistenprocess(struct knote *kn, struct kevent *kev)
+{
+       struct socket *so = kn->kn_fp->f_data;
+       int active, s;
+
+       s = solock(so);
+       active = filt_solisten_common(kn, so);
+       if (active)
+               knote_submit(kn, kev);
+       sounlock(so, s);
+
+       return (active);
 }
 
 #ifdef DDB
Index: sys/event.h
===================================================================
RCS file: src/sys/sys/event.h,v
retrieving revision 1.53
diff -u -p -r1.53 event.h
--- sys/event.h 17 Jan 2021 05:56:32 -0000      1.53
+++ sys/event.h 25 Jan 2021 14:54:20 -0000
@@ -165,30 +165,85 @@ struct klist {
  */
 #define NOTE_SIGNAL    0x08000000
 
+/*
+ * = Event filter interface
+ *
+ * == .f_flags
+ *
+ * Defines properties of the event filter:
+ *
+ * - FILTEROP_ISFD      Each knote of this filter is associated
+ *                      with a file descriptor.
+ *
+ * - FILTEROP_MPSAFE    The kqueue subsystem can invoke .f_attach(),
+ *                      .f_detach(), .f_modify() and .f_process() without
+ *                      the kernel lock.
+ *
+ * == .f_attach()
+ *
+ * Attaches the knote to the object.
+ *
+ * == .f_detach()
+ *
+ * Detaches the knote from the object. The object must not use this knote
+ * for delivering events after this callback has returned.
+ *
+ * == .f_event()
+ *
+ * Notifies the filter about an event. Called through knote().
+ *
+ * == .f_modify()
+ *
+ * Modifies the knote with new state from the user.
+ *
+ * Returns non-zero if the knote has become active.
+ *
+ * == .f_process()
+ *
+ * Checks if the event is active and returns non-zero if the event should be
+ * returned to the user.
+ *
+ * If kev is non-NULL and the event is active, the callback should store
+ * the event's state in kev for delivery to the user.
+ *
+ * == Concurrency control
+ *
+ * The kqueue subsystem serializes calls of .f_attach(), .f_detach(),
+ * .f_modify() and .f_process().
+ */
+
 #define FILTEROP_ISFD          0x00000001      /* ident == filedescriptor */
+#define FILTEROP_MPSAFE                0x00000002      /* safe without kernel 
lock */
 
 struct filterops {
        int     f_flags;
        int     (*f_attach)(struct knote *kn);
        void    (*f_detach)(struct knote *kn);
        int     (*f_event)(struct knote *kn, long hint);
+       int     (*f_modify)(struct kevent *kev, struct knote *kn);
+       int     (*f_process)(struct knote *kn, struct kevent *kev);
 };
 
+/*
+ * Locking:
+ *     I       immutable after creation
+ *     o       object lock
+ */
 struct knote {
        SLIST_ENTRY(knote)      kn_link;        /* for fd */
        SLIST_ENTRY(knote)      kn_selnext;     /* for struct selinfo */
        TAILQ_ENTRY(knote)      kn_tqe;
-       struct                  kqueue *kn_kq;  /* which queue we are on */
+       struct                  kqueue *kn_kq;  /* [I] which queue we are on */
        struct                  kevent kn_kevent;
        int                     kn_status;
-       int                     kn_sfflags;     /* saved filter flags */
-       __int64_t               kn_sdata;       /* saved data field */
+       int                     kn_sfflags;     /* [o] saved filter flags */
+       __int64_t               kn_sdata;       /* [o] saved data field */
        union {
                struct          file *p_fp;     /* file data pointer */
                struct          process *p_process;     /* process pointer */
        } kn_ptr;
        const struct            filterops *kn_fop;
-       void                    *kn_hook;
+       void                    *kn_hook;       /* [o] */
 #define KN_ACTIVE      0x0001                  /* event has been triggered */
 #define KN_QUEUED      0x0002                  /* event is on queue */
 #define KN_DISABLED    0x0004                  /* event is disabled */
@@ -198,12 +253,13 @@ struct knote {
 #define KN_ATTACHED    0x0040                  /* knote is attached to
                                                 * a knlist of the kqueue */
 
-#define kn_id          kn_kevent.ident
-#define kn_filter      kn_kevent.filter
-#define kn_flags       kn_kevent.flags
-#define kn_fflags      kn_kevent.fflags
-#define kn_data                kn_kevent.data
-#define kn_fp          kn_ptr.p_fp
+#define kn_id          kn_kevent.ident         /* [I] */
+#define kn_filter      kn_kevent.filter        /* [I] */
+#define kn_flags       kn_kevent.flags         /* [o] */
+#define kn_fflags      kn_kevent.fflags        /* [o] */
+#define kn_data                kn_kevent.data          /* [o] */
+#define kn_udata       kn_kevent.udata         /* [o] */
+#define kn_fp          kn_ptr.p_fp             /* [o] */
 };
 
 struct klistops {
@@ -234,6 +290,8 @@ extern void kqpoll_exit(void);
 extern void    knote(struct klist *list, long hint);
 extern void    knote_fdclose(struct proc *p, int fd);
 extern void    knote_processexit(struct proc *);
+extern void    knote_modify(const struct kevent *, struct knote *);
+extern void    knote_submit(struct knote *, struct kevent *);
 extern int     kqueue_register(struct kqueue *kq,
                    struct kevent *kev, struct proc *p);
 extern int     kqueue_scan(struct kqueue_scan_state *, int, struct kevent *,

Reply via email to