Here is another attempt to set klist lock for sockets. This is a revised
version of a patch that I posted in January [1].

Using solock() for the klists is probably the easiest way at the time
being. However, the lock is a potential point of contention because of
the underlying big-lock design. The increase of overhead is related to
adding and removing event registrations. With persistent registrations
the overhead is unchanged.

As a result, socket and named FIFO event filters should be ready to run
without the kernel lock. The f_event, f_modify and f_process callbacks
should be MP-safe already.

[1] https://marc.info/?l=openbsd-tech&m=160986578724696

OK?

Index: kern/uipc_socket.c
===================================================================
RCS file: src/sys/kern/uipc_socket.c,v
retrieving revision 1.265
diff -u -p -r1.265 uipc_socket.c
--- kern/uipc_socket.c  14 Oct 2021 23:05:10 -0000      1.265
+++ kern/uipc_socket.c  22 Oct 2021 12:17:57 -0000
@@ -84,7 +84,7 @@ int   filt_solistenprocess(struct knote *k
 int    filt_solisten_common(struct knote *kn, struct socket *so);
 
 const struct filterops solisten_filtops = {
-       .f_flags        = FILTEROP_ISFD,
+       .f_flags        = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach       = NULL,
        .f_detach       = filt_sordetach,
        .f_event        = filt_solisten,
@@ -93,7 +93,7 @@ const struct filterops solisten_filtops 
 };
 
 const struct filterops soread_filtops = {
-       .f_flags        = FILTEROP_ISFD,
+       .f_flags        = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach       = NULL,
        .f_detach       = filt_sordetach,
        .f_event        = filt_soread,
@@ -102,7 +102,7 @@ const struct filterops soread_filtops = 
 };
 
 const struct filterops sowrite_filtops = {
-       .f_flags        = FILTEROP_ISFD,
+       .f_flags        = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach       = NULL,
        .f_detach       = filt_sowdetach,
        .f_event        = filt_sowrite,
@@ -111,7 +111,7 @@ const struct filterops sowrite_filtops =
 };
 
 const struct filterops soexcept_filtops = {
-       .f_flags        = FILTEROP_ISFD,
+       .f_flags        = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach       = NULL,
        .f_detach       = filt_sordetach,
        .f_event        = filt_soread,
@@ -169,6 +169,8 @@ socreate(int dom, struct socket **aso, i
                return (EPROTOTYPE);
        so = pool_get(&socket_pool, PR_WAITOK | PR_ZERO);
        rw_init(&so->so_lock, "solock");
+       klist_init(&so->so_rcv.sb_sel.si_note, &socket_klistops, so);
+       klist_init(&so->so_snd.sb_sel.si_note, &socket_klistops, so);
        sigio_init(&so->so_sigio);
        TAILQ_INIT(&so->so_q0);
        TAILQ_INIT(&so->so_q);
@@ -258,6 +260,8 @@ sofree(struct socket *so, int s)
                }
        }
        sigio_free(&so->so_sigio);
+       klist_free(&so->so_rcv.sb_sel.si_note);
+       klist_free(&so->so_snd.sb_sel.si_note);
 #ifdef SOCKET_SPLICE
        if (so->so_sp) {
                if (issplicedback(so)) {
@@ -2038,9 +2042,9 @@ soo_kqfilter(struct file *fp, struct kno
 {
        struct socket *so = kn->kn_fp->f_data;
        struct sockbuf *sb;
+       int s;
 
-       KERNEL_ASSERT_LOCKED();
-
+       s = solock(so);
        switch (kn->kn_filter) {
        case EVFILT_READ:
                if (so->so_options & SO_ACCEPTCONN)
@@ -2058,10 +2062,12 @@ soo_kqfilter(struct file *fp, struct kno
                sb = &so->so_rcv;
                break;
        default:
+               sounlock(so, s);
                return (EINVAL);
        }
 
        klist_insert_locked(&sb->sb_sel.si_note, kn);
+       sounlock(so, s);
 
        return (0);
 }
@@ -2071,9 +2077,7 @@ filt_sordetach(struct knote *kn)
 {
        struct socket *so = kn->kn_fp->f_data;
 
-       KERNEL_ASSERT_LOCKED();
-
-       klist_remove_locked(&so->so_rcv.sb_sel.si_note, kn);
+       klist_remove(&so->so_rcv.sb_sel.si_note, kn);
 }
 
 int
@@ -2159,9 +2163,7 @@ filt_sowdetach(struct knote *kn)
 {
        struct socket *so = kn->kn_fp->f_data;
 
-       KERNEL_ASSERT_LOCKED();
-
-       klist_remove_locked(&so->so_snd.sb_sel.si_note, kn);
+       klist_remove(&so->so_snd.sb_sel.si_note, kn);
 }
 
 int
@@ -2284,6 +2286,36 @@ filt_solistenprocess(struct knote *kn, s
        return (rv);
 }
 
+void
+klist_soassertlk(void *arg)
+{
+       struct socket *so = arg;
+
+       soassertlocked(so);
+}
+
+int
+klist_solock(void *arg)
+{
+       struct socket *so = arg;
+
+       return (solock(so));
+}
+
+void
+klist_sounlock(void *arg, int ls)
+{
+       struct socket *so = arg;
+
+       sounlock(so, ls);
+}
+
+const struct klistops socket_klistops = {
+       .klo_assertlk   = klist_soassertlk,
+       .klo_lock       = klist_solock,
+       .klo_unlock     = klist_sounlock,
+};
+
 #ifdef DDB
 void
 sobuf_print(struct sockbuf *,
Index: kern/uipc_socket2.c
===================================================================
RCS file: src/sys/kern/uipc_socket2.c,v
retrieving revision 1.113
diff -u -p -r1.113 uipc_socket2.c
--- kern/uipc_socket2.c 26 Jul 2021 05:51:13 -0000      1.113
+++ kern/uipc_socket2.c 22 Oct 2021 12:17:57 -0000
@@ -189,6 +189,8 @@ sonewconn(struct socket *head, int conns
        so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
        so->so_rcv.sb_timeo_nsecs = head->so_rcv.sb_timeo_nsecs;
 
+       klist_init(&so->so_rcv.sb_sel.si_note, &socket_klistops, so);
+       klist_init(&so->so_snd.sb_sel.si_note, &socket_klistops, so);
        sigio_init(&so->so_sigio);
        sigio_copy(&so->so_sigio, &head->so_sigio);
 
@@ -196,6 +198,8 @@ sonewconn(struct socket *head, int conns
        if ((*so->so_proto->pr_attach)(so, 0)) {
                (void) soqremque(so, soqueue);
                sigio_free(&so->so_sigio);
+               klist_free(&so->so_rcv.sb_sel.si_note);
+               klist_free(&so->so_snd.sb_sel.si_note);
                pool_put(&socket_pool, so);
                return (NULL);
        }
Index: miscfs/fifofs/fifo_vnops.c
===================================================================
RCS file: src/sys/miscfs/fifofs/fifo_vnops.c,v
retrieving revision 1.82
diff -u -p -r1.82 fifo_vnops.c
--- miscfs/fifofs/fifo_vnops.c  15 Oct 2021 06:30:06 -0000      1.82
+++ miscfs/fifofs/fifo_vnops.c  22 Oct 2021 12:17:57 -0000
@@ -114,7 +114,7 @@ int filt_fifowriteprocess(struct knote *
 int    filt_fifowrite_common(struct knote *kn, struct socket *so);
 
 const struct filterops fiforead_filtops = {
-       .f_flags        = FILTEROP_ISFD,
+       .f_flags        = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach       = NULL,
        .f_detach       = filt_fifordetach,
        .f_event        = filt_fiforead,
@@ -123,7 +123,7 @@ const struct filterops fiforead_filtops 
 };
 
 const struct filterops fifowrite_filtops = {
-       .f_flags        = FILTEROP_ISFD,
+       .f_flags        = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach       = NULL,
        .f_detach       = filt_fifowdetach,
        .f_event        = filt_fifowrite,
@@ -528,7 +528,7 @@ fifo_kqfilter(void *v)
 
        ap->a_kn->kn_hook = so;
 
-       klist_insert_locked(&sb->sb_sel.si_note, ap->a_kn);
+       klist_insert(&sb->sb_sel.si_note, ap->a_kn);
 
        return (0);
 }
@@ -538,7 +538,7 @@ filt_fifordetach(struct knote *kn)
 {
        struct socket *so = (struct socket *)kn->kn_hook;
 
-       klist_remove_locked(&so->so_rcv.sb_sel.si_note, kn);
+       klist_remove(&so->so_rcv.sb_sel.si_note, kn);
 }
 
 int
@@ -609,7 +609,7 @@ filt_fifowdetach(struct knote *kn)
 {
        struct socket *so = (struct socket *)kn->kn_hook;
 
-       klist_remove_locked(&so->so_snd.sb_sel.si_note, kn);
+       klist_remove(&so->so_snd.sb_sel.si_note, kn);
 }
 
 int
Index: sys/event.h
===================================================================
RCS file: src/sys/sys/event.h,v
retrieving revision 1.56
diff -u -p -r1.56 event.h
--- sys/event.h 16 Jun 2021 14:26:30 -0000      1.56
+++ sys/event.h 22 Oct 2021 12:17:57 -0000
@@ -285,6 +285,7 @@ struct timespec;
 
 extern const struct filterops sig_filtops;
 extern const struct filterops dead_filtops;
+extern const struct klistops socket_klistops;
 
 extern void    kqpoll_init(void);
 extern void    kqpoll_exit(void);

Reply via email to