This patch modifies the filterops callback interface so that it becomes easier to use with fine-grained locking. The current code is an entanglement of accesses to various data. Some items are more or less controlled by kqueue, while some others are more or less under the control of event sources. The main point of the patch is to make the event source responsible of serializing access to struct knote's kn_event. This idea has been influenced by XNU's kqueue implementation.
The patch adds two callbacks to struct filterops: .f_modify() and .f_process(). .f_modify() applies parameters from user to a knote. As a feature, event sources now have the option to refuse parameter update. .f_process() retrieves a pending event from a knote, or checks if there is an event pending. After the refactoring, .f_event() is used when event source calls knote(). However, I will not change all the filterops implementations at once, so the patch uses .f_event() to emulate the new callbacks if they are missing. The emulation attempts to preserve old behaviour. One of the reasons for this change is to keep kqueue's and event sources' lockings clearly separate. The restructuring allows, for example, atomic fetching and clearing of event state (EV_CLEAR). The patch includes filterops changes for pipes and sockets, to give an idea of what the refactoring means in practice. One point of interest is that the NOTE_SUBMIT-based conditional locking becomes unnecessary. Also, there is a subtle change with EV_ONESHOT because now the pipe and socket callbacks do recheck the event during kqueue_scan() even when EV_ONESHOT is set. This could be avoided with a little extra code, but I doubt it is worth it with level-triggered events. The patch additionally makes preparations for kqueue unlocking by introducing a flag that indicates if filterops callbacks are MP-safe. However, kqueue itself still needs the kernel lock. The long section of splhigh() in kqueue_scan() is split as preparation for an upcoming mutex. This step is made possible by the refactoring. The socket filterops are not marked MP-safe yet because socket klists are not MP-safe. I have earlier posted a diff that uses solock() for the klists, but am having second thoughts about it. Inet sockets' solock() is based on NET_LOCK() and that lock is already showing signs of contention. Getting kqueue unlocked might help with that a bit since rw_lock() is allowed to spin for a moment when kernel lock is not held. OK? Index: kern/kern_event.c =================================================================== RCS file: src/sys/kern/kern_event.c,v retrieving revision 1.159 diff -u -p -r1.159 kern_event.c --- kern/kern_event.c 17 Jan 2021 05:56:32 -0000 1.159 +++ kern/kern_event.c 25 Jan 2021 14:54:19 -0000 @@ -95,6 +95,11 @@ void kqueue_do_check(struct kqueue *kq, void kqpoll_dequeue(struct proc *p); +static int filter_attach(struct knote *kn); +static void filter_detach(struct knote *kn); +static int filter_event(struct knote *kn, long hint); +static int filter_modify(struct kevent *kev, struct knote *kn); +static int filter_process(struct knote *kn, struct kevent *kev); static void kqueue_expand_hash(struct kqueue *kq); static void kqueue_expand_list(struct kqueue *kq, int fd); static void kqueue_task(void *); @@ -372,7 +377,7 @@ filt_proc(struct knote *kn, long hint) kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; kev.fflags = kn->kn_sfflags; kev.data = kn->kn_id; /* parent */ - kev.udata = kn->kn_kevent.udata; /* preserve udata */ + kev.udata = kn->kn_udata; /* preserve udata */ error = kqueue_register(kn->kn_kq, &kev, NULL); if (error) kn->kn_fflags |= NOTE_TRACKERR; @@ -467,6 +472,20 @@ filt_seltrue(struct knote *kn, long hint return (1); } +int +filt_seltruemodify(struct kevent *kev, struct knote *kn) +{ + knote_modify(kev, kn); + return (1); +} + +int +filt_seltrueprocess(struct knote *kn, struct kevent *kev) +{ + knote_submit(kn, kev); + return (1); +} + /* * This provides full kqfilter entry for device switch tables, which * has same effect as filter using filt_seltrue() as filter method. @@ -478,10 +497,12 @@ filt_seltruedetach(struct knote *kn) } const struct filterops seltrue_filtops = { - .f_flags = FILTEROP_ISFD, + .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, .f_attach = NULL, .f_detach = filt_seltruedetach, .f_event = filt_seltrue, + .f_modify = filt_seltruemodify, + .f_process = filt_seltrueprocess, }; int @@ -510,6 +531,20 @@ filt_dead(struct knote *kn, long hint) return (1); } +static int +filt_deadmodify(struct kevent *kev, struct knote *kn) +{ + knote_modify(kev, kn); + return (1); +} + +static int +filt_deadprocess(struct knote *kn, struct kevent *kev) +{ + knote_submit(kn, kev); + return (1); +} + static void filt_deaddetach(struct knote *kn) { @@ -517,10 +552,12 @@ filt_deaddetach(struct knote *kn) } const struct filterops dead_filtops = { - .f_flags = FILTEROP_ISFD, + .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, .f_attach = NULL, .f_detach = filt_deaddetach, .f_event = filt_dead, + .f_modify = filt_deadmodify, + .f_process = filt_deadprocess, }; static int @@ -533,12 +570,104 @@ filt_badfd(struct knote *kn, long hint) /* For use with kqpoll. */ const struct filterops badfd_filtops = { - .f_flags = FILTEROP_ISFD, + .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, .f_attach = NULL, .f_detach = filt_deaddetach, .f_event = filt_badfd, + .f_modify = filt_deadmodify, + .f_process = filt_deadprocess, }; +static int +filter_attach(struct knote *kn) +{ + int error; + + if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { + error = kn->kn_fop->f_attach(kn); + } else { + KERNEL_LOCK(); + error = kn->kn_fop->f_attach(kn); + KERNEL_UNLOCK(); + } + return (error); +} + +static void +filter_detach(struct knote *kn) +{ + if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { + kn->kn_fop->f_detach(kn); + } else { + KERNEL_LOCK(); + kn->kn_fop->f_detach(kn); + KERNEL_UNLOCK(); + } +} + +static int +filter_event(struct knote *kn, long hint) +{ + if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) + KERNEL_ASSERT_LOCKED(); + + return (kn->kn_fop->f_event(kn, hint)); +} + +static int +filter_modify(struct kevent *kev, struct knote *kn) +{ + int active, s; + + if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { + active = kn->kn_fop->f_modify(kev, kn); + } else { + KERNEL_LOCK(); + if (kn->kn_fop->f_modify != NULL) { + active = kn->kn_fop->f_modify(kev, kn); + } else { + /* Emulate f_modify using f_event. */ + s = splhigh(); + knote_modify(kev, kn); + active = kn->kn_fop->f_event(kn, 0); + splx(s); + } + KERNEL_UNLOCK(); + } + return (active); +} + +static int +filter_process(struct knote *kn, struct kevent *kev) +{ + int active, s; + + if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { + active = kn->kn_fop->f_process(kn, kev); + } else { + KERNEL_LOCK(); + if (kn->kn_fop->f_process != NULL) { + active = kn->kn_fop->f_process(kn, kev); + } else { + /* Emulate f_process using f_event. */ + s = splhigh(); + /* + * If called from kqueue_scan(), skip f_event + * when EV_ONESHOT is set, to preserve old behaviour. + */ + if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) + active = 1; + else + active = kn->kn_fop->f_event(kn, 0); + if (active) + knote_submit(kn, kev); + splx(s); + } + KERNEL_UNLOCK(); + } + return (active); +} + void kqpoll_init(void) { @@ -916,7 +1045,8 @@ again: kn->kn_kevent = *kev; knote_attach(kn); - if ((error = fops->f_attach(kn)) != 0) { + error = filter_attach(kn); + if (error != 0) { knote_drop(kn, p); goto done; } @@ -935,28 +1065,29 @@ again: * seen it. This corresponds to the insert * happening in full before the close. */ - kn->kn_fop->f_detach(kn); + filter_detach(kn); knote_drop(kn, p); goto done; } + + /* Check if there is a pending event. */ + if (filter_process(kn, NULL)) + knote_activate(kn); } else { /* * The user may change some filter values after the * initial EV_ADD, but doing so will not reset any * filters which have already been triggered. */ - kn->kn_sfflags = kev->fflags; - kn->kn_sdata = kev->data; - kn->kn_kevent.udata = kev->udata; + if (filter_modify(kev, kn)) + knote_activate(kn); + if (kev->flags & EV_ERROR) { + error = kev->data; + goto release; + } } - - s = splhigh(); - if (kn->kn_fop->f_event(kn, 0)) - knote_activate(kn); - splx(s); - } else if (kev->flags & EV_DELETE) { - kn->kn_fop->f_detach(kn); + filter_detach(kn); knote_drop(kn, p); goto done; } @@ -971,14 +1102,13 @@ again: if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { s = splhigh(); kn->kn_status &= ~KN_DISABLED; - if (kn->kn_fop->f_event(kn, 0)) - kn->kn_status |= KN_ACTIVE; - if ((kn->kn_status & KN_ACTIVE) && - ((kn->kn_status & KN_QUEUED) == 0)) - knote_enqueue(kn); splx(s); + /* Check if there is a pending event. */ + if (filter_process(kn, NULL)) + knote_activate(kn); } +release: s = splhigh(); knote_release(kn); splx(s); @@ -1108,39 +1238,36 @@ retry: knote_release(kn); continue; } - if ((kn->kn_flags & EV_ONESHOT) == 0 && - kn->kn_fop->f_event(kn, 0) == 0) { + + splx(s); + + memset(kevp, 0, sizeof(*kevp)); + if (filter_process(kn, kevp) == 0) { + s = splhigh(); if ((kn->kn_status & KN_QUEUED) == 0) kn->kn_status &= ~KN_ACTIVE; knote_release(kn); kqueue_check(kq); continue; } - *kevp = kn->kn_kevent; - kevp++; - nkev++; - scan->kqs_nevent++; /* * Post-event action on the note */ - if (kn->kn_flags & EV_ONESHOT) { - splx(s); - kn->kn_fop->f_detach(kn); + if (kevp->flags & EV_ONESHOT) { + filter_detach(kn); knote_drop(kn, p); s = splhigh(); - } else if (kn->kn_flags & (EV_CLEAR | EV_DISPATCH)) { - if (kn->kn_flags & EV_CLEAR) { - kn->kn_data = 0; - kn->kn_fflags = 0; - } - if (kn->kn_flags & EV_DISPATCH) + } else if (kevp->flags & (EV_CLEAR | EV_DISPATCH)) { + s = splhigh(); + if (kevp->flags & EV_DISPATCH) kn->kn_status |= KN_DISABLED; if ((kn->kn_status & KN_QUEUED) == 0) kn->kn_status &= ~KN_ACTIVE; KASSERT(kn->kn_status & KN_ATTACHED); knote_release(kn); } else { + s = splhigh(); if ((kn->kn_status & KN_QUEUED) == 0) { kqueue_check(kq); kq->kq_count++; @@ -1151,6 +1278,10 @@ retry: knote_release(kn); } kqueue_check(kq); + + kevp++; + nkev++; + scan->kqs_nevent++; } TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); splx(s); @@ -1439,7 +1570,7 @@ knote(struct klist *list, long hint) KLIST_ASSERT_LOCKED(list); SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, kn0) - if (kn->kn_fop->f_event(kn, hint)) + if (filter_event(kn, hint)) knote_activate(kn); } @@ -1459,7 +1590,7 @@ knote_remove(struct proc *p, struct knli continue; } splx(s); - kn->kn_fop->f_detach(kn); + filter_detach(kn); /* * Notify poll(2) and select(2) when a monitored @@ -1645,6 +1776,36 @@ knote_dequeue(struct knote *kn) kqueue_check(kq); } +/* + * Modify the knote's parameters. + * + * The knote's object lock must be held. + */ +void +knote_modify(const struct kevent *kev, struct knote *kn) +{ + kn->kn_sfflags = kev->fflags; + kn->kn_sdata = kev->data; + kn->kn_udata = kev->udata; +} + +/* + * Submit the knote's event for delivery. + * + * The knote's object lock must be held. + */ +void +knote_submit(struct knote *kn, struct kevent *kev) +{ + if (kev != NULL) { + *kev = kn->kn_kevent; + if (kn->kn_flags & EV_CLEAR) { + kn->kn_fflags = 0; + kn->kn_data = 0; + } + } +} + void klist_init(struct klist *klist, const struct klistops *ops, void *arg) { @@ -1727,10 +1888,10 @@ klist_invalidate(struct klist *list) } klist_unlock(list, ls); splx(s); - kn->kn_fop->f_detach(kn); + filter_detach(kn); if (kn->kn_fop->f_flags & FILTEROP_ISFD) { kn->kn_fop = &dead_filtops; - kn->kn_fop->f_event(kn, 0); + filter_event(kn, 0); knote_activate(kn); s = splhigh(); knote_release(kn); Index: kern/sys_pipe.c =================================================================== RCS file: src/sys/kern/sys_pipe.c,v retrieving revision 1.126 diff -u -p -r1.126 sys_pipe.c --- kern/sys_pipe.c 30 Dec 2020 17:02:32 -0000 1.126 +++ kern/sys_pipe.c 25 Jan 2021 14:54:19 -0000 @@ -78,20 +78,30 @@ static const struct fileops pipeops = { void filt_pipedetach(struct knote *kn); int filt_piperead(struct knote *kn, long hint); +int filt_pipereadmodify(struct kevent *kev, struct knote *kn); +int filt_pipereadprocess(struct knote *kn, struct kevent *kev); +int filt_piperead_common(struct knote *kn, struct pipe *rpipe); int filt_pipewrite(struct knote *kn, long hint); +int filt_pipewritemodify(struct kevent *kev, struct knote *kn); +int filt_pipewriteprocess(struct knote *kn, struct kevent *kev); +int filt_pipewrite_common(struct knote *kn, struct pipe *rpipe); const struct filterops pipe_rfiltops = { - .f_flags = FILTEROP_ISFD, + .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, .f_attach = NULL, .f_detach = filt_pipedetach, .f_event = filt_piperead, + .f_modify = filt_pipereadmodify, + .f_process = filt_pipereadprocess, }; const struct filterops pipe_wfiltops = { - .f_flags = FILTEROP_ISFD, + .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, .f_attach = NULL, .f_detach = filt_pipedetach, .f_event = filt_pipewrite, + .f_modify = filt_pipewritemodify, + .f_process = filt_pipewriteprocess, }; /* @@ -363,7 +373,7 @@ pipeselwakeup(struct pipe *cpipe) selwakeup(&cpipe->pipe_sel); } else { KERNEL_LOCK(); - KNOTE(&cpipe->pipe_sel.si_note, NOTE_SUBMIT); + KNOTE(&cpipe->pipe_sel.si_note, 0); KERNEL_UNLOCK(); } @@ -918,45 +928,73 @@ filt_pipedetach(struct knote *kn) } int -filt_piperead(struct knote *kn, long hint) +filt_piperead_common(struct knote *kn, struct pipe *rpipe) { - struct pipe *rpipe = kn->kn_fp->f_data, *wpipe; - struct rwlock *lock = rpipe->pipe_lock; + struct pipe *wpipe; + + rw_assert_wrlock(rpipe->pipe_lock); - if ((hint & NOTE_SUBMIT) == 0) - rw_enter_read(lock); wpipe = pipe_peer(rpipe); kn->kn_data = rpipe->pipe_buffer.cnt; if ((rpipe->pipe_state & PIPE_EOF) || wpipe == NULL) { - if ((hint & NOTE_SUBMIT) == 0) - rw_exit_read(lock); kn->kn_flags |= EV_EOF; if (kn->kn_flags & __EV_POLL) kn->kn_flags |= __EV_HUP; return (1); } - if ((hint & NOTE_SUBMIT) == 0) - rw_exit_read(lock); - return (kn->kn_data > 0); } int -filt_pipewrite(struct knote *kn, long hint) +filt_piperead(struct knote *kn, long hint) { - struct pipe *rpipe = kn->kn_fp->f_data, *wpipe; - struct rwlock *lock = rpipe->pipe_lock; + struct pipe *rpipe = kn->kn_fp->f_data; + + return (filt_piperead_common(kn, rpipe)); +} + +int +filt_pipereadmodify(struct kevent *kev, struct knote *kn) +{ + struct pipe *rpipe = kn->kn_fp->f_data; + int active; + + rw_enter_write(rpipe->pipe_lock); + knote_modify(kev, kn); + active = filt_piperead_common(kn, rpipe); + rw_exit_write(rpipe->pipe_lock); + + return (active); +} + +int +filt_pipereadprocess(struct knote *kn, struct kevent *kev) +{ + struct pipe *rpipe = kn->kn_fp->f_data; + int active; + + rw_enter_write(rpipe->pipe_lock); + active = filt_piperead_common(kn, rpipe); + if (active) + knote_submit(kn, kev); + rw_exit_write(rpipe->pipe_lock); + + return (active); +} + +int +filt_pipewrite_common(struct knote *kn, struct pipe *rpipe) +{ + struct pipe *wpipe; + + rw_assert_wrlock(rpipe->pipe_lock); - if ((hint & NOTE_SUBMIT) == 0) - rw_enter_read(lock); wpipe = pipe_peer(rpipe); if (wpipe == NULL) { - if ((hint & NOTE_SUBMIT) == 0) - rw_exit_read(lock); kn->kn_data = 0; kn->kn_flags |= EV_EOF; if (kn->kn_flags & __EV_POLL) @@ -965,10 +1003,44 @@ filt_pipewrite(struct knote *kn, long hi } kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; - if ((hint & NOTE_SUBMIT) == 0) - rw_exit_read(lock); - return (kn->kn_data >= PIPE_BUF); +} + +int +filt_pipewrite(struct knote *kn, long hint) +{ + struct pipe *rpipe = kn->kn_fp->f_data; + + return (filt_pipewrite_common(kn, rpipe)); +} + +int +filt_pipewritemodify(struct kevent *kev, struct knote *kn) +{ + struct pipe *rpipe = kn->kn_fp->f_data; + int active; + + rw_enter_write(rpipe->pipe_lock); + knote_modify(kev, kn); + active = filt_pipewrite_common(kn, rpipe); + rw_exit_write(rpipe->pipe_lock); + + return (active); +} + +int +filt_pipewriteprocess(struct knote *kn, struct kevent *kev) +{ + struct pipe *rpipe = kn->kn_fp->f_data; + int active; + + rw_enter_write(rpipe->pipe_lock); + active = filt_pipewrite_common(kn, rpipe); + if (active) + knote_submit(kn, kev); + rw_exit_write(rpipe->pipe_lock); + + return (active); } void Index: kern/uipc_socket.c =================================================================== RCS file: src/sys/kern/uipc_socket.c,v retrieving revision 1.254 diff -u -p -r1.254 uipc_socket.c --- kern/uipc_socket.c 17 Jan 2021 05:23:34 -0000 1.254 +++ kern/uipc_socket.c 25 Jan 2021 14:54:20 -0000 @@ -69,15 +69,26 @@ int somove(struct socket *, int); void filt_sordetach(struct knote *kn); int filt_soread(struct knote *kn, long hint); +int filt_soreadmodify(struct kevent *kev, struct knote *kn); +int filt_soreadprocess(struct knote *kn, struct kevent *kev); +int filt_soread_common(struct knote *kn, struct socket *so); void filt_sowdetach(struct knote *kn); int filt_sowrite(struct knote *kn, long hint); +int filt_sowritemodify(struct kevent *kev, struct knote *kn); +int filt_sowriteprocess(struct knote *kn, struct kevent *kev); +int filt_sowrite_common(struct knote *kn, struct socket *so); int filt_solisten(struct knote *kn, long hint); +int filt_solistenmodify(struct kevent *kev, struct knote *kn); +int filt_solistenprocess(struct knote *kn, struct kevent *kev); +int filt_solisten_common(struct knote *kn, struct socket *so); const struct filterops solisten_filtops = { .f_flags = FILTEROP_ISFD, .f_attach = NULL, .f_detach = filt_sordetach, .f_event = filt_solisten, + .f_modify = filt_solistenmodify, + .f_process = filt_solistenprocess, }; const struct filterops soread_filtops = { @@ -85,6 +96,8 @@ const struct filterops soread_filtops = .f_attach = NULL, .f_detach = filt_sordetach, .f_event = filt_soread, + .f_modify = filt_soreadmodify, + .f_process = filt_soreadprocess, }; const struct filterops sowrite_filtops = { @@ -92,6 +105,8 @@ const struct filterops sowrite_filtops = .f_attach = NULL, .f_detach = filt_sowdetach, .f_event = filt_sowrite, + .f_modify = filt_sowritemodify, + .f_process = filt_sowriteprocess, }; const struct filterops soexcept_filtops = { @@ -99,6 +114,8 @@ const struct filterops soexcept_filtops .f_attach = NULL, .f_detach = filt_sordetach, .f_event = filt_soread, + .f_modify = filt_soreadmodify, + .f_process = filt_soreadprocess, }; #ifndef SOMINCONN @@ -2055,13 +2072,12 @@ filt_sordetach(struct knote *kn) } int -filt_soread(struct knote *kn, long hint) +filt_soread_common(struct knote *kn, struct socket *so) { - struct socket *so = kn->kn_fp->f_data; - int s, rv = 0; + int rv = 0; + + soassertlocked(so); - if ((hint & NOTE_SUBMIT) == 0) - s = solock(so); kn->kn_data = so->so_rcv.sb_cc; #ifdef SOCKET_SPLICE if (isspliced(so)) { @@ -2089,12 +2105,47 @@ filt_soread(struct knote *kn, long hint) } else { rv = (kn->kn_data >= so->so_rcv.sb_lowat); } - if ((hint & NOTE_SUBMIT) == 0) - sounlock(so, s); return rv; } +int +filt_soread(struct knote *kn, long hint) +{ + struct socket *so = kn->kn_fp->f_data; + + return (filt_soread_common(kn, so)); +} + +int +filt_soreadmodify(struct kevent *kev, struct knote *kn) +{ + struct socket *so = kn->kn_fp->f_data; + int active, s; + + s = solock(so); + knote_modify(kev, kn); + active = filt_soread_common(kn, so); + sounlock(so, s); + + return (active); +} + +int +filt_soreadprocess(struct knote *kn, struct kevent *kev) +{ + struct socket *so = kn->kn_fp->f_data; + int active, s; + + s = solock(so); + active = filt_soread_common(kn, so); + if (active) + knote_submit(kn, kev); + sounlock(so, s); + + return (active); +} + void filt_sowdetach(struct knote *kn) { @@ -2106,13 +2157,12 @@ filt_sowdetach(struct knote *kn) } int -filt_sowrite(struct knote *kn, long hint) +filt_sowrite_common(struct knote *kn, struct socket *so) { - struct socket *so = kn->kn_fp->f_data; - int s, rv; + int rv; + + soassertlocked(so); - if ((hint & NOTE_SUBMIT) == 0) - s = solock(so); kn->kn_data = sbspace(so, &so->so_snd); if (so->so_state & SS_CANTSENDMORE) { kn->kn_flags |= EV_EOF; @@ -2132,25 +2182,92 @@ filt_sowrite(struct knote *kn, long hint } else { rv = (kn->kn_data >= so->so_snd.sb_lowat); } - if ((hint & NOTE_SUBMIT) == 0) - sounlock(so, s); return (rv); } int -filt_solisten(struct knote *kn, long hint) +filt_sowrite(struct knote *kn, long hint) { struct socket *so = kn->kn_fp->f_data; - int s; - if ((hint & NOTE_SUBMIT) == 0) - s = solock(so); + return (filt_sowrite_common(kn, so)); +} + +int +filt_sowritemodify(struct kevent *kev, struct knote *kn) +{ + struct socket *so = kn->kn_fp->f_data; + int active, s; + + s = solock(so); + knote_modify(kev, kn); + active = filt_sowrite_common(kn, so); + sounlock(so, s); + + return (active); +} + +int +filt_sowriteprocess(struct knote *kn, struct kevent *kev) +{ + struct socket *so = kn->kn_fp->f_data; + int active, s; + + s = solock(so); + active = filt_sowrite_common(kn, so); + if (active) + knote_submit(kn, kev); + sounlock(so, s); + + return (active); +} + +int +filt_solisten_common(struct knote *kn, struct socket *so) +{ + soassertlocked(so); + kn->kn_data = so->so_qlen; - if ((hint & NOTE_SUBMIT) == 0) - sounlock(so, s); return (kn->kn_data != 0); +} + +int +filt_solisten(struct knote *kn, long hint) +{ + struct socket *so = kn->kn_fp->f_data; + + return (filt_solisten_common(kn, so)); +} + +int +filt_solistenmodify(struct kevent *kev, struct knote *kn) +{ + struct socket *so = kn->kn_fp->f_data; + int active, s; + + s = solock(so); + knote_modify(kev, kn); + active = filt_solisten_common(kn, so); + sounlock(so, s); + + return (active); +} + +int +filt_solistenprocess(struct knote *kn, struct kevent *kev) +{ + struct socket *so = kn->kn_fp->f_data; + int active, s; + + s = solock(so); + active = filt_solisten_common(kn, so); + if (active) + knote_submit(kn, kev); + sounlock(so, s); + + return (active); } #ifdef DDB Index: sys/event.h =================================================================== RCS file: src/sys/sys/event.h,v retrieving revision 1.53 diff -u -p -r1.53 event.h --- sys/event.h 17 Jan 2021 05:56:32 -0000 1.53 +++ sys/event.h 25 Jan 2021 14:54:20 -0000 @@ -165,30 +165,85 @@ struct klist { */ #define NOTE_SIGNAL 0x08000000 +/* + * = Event filter interface + * + * == .f_flags + * + * Defines properties of the event filter: + * + * - FILTEROP_ISFD Each knote of this filter is associated + * with a file descriptor. + * + * - FILTEROP_MPSAFE The kqueue subsystem can invoke .f_attach(), + * .f_detach(), .f_modify() and .f_process() without + * the kernel lock. + * + * == .f_attach() + * + * Attaches the knote to the object. + * + * == .f_detach() + * + * Detaches the knote from the object. The object must not use this knote + * for delivering events after this callback has returned. + * + * == .f_event() + * + * Notifies the filter about an event. Called through knote(). + * + * == .f_modify() + * + * Modifies the knote with new state from the user. + * + * Returns non-zero if the knote has become active. + * + * == .f_process() + * + * Checks if the event is active and returns non-zero if the event should be + * returned to the user. + * + * If kev is non-NULL and the event is active, the callback should store + * the event's state in kev for delivery to the user. + * + * == Concurrency control + * + * The kqueue subsystem serializes calls of .f_attach(), .f_detach(), + * .f_modify() and .f_process(). + */ + #define FILTEROP_ISFD 0x00000001 /* ident == filedescriptor */ +#define FILTEROP_MPSAFE 0x00000002 /* safe without kernel lock */ struct filterops { int f_flags; int (*f_attach)(struct knote *kn); void (*f_detach)(struct knote *kn); int (*f_event)(struct knote *kn, long hint); + int (*f_modify)(struct kevent *kev, struct knote *kn); + int (*f_process)(struct knote *kn, struct kevent *kev); }; +/* + * Locking: + * I immutable after creation + * o object lock + */ struct knote { SLIST_ENTRY(knote) kn_link; /* for fd */ SLIST_ENTRY(knote) kn_selnext; /* for struct selinfo */ TAILQ_ENTRY(knote) kn_tqe; - struct kqueue *kn_kq; /* which queue we are on */ + struct kqueue *kn_kq; /* [I] which queue we are on */ struct kevent kn_kevent; int kn_status; - int kn_sfflags; /* saved filter flags */ - __int64_t kn_sdata; /* saved data field */ + int kn_sfflags; /* [o] saved filter flags */ + __int64_t kn_sdata; /* [o] saved data field */ union { struct file *p_fp; /* file data pointer */ struct process *p_process; /* process pointer */ } kn_ptr; const struct filterops *kn_fop; - void *kn_hook; + void *kn_hook; /* [o] */ #define KN_ACTIVE 0x0001 /* event has been triggered */ #define KN_QUEUED 0x0002 /* event is on queue */ #define KN_DISABLED 0x0004 /* event is disabled */ @@ -198,12 +253,13 @@ struct knote { #define KN_ATTACHED 0x0040 /* knote is attached to * a knlist of the kqueue */ -#define kn_id kn_kevent.ident -#define kn_filter kn_kevent.filter -#define kn_flags kn_kevent.flags -#define kn_fflags kn_kevent.fflags -#define kn_data kn_kevent.data -#define kn_fp kn_ptr.p_fp +#define kn_id kn_kevent.ident /* [I] */ +#define kn_filter kn_kevent.filter /* [I] */ +#define kn_flags kn_kevent.flags /* [o] */ +#define kn_fflags kn_kevent.fflags /* [o] */ +#define kn_data kn_kevent.data /* [o] */ +#define kn_udata kn_kevent.udata /* [o] */ +#define kn_fp kn_ptr.p_fp /* [o] */ }; struct klistops { @@ -234,6 +290,8 @@ extern void kqpoll_exit(void); extern void knote(struct klist *list, long hint); extern void knote_fdclose(struct proc *p, int fd); extern void knote_processexit(struct proc *); +extern void knote_modify(const struct kevent *, struct knote *); +extern void knote_submit(struct knote *, struct kevent *); extern int kqueue_register(struct kqueue *kq, struct kevent *kev, struct proc *p); extern int kqueue_scan(struct kqueue_scan_state *, int, struct kevent *,