> On Jul 13, 2022, at 7:18 PM, Jason Thorpe wrote:
>
> Ok, new version. Main differences:
And another new version. This:
==> Creates a knote_impl structure that’s private to kern_event.c that has the
new lock. I took the opportunity to move kn_influx to the knote_impl as well,
since absolutely no one outside of kern_event.c should touch it. This change
is ABI-compatible.
==> Improves the comments about the locking rules.
There are no functional changes since the last patch.
Index: kern/kern_event.c
===
RCS file: /cvsroot/src/sys/kern/kern_event.c,v
retrieving revision 1.143
diff -u -p -r1.143 kern_event.c
--- kern/kern_event.c 13 Jul 2022 14:11:46 - 1.143
+++ kern/kern_event.c 17 Jul 2022 19:44:06 -
@@ -120,6 +120,61 @@ static voidfilt_userdetach(struct knote
static int filt_user(struct knote *, long hint);
static int filt_usertouch(struct knote *, struct kevent *, long type);
+/*
+ * Private knote state that should never be exposed outside
+ * of kern_event.c
+ *
+ * Field locking:
+ *
+ * q kn_kq->kq_lock
+ */
+struct knote_impl {
+ struct knoteki_knote;
+ unsigned intki_influx; /* q: in-flux counter */
+ kmutex_tki_foplock; /* for kn_filterops */
+};
+
+#defineKIMPL_TO_KNOTE(kip) (&(kip)->ki_knote)
+#defineKNOTE_TO_KIMPL(knp) container_of((knp), struct knote_impl,
ki_knote)
+
+static inline struct knote *
+knote_alloc(bool sleepok)
+{
+ struct knote_impl *ki;
+
+ ki = kmem_zalloc(sizeof(*ki), sleepok ? KM_SLEEP : KM_NOSLEEP);
+ mutex_init(&ki->ki_foplock, MUTEX_DEFAULT, IPL_NONE);
+
+ return KIMPL_TO_KNOTE(ki);
+}
+
+static inline void
+knote_free(struct knote *kn)
+{
+ struct knote_impl *ki = KNOTE_TO_KIMPL(kn);
+
+ mutex_destroy(&ki->ki_foplock);
+ kmem_free(ki, sizeof(*ki));
+}
+
+static inline void
+knote_foplock_enter(struct knote *kn)
+{
+ mutex_enter(&KNOTE_TO_KIMPL(kn)->ki_foplock);
+}
+
+static inline void
+knote_foplock_exit(struct knote *kn)
+{
+ mutex_exit(&KNOTE_TO_KIMPL(kn)->ki_foplock);
+}
+
+static inline bool
+knote_foplock_owned(struct knote *kn)
+{
+ return mutex_owned(&KNOTE_TO_KIMPL(kn)->ki_foplock);
+}
+
static const struct fileops kqueueops = {
.fo_name = "kqueue",
.fo_read = (void *)enxio,
@@ -133,6 +188,31 @@ static const struct fileops kqueueops =
.fo_restart = kqueue_restart,
};
+static void
+filt_nopdetach(struct knote *kn __unused)
+{
+}
+
+static int
+filt_nopevent(struct knote *kn __unused, long hint __unused)
+{
+ return 0;
+}
+
+static const struct filterops nop_fd_filtops = {
+ .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
+ .f_attach = NULL,
+ .f_detach = filt_nopdetach,
+ .f_event = filt_nopevent,
+};
+
+static const struct filterops nop_filtops = {
+ .f_flags = FILTEROP_MPSAFE,
+ .f_attach = NULL,
+ .f_detach = filt_nopdetach,
+ .f_event = filt_nopevent,
+};
+
static const struct filterops kqread_filtops = {
.f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
.f_attach = NULL,
@@ -232,15 +312,41 @@ static size_t user_kfiltersz; /* size
* -> object lock (e.g., device driver lock, &c.)
* -> kn_kq->kq_lock
*
- * Locking rules:
+ * Locking rules. ==> indicates the lock is acquired by the backing
+ * object, locks prior are acquired before calling filter ops:
+ *
+ * f_attach: fdp->fd_lock -> knote foplock ->
+ * (maybe) KERNEL_LOCK ==> backing object lock
+ *
+ * f_detach: fdp->fd_lock -> knote foplock ->
+ *(maybe) KERNEL_LOCK ==> backing object lock
+ *
+ * f_event via kevent: fdp->fd_lock -> knote foplock ->
+ *(maybe) KERNEL_LOCK ==> backing object lock
+ *N.B. NOTE_SUBMIT will never be set in the "hint" argument
+ *in this case.
*
- * f_attach: fdp->fd_lock, KERNEL_LOCK
- * f_detach: fdp->fd_lock, KERNEL_LOCK
- * f_event(!NOTE_SUBMIT) via kevent: fdp->fd_lock, _no_ object lock
- * f_event via knote: whatever caller guarantees
- * Typically, f_event(NOTE_SUBMIT) via knote: object lock
- * f_event(!NOTE_SUBMIT) via knote: nothing,
- * acquires/releases object lock inside.
+ * f_event via knote (via backing object: Whatever caller guarantees.
+ * Typically:
+ * f_event(NOTE_SUBMIT): caller has already acquired backing
+ * object lock.
+ * f_event(!NOTE_SUBMIT): caller has not acquired backing object,
+ * lock or has possibly acquired KERNEL_LOCK. Backing object
+ * lock may or may not be acquired as-needed.
+ * N.B. the knote foplock will **not** be acquired in this case. The
+ * caller guarantees that klist_fini() will not be called concurrently
+ * with knote(