Module Name:    src
Committed By:   martin
Date:           Thu Feb  4 16:57:25 UTC 2021

Modified Files:
        src/sys/kern [netbsd-9]: kern_event.c

Log Message:
Pullup the following (requested by jdolecek in ticket #1191):

        sys/kern/kern_event.c   r1.110-1.115 (via patch)

fix a race in kqueue_scan() - when multiple threads check the same
kqueue, it could happen other thread seen empty kqueue while kevent
was being checked for re-firing and re-queued

make sure to keep retrying if there are outstanding kevents even
if no kevent is found on first pass through the queue, and only
kq_count when actually completely done with the kevent

PR kern/50094 by Christof Meerwal

Also fixes timer latency in Go, as reported in
https://github.com/golang/go/issues/42515 by Michael Pratt


To generate a diff of this commit:
cvs rdiff -u -r1.104 -r1.104.4.1 src/sys/kern/kern_event.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/kern_event.c
diff -u src/sys/kern/kern_event.c:1.104 src/sys/kern/kern_event.c:1.104.4.1
--- src/sys/kern/kern_event.c:1.104	Tue Nov 13 06:58:14 2018
+++ src/sys/kern/kern_event.c	Thu Feb  4 16:57:25 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_event.c,v 1.104 2018/11/13 06:58:14 maxv Exp $	*/
+/*	$NetBSD: kern_event.c,v 1.104.4.1 2021/02/04 16:57:25 martin Exp $	*/
 
 /*-
  * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
@@ -58,7 +58,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_event.c,v 1.104 2018/11/13 06:58:14 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_event.c,v 1.104.4.1 2021/02/04 16:57:25 martin Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -166,6 +166,8 @@ static int	kq_calloutmax = (4 * 1024);
 
 extern const struct filterops sig_filtops;
 
+#define KQ_FLUX_WAKEUP(kq)	cv_broadcast(&kq->kq_cv)
+
 /*
  * Table for for all system-defined filters.
  * These should be listed in the numeric order of the EVFILT_* defines.
@@ -1226,7 +1228,10 @@ kqueue_check(const char *func, size_t li
 			}
 			count++;
 			if (count > kq->kq_count) {
-				goto bad;
+				panic("%s,%zu: kq=%p kq->kq_count(%d) != "
+				    "count(%d), nmarker=%d",
+		    		    func, line, kq, kq->kq_count, count,
+				    nmarker);
 			}
 		} else {
 			nmarker++;
@@ -1240,11 +1245,6 @@ kqueue_check(const char *func, size_t li
 #endif
 		}
 	}
-	if (kq->kq_count != count) {
-bad:
-		panic("%s,%zu: kq=%p kq->kq_count(%d) != count(%d), nmarker=%d",
-		    func, line, kq, kq->kq_count, count, nmarker);
-	}
 }
 #define kq_check(a) kqueue_check(__func__, __LINE__, (a))
 #else /* defined(DEBUG) */
@@ -1268,7 +1268,7 @@ kqueue_scan(file_t *fp, size_t maxevents
 	struct timespec	ats, sleepts;
 	struct knote	*kn, *marker, morker;
 	size_t		count, nkev, nevents;
-	int		timeout, error, rv;
+	int		timeout, error, rv, influx;
 	filedesc_t	*fdp;
 
 	fdp = curlwp->l_fd;
@@ -1317,119 +1317,140 @@ kqueue_scan(file_t *fp, size_t maxevents
 			}
 		}
 		mutex_spin_exit(&kq->kq_lock);
-	} else {
-		/* mark end of knote list */
-		TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe);
+		goto done;
+	}
 
-		/*
-		 * Acquire the fdp->fd_lock interlock to avoid races with
-		 * file creation/destruction from other threads.
-		 */
-		mutex_spin_exit(&kq->kq_lock);
-		mutex_enter(&fdp->fd_lock);
-		mutex_spin_enter(&kq->kq_lock);
+	/* mark end of knote list */
+	TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe);
+	influx = 0;
 
-		while (count != 0) {
-			kn = TAILQ_FIRST(&kq->kq_head);	/* get next knote */
-			while ((kn->kn_status & KN_MARKER) != 0) {
-				if (kn == marker) {
-					/* it's our marker, stop */
-					TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
-					if (count < maxevents || (tsp != NULL &&
-					    (timeout = gettimeleft(&ats,
-					    &sleepts)) <= 0))
-						goto done;
-					mutex_exit(&fdp->fd_lock);
-					goto retry;
-				}
-				/* someone else's marker. */
-				kn = TAILQ_NEXT(kn, kn_tqe);
+	/*
+	 * Acquire the fdp->fd_lock interlock to avoid races with
+	 * file creation/destruction from other threads.
+	 */
+relock:
+	mutex_spin_exit(&kq->kq_lock);
+	mutex_enter(&fdp->fd_lock);
+	mutex_spin_enter(&kq->kq_lock);
+
+	while (count != 0) {
+		kn = TAILQ_FIRST(&kq->kq_head);	/* get next knote */
+
+		if ((kn->kn_status & KN_MARKER) != 0 && kn != marker) {
+			if (influx) {
+				influx = 0;
+				KQ_FLUX_WAKEUP(kq);
 			}
-			kq_check(kq);
+			mutex_exit(&fdp->fd_lock);
+			(void)cv_wait(&kq->kq_cv, &kq->kq_lock);
+			goto relock;
+		}
+
+		TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
+		if (kn == marker) {
+			/* it's our marker, stop */
+			KQ_FLUX_WAKEUP(kq);
+			if (count == maxevents) {
+				mutex_exit(&fdp->fd_lock);
+				goto retry;
+			}
+			break;
+		}
+		KASSERT((kn->kn_status & KN_BUSY) == 0);
+
+		kq_check(kq);
+		kn->kn_status &= ~KN_QUEUED;
+		kn->kn_status |= KN_BUSY;
+		kq_check(kq);
+		if (kn->kn_status & KN_DISABLED) {
+			kn->kn_status &= ~KN_BUSY;
 			kq->kq_count--;
-			TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
-			kn->kn_status &= ~KN_QUEUED;
-			kn->kn_status |= KN_BUSY;
-			kq_check(kq);
-			if (kn->kn_status & KN_DISABLED) {
+			/* don't want disabled events */
+			continue;
+		}
+		if ((kn->kn_flags & EV_ONESHOT) == 0) {
+			mutex_spin_exit(&kq->kq_lock);
+			KASSERT(kn->kn_fop != NULL);
+			KASSERT(kn->kn_fop->f_event != NULL);
+			KERNEL_LOCK(1, NULL);		/* XXXSMP */
+			KASSERT(mutex_owned(&fdp->fd_lock));
+			rv = (*kn->kn_fop->f_event)(kn, 0);
+			KERNEL_UNLOCK_ONE(NULL);	/* XXXSMP */
+			mutex_spin_enter(&kq->kq_lock);
+			/* Re-poll if note was re-enqueued. */
+			if ((kn->kn_status & KN_QUEUED) != 0) {
 				kn->kn_status &= ~KN_BUSY;
-				/* don't want disabled events */
+				/* Re-enqueue raised kq_count, lower it again */
+				kq->kq_count--;
+				influx = 1;
 				continue;
 			}
-			if ((kn->kn_flags & EV_ONESHOT) == 0) {
-				mutex_spin_exit(&kq->kq_lock);
-				KASSERT(kn->kn_fop != NULL);
-				KASSERT(kn->kn_fop->f_event != NULL);
-				KERNEL_LOCK(1, NULL);		/* XXXSMP */
-				KASSERT(mutex_owned(&fdp->fd_lock));
-				rv = (*kn->kn_fop->f_event)(kn, 0);
-				KERNEL_UNLOCK_ONE(NULL);	/* XXXSMP */
-				mutex_spin_enter(&kq->kq_lock);
-				/* Re-poll if note was re-enqueued. */
-				if ((kn->kn_status & KN_QUEUED) != 0) {
-					kn->kn_status &= ~KN_BUSY;
-					continue;
-				}
-				if (rv == 0) {
-					/*
-					 * non-ONESHOT event that hasn't
-					 * triggered again, so de-queue.
-					 */
-					kn->kn_status &= ~(KN_ACTIVE|KN_BUSY);
-					continue;
-				}
-			}
-			/* XXXAD should be got from f_event if !oneshot. */
-			*kevp++ = kn->kn_kevent;
-			nkev++;
-			if (kn->kn_flags & EV_ONESHOT) {
-				/* delete ONESHOT events after retrieval */
-				kn->kn_status &= ~KN_BUSY;
-				mutex_spin_exit(&kq->kq_lock);
-				knote_detach(kn, fdp, true);
-				mutex_enter(&fdp->fd_lock);
-				mutex_spin_enter(&kq->kq_lock);
-			} else if (kn->kn_flags & EV_CLEAR) {
-				/* clear state after retrieval */
-				kn->kn_data = 0;
-				kn->kn_fflags = 0;
-				kn->kn_status &= ~(KN_QUEUED|KN_ACTIVE|KN_BUSY);
-			} else if (kn->kn_flags & EV_DISPATCH) {
-				kn->kn_status |= KN_DISABLED;
-				kn->kn_status &= ~(KN_QUEUED|KN_ACTIVE|KN_BUSY);
-			} else {
-				/* add event back on list */
-				kq_check(kq);
-				kn->kn_status |= KN_QUEUED;
-				kn->kn_status &= ~KN_BUSY;
-				TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
-				kq->kq_count++;
-				kq_check(kq);
-			}
-			if (nkev == kevcnt) {
-				/* do copyouts in kevcnt chunks */
-				mutex_spin_exit(&kq->kq_lock);
-				mutex_exit(&fdp->fd_lock);
-				error = (*keops->keo_put_events)
-				    (keops->keo_private,
-				    kevbuf, ulistp, nevents, nkev);
-				mutex_enter(&fdp->fd_lock);
-				mutex_spin_enter(&kq->kq_lock);
-				nevents += nkev;
-				nkev = 0;
-				kevp = kevbuf;
-			}
-			count--;
-			if (error != 0 || count == 0) {
-				/* remove marker */
-				TAILQ_REMOVE(&kq->kq_head, marker, kn_tqe);
-				break;
+			if (rv == 0) {
+				/*
+				 * non-ONESHOT event that hasn't
+				 * triggered again, so de-queue.
+				 */
+				kn->kn_status &= ~(KN_ACTIVE|KN_BUSY);
+				kq->kq_count--;
+				influx = 1;
+				continue;
 			}
 		}
- done:
-		mutex_spin_exit(&kq->kq_lock);
-		mutex_exit(&fdp->fd_lock);
+		/* XXXAD should be got from f_event if !oneshot. */
+		*kevp++ = kn->kn_kevent;
+		nkev++;
+		if (kn->kn_flags & EV_ONESHOT) {
+			/* delete ONESHOT events after retrieval */
+			kn->kn_status &= ~KN_BUSY;
+			mutex_spin_exit(&kq->kq_lock);
+			knote_detach(kn, fdp, true);
+			mutex_enter(&fdp->fd_lock);
+			mutex_spin_enter(&kq->kq_lock);
+		} else if (kn->kn_flags & EV_CLEAR) {
+			/* clear state after retrieval */
+			kn->kn_data = 0;
+			kn->kn_fflags = 0;
+			kn->kn_status &= ~(KN_ACTIVE|KN_BUSY);
+			kq->kq_count--;
+		} else if (kn->kn_flags & EV_DISPATCH) {
+			kn->kn_status |= KN_DISABLED;
+			kn->kn_status &= ~(KN_ACTIVE|KN_BUSY);
+			kq->kq_count--;
+		} else {
+			/* add event back on list */
+			kq_check(kq);
+			kn->kn_status |= KN_QUEUED;
+			kn->kn_status &= ~KN_BUSY;
+			TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
+			kq_check(kq);
+		}
+		if (nkev == kevcnt) {
+			/* do copyouts in kevcnt chunks */
+			influx = 0;
+			KQ_FLUX_WAKEUP(kq);
+			mutex_spin_exit(&kq->kq_lock);
+			mutex_exit(&fdp->fd_lock);
+			error = (*keops->keo_put_events)
+			    (keops->keo_private,
+			    kevbuf, ulistp, nevents, nkev);
+			mutex_enter(&fdp->fd_lock);
+			mutex_spin_enter(&kq->kq_lock);
+			nevents += nkev;
+			nkev = 0;
+			kevp = kevbuf;
+		}
+		count--;
+		if (error != 0 || count == 0) {
+			/* remove marker */
+			TAILQ_REMOVE(&kq->kq_head, marker, kn_tqe);
+			break;
+		}
 	}
+	KQ_FLUX_WAKEUP(kq);
+	mutex_spin_exit(&kq->kq_lock);
+	mutex_exit(&fdp->fd_lock);
+
+done:
 	if (nkev != 0) {
 		/* copyout remaining events */
 		error = (*keops->keo_put_events)(keops->keo_private,

Reply via email to