Module Name:    src
Committed By:   ad
Date:           Thu Nov 21 21:42:31 UTC 2019

Modified Files:
        src/sys/kern: sys_select.c
        src/sys/sys: selinfo.h

Log Message:
Minor improvements to select/poll:

- Increase the maximum number of clusters from 32 to 64 for large systems.
  kcpuset_t could potentially be used here but that's an excursion I don't
  want to go on right now.  uint32_t -> uint64_t is very simple.

- In the case of a non-blocking select/poll, or where we won't block
  because there are events ready to report, stop registering interest in
  the back-end objects early.

- Change the wmesg for poll back to "poll".


To generate a diff of this commit:
cvs rdiff -u -r1.48 -r1.49 src/sys/kern/sys_select.c
cvs rdiff -u -r1.8 -r1.9 src/sys/sys/selinfo.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/sys_select.c
diff -u src/sys/kern/sys_select.c:1.48 src/sys/kern/sys_select.c:1.49
--- src/sys/kern/sys_select.c:1.48	Fri Sep 20 15:00:47 2019
+++ src/sys/kern/sys_select.c	Thu Nov 21 21:42:30 2019
@@ -1,7 +1,7 @@
-/*	$NetBSD: sys_select.c,v 1.48 2019/09/20 15:00:47 kamil Exp $	*/
+/*	$NetBSD: sys_select.c,v 1.49 2019/11/21 21:42:30 ad Exp $	*/
 
 /*-
- * Copyright (c) 2007, 2008, 2009, 2010 The NetBSD Foundation, Inc.
+ * Copyright (c) 2007, 2008, 2009, 2010, 2019 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -84,7 +84,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: sys_select.c,v 1.48 2019/09/20 15:00:47 kamil Exp $");
+__KERNEL_RCSID(0, "$NetBSD: sys_select.c,v 1.49 2019/11/21 21:42:30 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -104,6 +104,7 @@ __KERNEL_RCSID(0, "$NetBSD: sys_select.c
 #include <sys/socketvar.h>
 #include <sys/sleepq.h>
 #include <sys/sysctl.h>
+#include <sys/bitops.h>
 
 /* Flags for lwp::l_selflag. */
 #define	SEL_RESET	0	/* awoken, interrupted, or not yet polling */
@@ -111,10 +112,6 @@ __KERNEL_RCSID(0, "$NetBSD: sys_select.c
 #define	SEL_BLOCKING	2	/* blocking and waiting for event */
 #define	SEL_EVENT	3	/* interrupted, events set directly */
 
-/* Operations: either select() or poll(). */
-#define	SELOP_SELECT	1
-#define	SELOP_POLL	2
-
 /*
  * Per-cluster state for select()/poll().  For a system with fewer
  * than 32 CPUs, this gives us per-CPU clusters.
@@ -125,8 +122,8 @@ __KERNEL_RCSID(0, "$NetBSD: sys_select.c
 typedef struct selcluster {
 	kmutex_t	*sc_lock;
 	sleepq_t	sc_sleepq;
+	uint64_t	sc_mask;
 	int		sc_ncoll;
-	uint32_t	sc_mask;
 } selcluster_t;
 
 static inline int	selscan(char *, const int, const size_t, register_t *);
@@ -150,6 +147,10 @@ static syncobj_t select_sobj = {
 static selcluster_t	*selcluster[SELCLUSTERS] __read_mostly;
 static int		direct_select __read_mostly = 0;
 
+/* Operations: either select() or poll(). */
+const char		selop_select[] = "select";
+const char		selop_poll[] = "poll";
+
 /*
  * Select system call.
  */
@@ -221,7 +222,7 @@ sys___select50(struct lwp *l, const stru
  * sel_do_scan: common code to perform the scan on descriptors.
  */
 static int
-sel_do_scan(const int op, void *fds, const int nf, const size_t ni,
+sel_do_scan(const char *opname, void *fds, const int nf, const size_t ni,
     struct timespec *ts, sigset_t *mask, register_t *retval)
 {
 	lwp_t		* const l = curlwp;
@@ -238,10 +239,17 @@ sel_do_scan(const int op, void *fds, con
 	if (__predict_false(mask))
 		sigsuspendsetup(l, mask);
 
+	/*
+	 * We may context switch during or at any time after picking a CPU
+	 * and cluster to associate with, but it doesn't matter.  In the
+	 * unlikely event we migrate elsewhere all we risk is a little lock
+	 * contention; correctness is not sacrificed.
+	 */
 	sc = curcpu()->ci_data.cpu_selcluster;
 	lock = sc->sc_lock;
 	l->l_selcluster = sc;
-	if (op == SELOP_SELECT) {
+
+	if (opname == selop_select) {
 		l->l_selbits = fds;
 		l->l_selni = ni;
 	} else {
@@ -261,10 +269,16 @@ sel_do_scan(const int op, void *fds, con
 		 * and lock activity resulting from fo_poll is enough to
 		 * provide an up to date value for new polling activity.
 		 */
-		l->l_selflag = SEL_SCANNING;
+		if (ts && (ts->tv_sec | ts->tv_nsec | direct_select) == 0) {
+			/* Non-blocking: no need for selrecord()/selclear() */
+			l->l_selflag = SEL_RESET;
+		} else {
+			l->l_selflag = SEL_SCANNING;
+		}
 		ncoll = sc->sc_ncoll;
+		membar_exit();
 
-		if (op == SELOP_SELECT) {
+		if (opname == selop_select) {
 			error = selscan((char *)fds, nf, ni, retval);
 		} else {
 			error = pollscan((struct pollfd *)fds, nf, retval);
@@ -301,7 +315,7 @@ state_check:
 		l->l_selflag = SEL_BLOCKING;
 		l->l_kpriority = true;
 		sleepq_enter(&sc->sc_sleepq, l, lock);
-		sleepq_enqueue(&sc->sc_sleepq, sc, "select", &select_sobj);
+		sleepq_enqueue(&sc->sc_sleepq, sc, opname, &select_sobj);
 		error = sleepq_block(timo, true);
 		if (error != 0) {
 			break;
@@ -363,7 +377,7 @@ selcommon(register_t *retval, int nd, fd
 	getbits(ex, 2);
 #undef	getbits
 
-	error = sel_do_scan(SELOP_SELECT, bits, nd, ni, ts, mask, retval);
+	error = sel_do_scan(selop_select, bits, nd, ni, ts, mask, retval);
 	if (error == 0 && u_in != NULL)
 		error = copyout(bits + ni * 3, u_in, ni);
 	if (error == 0 && u_ou != NULL)
@@ -382,10 +396,12 @@ selscan(char *bits, const int nfd, const
 	fd_mask *ibitp, *obitp;
 	int msk, i, j, fd, n;
 	file_t *fp;
+	lwp_t *l;
 
 	ibitp = (fd_mask *)(bits + ni * 0);
 	obitp = (fd_mask *)(bits + ni * 3);
 	n = 0;
+	l = curlwp;
 
 	memset(obitp, 0, ni * 3);
 	for (msk = 0; msk < 3; msk++) {
@@ -402,8 +418,15 @@ selscan(char *bits, const int nfd, const
 				 * Setup an argument to selrecord(), which is
 				 * a file descriptor number.
 				 */
-				curlwp->l_selrec = fd;
+				l->l_selrec = fd;
 				if ((*fp->f_ops->fo_poll)(fp, sel_flag[msk])) {
+					if (!direct_select) {
+						/*
+						 * Have events: do nothing in
+						 * selrecord().
+						 */
+						l->l_selflag = SEL_RESET;
+					}
 					obits |= (1U << j);
 					n++;
 				}
@@ -412,7 +435,7 @@ selscan(char *bits, const int nfd, const
 			if (obits != 0) {
 				if (direct_select) {
 					kmutex_t *lock;
-					lock = curlwp->l_selcluster->sc_lock;
+					lock = l->l_selcluster->sc_lock;
 					mutex_spin_enter(lock);
 					*obitp |= obits;
 					mutex_spin_exit(lock);
@@ -527,7 +550,7 @@ pollcommon(register_t *retval, struct po
 	if (error)
 		goto fail;
 
-	error = sel_do_scan(SELOP_POLL, fds, nfds, ni, ts, mask, retval);
+	error = sel_do_scan(selop_poll, fds, nfds, ni, ts, mask, retval);
 	if (error == 0)
 		error = copyout(fds, u_fds, ni);
  fail:
@@ -560,6 +583,10 @@ pollscan(struct pollfd *fds, const int n
 			fd_putfile(fds->fd);
 		}
 		if (revents) {
+			if (!direct_select)  {
+				/* Have events: do nothing in selrecord(). */
+				curlwp->l_selflag = SEL_RESET;
+			}
 			fds->revents = revents;
 			n++;
 		}
@@ -607,7 +634,9 @@ selrecord(lwp_t *selector, struct selinf
 	sc = selector->l_selcluster;
 	other = sip->sel_lwp;
 
-	if (other == selector) {
+	if (selector->l_selflag == SEL_RESET) {
+		/* 0. We're not going to block - will poll again if needed. */
+	} else if (other == selector) {
 		/* 1. We (selector) already claimed to be the first LWP. */
 		KASSERT(sip->sel_cluster == sc);
 	} else if (other == NULL) {
@@ -708,7 +737,7 @@ void
 selnotify(struct selinfo *sip, int events, long knhint)
 {
 	selcluster_t *sc;
-	uint32_t mask;
+	uint64_t mask;
 	int index, oflag;
 	lwp_t *l;
 	kmutex_t *lock;
@@ -757,8 +786,8 @@ selnotify(struct selinfo *sip, int event
 		 */
 		sip->sel_collision = 0;
 		do {
-			index = ffs(mask) - 1;
-			mask &= ~(1U << index);
+			index = ffs64(mask) - 1;
+			mask ^= __BIT(index);
 			sc = selcluster[index];
 			lock = sc->sc_lock;
 			mutex_spin_enter(lock);
@@ -790,6 +819,15 @@ selclear(void)
 	sc = l->l_selcluster;
 	lock = sc->sc_lock;
 
+	/*
+	 * If the request was non-blocking, or we found events on the first
+	 * descriptor, there will be no need to clear anything - avoid
+	 * taking the lock.
+	 */
+	if (SLIST_EMPTY(&l->l_selwait)) {
+		return;
+	}
+
 	mutex_spin_enter(lock);
 	for (sip = SLIST_FIRST(&l->l_selwait); sip != NULL; sip = next) {
 		KASSERT(sip->sel_lwp == l);

Index: src/sys/sys/selinfo.h
diff -u src/sys/sys/selinfo.h:1.8 src/sys/sys/selinfo.h:1.9
--- src/sys/sys/selinfo.h:1.8	Thu Jul  8 12:23:31 2010
+++ src/sys/sys/selinfo.h	Thu Nov 21 21:42:30 2019
@@ -1,7 +1,7 @@
-/*	$NetBSD: selinfo.h,v 1.8 2010/07/08 12:23:31 rmind Exp $	*/
+/*	$NetBSD: selinfo.h,v 1.9 2019/11/21 21:42:30 ad Exp $	*/
 
 /*-
- * Copyright (c) 2008, 2010 The NetBSD Foundation, Inc.
+ * Copyright (c) 2008, 2010, 2019 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -71,13 +71,13 @@
  * notified when I/O becomes possible.
  */
 struct selinfo {
+	uint64_t	sel_collision;	/* mask of colliding cpus */
 	struct klist	sel_klist;	/* knotes attached to this selinfo */
 	void		*sel_cluster;	/* current cluster association */
 	struct lwp	*sel_lwp;	/* first LWP to be notified */
 	uintptr_t	sel_fdinfo;	/* selected descriptor by first LWP */
 	SLIST_ENTRY(selinfo) sel_chain;	/* entry on LWP's list of selinfo */
-	uint32_t	sel_collision;	/* mask of colliding cpus */
-	uint32_t	sel_reserved[3];/* reserved for future expansion */
+	uintptr_t	sel_reserved[2];/* reserved for future expansion */
 };
 
 #endif /* !_SYS_SELINFO_H_ */

Reply via email to