Module Name: src Committed By: ad Date: Thu Nov 21 21:42:31 UTC 2019
Modified Files: src/sys/kern: sys_select.c src/sys/sys: selinfo.h Log Message: Minor improvements to select/poll: - Increase the maximum number of clusters from 32 to 64 for large systems. kcpuset_t could potentially be used here but that's an excursion I don't want to go on right now. uint32_t -> uint64_t is very simple. - In the case of a non-blocking select/poll, or where we won't block because there are events ready to report, stop registering interest in the back-end objects early. - Change the wmesg for poll back to "poll". To generate a diff of this commit: cvs rdiff -u -r1.48 -r1.49 src/sys/kern/sys_select.c cvs rdiff -u -r1.8 -r1.9 src/sys/sys/selinfo.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/kern/sys_select.c diff -u src/sys/kern/sys_select.c:1.48 src/sys/kern/sys_select.c:1.49 --- src/sys/kern/sys_select.c:1.48 Fri Sep 20 15:00:47 2019 +++ src/sys/kern/sys_select.c Thu Nov 21 21:42:30 2019 @@ -1,7 +1,7 @@ -/* $NetBSD: sys_select.c,v 1.48 2019/09/20 15:00:47 kamil Exp $ */ +/* $NetBSD: sys_select.c,v 1.49 2019/11/21 21:42:30 ad Exp $ */ /*- - * Copyright (c) 2007, 2008, 2009, 2010 The NetBSD Foundation, Inc. + * Copyright (c) 2007, 2008, 2009, 2010, 2019 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -84,7 +84,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: sys_select.c,v 1.48 2019/09/20 15:00:47 kamil Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sys_select.c,v 1.49 2019/11/21 21:42:30 ad Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -104,6 +104,7 @@ __KERNEL_RCSID(0, "$NetBSD: sys_select.c #include <sys/socketvar.h> #include <sys/sleepq.h> #include <sys/sysctl.h> +#include <sys/bitops.h> /* Flags for lwp::l_selflag. */ #define SEL_RESET 0 /* awoken, interrupted, or not yet polling */ @@ -111,10 +112,6 @@ __KERNEL_RCSID(0, "$NetBSD: sys_select.c #define SEL_BLOCKING 2 /* blocking and waiting for event */ #define SEL_EVENT 3 /* interrupted, events set directly */ -/* Operations: either select() or poll(). */ -#define SELOP_SELECT 1 -#define SELOP_POLL 2 - /* * Per-cluster state for select()/poll(). For a system with fewer * than 32 CPUs, this gives us per-CPU clusters. @@ -125,8 +122,8 @@ __KERNEL_RCSID(0, "$NetBSD: sys_select.c typedef struct selcluster { kmutex_t *sc_lock; sleepq_t sc_sleepq; + uint64_t sc_mask; int sc_ncoll; - uint32_t sc_mask; } selcluster_t; static inline int selscan(char *, const int, const size_t, register_t *); @@ -150,6 +147,10 @@ static syncobj_t select_sobj = { static selcluster_t *selcluster[SELCLUSTERS] __read_mostly; static int direct_select __read_mostly = 0; +/* Operations: either select() or poll(). */ +const char selop_select[] = "select"; +const char selop_poll[] = "poll"; + /* * Select system call. */ @@ -221,7 +222,7 @@ sys___select50(struct lwp *l, const stru * sel_do_scan: common code to perform the scan on descriptors. */ static int -sel_do_scan(const int op, void *fds, const int nf, const size_t ni, +sel_do_scan(const char *opname, void *fds, const int nf, const size_t ni, struct timespec *ts, sigset_t *mask, register_t *retval) { lwp_t * const l = curlwp; @@ -238,10 +239,17 @@ sel_do_scan(const int op, void *fds, con if (__predict_false(mask)) sigsuspendsetup(l, mask); + /* + * We may context switch during or at any time after picking a CPU + * and cluster to associate with, but it doesn't matter. In the + * unlikely event we migrate elsewhere all we risk is a little lock + * contention; correctness is not sacrificed. + */ sc = curcpu()->ci_data.cpu_selcluster; lock = sc->sc_lock; l->l_selcluster = sc; - if (op == SELOP_SELECT) { + + if (opname == selop_select) { l->l_selbits = fds; l->l_selni = ni; } else { @@ -261,10 +269,16 @@ sel_do_scan(const int op, void *fds, con * and lock activity resulting from fo_poll is enough to * provide an up to date value for new polling activity. */ - l->l_selflag = SEL_SCANNING; + if (ts && (ts->tv_sec | ts->tv_nsec | direct_select) == 0) { + /* Non-blocking: no need for selrecord()/selclear() */ + l->l_selflag = SEL_RESET; + } else { + l->l_selflag = SEL_SCANNING; + } ncoll = sc->sc_ncoll; + membar_exit(); - if (op == SELOP_SELECT) { + if (opname == selop_select) { error = selscan((char *)fds, nf, ni, retval); } else { error = pollscan((struct pollfd *)fds, nf, retval); @@ -301,7 +315,7 @@ state_check: l->l_selflag = SEL_BLOCKING; l->l_kpriority = true; sleepq_enter(&sc->sc_sleepq, l, lock); - sleepq_enqueue(&sc->sc_sleepq, sc, "select", &select_sobj); + sleepq_enqueue(&sc->sc_sleepq, sc, opname, &select_sobj); error = sleepq_block(timo, true); if (error != 0) { break; @@ -363,7 +377,7 @@ selcommon(register_t *retval, int nd, fd getbits(ex, 2); #undef getbits - error = sel_do_scan(SELOP_SELECT, bits, nd, ni, ts, mask, retval); + error = sel_do_scan(selop_select, bits, nd, ni, ts, mask, retval); if (error == 0 && u_in != NULL) error = copyout(bits + ni * 3, u_in, ni); if (error == 0 && u_ou != NULL) @@ -382,10 +396,12 @@ selscan(char *bits, const int nfd, const fd_mask *ibitp, *obitp; int msk, i, j, fd, n; file_t *fp; + lwp_t *l; ibitp = (fd_mask *)(bits + ni * 0); obitp = (fd_mask *)(bits + ni * 3); n = 0; + l = curlwp; memset(obitp, 0, ni * 3); for (msk = 0; msk < 3; msk++) { @@ -402,8 +418,15 @@ selscan(char *bits, const int nfd, const * Setup an argument to selrecord(), which is * a file descriptor number. */ - curlwp->l_selrec = fd; + l->l_selrec = fd; if ((*fp->f_ops->fo_poll)(fp, sel_flag[msk])) { + if (!direct_select) { + /* + * Have events: do nothing in + * selrecord(). + */ + l->l_selflag = SEL_RESET; + } obits |= (1U << j); n++; } @@ -412,7 +435,7 @@ selscan(char *bits, const int nfd, const if (obits != 0) { if (direct_select) { kmutex_t *lock; - lock = curlwp->l_selcluster->sc_lock; + lock = l->l_selcluster->sc_lock; mutex_spin_enter(lock); *obitp |= obits; mutex_spin_exit(lock); @@ -527,7 +550,7 @@ pollcommon(register_t *retval, struct po if (error) goto fail; - error = sel_do_scan(SELOP_POLL, fds, nfds, ni, ts, mask, retval); + error = sel_do_scan(selop_poll, fds, nfds, ni, ts, mask, retval); if (error == 0) error = copyout(fds, u_fds, ni); fail: @@ -560,6 +583,10 @@ pollscan(struct pollfd *fds, const int n fd_putfile(fds->fd); } if (revents) { + if (!direct_select) { + /* Have events: do nothing in selrecord(). */ + curlwp->l_selflag = SEL_RESET; + } fds->revents = revents; n++; } @@ -607,7 +634,9 @@ selrecord(lwp_t *selector, struct selinf sc = selector->l_selcluster; other = sip->sel_lwp; - if (other == selector) { + if (selector->l_selflag == SEL_RESET) { + /* 0. We're not going to block - will poll again if needed. */ + } else if (other == selector) { /* 1. We (selector) already claimed to be the first LWP. */ KASSERT(sip->sel_cluster == sc); } else if (other == NULL) { @@ -708,7 +737,7 @@ void selnotify(struct selinfo *sip, int events, long knhint) { selcluster_t *sc; - uint32_t mask; + uint64_t mask; int index, oflag; lwp_t *l; kmutex_t *lock; @@ -757,8 +786,8 @@ selnotify(struct selinfo *sip, int event */ sip->sel_collision = 0; do { - index = ffs(mask) - 1; - mask &= ~(1U << index); + index = ffs64(mask) - 1; + mask ^= __BIT(index); sc = selcluster[index]; lock = sc->sc_lock; mutex_spin_enter(lock); @@ -790,6 +819,15 @@ selclear(void) sc = l->l_selcluster; lock = sc->sc_lock; + /* + * If the request was non-blocking, or we found events on the first + * descriptor, there will be no need to clear anything - avoid + * taking the lock. + */ + if (SLIST_EMPTY(&l->l_selwait)) { + return; + } + mutex_spin_enter(lock); for (sip = SLIST_FIRST(&l->l_selwait); sip != NULL; sip = next) { KASSERT(sip->sel_lwp == l); Index: src/sys/sys/selinfo.h diff -u src/sys/sys/selinfo.h:1.8 src/sys/sys/selinfo.h:1.9 --- src/sys/sys/selinfo.h:1.8 Thu Jul 8 12:23:31 2010 +++ src/sys/sys/selinfo.h Thu Nov 21 21:42:30 2019 @@ -1,7 +1,7 @@ -/* $NetBSD: selinfo.h,v 1.8 2010/07/08 12:23:31 rmind Exp $ */ +/* $NetBSD: selinfo.h,v 1.9 2019/11/21 21:42:30 ad Exp $ */ /*- - * Copyright (c) 2008, 2010 The NetBSD Foundation, Inc. + * Copyright (c) 2008, 2010, 2019 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -71,13 +71,13 @@ * notified when I/O becomes possible. */ struct selinfo { + uint64_t sel_collision; /* mask of colliding cpus */ struct klist sel_klist; /* knotes attached to this selinfo */ void *sel_cluster; /* current cluster association */ struct lwp *sel_lwp; /* first LWP to be notified */ uintptr_t sel_fdinfo; /* selected descriptor by first LWP */ SLIST_ENTRY(selinfo) sel_chain; /* entry on LWP's list of selinfo */ - uint32_t sel_collision; /* mask of colliding cpus */ - uint32_t sel_reserved[3];/* reserved for future expansion */ + uintptr_t sel_reserved[2];/* reserved for future expansion */ }; #endif /* !_SYS_SELINFO_H_ */