Hello, I think the recent diff should go in so I can pick it up to my tree. It's the same what I have in my net/systm.h
What I also found that in some cases, we are going to grab KERNEL_LOCK(), while running in in_input_process(). this typically happens when we deal with multicast forwarding in ip_input_if(): diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index c02e95de474..a35b3d6e533 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -432,9 +432,12 @@ ip_input_if(struct mbuf **mp, int *offp, int nxt, int af, struct ifnet *ifp) * as expected when ip_mforward() is called from * ip_output().) */ + NET_RUNLOCK(); + NET_LOCK(); KERNEL_LOCK(); error = ip_mforward(m, ifp); KERNEL_UNLOCK(); + NET_DOWNGRADE(); if (error) { ipstat_inc(ips_cantforward); goto bad; I deliberately keep NET_RUNLOCK()/NET_LOCK() as separate operations, while the reverse one is joined to NET_DOWNGRADE(). The piece above comes from larger diff. Which I hope to send later today. I'll become off-line then and will be back on Monday 13th. O.K. sashan@ 2017-11-08 11:50 GMT+01:00 Martin Pieuchot <m...@openbsd.org>: > On 08/11/17(Wed) 10:37, Martin Pieuchot wrote: >> We're at the stage where we want to run multiple parts of the Network >> Stack in parallel. sashan@ is trying to get multiple network threads >> running to exercise parallelism in pf_test() and tb@ is trying to push >> the NET_LOCK() further down in ioctl(2) path. >> >> However we want to be able to gradually select which code paths can be >> executed in parallel as the rest of the stack. So the way to move >> forward is to introduce a read version of the NET_LOCK(), NET_RLOCK() >> and change code paths on a case-by-case basis. >> >> Diff below does that for the input processing path. Note that the 3 >> tasks below cannot run in parallel because they are scheduled on the >> same taskq. >> >> Regarding assert macros, NET_ASSERT_LOCK() now required either a read >> or write lock. I also introduced a NET_ASSERT_WLOCK() to put in paths >> that are not ready to be run in parallel of the rest of the stack. >> >> Once that's in we can slowly convert existing NET_LOCK() to NET_WLOCK() >> to be explicit. > > I got bitten by rw_status() once again. There's no way to tell that the > current lock is not holding a read lock. But that's ok since what we > really need is to assert that we're not holding a write lock. > > Fixed diff below, ok? > > Index: net/if.c > =================================================================== > RCS file: /cvs/src/sys/net/if.c,v > retrieving revision 1.523 > diff -u -p -r1.523 if.c > --- net/if.c 4 Nov 2017 16:58:46 -0000 1.523 > +++ net/if.c 8 Nov 2017 10:11:13 -0000 > @@ -905,7 +905,7 @@ if_input_process(void *xifidx) > * to PF globals, pipex globals, unicast and multicast addresses > * lists. > */ > - NET_LOCK(); > + NET_RLOCK(); > s = splnet(); > while ((m = ml_dequeue(&ml)) != NULL) { > /* > @@ -922,7 +922,7 @@ if_input_process(void *xifidx) > m_freem(m); > } > splx(s); > - NET_UNLOCK(); > + NET_RUNLOCK(); > out: > if_put(ifp); > } > Index: netinet/ip_input.c > =================================================================== > RCS file: /cvs/src/sys/netinet/ip_input.c,v > retrieving revision 1.329 > diff -u -p -r1.329 ip_input.c > --- netinet/ip_input.c 5 Nov 2017 13:19:59 -0000 1.329 > +++ netinet/ip_input.c 8 Nov 2017 10:03:54 -0000 > @@ -1814,11 +1814,11 @@ ip_send_dispatch(void *xmq) > if (ml_empty(&ml)) > return; > > - NET_LOCK(); > + NET_RLOCK(); > while ((m = ml_dequeue(&ml)) != NULL) { > ip_output(m, NULL, NULL, 0, NULL, NULL, 0); > } > - NET_UNLOCK(); > + NET_RUNLOCK(); > } > > void > Index: netinet6/ip6_input.c > =================================================================== > RCS file: /cvs/src/sys/netinet6/ip6_input.c,v > retrieving revision 1.207 > diff -u -p -r1.207 ip6_input.c > --- netinet6/ip6_input.c 1 Nov 2017 06:35:38 -0000 1.207 > +++ netinet6/ip6_input.c 8 Nov 2017 10:03:54 -0000 > @@ -1459,7 +1459,7 @@ ip6_send_dispatch(void *xmq) > if (ml_empty(&ml)) > return; > > - NET_LOCK(); > + NET_RLOCK(); > while ((m = ml_dequeue(&ml)) != NULL) { > /* > * To avoid a "too big" situation at an intermediate router > and > @@ -1470,7 +1470,7 @@ ip6_send_dispatch(void *xmq) > */ > ip6_output(m, NULL, NULL, IPV6_MINMTU, NULL, NULL); > } > - NET_UNLOCK(); > + NET_RUNLOCK(); > } > > void > Index: sys/systm.h > =================================================================== > RCS file: /cvs/src/sys/sys/systm.h,v > retrieving revision 1.133 > diff -u -p -r1.133 systm.h > --- sys/systm.h 11 Aug 2017 21:24:20 -0000 1.133 > +++ sys/systm.h 8 Nov 2017 10:13:42 -0000 > @@ -296,26 +296,36 @@ int uiomove(void *, size_t, struct uio * > > extern struct rwlock netlock; > > -#define NET_LOCK() > \ > -do { \ > - rw_enter_write(&netlock); \ > -} while (0) > +#define NET_LOCK() NET_WLOCK() > +#define NET_UNLOCK() NET_WUNLOCK() > +#define NET_ASSERT_UNLOCKED() NET_ASSERT_WUNLOCKED() > + > + > +#define NET_WLOCK() do { rw_enter_write(&netlock); } while (0) > +#define NET_WUNLOCK() do { rw_exit_write(&netlock); } while (0) > > -#define NET_UNLOCK() > \ > +#define NET_ASSERT_WLOCKED() > \ > do { \ > - rw_exit_write(&netlock); \ > + int _s = rw_status(&netlock); \ > + if (_s != RW_WRITE) \ > + splassert_fail(RW_WRITE, _s, __func__); \ > } while (0) > > -#define NET_ASSERT_LOCKED() > \ > +#define NET_ASSERT_WUNLOCKED() > \ > do { \ > - if (rw_status(&netlock) != RW_WRITE) \ > - splassert_fail(RW_WRITE, rw_status(&netlock), __func__);\ > + int _s = rw_status(&netlock); \ > + if (_s == RW_WRITE) \ > + splassert_fail(0, RW_WRITE, __func__); \ > } while (0) > > -#define NET_ASSERT_UNLOCKED() > \ > +#define NET_RLOCK() do { rw_enter_read(&netlock); } while (0) > +#define NET_RUNLOCK() do { rw_exit_read(&netlock); } while (0) > + > +#define NET_ASSERT_LOCKED() > \ > do { \ > - if (rw_status(&netlock) == RW_WRITE) \ > - splassert_fail(0, rw_status(&netlock), __func__); \ > + int _s = rw_status(&netlock); \ > + if (_s != RW_WRITE && _s != RW_READ) \ > + splassert_fail(RW_READ, _s, __func__); \ > } while (0) > > __returns_twice int setjmp(label_t *);