Hello,

I think the recent diff should go in so I can pick it up to my tree.
It's the same what I have in my net/systm.h

What I also found that in some cases, we are going to
grab KERNEL_LOCK(), while running in in_input_process().
this typically happens when we deal with multicast forwarding
in ip_input_if():
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
index c02e95de474..a35b3d6e533 100644
--- a/sys/netinet/ip_input.c
+++ b/sys/netinet/ip_input.c
@@ -432,9 +432,12 @@ ip_input_if(struct mbuf **mp, int *offp, int nxt,
int af, struct ifnet *ifp)
                         * as expected when ip_mforward() is called from
                         * ip_output().)
                         */
+                       NET_RUNLOCK();
+                       NET_LOCK();
                        KERNEL_LOCK();
                        error = ip_mforward(m, ifp);
                        KERNEL_UNLOCK();
+                       NET_DOWNGRADE();
                        if (error) {
                                ipstat_inc(ips_cantforward);
                                goto bad;


I deliberately keep NET_RUNLOCK()/NET_LOCK() as separate operations,
while the reverse one is joined to NET_DOWNGRADE(). The piece above
comes from larger diff. Which I hope to send later today. I'll become off-line
then and will be back on Monday 13th.

O.K. sashan@



2017-11-08 11:50 GMT+01:00 Martin Pieuchot <m...@openbsd.org>:
> On 08/11/17(Wed) 10:37, Martin Pieuchot wrote:
>> We're at the stage where we want to run multiple parts of the Network
>> Stack in parallel.  sashan@ is trying to get multiple network threads
>> running to exercise parallelism in pf_test() and tb@ is trying to push
>> the NET_LOCK() further down in ioctl(2) path.
>>
>> However we want to be able to gradually select which code paths can be
>> executed in parallel as the rest of the stack.  So the way to move
>> forward is to introduce a read version of the NET_LOCK(), NET_RLOCK()
>> and change code paths on a case-by-case basis.
>>
>> Diff below does that for the input processing path.  Note that the 3
>> tasks below cannot run in parallel because they are scheduled on the
>> same taskq.
>>
>> Regarding assert macros, NET_ASSERT_LOCK() now required either a read
>> or write lock.  I also introduced a NET_ASSERT_WLOCK() to put in paths
>> that are not ready to be run in parallel of the rest of the stack.
>>
>> Once that's in we can slowly convert existing NET_LOCK() to NET_WLOCK()
>> to be explicit.
>
> I got bitten by rw_status() once again.  There's no way to tell that the
> current lock is not holding a read lock.  But that's ok since what we
> really need is to assert that we're not holding a write lock.
>
> Fixed diff below, ok?
>
> Index: net/if.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if.c,v
> retrieving revision 1.523
> diff -u -p -r1.523 if.c
> --- net/if.c    4 Nov 2017 16:58:46 -0000       1.523
> +++ net/if.c    8 Nov 2017 10:11:13 -0000
> @@ -905,7 +905,7 @@ if_input_process(void *xifidx)
>          * to PF globals, pipex globals, unicast and multicast addresses
>          * lists.
>          */
> -       NET_LOCK();
> +       NET_RLOCK();
>         s = splnet();
>         while ((m = ml_dequeue(&ml)) != NULL) {
>                 /*
> @@ -922,7 +922,7 @@ if_input_process(void *xifidx)
>                         m_freem(m);
>         }
>         splx(s);
> -       NET_UNLOCK();
> +       NET_RUNLOCK();
>  out:
>         if_put(ifp);
>  }
> Index: netinet/ip_input.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/ip_input.c,v
> retrieving revision 1.329
> diff -u -p -r1.329 ip_input.c
> --- netinet/ip_input.c  5 Nov 2017 13:19:59 -0000       1.329
> +++ netinet/ip_input.c  8 Nov 2017 10:03:54 -0000
> @@ -1814,11 +1814,11 @@ ip_send_dispatch(void *xmq)
>         if (ml_empty(&ml))
>                 return;
>
> -       NET_LOCK();
> +       NET_RLOCK();
>         while ((m = ml_dequeue(&ml)) != NULL) {
>                 ip_output(m, NULL, NULL, 0, NULL, NULL, 0);
>         }
> -       NET_UNLOCK();
> +       NET_RUNLOCK();
>  }
>
>  void
> Index: netinet6/ip6_input.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet6/ip6_input.c,v
> retrieving revision 1.207
> diff -u -p -r1.207 ip6_input.c
> --- netinet6/ip6_input.c        1 Nov 2017 06:35:38 -0000       1.207
> +++ netinet6/ip6_input.c        8 Nov 2017 10:03:54 -0000
> @@ -1459,7 +1459,7 @@ ip6_send_dispatch(void *xmq)
>         if (ml_empty(&ml))
>                 return;
>
> -       NET_LOCK();
> +       NET_RLOCK();
>         while ((m = ml_dequeue(&ml)) != NULL) {
>                 /*
>                  * To avoid a "too big" situation at an intermediate router 
> and
> @@ -1470,7 +1470,7 @@ ip6_send_dispatch(void *xmq)
>                  */
>                 ip6_output(m, NULL, NULL, IPV6_MINMTU, NULL, NULL);
>         }
> -       NET_UNLOCK();
> +       NET_RUNLOCK();
>  }
>
>  void
> Index: sys/systm.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/systm.h,v
> retrieving revision 1.133
> diff -u -p -r1.133 systm.h
> --- sys/systm.h 11 Aug 2017 21:24:20 -0000      1.133
> +++ sys/systm.h 8 Nov 2017 10:13:42 -0000
> @@ -296,26 +296,36 @@ int       uiomove(void *, size_t, struct uio *
>
>  extern struct rwlock netlock;
>
> -#define        NET_LOCK()                                                    
>   \
> -do {                                                                   \
> -       rw_enter_write(&netlock);                                       \
> -} while (0)
> +#define        NET_LOCK()              NET_WLOCK()
> +#define        NET_UNLOCK()            NET_WUNLOCK()
> +#define        NET_ASSERT_UNLOCKED()   NET_ASSERT_WUNLOCKED()
> +
> +
> +#define        NET_WLOCK()     do { rw_enter_write(&netlock); } while (0)
> +#define        NET_WUNLOCK()   do { rw_exit_write(&netlock); } while (0)
>
> -#define        NET_UNLOCK()                                                  
>   \
> +#define        NET_ASSERT_WLOCKED()                                          
>   \
>  do {                                                                   \
> -       rw_exit_write(&netlock);                                        \
> +       int _s = rw_status(&netlock);                                   \
> +       if (_s != RW_WRITE)                                             \
> +               splassert_fail(RW_WRITE, _s, __func__);                 \
>  } while (0)
>
> -#define        NET_ASSERT_LOCKED()                                           
>   \
> +#define        NET_ASSERT_WUNLOCKED()                                        
>   \
>  do {                                                                   \
> -       if (rw_status(&netlock) != RW_WRITE)                            \
> -               splassert_fail(RW_WRITE, rw_status(&netlock), __func__);\
> +       int _s = rw_status(&netlock);                                   \
> +       if (_s == RW_WRITE)                                             \
> +               splassert_fail(0, RW_WRITE, __func__);                  \
>  } while (0)
>
> -#define        NET_ASSERT_UNLOCKED()                                         
>   \
> +#define        NET_RLOCK()     do { rw_enter_read(&netlock); } while (0)
> +#define        NET_RUNLOCK()   do { rw_exit_read(&netlock); } while (0)
> +
> +#define        NET_ASSERT_LOCKED()                                           
>   \
>  do {                                                                   \
> -       if (rw_status(&netlock) == RW_WRITE)                            \
> -               splassert_fail(0, rw_status(&netlock), __func__);       \
> +       int _s = rw_status(&netlock);                                   \
> +       if (_s != RW_WRITE && _s != RW_READ)                            \
> +               splassert_fail(RW_READ, _s, __func__);                  \
>  } while (0)
>
>  __returns_twice int    setjmp(label_t *);

Reply via email to