Re: un-KERNEL_LOCK() TCP/UDP input & co

2017-11-14 Thread Visa Hankala
On Tue, Nov 14, 2017 at 10:37:16AM +0100, Martin Pieuchot wrote:
> Note that the diff below still includes a false positive.  If the
> current thread doesn't have the read-lock but another one has it,
> then RW_READ will still be returned.

That's true if witness(4) has not been enabled.



Re: un-KERNEL_LOCK() TCP/UDP input & co

2017-11-14 Thread Martin Pieuchot
On 13/11/17(Mon) 17:29, Visa Hankala wrote:
> On Mon, Nov 13, 2017 at 01:25:42PM +0100, Martin Pieuchot wrote:
> > I'd love is somebody could do the changes in the rwlock API to let us
> > check if we're holding a lock.  Maybe this is already present in
> > witness(4), visa?
> 
> witness(4) does maintain data about lock ownership. The patch below
> might do the trick for rwlocks without adding extra state tracking.

I wouldn't mind a solution that's always on.  I'm afraid it will take
some time before we can enable WITNESS regularly.

> The patch also fixes the witness-side initialization of statically
> initialized locks, in functions witness_checkorder() and
> witness_lock(). The fix belongs to a separate commit though.

You should commit that.

Note that the diff below still includes a false positive.  If the
current thread doesn't have the read-lock but another one has it,
then RW_READ will still be returned.

> Index: share/man/man9/rwlock.9
> ===
> RCS file: src/share/man/man9/rwlock.9,v
> retrieving revision 1.20
> diff -u -p -r1.20 rwlock.9
> --- share/man/man9/rwlock.9   30 Oct 2017 13:33:36 -  1.20
> +++ share/man/man9/rwlock.9   13 Nov 2017 17:05:51 -
> @@ -33,6 +33,7 @@
>  .Nm rw_assert_anylock ,
>  .Nm rw_assert_unlocked ,
>  .Nm rw_status ,
> +.Nm rw_status_curproc ,
>  .Nm RWLOCK_INITIALIZER ,
>  .Nm rrw_init ,
>  .Nm rrw_init_flags ,
> @@ -68,6 +69,8 @@
>  .Fn rw_assert_unlocked "struct rwlock *rwl"
>  .Ft int
>  .Fn rw_status "struct rwlock *rwl"
> +.Ft int
> +.Fn rw_status_curproc "struct rwlock *rwl"
>  .Fn RWLOCK_INITIALIZER "const char *name"
>  .Ft void
>  .Fn rrw_init "struct rrwlock *rrwl" "const char *name"
> @@ -181,7 +184,7 @@ functions check the status
>  .Fa rwl ,
>  panicking if it is not write-, read-, any-, or unlocked, respectively.
>  .Pp
> -.Nm rw_status
> +.Fn rw_status
>  returns the current state of the lock:
>  .Pp
>  .Bl -tag -width "RW_WRITE_OTHER" -offset indent -compact
> @@ -194,6 +197,22 @@ Lock is read locked.
>  The current thread may be one of the threads that has it locked.
>  .It 0
>  Lock is not locked.
> +.El
> +.Pp
> +.Fn rw_status_curproc
> +returns the current state of the lock relative to the calling thread:
> +.Pp
> +.Bl -tag -width "RW_WRITE_OTHER" -offset indent -compact
> +.It Dv RW_WRITE
> +Lock is write locked by the calling thread.
> +.It Dv RW_READ
> +Lock is read locked.
> +The current thread may be one of the threads that has it locked.
> +If the kernel has been compiled with
> +.Xr witness 4 ,
> +the status is exact, and the lock is read locked by the current thread.
> +.It 0
> +Lock is not locked by the calling thread.
>  .El
>  .Pp
>  A lock declaration may be initialised with the
> Index: sys/kern/kern_rwlock.c
> ===
> RCS file: src/sys/kern/kern_rwlock.c,v
> retrieving revision 1.32
> diff -u -p -r1.32 kern_rwlock.c
> --- sys/kern/kern_rwlock.c24 Oct 2017 08:53:15 -  1.32
> +++ sys/kern/kern_rwlock.c13 Nov 2017 17:05:51 -
> @@ -325,6 +325,34 @@ rw_status(struct rwlock *rwl)
>   return (0);
>  }
>  
> +int
> +rw_status_curproc(struct rwlock *rwl)
> +{
> + unsigned long owner;
> +#ifdef WITNESS
> + int status = witness_status(>rwl_lock_obj);
> +
> + if (status != -1) {
> + if (status & LA_XLOCKED)
> + return (RW_WRITE);
> + if (status & LA_SLOCKED)
> + return (RW_READ);
> + return (0);
> + }
> +#endif
> +
> + owner = rwl->rwl_owner;
> + if (owner & RWLOCK_WRLOCK) {
> + if (RW_PROC(curproc) == RW_PROC(owner))
> + return (RW_WRITE);
> + else
> + return (0);
> + }
> + if (owner)
> + return RW_READ;
> + return (0);
> +}
> +
>  #ifdef DIAGNOSTIC
>  void
>  rw_assert_wrlock(struct rwlock *rwl)
> Index: sys/kern/subr_witness.c
> ===
> RCS file: src/sys/kern/subr_witness.c,v
> retrieving revision 1.4
> diff -u -p -r1.4 subr_witness.c
> --- sys/kern/subr_witness.c   12 Aug 2017 03:13:23 -  1.4
> +++ sys/kern/subr_witness.c   13 Nov 2017 17:05:51 -
> @@ -744,8 +744,8 @@ witness_checkorder(struct lock_object *l
>   struct witness *w, *w1;
>   int i, j, s;
>  
> - if (witness_cold || witness_watch < 1 || lock->lo_witness == NULL ||
> - panicstr != NULL)
> + if (witness_cold || witness_watch < 1 || panicstr != NULL ||
> + (lock->lo_flags & LO_WITNESS) == 0)
>   return;
>  
>   w = lock->lo_witness;
> @@ -1078,8 +1078,8 @@ witness_lock(struct lock_object *lock, i
>   struct witness *w;
>   int s;
>  
> - if (witness_cold || witness_watch == -1 || lock->lo_witness == NULL ||
> - panicstr != NULL)
> + if (witness_cold || witness_watch == -1 || panicstr != NULL ||

Re: un-KERNEL_LOCK() TCP/UDP input & co

2017-11-13 Thread Visa Hankala
On Mon, Nov 13, 2017 at 01:25:42PM +0100, Martin Pieuchot wrote:
> I'd love is somebody could do the changes in the rwlock API to let us
> check if we're holding a lock.  Maybe this is already present in
> witness(4), visa?

witness(4) does maintain data about lock ownership. The patch below
might do the trick for rwlocks without adding extra state tracking.

The patch also fixes the witness-side initialization of statically
initialized locks, in functions witness_checkorder() and
witness_lock(). The fix belongs to a separate commit though.

Index: share/man/man9/rwlock.9
===
RCS file: src/share/man/man9/rwlock.9,v
retrieving revision 1.20
diff -u -p -r1.20 rwlock.9
--- share/man/man9/rwlock.9 30 Oct 2017 13:33:36 -  1.20
+++ share/man/man9/rwlock.9 13 Nov 2017 17:05:51 -
@@ -33,6 +33,7 @@
 .Nm rw_assert_anylock ,
 .Nm rw_assert_unlocked ,
 .Nm rw_status ,
+.Nm rw_status_curproc ,
 .Nm RWLOCK_INITIALIZER ,
 .Nm rrw_init ,
 .Nm rrw_init_flags ,
@@ -68,6 +69,8 @@
 .Fn rw_assert_unlocked "struct rwlock *rwl"
 .Ft int
 .Fn rw_status "struct rwlock *rwl"
+.Ft int
+.Fn rw_status_curproc "struct rwlock *rwl"
 .Fn RWLOCK_INITIALIZER "const char *name"
 .Ft void
 .Fn rrw_init "struct rrwlock *rrwl" "const char *name"
@@ -181,7 +184,7 @@ functions check the status
 .Fa rwl ,
 panicking if it is not write-, read-, any-, or unlocked, respectively.
 .Pp
-.Nm rw_status
+.Fn rw_status
 returns the current state of the lock:
 .Pp
 .Bl -tag -width "RW_WRITE_OTHER" -offset indent -compact
@@ -194,6 +197,22 @@ Lock is read locked.
 The current thread may be one of the threads that has it locked.
 .It 0
 Lock is not locked.
+.El
+.Pp
+.Fn rw_status_curproc
+returns the current state of the lock relative to the calling thread:
+.Pp
+.Bl -tag -width "RW_WRITE_OTHER" -offset indent -compact
+.It Dv RW_WRITE
+Lock is write locked by the calling thread.
+.It Dv RW_READ
+Lock is read locked.
+The current thread may be one of the threads that has it locked.
+If the kernel has been compiled with
+.Xr witness 4 ,
+the status is exact, and the lock is read locked by the current thread.
+.It 0
+Lock is not locked by the calling thread.
 .El
 .Pp
 A lock declaration may be initialised with the
Index: sys/kern/kern_rwlock.c
===
RCS file: src/sys/kern/kern_rwlock.c,v
retrieving revision 1.32
diff -u -p -r1.32 kern_rwlock.c
--- sys/kern/kern_rwlock.c  24 Oct 2017 08:53:15 -  1.32
+++ sys/kern/kern_rwlock.c  13 Nov 2017 17:05:51 -
@@ -325,6 +325,34 @@ rw_status(struct rwlock *rwl)
return (0);
 }
 
+int
+rw_status_curproc(struct rwlock *rwl)
+{
+   unsigned long owner;
+#ifdef WITNESS
+   int status = witness_status(>rwl_lock_obj);
+
+   if (status != -1) {
+   if (status & LA_XLOCKED)
+   return (RW_WRITE);
+   if (status & LA_SLOCKED)
+   return (RW_READ);
+   return (0);
+   }
+#endif
+
+   owner = rwl->rwl_owner;
+   if (owner & RWLOCK_WRLOCK) {
+   if (RW_PROC(curproc) == RW_PROC(owner))
+   return (RW_WRITE);
+   else
+   return (0);
+   }
+   if (owner)
+   return RW_READ;
+   return (0);
+}
+
 #ifdef DIAGNOSTIC
 void
 rw_assert_wrlock(struct rwlock *rwl)
Index: sys/kern/subr_witness.c
===
RCS file: src/sys/kern/subr_witness.c,v
retrieving revision 1.4
diff -u -p -r1.4 subr_witness.c
--- sys/kern/subr_witness.c 12 Aug 2017 03:13:23 -  1.4
+++ sys/kern/subr_witness.c 13 Nov 2017 17:05:51 -
@@ -744,8 +744,8 @@ witness_checkorder(struct lock_object *l
struct witness *w, *w1;
int i, j, s;
 
-   if (witness_cold || witness_watch < 1 || lock->lo_witness == NULL ||
-   panicstr != NULL)
+   if (witness_cold || witness_watch < 1 || panicstr != NULL ||
+   (lock->lo_flags & LO_WITNESS) == 0)
return;
 
w = lock->lo_witness;
@@ -1078,8 +1078,8 @@ witness_lock(struct lock_object *lock, i
struct witness *w;
int s;
 
-   if (witness_cold || witness_watch == -1 || lock->lo_witness == NULL ||
-   panicstr != NULL)
+   if (witness_cold || witness_watch == -1 || panicstr != NULL ||
+   (lock->lo_flags & LO_WITNESS) == 0)
return;
 
w = lock->lo_witness;
@@ -2004,6 +2004,42 @@ witness_assert(const struct lock_object 
 
}
 #endif /* INVARIANT_SUPPORT */
+}
+
+int
+witness_status(struct lock_object *lock)
+{
+   struct lock_instance *instance;
+   struct lock_class *class;
+   int status = 0;
+
+   if (witness_cold || witness_watch < 1 || panicstr != NULL ||
+   (lock->lo_flags & LO_WITNESS) == 0)
+   return -1;
+
+   class = LOCK_CLASS(lock);

Re: un-KERNEL_LOCK() TCP/UDP input & co

2017-11-13 Thread Alexandr Nedvedicky
Hello,

> > So I'd like to know, what's the plan for NET_ASSERT_LOCKED() macro?
> > 
> > a) are we going to relax it, so it will test if the net_lock is
> > locked?
> 
> Yep, that's already done.

cool, thanks a lot. I've just noticed the change is there while ago, while
reading another thread here [1] (assertion "_kernel_lock_held()" failed,
uipc_socket2.c: ipsec)


thanks and
regards
sasha

[1] https://marc.info/?l=openbsd-tech=151052226224227=2 



Re: un-KERNEL_LOCK() TCP/UDP input & co

2017-11-13 Thread Martin Pieuchot
On 13/11/17(Mon) 12:14, Alexandr Nedvedicky wrote:
> your change looks good to me. Though I have a comment/question to your diff.
> 
> > Index: net/if_vxlan.c
> > ===
> > RCS file: /cvs/src/sys/net/if_vxlan.c,v
> > retrieving revision 1.63
> > diff -u -p -r1.63 if_vxlan.c
> > --- net/if_vxlan.c  25 Oct 2017 09:24:09 -  1.63
> > +++ net/if_vxlan.c  8 Nov 2017 14:49:58 -
> > @@ -611,6 +611,7 @@ vxlan_lookup(struct mbuf *m, struct udph
> > vni = VXLAN_VNI_UNSET;
> > }
> >  
> > +   NET_ASSERT_LOCKED();
> > /* First search for a vxlan(4) interface with the packet's VNI */
> > LIST_FOREACH(sc, _tagh[VXLAN_TAGHASH(vni)], sc_entry) {
> > if ((uh->uh_dport == sc->sc_dstport) &&
> > Index: net/pf.c
> 
> I think we are approaching point, which requires us to revisit
> NET_ASSERT_LOCKED(). The assert currently tests caller is writer
> on net_lock.

Since last week NET_ASSERT_LOCKED() is checking if the calling thread
owns the (write) lock or if it is hold by at least one reader.

Without changing our rwlock implementation there's no way to check if
the calling thread is holding a read lock.  That would be definitively
useful.

> Since we are going to 'soften' the NET_LOCK() to R-lock for
> some tasks/threads the NET_ASSERT_LOCKED() will become invalid
> (false positive) assertion for functions, which are being grabbed
> occasionally as readers and other times as writers (?typically in
> local outbound path?). I've seen such smoke already in my diffs
> I'm working on currently.
> 
> So I'd like to know, what's the plan for NET_ASSERT_LOCKED() macro?
> 
>   a) are we going to relax it, so it will test if the net_lock is
>   locked?

Yep, that's already done.

>   b) are we going to keep it, but a new 'soft' assert will be introduced
>   e.g. NET_ASSERT_ALOCKED() (A for any lock)?

We should turn NET_ASSERT_LOCKED() macros that require a write lock to
NET_ASSERT_WLOCKED().  But that's mostly in the ioctl(2) path.

>   c) add parameter to current NET_ASSERT_LOCKED() stating desired
>   lock level: 0 - unlocked, 1 - reader, 2 - writer

I don't care how the API looks like, I'd just argue for coherency
between all or locks.

> As I've said the patch looks good to me as-is and should go in. I just
> would like to hear some greater plan for NET_ASSERT_LOCKED(). Perhaps
> we should go for it before we will be further playing with parallel
> softnet.

I'd love is somebody could do the changes in the rwlock API to let us
check if we're holding a lock.  Maybe this is already present in
witness(4), visa?



Re: un-KERNEL_LOCK() TCP/UDP input & co

2017-11-13 Thread Alexandr Nedvedicky
Hello,

your change looks good to me. Though I have a comment/question to your diff.


> Index: net/if_vxlan.c
> ===
> RCS file: /cvs/src/sys/net/if_vxlan.c,v
> retrieving revision 1.63
> diff -u -p -r1.63 if_vxlan.c
> --- net/if_vxlan.c25 Oct 2017 09:24:09 -  1.63
> +++ net/if_vxlan.c8 Nov 2017 14:49:58 -
> @@ -611,6 +611,7 @@ vxlan_lookup(struct mbuf *m, struct udph
>   vni = VXLAN_VNI_UNSET;
>   }
>  
> + NET_ASSERT_LOCKED();
>   /* First search for a vxlan(4) interface with the packet's VNI */
>   LIST_FOREACH(sc, _tagh[VXLAN_TAGHASH(vni)], sc_entry) {
>   if ((uh->uh_dport == sc->sc_dstport) &&
> Index: net/pf.c

I think we are approaching point, which requires us to revisit
NET_ASSERT_LOCKED(). The assert currently tests caller is writer
on net_lock.

Since we are going to 'soften' the NET_LOCK() to R-lock for
some tasks/threads the NET_ASSERT_LOCKED() will become invalid
(false positive) assertion for functions, which are being grabbed
occasionally as readers and other times as writers (?typically in
local outbound path?). I've seen such smoke already in my diffs
I'm working on currently.

So I'd like to know, what's the plan for NET_ASSERT_LOCKED() macro?

a) are we going to relax it, so it will test if the net_lock is
locked?

b) are we going to keep it, but a new 'soft' assert will be introduced
e.g. NET_ASSERT_ALOCKED() (A for any lock)?

c) add parameter to current NET_ASSERT_LOCKED() stating desired
lock level: 0 - unlocked, 1 - reader, 2 - writer

I'm leaning towards option b) introduce a new assertion for cases
where we require lock, but we don't care if it is reader/writer.

As I've said the patch looks good to me as-is and should go in. I just
would like to hear some greater plan for NET_ASSERT_LOCKED(). Perhaps
we should go for it before we will be further playing with parallel
softnet.

thanks a lot
regards
sasha



un-KERNEL_LOCK() TCP/UDP input & co

2017-11-08 Thread Martin Pieuchot
After auditing all the pr_input() functions, the only missing bits
for taking them out of the KERNEL_LOCK() are: ``etheripstat''.  I
leave such counter conversions for somebody else (8

In the meantime I annotated globals used in these functions.  Most
of the pseudo-interfaces have a global list that is currently protected
by the NET_LOCK().

PCB tables are also currently protected by the NET_LOCK().

carp(4) and IGMP inputs need some love, so they grab the KERNEL_LOCK()
for now.

inet{,6}ctlerrmap are read-only so they get turned into 'const'.

With this diff in we should be able to have a mostly KERNEL_LOCK()-free
softnet taskq.

ok?

Index: net/if_etherip.c
===
RCS file: /cvs/src/sys/net/if_etherip.c,v
retrieving revision 1.21
diff -u -p -r1.21 if_etherip.c
--- net/if_etherip.c25 Oct 2017 09:24:09 -  1.21
+++ net/if_etherip.c8 Nov 2017 14:41:29 -
@@ -434,6 +434,7 @@ ip_etherip_input(struct mbuf **mp, int *
return IPPROTO_DONE;
}
 
+   NET_ASSERT_LOCKED();
LIST_FOREACH(sc, _softc_list, sc_entry) {
if (sc->sc_src.ss_family != AF_INET ||
sc->sc_dst.ss_family != AF_INET)
@@ -598,6 +599,7 @@ ip6_etherip_input(struct mbuf **mp, int 
in6_recoverscope(, >ip6_src);
in6_recoverscope(, >ip6_dst);
 
+   NET_ASSERT_LOCKED();
LIST_FOREACH(sc, _softc_list, sc_entry) {
if (sc->sc_src.ss_family != AF_INET6 ||
sc->sc_dst.ss_family != AF_INET6)
Index: net/if_gif.c
===
RCS file: /cvs/src/sys/net/if_gif.c,v
retrieving revision 1.101
diff -u -p -r1.101 if_gif.c
--- net/if_gif.c25 Oct 2017 09:24:09 -  1.101
+++ net/if_gif.c8 Nov 2017 14:40:26 -
@@ -622,6 +622,7 @@ in_gif_input(struct mbuf **mp, int *offp
 
ip = mtod(m, struct ip *);
 
+   NET_ASSERT_LOCKED();
/* this code will be soon improved. */
LIST_FOREACH(sc, _softc_list, gif_list) {
if (sc->gif_psrc == NULL || sc->gif_pdst == NULL ||
@@ -730,7 +731,8 @@ in6_gif_output(struct ifnet *ifp, int fa
return 0;
 }
 
-int in6_gif_input(struct mbuf **mp, int *offp, int proto, int af)
+int
+in6_gif_input(struct mbuf **mp, int *offp, int proto, int af)
 {
struct mbuf *m = *mp;
struct gif_softc *sc;
@@ -747,6 +749,7 @@ int in6_gif_input(struct mbuf **mp, int 
in6_recoverscope(, >ip6_src);
in6_recoverscope(, >ip6_dst);
 
+   NET_ASSERT_LOCKED();
LIST_FOREACH(sc, _softc_list, gif_list) {
if (sc->gif_psrc == NULL || sc->gif_pdst == NULL ||
sc->gif_psrc->sa_family != AF_INET6 ||
Index: net/if_pfsync.c
===
RCS file: /cvs/src/sys/net/if_pfsync.c,v
retrieving revision 1.254
diff -u -p -r1.254 if_pfsync.c
--- net/if_pfsync.c 11 Aug 2017 21:24:19 -  1.254
+++ net/if_pfsync.c 8 Nov 2017 14:42:49 -
@@ -659,6 +659,8 @@ pfsync_input(struct mbuf **mp, int *offp
int offset, noff, len, count, mlen, flags = 0;
int e;
 
+   NET_ASSERT_LOCKED();
+
pfsyncstat_inc(pfsyncs_ipackets);
 
/* verify that we have a sync interface configured */
Index: net/if_vxlan.c
===
RCS file: /cvs/src/sys/net/if_vxlan.c,v
retrieving revision 1.63
diff -u -p -r1.63 if_vxlan.c
--- net/if_vxlan.c  25 Oct 2017 09:24:09 -  1.63
+++ net/if_vxlan.c  8 Nov 2017 14:49:58 -
@@ -611,6 +611,7 @@ vxlan_lookup(struct mbuf *m, struct udph
vni = VXLAN_VNI_UNSET;
}
 
+   NET_ASSERT_LOCKED();
/* First search for a vxlan(4) interface with the packet's VNI */
LIST_FOREACH(sc, _tagh[VXLAN_TAGHASH(vni)], sc_entry) {
if ((uh->uh_dport == sc->sc_dstport) &&
Index: net/pf.c
===
RCS file: /cvs/src/sys/net/pf.c,v
retrieving revision 1.1043
diff -u -p -r1.1043 pf.c
--- net/pf.c31 Oct 2017 22:05:12 -  1.1043
+++ net/pf.c8 Nov 2017 15:03:35 -
@@ -3184,12 +3184,14 @@ pf_socket_lookup(struct pf_pdesc *pd)
sport = pd->hdr.tcp.th_sport;
dport = pd->hdr.tcp.th_dport;
PF_ASSERT_LOCKED();
+   NET_ASSERT_LOCKED();
tb = 
break;
case IPPROTO_UDP:
sport = pd->hdr.udp.uh_sport;
dport = pd->hdr.udp.uh_dport;
PF_ASSERT_LOCKED();
+   NET_ASSERT_LOCKED();
tb = 
break;
default:
Index: net/pipex.c
===
RCS file: /cvs/src/sys/net/pipex.c,v
retrieving revision 1.105
diff -u -p -r1.105 pipex.c
--- net/pipex.c 11 Aug 2017 21:24:19 -