Re: soreceive() with shared netlock for raw sockets

2022-09-12 Thread Alexander Bluhm
On Sat, Sep 10, 2022 at 10:49:40PM +0300, Vitaliy Makkoveev wrote:
> As it was done for udp and divert sockets.

I have no stess test for parallel receive yet.
Diff looks reasonable.

OK bluhm@

> Index: sys/netinet/ip_var.h
> ===
> RCS file: /cvs/src/sys/netinet/ip_var.h,v
> retrieving revision 1.104
> diff -u -p -r1.104 ip_var.h
> --- sys/netinet/ip_var.h  3 Sep 2022 22:43:38 -   1.104
> +++ sys/netinet/ip_var.h  10 Sep 2022 19:41:56 -
> @@ -258,6 +258,8 @@ intrip_output(struct mbuf *, struct so
>   struct mbuf *);
>  int   rip_attach(struct socket *, int);
>  int   rip_detach(struct socket *);
> +void  rip_lock(struct socket *);
> +void  rip_unlock(struct socket *);
>  int   rip_bind(struct socket *so, struct mbuf *, struct proc *);
>  int   rip_connect(struct socket *, struct mbuf *);
>  int   rip_disconnect(struct socket *);
> Index: sys/netinet/raw_ip.c
> ===
> RCS file: /cvs/src/sys/netinet/raw_ip.c,v
> retrieving revision 1.147
> diff -u -p -r1.147 raw_ip.c
> --- sys/netinet/raw_ip.c  3 Sep 2022 22:43:38 -   1.147
> +++ sys/netinet/raw_ip.c  10 Sep 2022 19:41:56 -
> @@ -106,6 +106,8 @@ struct inpcbtable rawcbtable;
>  const struct pr_usrreqs rip_usrreqs = {
>   .pru_attach = rip_attach,
>   .pru_detach = rip_detach,
> + .pru_lock   = rip_lock,
> + .pru_unlock = rip_unlock,
>   .pru_bind   = rip_bind,
>   .pru_connect= rip_connect,
>   .pru_disconnect = rip_disconnect,
> @@ -220,12 +222,19 @@ rip_input(struct mbuf **mp, int *offp, i
>   else
>   n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
>   if (n != NULL) {
> + int ret;
> +
>   if (inp->inp_flags & INP_CONTROLOPTS ||
>   inp->inp_socket->so_options & SO_TIMESTAMP)
>   ip_savecontrol(inp, , ip, n);
> - if (sbappendaddr(inp->inp_socket,
> +
> + mtx_enter(>inp_mtx);
> + ret = sbappendaddr(inp->inp_socket,
>   >inp_socket->so_rcv,
> - sintosa(), n, opts) == 0) {
> + sintosa(), n, opts);
> + mtx_leave(>inp_mtx);
> +
> + if (ret == 0) {
>   /* should notify about lost packet */
>   m_freem(n);
>   m_freem(opts);
> @@ -498,6 +507,24 @@ rip_detach(struct socket *so)
>   in_pcbdetach(inp);
>  
>   return (0);
> +}
> +
> +void
> +rip_lock(struct socket *so)
> +{
> + struct inpcb *inp = sotoinpcb(so);
> +
> + NET_ASSERT_LOCKED();
> + mtx_enter(>inp_mtx);
> +}
> +
> +void
> +rip_unlock(struct socket *so)
> +{
> + struct inpcb *inp = sotoinpcb(so);
> +
> + NET_ASSERT_LOCKED();
> + mtx_leave(>inp_mtx);
>  }
>  
>  int
> Index: sys/netinet6/ip6_var.h
> ===
> RCS file: /cvs/src/sys/netinet6/ip6_var.h,v
> retrieving revision 1.102
> diff -u -p -r1.102 ip6_var.h
> --- sys/netinet6/ip6_var.h3 Sep 2022 22:43:38 -   1.102
> +++ sys/netinet6/ip6_var.h10 Sep 2022 19:41:56 -
> @@ -353,6 +353,8 @@ int   rip6_output(struct mbuf *, struct so
>   struct mbuf *);
>  int  rip6_attach(struct socket *, int);
>  int  rip6_detach(struct socket *);
> +void rip6_lock(struct socket *);
> +void rip6_unlock(struct socket *);
>  int  rip6_bind(struct socket *, struct mbuf *, struct proc *);
>  int  rip6_connect(struct socket *, struct mbuf *);
>  int  rip6_disconnect(struct socket *);
> Index: sys/netinet6/raw_ip6.c
> ===
> RCS file: /cvs/src/sys/netinet6/raw_ip6.c,v
> retrieving revision 1.168
> diff -u -p -r1.168 raw_ip6.c
> --- sys/netinet6/raw_ip6.c3 Sep 2022 22:43:38 -   1.168
> +++ sys/netinet6/raw_ip6.c10 Sep 2022 19:41:56 -
> @@ -108,6 +108,8 @@ struct cpumem *rip6counters;
>  const struct pr_usrreqs rip6_usrreqs = {
>   .pru_attach = rip6_attach,
>   .pru_detach = rip6_detach,
> + .pru_lock   = rip6_lock,
> + .pru_unlock = rip6_unlock,
>   .pru_bind   = rip6_bind,
>   .pru_connect= rip6_connect,
>   .pru_disconnect = rip6_disconnect,
> @@ -261,13 +263,20 @@ rip6_input(struct mbuf **mp, int *offp, 
>   else
>   n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
>   if (n != NULL) {
> + int ret;
> +
>   if (in6p->inp_flags & IN6P_CONTROLOPTS)
>   ip6_savecontrol(in6p, n, );
>   /* strip intermediate headers */
>   m_adj(n, *offp);
> - if 

soreceive() with shared netlock for raw sockets

2022-09-10 Thread Vitaliy Makkoveev
As it was done for udp and divert sockets.

Index: sys/netinet/ip_var.h
===
RCS file: /cvs/src/sys/netinet/ip_var.h,v
retrieving revision 1.104
diff -u -p -r1.104 ip_var.h
--- sys/netinet/ip_var.h3 Sep 2022 22:43:38 -   1.104
+++ sys/netinet/ip_var.h10 Sep 2022 19:41:56 -
@@ -258,6 +258,8 @@ int  rip_output(struct mbuf *, struct so
struct mbuf *);
 int rip_attach(struct socket *, int);
 int rip_detach(struct socket *);
+voidrip_lock(struct socket *);
+voidrip_unlock(struct socket *);
 int rip_bind(struct socket *so, struct mbuf *, struct proc *);
 int rip_connect(struct socket *, struct mbuf *);
 int rip_disconnect(struct socket *);
Index: sys/netinet/raw_ip.c
===
RCS file: /cvs/src/sys/netinet/raw_ip.c,v
retrieving revision 1.147
diff -u -p -r1.147 raw_ip.c
--- sys/netinet/raw_ip.c3 Sep 2022 22:43:38 -   1.147
+++ sys/netinet/raw_ip.c10 Sep 2022 19:41:56 -
@@ -106,6 +106,8 @@ struct inpcbtable rawcbtable;
 const struct pr_usrreqs rip_usrreqs = {
.pru_attach = rip_attach,
.pru_detach = rip_detach,
+   .pru_lock   = rip_lock,
+   .pru_unlock = rip_unlock,
.pru_bind   = rip_bind,
.pru_connect= rip_connect,
.pru_disconnect = rip_disconnect,
@@ -220,12 +222,19 @@ rip_input(struct mbuf **mp, int *offp, i
else
n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
if (n != NULL) {
+   int ret;
+
if (inp->inp_flags & INP_CONTROLOPTS ||
inp->inp_socket->so_options & SO_TIMESTAMP)
ip_savecontrol(inp, , ip, n);
-   if (sbappendaddr(inp->inp_socket,
+
+   mtx_enter(>inp_mtx);
+   ret = sbappendaddr(inp->inp_socket,
>inp_socket->so_rcv,
-   sintosa(), n, opts) == 0) {
+   sintosa(), n, opts);
+   mtx_leave(>inp_mtx);
+
+   if (ret == 0) {
/* should notify about lost packet */
m_freem(n);
m_freem(opts);
@@ -498,6 +507,24 @@ rip_detach(struct socket *so)
in_pcbdetach(inp);
 
return (0);
+}
+
+void
+rip_lock(struct socket *so)
+{
+   struct inpcb *inp = sotoinpcb(so);
+
+   NET_ASSERT_LOCKED();
+   mtx_enter(>inp_mtx);
+}
+
+void
+rip_unlock(struct socket *so)
+{
+   struct inpcb *inp = sotoinpcb(so);
+
+   NET_ASSERT_LOCKED();
+   mtx_leave(>inp_mtx);
 }
 
 int
Index: sys/netinet6/ip6_var.h
===
RCS file: /cvs/src/sys/netinet6/ip6_var.h,v
retrieving revision 1.102
diff -u -p -r1.102 ip6_var.h
--- sys/netinet6/ip6_var.h  3 Sep 2022 22:43:38 -   1.102
+++ sys/netinet6/ip6_var.h  10 Sep 2022 19:41:56 -
@@ -353,6 +353,8 @@ int rip6_output(struct mbuf *, struct so
struct mbuf *);
 intrip6_attach(struct socket *, int);
 intrip6_detach(struct socket *);
+void   rip6_lock(struct socket *);
+void   rip6_unlock(struct socket *);
 intrip6_bind(struct socket *, struct mbuf *, struct proc *);
 intrip6_connect(struct socket *, struct mbuf *);
 intrip6_disconnect(struct socket *);
Index: sys/netinet6/raw_ip6.c
===
RCS file: /cvs/src/sys/netinet6/raw_ip6.c,v
retrieving revision 1.168
diff -u -p -r1.168 raw_ip6.c
--- sys/netinet6/raw_ip6.c  3 Sep 2022 22:43:38 -   1.168
+++ sys/netinet6/raw_ip6.c  10 Sep 2022 19:41:56 -
@@ -108,6 +108,8 @@ struct cpumem *rip6counters;
 const struct pr_usrreqs rip6_usrreqs = {
.pru_attach = rip6_attach,
.pru_detach = rip6_detach,
+   .pru_lock   = rip6_lock,
+   .pru_unlock = rip6_unlock,
.pru_bind   = rip6_bind,
.pru_connect= rip6_connect,
.pru_disconnect = rip6_disconnect,
@@ -261,13 +263,20 @@ rip6_input(struct mbuf **mp, int *offp, 
else
n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
if (n != NULL) {
+   int ret;
+
if (in6p->inp_flags & IN6P_CONTROLOPTS)
ip6_savecontrol(in6p, n, );
/* strip intermediate headers */
m_adj(n, *offp);
-   if (sbappendaddr(in6p->inp_socket,
+
+   mtx_enter(>inp_mtx);
+   ret = sbappendaddr(in6p->inp_socket,
>inp_socket->so_rcv,
-   sin6tosa(), n, opts) == 0) {
+   

Re: soreceive with shared netlock

2022-09-03 Thread Vitaliy Makkoveev
> On 3 Sep 2022, at 23:47, Alexander Bluhm  wrote:
> 
> Hi,
> 
> The next small step towards parallel network stack is to use shared
> netlock in soreceive().  The UDP and IP divert layer provide locking
> of the PCB.  If that is possible, use shared instead of exclusive
> netlock in soreceive().  The PCB mutex provides a per socket lock
> against multiple soreceive() running in parallel.
> 
> The solock_shared() is a bit hacky.  Especially where we have to
> release and regrab both locks in sosleep_nsec().  But it does what
> we need.
> 
> The udp_input() keeps exclusive lock for now.  Socket splicing and
> IP header chains are not completely MP safe yet.  Also udp_input()
> has some tentacles that need more testing.
> 
> Ok to procceed with soreceive() unlocking before release?  I think
> udp_input() has to wait post 7.2.
> 

I have no objections.

> bluhm
> 
> Index: kern/uipc_socket.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v
> retrieving revision 1.287
> diff -u -p -r1.287 uipc_socket.c
> --- kern/uipc_socket.c3 Sep 2022 13:29:33 -   1.287
> +++ kern/uipc_socket.c3 Sep 2022 19:23:43 -
> @@ -822,10 +822,10 @@ bad:
>   if (mp)
>   *mp = NULL;
> 
> - solock(so);
> + solock_shared(so);
> restart:
>   if ((error = sblock(so, >so_rcv, SBLOCKWAIT(flags))) != 0) {
> - sounlock(so);
> + sounlock_shared(so);
>   return (error);
>   }
> 
> @@ -893,7 +893,7 @@ restart:
>   sbunlock(so, >so_rcv);
>   error = sbwait(so, >so_rcv);
>   if (error) {
> - sounlock(so);
> + sounlock_shared(so);
>   return (error);
>   }
>   goto restart;
> @@ -962,11 +962,11 @@ dontblock:
>   sbsync(>so_rcv, nextrecord);
>   if (controlp) {
>   if (pr->pr_domain->dom_externalize) {
> - sounlock(so);
> + sounlock_shared(so);
>   error =
>   (*pr->pr_domain->dom_externalize)
>   (cm, controllen, flags);
> - solock(so);
> + solock_shared(so);
>   }
>   *controlp = cm;
>   } else {
> @@ -1040,9 +1040,9 @@ dontblock:
>   SBLASTRECORDCHK(>so_rcv, "soreceive uiomove");
>   SBLASTMBUFCHK(>so_rcv, "soreceive uiomove");
>   resid = uio->uio_resid;
> - sounlock(so);
> + sounlock_shared(so);
>   uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio);
> - solock(so);
> + solock_shared(so);
>   if (uio_error)
>   uio->uio_resid = resid - len;
>   } else
> @@ -1126,7 +1126,7 @@ dontblock:
>   error = sbwait(so, >so_rcv);
>   if (error) {
>   sbunlock(so, >so_rcv);
> - sounlock(so);
> + sounlock_shared(so);
>   return (0);
>   }
>   if ((m = so->so_rcv.sb_mb) != NULL)
> @@ -1171,7 +1171,7 @@ dontblock:
>   *flagsp |= flags;
> release:
>   sbunlock(so, >so_rcv);
> - sounlock(so);
> + sounlock_shared(so);
>   return (error);
> }
> 
> Index: kern/uipc_socket2.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket2.c,v
> retrieving revision 1.127
> diff -u -p -r1.127 uipc_socket2.c
> --- kern/uipc_socket2.c   13 Aug 2022 21:01:46 -  1.127
> +++ kern/uipc_socket2.c   3 Sep 2022 19:23:43 -
> @@ -360,6 +360,24 @@ solock(struct socket *so)
>   }
> }
> 
> +void
> +solock_shared(struct socket *so)
> +{
> + switch (so->so_proto->pr_domain->dom_family) {
> + case PF_INET:
> + case PF_INET6:
> + if (so->so_proto->pr_usrreqs->pru_lock != NULL) {
> + NET_LOCK_SHARED();
> + pru_lock(so);
> + } else
> + 

Re: soreceive with shared netlock

2022-09-03 Thread Vitaliy Makkoveev
> On 4 Sep 2022, at 00:56, Alexander Bluhm  wrote:
> 
> On Sat, Sep 03, 2022 at 11:20:17PM +0200, Hrvoje Popovski wrote:
>> with this diff while booting I'm getting this witness trace
> 
> It is not related to soreceive() diff, but TCP diff I commited
> before.  I forgot a mutex initalization which is fatal with witness.
> Fix below.
> 

ok mvs@

> bluhm
> 
> Index: netinet/tcp_subr.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_subr.c,v
> retrieving revision 1.187
> diff -u -p -r1.187 tcp_subr.c
> --- netinet/tcp_subr.c3 Sep 2022 19:22:19 -   1.187
> +++ netinet/tcp_subr.c3 Sep 2022 21:54:48 -
> @@ -105,7 +105,7 @@
>  *T   tcp_timer_mtx   global tcp timer data structures
>  */
> 
> -struct mutex tcp_timer_mtx;
> +struct mutex tcp_timer_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
> 
> /* patchable/settable parameters for tcp */
> int   tcp_mssdflt = TCP_MSS;
> 



Re: soreceive with shared netlock

2022-09-03 Thread Alexander Bluhm
On Sat, Sep 03, 2022 at 11:20:17PM +0200, Hrvoje Popovski wrote:
> with this diff while booting I'm getting this witness trace

It is not related to soreceive() diff, but TCP diff I commited
before.  I forgot a mutex initalization which is fatal with witness.
Fix below.

bluhm

Index: netinet/tcp_subr.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_subr.c,v
retrieving revision 1.187
diff -u -p -r1.187 tcp_subr.c
--- netinet/tcp_subr.c  3 Sep 2022 19:22:19 -   1.187
+++ netinet/tcp_subr.c  3 Sep 2022 21:54:48 -
@@ -105,7 +105,7 @@
  * T   tcp_timer_mtx   global tcp timer data structures
  */
 
-struct mutex tcp_timer_mtx;
+struct mutex tcp_timer_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
 
 /* patchable/settable parameters for tcp */
 inttcp_mssdflt = TCP_MSS;



Re: soreceive with shared netlock

2022-09-03 Thread Hrvoje Popovski
On 3.9.2022. 22:47, Alexander Bluhm wrote:
> Hi,
> 
> The next small step towards parallel network stack is to use shared
> netlock in soreceive().  The UDP and IP divert layer provide locking
> of the PCB.  If that is possible, use shared instead of exclusive
> netlock in soreceive().  The PCB mutex provides a per socket lock
> against multiple soreceive() running in parallel.
> 
> The solock_shared() is a bit hacky.  Especially where we have to
> release and regrab both locks in sosleep_nsec().  But it does what
> we need.
> 
> The udp_input() keeps exclusive lock for now.  Socket splicing and
> IP header chains are not completely MP safe yet.  Also udp_input()
> has some tentacles that need more testing.
> 
> Ok to procceed with soreceive() unlocking before release?  I think
> udp_input() has to wait post 7.2.

Hi,

with this diff while booting I'm getting this witness trace

iic0 at ichiic0
isa0 at pcib0
isadma0 at isa0
pcppi0 at isa0 port 0x61
spkr0 at pcppi0
vmm0 at mainbus0: VMX/EPT
witness: lock_object uninitialized: 0x82379880
Starting stack trace...
witness_checkorder(82379880,9,0,82379880,8a214bc08a1c5075,822b0ff0)
at witness_checkorder+0xad
mtx_enter(82379870,82379870,6e3a311a0ad2ae1e,82097020,824c12c0,9)
at mtx_enter+0x34
tcp_slowtimo(812dd160,800020bf3110,82379870,82379870,6e3a311a0ad2ae1e,82097020)
at tcp_slowtimo+0x10
pfslowtimo(824c12c0,824c12c0,824c12c0,81ed1820,822b3148,d)
at pfslowtimo+0xc7
timeout_run(824c12c0,824c12c0,82301e90,800020bf31a0,81277b90,82301e90)
at timeout_run+0x93
softclock_thread(800020be2fc0,800020be2fc0,0,0,0,4) at
softclock_thread+0x11d
end trace frame: 0x0, count: 251
End of stack trace.





soreceive with shared netlock

2022-09-03 Thread Alexander Bluhm
Hi,

The next small step towards parallel network stack is to use shared
netlock in soreceive().  The UDP and IP divert layer provide locking
of the PCB.  If that is possible, use shared instead of exclusive
netlock in soreceive().  The PCB mutex provides a per socket lock
against multiple soreceive() running in parallel.

The solock_shared() is a bit hacky.  Especially where we have to
release and regrab both locks in sosleep_nsec().  But it does what
we need.

The udp_input() keeps exclusive lock for now.  Socket splicing and
IP header chains are not completely MP safe yet.  Also udp_input()
has some tentacles that need more testing.

Ok to procceed with soreceive() unlocking before release?  I think
udp_input() has to wait post 7.2.

bluhm

Index: kern/uipc_socket.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.287
diff -u -p -r1.287 uipc_socket.c
--- kern/uipc_socket.c  3 Sep 2022 13:29:33 -   1.287
+++ kern/uipc_socket.c  3 Sep 2022 19:23:43 -
@@ -822,10 +822,10 @@ bad:
if (mp)
*mp = NULL;
 
-   solock(so);
+   solock_shared(so);
 restart:
if ((error = sblock(so, >so_rcv, SBLOCKWAIT(flags))) != 0) {
-   sounlock(so);
+   sounlock_shared(so);
return (error);
}
 
@@ -893,7 +893,7 @@ restart:
sbunlock(so, >so_rcv);
error = sbwait(so, >so_rcv);
if (error) {
-   sounlock(so);
+   sounlock_shared(so);
return (error);
}
goto restart;
@@ -962,11 +962,11 @@ dontblock:
sbsync(>so_rcv, nextrecord);
if (controlp) {
if (pr->pr_domain->dom_externalize) {
-   sounlock(so);
+   sounlock_shared(so);
error =
(*pr->pr_domain->dom_externalize)
(cm, controllen, flags);
-   solock(so);
+   solock_shared(so);
}
*controlp = cm;
} else {
@@ -1040,9 +1040,9 @@ dontblock:
SBLASTRECORDCHK(>so_rcv, "soreceive uiomove");
SBLASTMBUFCHK(>so_rcv, "soreceive uiomove");
resid = uio->uio_resid;
-   sounlock(so);
+   sounlock_shared(so);
uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio);
-   solock(so);
+   solock_shared(so);
if (uio_error)
uio->uio_resid = resid - len;
} else
@@ -1126,7 +1126,7 @@ dontblock:
error = sbwait(so, >so_rcv);
if (error) {
sbunlock(so, >so_rcv);
-   sounlock(so);
+   sounlock_shared(so);
return (0);
}
if ((m = so->so_rcv.sb_mb) != NULL)
@@ -1171,7 +1171,7 @@ dontblock:
*flagsp |= flags;
 release:
sbunlock(so, >so_rcv);
-   sounlock(so);
+   sounlock_shared(so);
return (error);
 }
 
Index: kern/uipc_socket2.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket2.c,v
retrieving revision 1.127
diff -u -p -r1.127 uipc_socket2.c
--- kern/uipc_socket2.c 13 Aug 2022 21:01:46 -  1.127
+++ kern/uipc_socket2.c 3 Sep 2022 19:23:43 -
@@ -360,6 +360,24 @@ solock(struct socket *so)
}
 }
 
+void
+solock_shared(struct socket *so)
+{
+   switch (so->so_proto->pr_domain->dom_family) {
+   case PF_INET:
+   case PF_INET6:
+   if (so->so_proto->pr_usrreqs->pru_lock != NULL) {
+   NET_LOCK_SHARED();
+   pru_lock(so);
+   } else
+   NET_LOCK();
+   break;
+   default:
+   rw_enter_write(>so_lock);
+   break;
+   }
+}
+
 int
 solock_persocket(struct socket *so)
 {
@@ -403,6 +421,24 @@ sounlock(struct socket *so)
 }
 
 void
+sounlock_shared(struct socket *so)
+{
+   switch (so->so_proto->pr_domain->dom_family) {
+   case PF_INET:
+   case PF_INET6:
+   if (so->so_proto->pr_usrreqs->pru_unlock != NULL) {
+   pru_unlock(so);
+   NET_UNLOCK_SH