Re: soreceive() with shared netlock for raw sockets
On Sat, Sep 10, 2022 at 10:49:40PM +0300, Vitaliy Makkoveev wrote: > As it was done for udp and divert sockets. I have no stess test for parallel receive yet. Diff looks reasonable. OK bluhm@ > Index: sys/netinet/ip_var.h > === > RCS file: /cvs/src/sys/netinet/ip_var.h,v > retrieving revision 1.104 > diff -u -p -r1.104 ip_var.h > --- sys/netinet/ip_var.h 3 Sep 2022 22:43:38 - 1.104 > +++ sys/netinet/ip_var.h 10 Sep 2022 19:41:56 - > @@ -258,6 +258,8 @@ intrip_output(struct mbuf *, struct so > struct mbuf *); > int rip_attach(struct socket *, int); > int rip_detach(struct socket *); > +void rip_lock(struct socket *); > +void rip_unlock(struct socket *); > int rip_bind(struct socket *so, struct mbuf *, struct proc *); > int rip_connect(struct socket *, struct mbuf *); > int rip_disconnect(struct socket *); > Index: sys/netinet/raw_ip.c > === > RCS file: /cvs/src/sys/netinet/raw_ip.c,v > retrieving revision 1.147 > diff -u -p -r1.147 raw_ip.c > --- sys/netinet/raw_ip.c 3 Sep 2022 22:43:38 - 1.147 > +++ sys/netinet/raw_ip.c 10 Sep 2022 19:41:56 - > @@ -106,6 +106,8 @@ struct inpcbtable rawcbtable; > const struct pr_usrreqs rip_usrreqs = { > .pru_attach = rip_attach, > .pru_detach = rip_detach, > + .pru_lock = rip_lock, > + .pru_unlock = rip_unlock, > .pru_bind = rip_bind, > .pru_connect= rip_connect, > .pru_disconnect = rip_disconnect, > @@ -220,12 +222,19 @@ rip_input(struct mbuf **mp, int *offp, i > else > n = m_copym(m, 0, M_COPYALL, M_NOWAIT); > if (n != NULL) { > + int ret; > + > if (inp->inp_flags & INP_CONTROLOPTS || > inp->inp_socket->so_options & SO_TIMESTAMP) > ip_savecontrol(inp, , ip, n); > - if (sbappendaddr(inp->inp_socket, > + > + mtx_enter(>inp_mtx); > + ret = sbappendaddr(inp->inp_socket, > >inp_socket->so_rcv, > - sintosa(), n, opts) == 0) { > + sintosa(), n, opts); > + mtx_leave(>inp_mtx); > + > + if (ret == 0) { > /* should notify about lost packet */ > m_freem(n); > m_freem(opts); > @@ -498,6 +507,24 @@ rip_detach(struct socket *so) > in_pcbdetach(inp); > > return (0); > +} > + > +void > +rip_lock(struct socket *so) > +{ > + struct inpcb *inp = sotoinpcb(so); > + > + NET_ASSERT_LOCKED(); > + mtx_enter(>inp_mtx); > +} > + > +void > +rip_unlock(struct socket *so) > +{ > + struct inpcb *inp = sotoinpcb(so); > + > + NET_ASSERT_LOCKED(); > + mtx_leave(>inp_mtx); > } > > int > Index: sys/netinet6/ip6_var.h > === > RCS file: /cvs/src/sys/netinet6/ip6_var.h,v > retrieving revision 1.102 > diff -u -p -r1.102 ip6_var.h > --- sys/netinet6/ip6_var.h3 Sep 2022 22:43:38 - 1.102 > +++ sys/netinet6/ip6_var.h10 Sep 2022 19:41:56 - > @@ -353,6 +353,8 @@ int rip6_output(struct mbuf *, struct so > struct mbuf *); > int rip6_attach(struct socket *, int); > int rip6_detach(struct socket *); > +void rip6_lock(struct socket *); > +void rip6_unlock(struct socket *); > int rip6_bind(struct socket *, struct mbuf *, struct proc *); > int rip6_connect(struct socket *, struct mbuf *); > int rip6_disconnect(struct socket *); > Index: sys/netinet6/raw_ip6.c > === > RCS file: /cvs/src/sys/netinet6/raw_ip6.c,v > retrieving revision 1.168 > diff -u -p -r1.168 raw_ip6.c > --- sys/netinet6/raw_ip6.c3 Sep 2022 22:43:38 - 1.168 > +++ sys/netinet6/raw_ip6.c10 Sep 2022 19:41:56 - > @@ -108,6 +108,8 @@ struct cpumem *rip6counters; > const struct pr_usrreqs rip6_usrreqs = { > .pru_attach = rip6_attach, > .pru_detach = rip6_detach, > + .pru_lock = rip6_lock, > + .pru_unlock = rip6_unlock, > .pru_bind = rip6_bind, > .pru_connect= rip6_connect, > .pru_disconnect = rip6_disconnect, > @@ -261,13 +263,20 @@ rip6_input(struct mbuf **mp, int *offp, > else > n = m_copym(m, 0, M_COPYALL, M_NOWAIT); > if (n != NULL) { > + int ret; > + > if (in6p->inp_flags & IN6P_CONTROLOPTS) > ip6_savecontrol(in6p, n, ); > /* strip intermediate headers */ > m_adj(n, *offp); > - if
soreceive() with shared netlock for raw sockets
As it was done for udp and divert sockets. Index: sys/netinet/ip_var.h === RCS file: /cvs/src/sys/netinet/ip_var.h,v retrieving revision 1.104 diff -u -p -r1.104 ip_var.h --- sys/netinet/ip_var.h3 Sep 2022 22:43:38 - 1.104 +++ sys/netinet/ip_var.h10 Sep 2022 19:41:56 - @@ -258,6 +258,8 @@ int rip_output(struct mbuf *, struct so struct mbuf *); int rip_attach(struct socket *, int); int rip_detach(struct socket *); +voidrip_lock(struct socket *); +voidrip_unlock(struct socket *); int rip_bind(struct socket *so, struct mbuf *, struct proc *); int rip_connect(struct socket *, struct mbuf *); int rip_disconnect(struct socket *); Index: sys/netinet/raw_ip.c === RCS file: /cvs/src/sys/netinet/raw_ip.c,v retrieving revision 1.147 diff -u -p -r1.147 raw_ip.c --- sys/netinet/raw_ip.c3 Sep 2022 22:43:38 - 1.147 +++ sys/netinet/raw_ip.c10 Sep 2022 19:41:56 - @@ -106,6 +106,8 @@ struct inpcbtable rawcbtable; const struct pr_usrreqs rip_usrreqs = { .pru_attach = rip_attach, .pru_detach = rip_detach, + .pru_lock = rip_lock, + .pru_unlock = rip_unlock, .pru_bind = rip_bind, .pru_connect= rip_connect, .pru_disconnect = rip_disconnect, @@ -220,12 +222,19 @@ rip_input(struct mbuf **mp, int *offp, i else n = m_copym(m, 0, M_COPYALL, M_NOWAIT); if (n != NULL) { + int ret; + if (inp->inp_flags & INP_CONTROLOPTS || inp->inp_socket->so_options & SO_TIMESTAMP) ip_savecontrol(inp, , ip, n); - if (sbappendaddr(inp->inp_socket, + + mtx_enter(>inp_mtx); + ret = sbappendaddr(inp->inp_socket, >inp_socket->so_rcv, - sintosa(), n, opts) == 0) { + sintosa(), n, opts); + mtx_leave(>inp_mtx); + + if (ret == 0) { /* should notify about lost packet */ m_freem(n); m_freem(opts); @@ -498,6 +507,24 @@ rip_detach(struct socket *so) in_pcbdetach(inp); return (0); +} + +void +rip_lock(struct socket *so) +{ + struct inpcb *inp = sotoinpcb(so); + + NET_ASSERT_LOCKED(); + mtx_enter(>inp_mtx); +} + +void +rip_unlock(struct socket *so) +{ + struct inpcb *inp = sotoinpcb(so); + + NET_ASSERT_LOCKED(); + mtx_leave(>inp_mtx); } int Index: sys/netinet6/ip6_var.h === RCS file: /cvs/src/sys/netinet6/ip6_var.h,v retrieving revision 1.102 diff -u -p -r1.102 ip6_var.h --- sys/netinet6/ip6_var.h 3 Sep 2022 22:43:38 - 1.102 +++ sys/netinet6/ip6_var.h 10 Sep 2022 19:41:56 - @@ -353,6 +353,8 @@ int rip6_output(struct mbuf *, struct so struct mbuf *); intrip6_attach(struct socket *, int); intrip6_detach(struct socket *); +void rip6_lock(struct socket *); +void rip6_unlock(struct socket *); intrip6_bind(struct socket *, struct mbuf *, struct proc *); intrip6_connect(struct socket *, struct mbuf *); intrip6_disconnect(struct socket *); Index: sys/netinet6/raw_ip6.c === RCS file: /cvs/src/sys/netinet6/raw_ip6.c,v retrieving revision 1.168 diff -u -p -r1.168 raw_ip6.c --- sys/netinet6/raw_ip6.c 3 Sep 2022 22:43:38 - 1.168 +++ sys/netinet6/raw_ip6.c 10 Sep 2022 19:41:56 - @@ -108,6 +108,8 @@ struct cpumem *rip6counters; const struct pr_usrreqs rip6_usrreqs = { .pru_attach = rip6_attach, .pru_detach = rip6_detach, + .pru_lock = rip6_lock, + .pru_unlock = rip6_unlock, .pru_bind = rip6_bind, .pru_connect= rip6_connect, .pru_disconnect = rip6_disconnect, @@ -261,13 +263,20 @@ rip6_input(struct mbuf **mp, int *offp, else n = m_copym(m, 0, M_COPYALL, M_NOWAIT); if (n != NULL) { + int ret; + if (in6p->inp_flags & IN6P_CONTROLOPTS) ip6_savecontrol(in6p, n, ); /* strip intermediate headers */ m_adj(n, *offp); - if (sbappendaddr(in6p->inp_socket, + + mtx_enter(>inp_mtx); + ret = sbappendaddr(in6p->inp_socket, >inp_socket->so_rcv, - sin6tosa(), n, opts) == 0) { +
Re: soreceive with shared netlock
> On 3 Sep 2022, at 23:47, Alexander Bluhm wrote: > > Hi, > > The next small step towards parallel network stack is to use shared > netlock in soreceive(). The UDP and IP divert layer provide locking > of the PCB. If that is possible, use shared instead of exclusive > netlock in soreceive(). The PCB mutex provides a per socket lock > against multiple soreceive() running in parallel. > > The solock_shared() is a bit hacky. Especially where we have to > release and regrab both locks in sosleep_nsec(). But it does what > we need. > > The udp_input() keeps exclusive lock for now. Socket splicing and > IP header chains are not completely MP safe yet. Also udp_input() > has some tentacles that need more testing. > > Ok to procceed with soreceive() unlocking before release? I think > udp_input() has to wait post 7.2. > I have no objections. > bluhm > > Index: kern/uipc_socket.c > === > RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v > retrieving revision 1.287 > diff -u -p -r1.287 uipc_socket.c > --- kern/uipc_socket.c3 Sep 2022 13:29:33 - 1.287 > +++ kern/uipc_socket.c3 Sep 2022 19:23:43 - > @@ -822,10 +822,10 @@ bad: > if (mp) > *mp = NULL; > > - solock(so); > + solock_shared(so); > restart: > if ((error = sblock(so, >so_rcv, SBLOCKWAIT(flags))) != 0) { > - sounlock(so); > + sounlock_shared(so); > return (error); > } > > @@ -893,7 +893,7 @@ restart: > sbunlock(so, >so_rcv); > error = sbwait(so, >so_rcv); > if (error) { > - sounlock(so); > + sounlock_shared(so); > return (error); > } > goto restart; > @@ -962,11 +962,11 @@ dontblock: > sbsync(>so_rcv, nextrecord); > if (controlp) { > if (pr->pr_domain->dom_externalize) { > - sounlock(so); > + sounlock_shared(so); > error = > (*pr->pr_domain->dom_externalize) > (cm, controllen, flags); > - solock(so); > + solock_shared(so); > } > *controlp = cm; > } else { > @@ -1040,9 +1040,9 @@ dontblock: > SBLASTRECORDCHK(>so_rcv, "soreceive uiomove"); > SBLASTMBUFCHK(>so_rcv, "soreceive uiomove"); > resid = uio->uio_resid; > - sounlock(so); > + sounlock_shared(so); > uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio); > - solock(so); > + solock_shared(so); > if (uio_error) > uio->uio_resid = resid - len; > } else > @@ -1126,7 +1126,7 @@ dontblock: > error = sbwait(so, >so_rcv); > if (error) { > sbunlock(so, >so_rcv); > - sounlock(so); > + sounlock_shared(so); > return (0); > } > if ((m = so->so_rcv.sb_mb) != NULL) > @@ -1171,7 +1171,7 @@ dontblock: > *flagsp |= flags; > release: > sbunlock(so, >so_rcv); > - sounlock(so); > + sounlock_shared(so); > return (error); > } > > Index: kern/uipc_socket2.c > === > RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket2.c,v > retrieving revision 1.127 > diff -u -p -r1.127 uipc_socket2.c > --- kern/uipc_socket2.c 13 Aug 2022 21:01:46 - 1.127 > +++ kern/uipc_socket2.c 3 Sep 2022 19:23:43 - > @@ -360,6 +360,24 @@ solock(struct socket *so) > } > } > > +void > +solock_shared(struct socket *so) > +{ > + switch (so->so_proto->pr_domain->dom_family) { > + case PF_INET: > + case PF_INET6: > + if (so->so_proto->pr_usrreqs->pru_lock != NULL) { > + NET_LOCK_SHARED(); > + pru_lock(so); > + } else > +
Re: soreceive with shared netlock
> On 4 Sep 2022, at 00:56, Alexander Bluhm wrote: > > On Sat, Sep 03, 2022 at 11:20:17PM +0200, Hrvoje Popovski wrote: >> with this diff while booting I'm getting this witness trace > > It is not related to soreceive() diff, but TCP diff I commited > before. I forgot a mutex initalization which is fatal with witness. > Fix below. > ok mvs@ > bluhm > > Index: netinet/tcp_subr.c > === > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_subr.c,v > retrieving revision 1.187 > diff -u -p -r1.187 tcp_subr.c > --- netinet/tcp_subr.c3 Sep 2022 19:22:19 - 1.187 > +++ netinet/tcp_subr.c3 Sep 2022 21:54:48 - > @@ -105,7 +105,7 @@ > *T tcp_timer_mtx global tcp timer data structures > */ > > -struct mutex tcp_timer_mtx; > +struct mutex tcp_timer_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); > > /* patchable/settable parameters for tcp */ > int tcp_mssdflt = TCP_MSS; >
Re: soreceive with shared netlock
On Sat, Sep 03, 2022 at 11:20:17PM +0200, Hrvoje Popovski wrote: > with this diff while booting I'm getting this witness trace It is not related to soreceive() diff, but TCP diff I commited before. I forgot a mutex initalization which is fatal with witness. Fix below. bluhm Index: netinet/tcp_subr.c === RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_subr.c,v retrieving revision 1.187 diff -u -p -r1.187 tcp_subr.c --- netinet/tcp_subr.c 3 Sep 2022 19:22:19 - 1.187 +++ netinet/tcp_subr.c 3 Sep 2022 21:54:48 - @@ -105,7 +105,7 @@ * T tcp_timer_mtx global tcp timer data structures */ -struct mutex tcp_timer_mtx; +struct mutex tcp_timer_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); /* patchable/settable parameters for tcp */ inttcp_mssdflt = TCP_MSS;
Re: soreceive with shared netlock
On 3.9.2022. 22:47, Alexander Bluhm wrote: > Hi, > > The next small step towards parallel network stack is to use shared > netlock in soreceive(). The UDP and IP divert layer provide locking > of the PCB. If that is possible, use shared instead of exclusive > netlock in soreceive(). The PCB mutex provides a per socket lock > against multiple soreceive() running in parallel. > > The solock_shared() is a bit hacky. Especially where we have to > release and regrab both locks in sosleep_nsec(). But it does what > we need. > > The udp_input() keeps exclusive lock for now. Socket splicing and > IP header chains are not completely MP safe yet. Also udp_input() > has some tentacles that need more testing. > > Ok to procceed with soreceive() unlocking before release? I think > udp_input() has to wait post 7.2. Hi, with this diff while booting I'm getting this witness trace iic0 at ichiic0 isa0 at pcib0 isadma0 at isa0 pcppi0 at isa0 port 0x61 spkr0 at pcppi0 vmm0 at mainbus0: VMX/EPT witness: lock_object uninitialized: 0x82379880 Starting stack trace... witness_checkorder(82379880,9,0,82379880,8a214bc08a1c5075,822b0ff0) at witness_checkorder+0xad mtx_enter(82379870,82379870,6e3a311a0ad2ae1e,82097020,824c12c0,9) at mtx_enter+0x34 tcp_slowtimo(812dd160,800020bf3110,82379870,82379870,6e3a311a0ad2ae1e,82097020) at tcp_slowtimo+0x10 pfslowtimo(824c12c0,824c12c0,824c12c0,81ed1820,822b3148,d) at pfslowtimo+0xc7 timeout_run(824c12c0,824c12c0,82301e90,800020bf31a0,81277b90,82301e90) at timeout_run+0x93 softclock_thread(800020be2fc0,800020be2fc0,0,0,0,4) at softclock_thread+0x11d end trace frame: 0x0, count: 251 End of stack trace.
soreceive with shared netlock
Hi, The next small step towards parallel network stack is to use shared netlock in soreceive(). The UDP and IP divert layer provide locking of the PCB. If that is possible, use shared instead of exclusive netlock in soreceive(). The PCB mutex provides a per socket lock against multiple soreceive() running in parallel. The solock_shared() is a bit hacky. Especially where we have to release and regrab both locks in sosleep_nsec(). But it does what we need. The udp_input() keeps exclusive lock for now. Socket splicing and IP header chains are not completely MP safe yet. Also udp_input() has some tentacles that need more testing. Ok to procceed with soreceive() unlocking before release? I think udp_input() has to wait post 7.2. bluhm Index: kern/uipc_socket.c === RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v retrieving revision 1.287 diff -u -p -r1.287 uipc_socket.c --- kern/uipc_socket.c 3 Sep 2022 13:29:33 - 1.287 +++ kern/uipc_socket.c 3 Sep 2022 19:23:43 - @@ -822,10 +822,10 @@ bad: if (mp) *mp = NULL; - solock(so); + solock_shared(so); restart: if ((error = sblock(so, >so_rcv, SBLOCKWAIT(flags))) != 0) { - sounlock(so); + sounlock_shared(so); return (error); } @@ -893,7 +893,7 @@ restart: sbunlock(so, >so_rcv); error = sbwait(so, >so_rcv); if (error) { - sounlock(so); + sounlock_shared(so); return (error); } goto restart; @@ -962,11 +962,11 @@ dontblock: sbsync(>so_rcv, nextrecord); if (controlp) { if (pr->pr_domain->dom_externalize) { - sounlock(so); + sounlock_shared(so); error = (*pr->pr_domain->dom_externalize) (cm, controllen, flags); - solock(so); + solock_shared(so); } *controlp = cm; } else { @@ -1040,9 +1040,9 @@ dontblock: SBLASTRECORDCHK(>so_rcv, "soreceive uiomove"); SBLASTMBUFCHK(>so_rcv, "soreceive uiomove"); resid = uio->uio_resid; - sounlock(so); + sounlock_shared(so); uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio); - solock(so); + solock_shared(so); if (uio_error) uio->uio_resid = resid - len; } else @@ -1126,7 +1126,7 @@ dontblock: error = sbwait(so, >so_rcv); if (error) { sbunlock(so, >so_rcv); - sounlock(so); + sounlock_shared(so); return (0); } if ((m = so->so_rcv.sb_mb) != NULL) @@ -1171,7 +1171,7 @@ dontblock: *flagsp |= flags; release: sbunlock(so, >so_rcv); - sounlock(so); + sounlock_shared(so); return (error); } Index: kern/uipc_socket2.c === RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket2.c,v retrieving revision 1.127 diff -u -p -r1.127 uipc_socket2.c --- kern/uipc_socket2.c 13 Aug 2022 21:01:46 - 1.127 +++ kern/uipc_socket2.c 3 Sep 2022 19:23:43 - @@ -360,6 +360,24 @@ solock(struct socket *so) } } +void +solock_shared(struct socket *so) +{ + switch (so->so_proto->pr_domain->dom_family) { + case PF_INET: + case PF_INET6: + if (so->so_proto->pr_usrreqs->pru_lock != NULL) { + NET_LOCK_SHARED(); + pru_lock(so); + } else + NET_LOCK(); + break; + default: + rw_enter_write(>so_lock); + break; + } +} + int solock_persocket(struct socket *so) { @@ -403,6 +421,24 @@ sounlock(struct socket *so) } void +sounlock_shared(struct socket *so) +{ + switch (so->so_proto->pr_domain->dom_family) { + case PF_INET: + case PF_INET6: + if (so->so_proto->pr_usrreqs->pru_unlock != NULL) { + pru_unlock(so); + NET_UNLOCK_SH