> On 21 Aug 2022, at 00:31, Alexander Bluhm <alexander.bl...@gmx.net> wrote: > > Hi, > > A inpcb mutex seems usable to serialize access to socket receive > buffer. I have tried it for divert and udp input. > > As first step I would like to replace the kernel lock with a per > PCB mutex in divert input. > > ok? >
ok mvs@ > bluhm > > Index: netinet/in_pcb.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v > retrieving revision 1.270 > diff -u -p -r1.270 in_pcb.c > --- netinet/in_pcb.c 8 Aug 2022 12:06:30 -0000 1.270 > +++ netinet/in_pcb.c 20 Aug 2022 21:21:13 -0000 > @@ -236,6 +236,7 @@ in_pcballoc(struct socket *so, struct in > inp->inp_table = table; > inp->inp_socket = so; > refcnt_init_trace(&inp->inp_refcnt, DT_REFCNT_IDX_INPCB); > + mtx_init(&inp->inp_mtx, IPL_SOFTNET); > inp->inp_seclevel[SL_AUTH] = IPSEC_AUTH_LEVEL_DEFAULT; > inp->inp_seclevel[SL_ESP_TRANS] = IPSEC_ESP_TRANS_LEVEL_DEFAULT; > inp->inp_seclevel[SL_ESP_NETWORK] = IPSEC_ESP_NETWORK_LEVEL_DEFAULT; > Index: netinet/in_pcb.h > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.h,v > retrieving revision 1.129 > diff -u -p -r1.129 in_pcb.h > --- netinet/in_pcb.h 15 May 2022 09:12:20 -0000 1.129 > +++ netinet/in_pcb.h 20 Aug 2022 21:21:13 -0000 > @@ -79,6 +79,7 @@ > * I immutable after creation > * N net lock > * t inpt_mtx pcb table mutex > + * p inpcb_mtx pcb mutex > */ > > struct pf_state_key; > @@ -121,6 +122,7 @@ struct inpcb { > #define inp_route inp_ru.ru_route > #define inp_route6 inp_ru.ru_route6 > struct refcnt inp_refcnt; /* refcount PCB, delay memory free */ > + struct mutex inp_mtx; /* protect PCB and socket members */ > int inp_flags; /* generic IP/datagram flags */ > union { /* Header prototype. */ > struct ip hu_ip; > Index: netinet/ip_divert.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_divert.c,v > retrieving revision 1.69 > diff -u -p -r1.69 ip_divert.c > --- netinet/ip_divert.c 15 Aug 2022 09:11:39 -0000 1.69 > +++ netinet/ip_divert.c 20 Aug 2022 21:20:19 -0000 > @@ -227,22 +227,15 @@ divert_packet(struct mbuf *m, int dir, u > if_put(ifp); > } > > + mtx_enter(&inp->inp_mtx); > so = inp->inp_socket; > - /* > - * XXXSMP sbappendaddr() is not MP safe and this function is called > - * from pf with shared netlock. To call only one sbappendaddr() from > - * divert_packet(), protect it with kernel lock. All other places > - * call sbappendaddr() with exclusive net lock. This blocks > - * divert_packet() as we have the shared lock. > - */ > - KERNEL_LOCK(); > if (sbappendaddr(so, &so->so_rcv, sintosa(&sin), m, NULL) == 0) { > - KERNEL_UNLOCK(); > + mtx_leave(&inp->inp_mtx); > divstat_inc(divs_fullsock); > goto bad; > } > - sorwakeup(inp->inp_socket); > - KERNEL_UNLOCK(); > + mtx_leave(&inp->inp_mtx); > + sorwakeup(so); > > in_pcbunref(inp); > return; > Index: netinet6/ip6_divert.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_divert.c,v > retrieving revision 1.68 > diff -u -p -r1.68 ip6_divert.c > --- netinet6/ip6_divert.c 15 Aug 2022 09:11:39 -0000 1.68 > +++ netinet6/ip6_divert.c 20 Aug 2022 21:27:46 -0000 > @@ -233,22 +233,15 @@ divert6_packet(struct mbuf *m, int dir, > if_put(ifp); > } > > + mtx_enter(&inp->inp_mtx); > so = inp->inp_socket; > - /* > - * XXXSMP sbappendaddr() is not MP safe and this function is called > - * from pf with shared netlock. To call only one sbappendaddr() from > - * divert_packet(), protect it with kernel lock. All other places > - * call sbappendaddr() with exclusive net lock. This blocks > - * divert_packet() as we have the shared lock. > - */ > - KERNEL_LOCK(); > if (sbappendaddr(so, &so->so_rcv, sin6tosa(&sin6), m, NULL) == 0) { > - KERNEL_UNLOCK(); > + mtx_leave(&inp->inp_mtx); > div6stat_inc(div6s_fullsock); > goto bad; > } > - sorwakeup(inp->inp_socket); > - KERNEL_UNLOCK(); > + mtx_leave(&inp->inp_mtx); > + sorwakeup(so); > > in_pcbunref(inp); > return; >