> On 21 Aug 2022, at 00:31, Alexander Bluhm <[email protected]> wrote:
>
> Hi,
>
> A inpcb mutex seems usable to serialize access to socket receive
> buffer. I have tried it for divert and udp input.
>
> As first step I would like to replace the kernel lock with a per
> PCB mutex in divert input.
>
> ok?
>
ok mvs@
> bluhm
>
> Index: netinet/in_pcb.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v
> retrieving revision 1.270
> diff -u -p -r1.270 in_pcb.c
> --- netinet/in_pcb.c 8 Aug 2022 12:06:30 -0000 1.270
> +++ netinet/in_pcb.c 20 Aug 2022 21:21:13 -0000
> @@ -236,6 +236,7 @@ in_pcballoc(struct socket *so, struct in
> inp->inp_table = table;
> inp->inp_socket = so;
> refcnt_init_trace(&inp->inp_refcnt, DT_REFCNT_IDX_INPCB);
> + mtx_init(&inp->inp_mtx, IPL_SOFTNET);
> inp->inp_seclevel[SL_AUTH] = IPSEC_AUTH_LEVEL_DEFAULT;
> inp->inp_seclevel[SL_ESP_TRANS] = IPSEC_ESP_TRANS_LEVEL_DEFAULT;
> inp->inp_seclevel[SL_ESP_NETWORK] = IPSEC_ESP_NETWORK_LEVEL_DEFAULT;
> Index: netinet/in_pcb.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.h,v
> retrieving revision 1.129
> diff -u -p -r1.129 in_pcb.h
> --- netinet/in_pcb.h 15 May 2022 09:12:20 -0000 1.129
> +++ netinet/in_pcb.h 20 Aug 2022 21:21:13 -0000
> @@ -79,6 +79,7 @@
> * I immutable after creation
> * N net lock
> * t inpt_mtx pcb table mutex
> + * p inpcb_mtx pcb mutex
> */
>
> struct pf_state_key;
> @@ -121,6 +122,7 @@ struct inpcb {
> #define inp_route inp_ru.ru_route
> #define inp_route6 inp_ru.ru_route6
> struct refcnt inp_refcnt; /* refcount PCB, delay memory free */
> + struct mutex inp_mtx; /* protect PCB and socket members */
> int inp_flags; /* generic IP/datagram flags */
> union { /* Header prototype. */
> struct ip hu_ip;
> Index: netinet/ip_divert.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_divert.c,v
> retrieving revision 1.69
> diff -u -p -r1.69 ip_divert.c
> --- netinet/ip_divert.c 15 Aug 2022 09:11:39 -0000 1.69
> +++ netinet/ip_divert.c 20 Aug 2022 21:20:19 -0000
> @@ -227,22 +227,15 @@ divert_packet(struct mbuf *m, int dir, u
> if_put(ifp);
> }
>
> + mtx_enter(&inp->inp_mtx);
> so = inp->inp_socket;
> - /*
> - * XXXSMP sbappendaddr() is not MP safe and this function is called
> - * from pf with shared netlock. To call only one sbappendaddr() from
> - * divert_packet(), protect it with kernel lock. All other places
> - * call sbappendaddr() with exclusive net lock. This blocks
> - * divert_packet() as we have the shared lock.
> - */
> - KERNEL_LOCK();
> if (sbappendaddr(so, &so->so_rcv, sintosa(&sin), m, NULL) == 0) {
> - KERNEL_UNLOCK();
> + mtx_leave(&inp->inp_mtx);
> divstat_inc(divs_fullsock);
> goto bad;
> }
> - sorwakeup(inp->inp_socket);
> - KERNEL_UNLOCK();
> + mtx_leave(&inp->inp_mtx);
> + sorwakeup(so);
>
> in_pcbunref(inp);
> return;
> Index: netinet6/ip6_divert.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_divert.c,v
> retrieving revision 1.68
> diff -u -p -r1.68 ip6_divert.c
> --- netinet6/ip6_divert.c 15 Aug 2022 09:11:39 -0000 1.68
> +++ netinet6/ip6_divert.c 20 Aug 2022 21:27:46 -0000
> @@ -233,22 +233,15 @@ divert6_packet(struct mbuf *m, int dir,
> if_put(ifp);
> }
>
> + mtx_enter(&inp->inp_mtx);
> so = inp->inp_socket;
> - /*
> - * XXXSMP sbappendaddr() is not MP safe and this function is called
> - * from pf with shared netlock. To call only one sbappendaddr() from
> - * divert_packet(), protect it with kernel lock. All other places
> - * call sbappendaddr() with exclusive net lock. This blocks
> - * divert_packet() as we have the shared lock.
> - */
> - KERNEL_LOCK();
> if (sbappendaddr(so, &so->so_rcv, sin6tosa(&sin6), m, NULL) == 0) {
> - KERNEL_UNLOCK();
> + mtx_leave(&inp->inp_mtx);
> div6stat_inc(div6s_fullsock);
> goto bad;
> }
> - sorwakeup(inp->inp_socket);
> - KERNEL_UNLOCK();
> + mtx_leave(&inp->inp_mtx);
> + sorwakeup(so);
>
> in_pcbunref(inp);
> return;
>