On Thu, Jun 30, 2022 at 11:08:48AM +0200, Claudio Jeker wrote:
> This diff converts the SRP list to a SMR list in rtsock.c
> SRP is a bit strange with how it works and the SMR code is a bit easier to
> understand. Since we can sleep in the SMR_TAILQ_FOREACH() we need to grab
> a refcount on the route pcb so that we can leave the SMR critical section
> and then enter the SMR critical section at the end of the loop before
> dropping the refcount again.
>
> The diff does not immeditaly explode but I doubt we can exploit
> parallelism in route_input() so this may fail at some later stage if it is
> wrong.
>
> Comments from the lock critics welcome
We use `so_lock' rwlock(9) to protect route domain sockets. We can't
convert this SRP list to SMR list because we call solock() within
foreach loop.
We can easily crash kernel by running in parallel some "route monitor"
commands and "while true; ifconfig vether0 create ; ifconfig vether0
destroy; done".
> --
> :wq Claudio
>
> Index: sys/net/rtsock.c
> ===================================================================
> RCS file: /cvs/src/sys/net/rtsock.c,v
> retrieving revision 1.334
> diff -u -p -r1.334 rtsock.c
> --- sys/net/rtsock.c 28 Jun 2022 10:01:13 -0000 1.334
> +++ sys/net/rtsock.c 30 Jun 2022 08:02:09 -0000
> @@ -71,7 +71,7 @@
> #include <sys/domain.h>
> #include <sys/pool.h>
> #include <sys/protosw.h>
> -#include <sys/srp.h>
> +#include <sys/smr.h>
>
> #include <net/if.h>
> #include <net/if_dl.h>
> @@ -107,8 +107,6 @@ struct walkarg {
> };
>
> void route_prinit(void);
> -void rcb_ref(void *, void *);
> -void rcb_unref(void *, void *);
> int route_output(struct mbuf *, struct socket *, struct sockaddr *,
> struct mbuf *);
> int route_ctloutput(int, struct socket *, int, int, struct mbuf *);
> @@ -149,7 +147,7 @@ int rt_setsource(unsigned int, struct
> struct rtpcb {
> struct socket *rop_socket; /* [I] */
>
> - SRPL_ENTRY(rtpcb) rop_list;
> + SMR_TAILQ_ENTRY(rtpcb) rop_list;
> struct refcnt rop_refcnt;
> struct timeout rop_timeout;
> unsigned int rop_msgfilter; /* [s] */
> @@ -162,8 +160,7 @@ struct rtpcb {
> #define sotortpcb(so) ((struct rtpcb *)(so)->so_pcb)
>
> struct rtptable {
> - SRPL_HEAD(, rtpcb) rtp_list;
> - struct srpl_rc rtp_rc;
> + SMR_TAILQ_HEAD(, rtpcb) rtp_list;
> struct rwlock rtp_lk;
> unsigned int rtp_count;
> };
> @@ -185,29 +182,12 @@ struct rtptable rtptable;
> void
> route_prinit(void)
> {
> - srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL);
> rw_init(&rtptable.rtp_lk, "rtsock");
> - SRPL_INIT(&rtptable.rtp_list);
> + SMR_TAILQ_INIT(&rtptable.rtp_list);
> pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0,
> IPL_SOFTNET, PR_WAITOK, "rtpcb", NULL);
> }
>
> -void
> -rcb_ref(void *null, void *v)
> -{
> - struct rtpcb *rop = v;
> -
> - refcnt_take(&rop->rop_refcnt);
> -}
> -
> -void
> -rcb_unref(void *null, void *v)
> -{
> - struct rtpcb *rop = v;
> -
> - refcnt_rele_wake(&rop->rop_refcnt);
> -}
> -
> int
> route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
> struct mbuf *control, struct proc *p)
> @@ -325,8 +305,7 @@ route_attach(struct socket *so, int prot
> so->so_options |= SO_USELOOPBACK;
>
> rw_enter(&rtptable.rtp_lk, RW_WRITE);
> - SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop,
> - rop_list);
> + SMR_TAILQ_INSERT_HEAD_LOCKED(&rtptable.rtp_list, rop, rop_list);
> rtptable.rtp_count++;
> rw_exit(&rtptable.rtp_lk);
>
> @@ -347,8 +326,7 @@ route_detach(struct socket *so)
> rw_enter(&rtptable.rtp_lk, RW_WRITE);
>
> rtptable.rtp_count--;
> - SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb,
> - rop_list);
> + SMR_TAILQ_REMOVE_LOCKED(&rtptable.rtp_list, rop, rop_list);
> rw_exit(&rtptable.rtp_lk);
>
> sounlock(so);
> @@ -356,6 +334,7 @@ route_detach(struct socket *so)
> /* wait for all references to drop */
> refcnt_finalize(&rop->rop_refcnt, "rtsockrefs");
> timeout_del_barrier(&rop->rop_timeout);
> + smr_barrier();
>
> solock(so);
>
> @@ -501,7 +480,6 @@ route_input(struct mbuf *m0, struct sock
> struct rtpcb *rop;
> struct rt_msghdr *rtm;
> struct mbuf *m = m0;
> - struct srp_ref sr;
>
> /* ensure that we can access the rtm_type via mtod() */
> if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) {
> @@ -509,7 +487,8 @@ route_input(struct mbuf *m0, struct sock
> return;
> }
>
> - SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) {
> + smr_read_enter();
> + SMR_TAILQ_FOREACH(rop, &rtptable.rtp_list, rop_list) {
> /*
> * If route socket is bound to an address family only send
> * messages that match the address family. Address family
> @@ -519,7 +498,8 @@ route_input(struct mbuf *m0, struct sock
> rop->rop_proto != sa_family)
> continue;
>
> -
> + refcnt_take(&rop->rop_refcnt);
> + smr_read_leave();
> so = rop->rop_socket;
> solock(so);
>
> @@ -579,8 +559,10 @@ route_input(struct mbuf *m0, struct sock
> rtm_sendup(so, m);
> next:
> sounlock(so);
> + smr_read_enter();
> + refcnt_rele_wake(&rop->rop_refcnt);
> }
> - SRPL_LEAVE(&sr);
> + smr_read_leave();
>
> m_freem(m);
> }
>