On Thu, Jun 30, 2022 at 11:51:52AM +0200, Claudio Jeker wrote:
> On Thu, Jun 30, 2022 at 11:08:48AM +0200, Claudio Jeker wrote:
> > This diff converts the SRP list to a SMR list in rtsock.c
> > SRP is a bit strange with how it works and the SMR code is a bit easier to
> > understand. Since we can sleep in the SMR_TAILQ_FOREACH() we need to grab
> > a refcount on the route pcb so that we can leave the SMR critical section
> > and then enter the SMR critical section at the end of the loop before
> > dropping the refcount again.
> > 
> > The diff does not immeditaly explode but I doubt we can exploit
> > parallelism in route_input() so this may fail at some later stage if it is
> > wrong.
> > 
> > Comments from the lock critics welcome
> 
> After discussing this with mpi@ and jmatthew@ we came to the conclusion
> that we need to smr_barrier() before refcnt_finalize() to ensure that no
> other CPU is between the SMR_TAILQ_FOREACH, refcnt_take() and
> smr_read_leave().
> 
> Updated diff below

ok by me

> -- 
> :wq Claudio
> 
> Index: net/rtsock.c
> ===================================================================
> RCS file: /cvs/src/sys/net/rtsock.c,v
> retrieving revision 1.334
> diff -u -p -r1.334 rtsock.c
> --- net/rtsock.c      28 Jun 2022 10:01:13 -0000      1.334
> +++ net/rtsock.c      30 Jun 2022 09:25:53 -0000
> @@ -71,7 +71,7 @@
>  #include <sys/domain.h>
>  #include <sys/pool.h>
>  #include <sys/protosw.h>
> -#include <sys/srp.h>
> +#include <sys/smr.h>
>  
>  #include <net/if.h>
>  #include <net/if_dl.h>
> @@ -107,8 +107,6 @@ struct walkarg {
>  };
>  
>  void route_prinit(void);
> -void rcb_ref(void *, void *);
> -void rcb_unref(void *, void *);
>  int  route_output(struct mbuf *, struct socket *, struct sockaddr *,
>           struct mbuf *);
>  int  route_ctloutput(int, struct socket *, int, int, struct mbuf *);
> @@ -149,7 +147,7 @@ int                rt_setsource(unsigned int, struct 
>  struct rtpcb {
>       struct socket           *rop_socket;            /* [I] */
>  
> -     SRPL_ENTRY(rtpcb)       rop_list;
> +     SMR_TAILQ_ENTRY(rtpcb)  rop_list;
>       struct refcnt           rop_refcnt;
>       struct timeout          rop_timeout;
>       unsigned int            rop_msgfilter;          /* [s] */
> @@ -162,8 +160,7 @@ struct rtpcb {
>  #define      sotortpcb(so)   ((struct rtpcb *)(so)->so_pcb)
>  
>  struct rtptable {
> -     SRPL_HEAD(, rtpcb)      rtp_list;
> -     struct srpl_rc          rtp_rc;
> +     SMR_TAILQ_HEAD(, rtpcb) rtp_list;
>       struct rwlock           rtp_lk;
>       unsigned int            rtp_count;
>  };
> @@ -185,29 +182,12 @@ struct rtptable rtptable;
>  void
>  route_prinit(void)
>  {
> -     srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL);
>       rw_init(&rtptable.rtp_lk, "rtsock");
> -     SRPL_INIT(&rtptable.rtp_list);
> +     SMR_TAILQ_INIT(&rtptable.rtp_list);
>       pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0,
>           IPL_SOFTNET, PR_WAITOK, "rtpcb", NULL);
>  }
>  
> -void
> -rcb_ref(void *null, void *v)
> -{
> -     struct rtpcb *rop = v;
> -
> -     refcnt_take(&rop->rop_refcnt);
> -}
> -
> -void
> -rcb_unref(void *null, void *v)
> -{
> -     struct rtpcb *rop = v;
> -
> -     refcnt_rele_wake(&rop->rop_refcnt);
> -}
> -
>  int
>  route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
>      struct mbuf *control, struct proc *p)
> @@ -325,8 +305,7 @@ route_attach(struct socket *so, int prot
>       so->so_options |= SO_USELOOPBACK;
>  
>       rw_enter(&rtptable.rtp_lk, RW_WRITE);
> -     SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop,
> -         rop_list);
> +     SMR_TAILQ_INSERT_HEAD_LOCKED(&rtptable.rtp_list, rop, rop_list);
>       rtptable.rtp_count++;
>       rw_exit(&rtptable.rtp_lk);
>  
> @@ -347,13 +326,13 @@ route_detach(struct socket *so)
>       rw_enter(&rtptable.rtp_lk, RW_WRITE);
>  
>       rtptable.rtp_count--;
> -     SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb,
> -         rop_list);
> +     SMR_TAILQ_REMOVE_LOCKED(&rtptable.rtp_list, rop, rop_list);
>       rw_exit(&rtptable.rtp_lk);
>  
>       sounlock(so);
>  
>       /* wait for all references to drop */
> +     smr_barrier();
>       refcnt_finalize(&rop->rop_refcnt, "rtsockrefs");
>       timeout_del_barrier(&rop->rop_timeout);
>  
> @@ -501,7 +480,6 @@ route_input(struct mbuf *m0, struct sock
>       struct rtpcb *rop;
>       struct rt_msghdr *rtm;
>       struct mbuf *m = m0;
> -     struct srp_ref sr;
>  
>       /* ensure that we can access the rtm_type via mtod() */
>       if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) {
> @@ -509,7 +487,8 @@ route_input(struct mbuf *m0, struct sock
>               return;
>       }
>  
> -     SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) {
> +     smr_read_enter();
> +     SMR_TAILQ_FOREACH(rop, &rtptable.rtp_list, rop_list) {
>               /*
>                * If route socket is bound to an address family only send
>                * messages that match the address family. Address family
> @@ -519,7 +498,8 @@ route_input(struct mbuf *m0, struct sock
>                   rop->rop_proto != sa_family)
>                       continue;
>  
> -
> +             refcnt_take(&rop->rop_refcnt);
> +             smr_read_leave();
>               so = rop->rop_socket;
>               solock(so);
>  
> @@ -579,8 +559,10 @@ route_input(struct mbuf *m0, struct sock
>               rtm_sendup(so, m);
>  next:
>               sounlock(so);
> +             smr_read_enter();
> +             refcnt_rele_wake(&rop->rop_refcnt);
>       }
> -     SRPL_LEAVE(&sr);
> +     smr_read_leave();
>  
>       m_freem(m);
>  }
> 

Reply via email to