On 30/06/22(Thu) 11:56, Claudio Jeker wrote:
> On Thu, Jun 30, 2022 at 12:34:33PM +0300, Vitaliy Makkoveev wrote:
> > On Thu, Jun 30, 2022 at 11:08:48AM +0200, Claudio Jeker wrote:
> > > This diff converts the SRP list to a SMR list in rtsock.c
> > > SRP is a bit strange with how it works and the SMR code is a bit easier to
> > > understand. Since we can sleep in the SMR_TAILQ_FOREACH() we need to grab
> > > a refcount on the route pcb so that we can leave the SMR critical section
> > > and then enter the SMR critical section at the end of the loop before
> > > dropping the refcount again.
> > > 
> > > The diff does not immeditaly explode but I doubt we can exploit
> > > parallelism in route_input() so this may fail at some later stage if it is
> > > wrong.
> > > 
> > > Comments from the lock critics welcome
> > 
> > We use `so_lock' rwlock(9) to protect route domain sockets. We can't
> > convert this SRP list to SMR list because we call solock() within
> > foreach loop.

We shouldn't use SRP list either, no?  Or are we allowed to sleep
holding a SRP reference?  That's the question that triggered this diff.

> because of the so_lock the code uses a refcnt on the route pcb to make
> sure that the object is not freed while we sleep. So that is handled by
> this diff.
>  
> > We can easily crash kernel by running in parallel some "route monitor"
> > commands and "while true; ifconfig vether0 create ; ifconfig vether0
> > destroy; done".
> 
> That does not cause problem on my system.
>  
> > > -- 
> > > :wq Claudio
> > > 
> > > Index: sys/net/rtsock.c
> > > ===================================================================
> > > RCS file: /cvs/src/sys/net/rtsock.c,v
> > > retrieving revision 1.334
> > > diff -u -p -r1.334 rtsock.c
> > > --- sys/net/rtsock.c      28 Jun 2022 10:01:13 -0000      1.334
> > > +++ sys/net/rtsock.c      30 Jun 2022 08:02:09 -0000
> > > @@ -71,7 +71,7 @@
> > >  #include <sys/domain.h>
> > >  #include <sys/pool.h>
> > >  #include <sys/protosw.h>
> > > -#include <sys/srp.h>
> > > +#include <sys/smr.h>
> > >  
> > >  #include <net/if.h>
> > >  #include <net/if_dl.h>
> > > @@ -107,8 +107,6 @@ struct walkarg {
> > >  };
> > >  
> > >  void     route_prinit(void);
> > > -void     rcb_ref(void *, void *);
> > > -void     rcb_unref(void *, void *);
> > >  int      route_output(struct mbuf *, struct socket *, struct sockaddr *,
> > >       struct mbuf *);
> > >  int      route_ctloutput(int, struct socket *, int, int, struct mbuf *);
> > > @@ -149,7 +147,7 @@ int            rt_setsource(unsigned int, struct 
> > >  struct rtpcb {
> > >   struct socket           *rop_socket;            /* [I] */
> > >  
> > > - SRPL_ENTRY(rtpcb)       rop_list;
> > > + SMR_TAILQ_ENTRY(rtpcb)  rop_list;
> > >   struct refcnt           rop_refcnt;
> > >   struct timeout          rop_timeout;
> > >   unsigned int            rop_msgfilter;          /* [s] */
> > > @@ -162,8 +160,7 @@ struct rtpcb {
> > >  #define  sotortpcb(so)   ((struct rtpcb *)(so)->so_pcb)
> > >  
> > >  struct rtptable {
> > > - SRPL_HEAD(, rtpcb)      rtp_list;
> > > - struct srpl_rc          rtp_rc;
> > > + SMR_TAILQ_HEAD(, rtpcb) rtp_list;
> > >   struct rwlock           rtp_lk;
> > >   unsigned int            rtp_count;
> > >  };
> > > @@ -185,29 +182,12 @@ struct rtptable rtptable;
> > >  void
> > >  route_prinit(void)
> > >  {
> > > - srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL);
> > >   rw_init(&rtptable.rtp_lk, "rtsock");
> > > - SRPL_INIT(&rtptable.rtp_list);
> > > + SMR_TAILQ_INIT(&rtptable.rtp_list);
> > >   pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0,
> > >       IPL_SOFTNET, PR_WAITOK, "rtpcb", NULL);
> > >  }
> > >  
> > > -void
> > > -rcb_ref(void *null, void *v)
> > > -{
> > > - struct rtpcb *rop = v;
> > > -
> > > - refcnt_take(&rop->rop_refcnt);
> > > -}
> > > -
> > > -void
> > > -rcb_unref(void *null, void *v)
> > > -{
> > > - struct rtpcb *rop = v;
> > > -
> > > - refcnt_rele_wake(&rop->rop_refcnt);
> > > -}
> > > -
> > >  int
> > >  route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf 
> > > *nam,
> > >      struct mbuf *control, struct proc *p)
> > > @@ -325,8 +305,7 @@ route_attach(struct socket *so, int prot
> > >   so->so_options |= SO_USELOOPBACK;
> > >  
> > >   rw_enter(&rtptable.rtp_lk, RW_WRITE);
> > > - SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop,
> > > -     rop_list);
> > > + SMR_TAILQ_INSERT_HEAD_LOCKED(&rtptable.rtp_list, rop, rop_list);
> > >   rtptable.rtp_count++;
> > >   rw_exit(&rtptable.rtp_lk);
> > >  
> > > @@ -347,8 +326,7 @@ route_detach(struct socket *so)
> > >   rw_enter(&rtptable.rtp_lk, RW_WRITE);
> > >  
> > >   rtptable.rtp_count--;
> > > - SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb,
> > > -     rop_list);
> > > + SMR_TAILQ_REMOVE_LOCKED(&rtptable.rtp_list, rop, rop_list);
> > >   rw_exit(&rtptable.rtp_lk);
> > >  
> > >   sounlock(so);
> > > @@ -356,6 +334,7 @@ route_detach(struct socket *so)
> > >   /* wait for all references to drop */
> > >   refcnt_finalize(&rop->rop_refcnt, "rtsockrefs");
> > >   timeout_del_barrier(&rop->rop_timeout);
> > > + smr_barrier();
> > >  
> > >   solock(so);
> > >  
> > > @@ -501,7 +480,6 @@ route_input(struct mbuf *m0, struct sock
> > >   struct rtpcb *rop;
> > >   struct rt_msghdr *rtm;
> > >   struct mbuf *m = m0;
> > > - struct srp_ref sr;
> > >  
> > >   /* ensure that we can access the rtm_type via mtod() */
> > >   if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) {
> > > @@ -509,7 +487,8 @@ route_input(struct mbuf *m0, struct sock
> > >           return;
> > >   }
> > >  
> > > - SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) {
> > > + smr_read_enter();
> > > + SMR_TAILQ_FOREACH(rop, &rtptable.rtp_list, rop_list) {
> > >           /*
> > >            * If route socket is bound to an address family only send
> > >            * messages that match the address family. Address family
> > > @@ -519,7 +498,8 @@ route_input(struct mbuf *m0, struct sock
> > >               rop->rop_proto != sa_family)
> > >                   continue;
> > >  
> > > -
> > > +         refcnt_take(&rop->rop_refcnt);
> > > +         smr_read_leave();
> > >           so = rop->rop_socket;
> > >           solock(so);
> > >  
> > > @@ -579,8 +559,10 @@ route_input(struct mbuf *m0, struct sock
> > >           rtm_sendup(so, m);
> > >  next:
> > >           sounlock(so);
> > > +         smr_read_enter();
> > > +         refcnt_rele_wake(&rop->rop_refcnt);
> > >   }
> > > - SRPL_LEAVE(&sr);
> > > + smr_read_leave();
> > >  
> > >   m_freem(m);
> > >  }
> > > 
> > 
> 
> -- 
> :wq Claudio
> 

Reply via email to