On 29/11/17(Wed) 20:28, Claudio Jeker wrote: > This diff changes the the PCB list into an SRP list. Therefor concurrent > access is no longer an issue and route_input() could be more unlocked. > There is not much point of releasing the KERNEL_LOCK for now but I added > commented lock / unlock to the code. > > Next on the list are now the socekt functions sorwakeup, sbspace and > sbappendaddr. If those are save then route_input should be save. > > If this is the way to go then doing the same for pfkey should be straight > forward. > > Looking forward to test reports and comments
rw_enter() will not wail so its returned value don't need to be checked. I'm not fan of the commented KERNEL_LOCK/UNLOCK dance, but either way I think this should go in. > Index: net/rtsock.c > =================================================================== > RCS file: /cvs/src/sys/net/rtsock.c,v > retrieving revision 1.255 > diff -u -p -r1.255 rtsock.c > --- net/rtsock.c 3 Nov 2017 16:23:20 -0000 1.255 > +++ net/rtsock.c 29 Nov 2017 18:54:26 -0000 > @@ -70,6 +70,7 @@ > #include <sys/socketvar.h> > #include <sys/domain.h> > #include <sys/protosw.h> > +#include <sys/srp.h> > > #include <net/if.h> > #include <net/if_dl.h> > @@ -94,7 +95,6 @@ > #include <sys/kernel.h> > #include <sys/timeout.h> > > -struct sockaddr route_dst = { 2, PF_ROUTE, }; > struct sockaddr route_src = { 2, PF_ROUTE, }; > > struct walkarg { > @@ -103,6 +103,8 @@ struct walkarg { > }; > > void route_prinit(void); > +void route_ref(void *, void *); > +void route_unref(void *, void *); > int route_output(struct mbuf *, struct socket *, struct sockaddr *, > struct mbuf *); > int route_ctloutput(int, struct socket *, int, int, struct mbuf *); > @@ -133,7 +135,8 @@ int sysctl_rtable_rtstat(void *, size_ > > struct routecb { > struct rawcb rcb; > - LIST_ENTRY(routecb) rcb_list; > + SRPL_ENTRY(routecb) rcb_list; > + struct refcnt refcnt; > struct timeout timeout; > unsigned int msgfilter; > unsigned int flags; > @@ -142,11 +145,10 @@ struct routecb { > #define sotoroutecb(so) ((struct routecb *)(so)->so_pcb) > > struct route_cb { > - LIST_HEAD(, routecb) rcb; > - int ip_count; > - int ip6_count; > - int mpls_count; > - int any_count; > + SRPL_HEAD(, routecb) rcb; > + struct srpl_rc rcb_rc; > + struct rwlock rcb_lk; > + unsigned int any_count; > }; > > struct route_cb route_cb; > @@ -165,9 +167,26 @@ struct route_cb route_cb; > void > route_prinit(void) > { > - LIST_INIT(&route_cb.rcb); > + srpl_rc_init(&route_cb.rcb_rc, route_ref, route_unref, NULL); > + rw_init(&route_cb.rcb_lk, "rtsock"); > + SRPL_INIT(&route_cb.rcb); > } > > +void > +route_ref(void *null, void *v) > +{ > + struct routecb *rop = v; > + > + refcnt_take(&rop->refcnt); > +} > + > +void > +route_unref(void *null, void *v) > +{ > + struct routecb *rop = v; > + > + refcnt_rele_wake(&rop->refcnt); > +} > > int > route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, > @@ -218,9 +237,10 @@ route_attach(struct socket *so, int prot > */ > rop = malloc(sizeof(struct routecb), M_PCB, M_WAITOK|M_ZERO); > rp = &rop->rcb; > - so->so_pcb = rp; > + so->so_pcb = rop; > /* Init the timeout structure */ > - timeout_set(&rop->timeout, route_senddesync, rp); > + timeout_set(&rop->timeout, route_senddesync, rop); > + refcnt_init(&rop->refcnt); > > if (curproc == NULL) > error = EACCES; > @@ -230,31 +250,33 @@ route_attach(struct socket *so, int prot > free(rop, M_PCB, sizeof(struct routecb)); > return (error); > } > + > rp->rcb_socket = so; > rp->rcb_proto.sp_family = so->so_proto->pr_domain->dom_family; > rp->rcb_proto.sp_protocol = proto; > > rop->rtableid = curproc->p_p->ps_rtableid; > - switch (rp->rcb_proto.sp_protocol) { > - case AF_INET: > - route_cb.ip_count++; > - break; > - case AF_INET6: > - route_cb.ip6_count++; > - break; > -#ifdef MPLS > - case AF_MPLS: > - route_cb.mpls_count++; > - break; > -#endif > - } > > soisconnected(so); > so->so_options |= SO_USELOOPBACK; > > + /* KERNEL_UNLOCK(); */ > + > rp->rcb_faddr = &route_src; > + > + error = rw_enter(&route_cb.rcb_lk, RW_WRITE); > + if (error != 0) { > + free(rop, M_PCB, sizeof(struct routecb)); > + /* KERNEL_LOCK(); */ > + return (error); > + } > + > + SRPL_INSERT_HEAD_LOCKED(&route_cb.rcb_rc, &route_cb.rcb, rop, rcb_list); > route_cb.any_count++; > - LIST_INSERT_HEAD(&route_cb.rcb, rop, rcb_list); > + > + rw_exit(&route_cb.rcb_lk); > + > + /* KERNEL_LOCK(); */ > > return (0); > } > @@ -263,7 +285,7 @@ int > route_detach(struct socket *so) > { > struct routecb *rop; > - int af; > + int error; > > soassertlocked(so); > > @@ -271,18 +293,24 @@ route_detach(struct socket *so) > if (rop == NULL) > return (EINVAL); > > + /* KERNEL_UNLOCK(); */ > + > + error = rw_enter(&route_cb.rcb_lk, RW_WRITE); > + if (error != 0) { > + /* KERNEL_LOCK(); */ > + return (error); > + } > + > timeout_del(&rop->timeout); > - af = rop->rcb.rcb_proto.sp_protocol; > - if (af == AF_INET) > - route_cb.ip_count--; > - else if (af == AF_INET6) > - route_cb.ip6_count--; > -#ifdef MPLS > - else if (af == AF_MPLS) > - route_cb.mpls_count--; > -#endif > route_cb.any_count--; > - LIST_REMOVE(rop, rcb_list); > + > + SRPL_REMOVE_LOCKED(&route_cb.rcb_rc, &route_cb.rcb, > + rop, routecb, rcb_list); > + > + rw_exit(&route_cb.rcb_lk); > + /* wait for all references to drop */ > + refcnt_finalize(&rop->refcnt, "rtsockrefs"); > + /* KERNEL_LOCK(); */ > > so->so_pcb = NULL; > sofree(so); > @@ -348,12 +376,10 @@ route_ctloutput(int op, struct socket *s > void > route_senddesync(void *data) > { > - struct rawcb *rp; > struct routecb *rop; > struct mbuf *desync_mbuf; > > - rp = (struct rawcb *)data; > - rop = (struct routecb *)rp; > + rop = (struct routecb *)data; > > /* If we are in a DESYNC state, try to send a RTM_DESYNC packet */ > if ((rop->flags & ROUTECB_FLAG_DESYNC) == 0) > @@ -365,11 +391,11 @@ route_senddesync(void *data) > */ > desync_mbuf = rtm_msg1(RTM_DESYNC, NULL); > if (desync_mbuf != NULL) { > - struct socket *so = rp->rcb_socket; > + struct socket *so = rop->rcb.rcb_socket; > if (sbappendaddr(so, &so->so_rcv, &route_src, > desync_mbuf, NULL) != 0) { > rop->flags &= ~ROUTECB_FLAG_DESYNC; > - sorwakeup(rp->rcb_socket); > + sorwakeup(rop->rcb.rcb_socket); > return; > } > m_freem(desync_mbuf); > @@ -381,26 +407,22 @@ route_senddesync(void *data) > void > route_input(struct mbuf *m0, struct socket *so, sa_family_t sa_family) > { > - struct rawcb *rp; > struct routecb *rop; > + struct rawcb *rp; > struct rt_msghdr *rtm; > struct mbuf *m = m0; > - int sockets = 0; > struct socket *last = NULL; > - struct sockaddr *sosrc, *sodst; > + struct srp_ref sr; > > KERNEL_ASSERT_LOCKED(); > > - sosrc = &route_src; > - sodst = &route_dst; > - > /* ensure that we can access the rtm_type via mtod() */ > if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) { > m_freem(m); > return; > } > > - LIST_FOREACH(rop, &route_cb.rcb, rcb_list) { > + SRPL_FOREACH(rop, &sr, &route_cb.rcb, rcb_list) { > rp = &rop->rcb; > if (!(rp->rcb_socket->so_state & SS_ISCONNECTED)) > continue; > @@ -459,8 +481,8 @@ route_input(struct mbuf *m0, struct sock > struct mbuf *n; > if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) { > if (sbspace(last, &last->so_rcv) < (2*MSIZE) || > - sbappendaddr(last, &last->so_rcv, sosrc, > - n, (struct mbuf *)NULL) == 0) { > + sbappendaddr(last, &last->so_rcv, > + &route_src, n, (struct mbuf *)NULL) == 0) { > /* > * Flag socket as desync'ed and > * flush required > @@ -468,31 +490,35 @@ route_input(struct mbuf *m0, struct sock > sotoroutecb(last)->flags |= > ROUTECB_FLAG_DESYNC | > ROUTECB_FLAG_FLUSH; > - route_senddesync(sotorawcb(last)); > + route_senddesync(sotoroutecb(last)); > m_freem(n); > } else { > sorwakeup(last); > - sockets++; > } > } > + refcnt_rele_wake(&sotoroutecb(last)->refcnt); > } > - last = rp->rcb_socket; > + /* keep a reference for last */ > + refcnt_take(&rop->refcnt); > + last = rop->rcb.rcb_socket; > } > if (last) { > if (sbspace(last, &last->so_rcv) < (2 * MSIZE) || > - sbappendaddr(last, &last->so_rcv, sosrc, > + sbappendaddr(last, &last->so_rcv, &route_src, > m, (struct mbuf *)NULL) == 0) { > /* Flag socket as desync'ed and flush required */ > sotoroutecb(last)->flags |= > ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH; > - route_senddesync(sotorawcb(last)); > + route_senddesync(sotoroutecb(last)); > m_freem(m); > } else { > sorwakeup(last); > - sockets++; > } > + refcnt_rele_wake(&sotoroutecb(last)->refcnt); > } else > m_freem(m); > + > + SRPL_LEAVE(&sr); > } > > struct rt_msghdr * >