This is a start at pushing locking down into the socket layer.
Use a SRPL list to manage the route PCBs and start running stuff without
the kernel lock where save. Includes some cleanup since rawcb was removed
in along the way.

The goal is to make the block running without the kernel lock larger and
larger.
-- 
:wq Claudio

Index: net/raw_cb.c
===================================================================
RCS file: /cvs/src/sys/net/raw_cb.c,v
retrieving revision 1.11
diff -u -p -r1.11 raw_cb.c
--- net/raw_cb.c        24 Jan 2017 10:08:30 -0000      1.11
+++ net/raw_cb.c        28 May 2017 07:34:19 -0000
@@ -46,16 +46,10 @@
 
 /*
  * Routines to manage the raw protocol control blocks.
- *
- * TODO:
- *     hash lookups by protocol family/protocol + address family
- *     take care of unique address problems per AF?
- *     redo address binding to allow wildcards
  */
 
 u_long raw_sendspace = RAWSNDQ;
 u_long raw_recvspace = RAWRCVQ;
-struct rawcbhead rawcb;
 
 /*
  * Allocate a control block and a nominal amount
@@ -72,14 +66,13 @@ raw_attach(struct socket *so, int proto)
         * after space has been allocated for the
         * rawcb.
         */
-       if (rp == 0)
+       if (rp == NULL)
                return (ENOBUFS);
        if ((error = soreserve(so, raw_sendspace, raw_recvspace)) != 0)
                return (error);
        rp->rcb_socket = so;
        rp->rcb_proto.sp_family = so->so_proto->pr_domain->dom_family;
        rp->rcb_proto.sp_protocol = proto;
-       LIST_INSERT_HEAD(&rawcb, rp, rcb_list);
        return (0);
 }
 
@@ -94,7 +87,6 @@ raw_detach(struct rawcb *rp)
 
        so->so_pcb = 0;
        sofree(so);
-       LIST_REMOVE(rp, rcb_list);
        free((caddr_t)(rp), M_PCB, 0);
 }
 
@@ -104,7 +96,6 @@ raw_detach(struct rawcb *rp)
 void
 raw_disconnect(struct rawcb *rp)
 {
-
        if (rp->rcb_socket->so_state & SS_NOFDREF)
                raw_detach(rp);
 }
Index: net/raw_cb.h
===================================================================
RCS file: /cvs/src/sys/net/raw_cb.h,v
retrieving revision 1.11
diff -u -p -r1.11 raw_cb.h
--- net/raw_cb.h        23 Jan 2017 16:31:24 -0000      1.11
+++ net/raw_cb.h        27 May 2017 19:36:17 -0000
@@ -40,7 +40,6 @@
  * to tie a socket to the generic raw interface.
  */
 struct rawcb {
-       LIST_ENTRY(rawcb) rcb_list;     /* doubly linked list */
        struct  socket *rcb_socket;     /* back pointer to socket */
        struct  sockaddr *rcb_faddr;    /* destination address */
        struct  sockaddr *rcb_laddr;    /* socket's address */
@@ -54,8 +53,6 @@ struct rawcb {
 #define        RAWRCVQ         8192
 
 #ifdef _KERNEL
-
-extern LIST_HEAD(rawcbhead, rawcb) rawcb;              /* head of list */
 
 #define        sotorawcb(so)           ((struct rawcb *)(so)->so_pcb)
 int     raw_attach(struct socket *, int);
Index: net/raw_usrreq.c
===================================================================
RCS file: /cvs/src/sys/net/raw_usrreq.c,v
retrieving revision 1.31
diff -u -p -r1.31 raw_usrreq.c
--- net/raw_usrreq.c    13 Mar 2017 20:18:21 -0000      1.31
+++ net/raw_usrreq.c    28 May 2017 07:32:16 -0000
@@ -45,15 +45,6 @@
 #include <net/raw_cb.h>
 
 #include <sys/stdarg.h>
-/*
- * Initialize raw connection block q.
- */
-void
-raw_init(void)
-{
-
-       LIST_INIT(&rawcb);
-}
 
 int
 raw_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
@@ -71,7 +62,7 @@ raw_usrreq(struct socket *so, int req, s
                m_freem(m);
                return (EOPNOTSUPP);
        }
-       if (rp == 0) {
+       if (rp == NULL) {
                m_freem(m);
                return (EINVAL);
        }
@@ -81,10 +72,6 @@ raw_usrreq(struct socket *so, int req, s
         * Flush data or not depending on the options.
         */
        case PRU_DETACH:
-               if (rp == 0) {
-                       error = ENOTCONN;
-                       break;
-               }
                raw_detach(rp);
                break;
 
Index: net/rtsock.c
===================================================================
RCS file: /cvs/src/sys/net/rtsock.c,v
retrieving revision 1.237
diff -u -p -r1.237 rtsock.c
--- net/rtsock.c        19 Apr 2017 15:21:54 -0000      1.237
+++ net/rtsock.c        28 May 2017 13:58:14 -0000
@@ -70,6 +70,7 @@
 #include <sys/socketvar.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
+#include <sys/srp.h>
 
 #include <net/if.h>
 #include <net/if_dl.h>
@@ -98,6 +99,9 @@ struct walkarg {
        caddr_t w_where, w_tmem;
 };
 
+void   route_prinit(void);
+void   route_ref(void *, void *);
+void   route_unref(void *, void *);
 int    route_output(struct mbuf *, struct socket *, struct sockaddr *,
            struct mbuf *);
 int    route_ctloutput(int, struct socket *, int, int, struct mbuf *);
@@ -126,19 +130,21 @@ int                sysctl_ifnames(struct walkarg *);
 int             sysctl_rtable_rtstat(void *, size_t *, void *);
 
 struct routecb {
-       struct rawcb    rcb;
-       struct timeout  timeout;
-       unsigned int    msgfilter;
-       unsigned int    flags;
-       u_int           rtableid;
+       struct rawcb            rcb;
+       SRPL_ENTRY(routecb)     rcb_list;
+       struct refcnt           refcnt;
+       struct timeout          timeout;
+       unsigned int            msgfilter;
+       unsigned int            flags;
+       u_int                   rtableid;
 };
 #define        sotoroutecb(so) ((struct routecb *)(so)->so_pcb)
 
 struct route_cb {
-       int             ip_count;
-       int             ip6_count;
-       int             mpls_count;
-       int             any_count;
+       SRPL_HEAD(, routecb)    rcb;
+       struct srpl_rc          rcb_rc;
+       struct rwlock           rcb_lk;
+       unsigned int            any_count;
 };
 
 struct route_cb route_cb;
@@ -154,46 +160,70 @@ struct route_cb route_cb;
 
 #define ROUTE_DESYNC_RESEND_TIMEOUT    (hz / 5)        /* In hz */
 
+void
+route_prinit(void)
+{
+       srpl_rc_init(&route_cb.rcb_rc, route_ref, route_unref, NULL);
+       rw_init(&route_cb.rcb_lk, "rtsock");
+       SRPL_INIT(&route_cb.rcb);
+}
+
+void
+route_ref(void *null, void *v)
+{
+       struct routecb *rop = v;
+
+       refcnt_take(&rop->refcnt);
+}
+
+void
+route_unref(void *null, void *v)
+{
+       struct routecb *rop = v;
+       
+       refcnt_rele_wake(&rop->refcnt);
+}
+
 int
 route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
     struct mbuf *control, struct proc *p)
 {
-       struct rawcb    *rp;
        struct routecb  *rop;
-       int              af;
        int              error = 0;
 
-       rp = sotorawcb(so);
+       rop = sotoroutecb(so);
+       if (rop == NULL)
+               return ENOTCONN;
 
        switch (req) {
        case PRU_RCVD:
-               rop = (struct routecb *)rp;
-
                /*
                 * If we are in a FLUSH state, check if the buffer is
                 * empty so that we can clear the flag.
                 */
                if (((rop->flags & ROUTECB_FLAG_FLUSH) != 0) &&
-                   ((sbspace(&rp->rcb_socket->so_rcv) ==
-                   rp->rcb_socket->so_rcv.sb_hiwat)))
+                   ((sbspace(&rop->rcb.rcb_socket->so_rcv) ==
+                   rop->rcb.rcb_socket->so_rcv.sb_hiwat)))
                        rop->flags &= ~ROUTECB_FLAG_FLUSH;
                break;
 
        case PRU_DETACH:
-               if (rp) {
-                       timeout_del(&((struct routecb *)rp)->timeout);
-                       af = rp->rcb_proto.sp_protocol;
-                       if (af == AF_INET)
-                               route_cb.ip_count--;
-                       else if (af == AF_INET6)
-                               route_cb.ip6_count--;
-#ifdef MPLS
-                       else if (af == AF_MPLS)
-                               route_cb.mpls_count--;
-#endif
-                       route_cb.any_count--;
-               }
+               KERNEL_UNLOCK();
+
+               error = rw_enter(&route_cb.rcb_lk, RW_WRITE | RW_INTR);
+               if (error != 0)
+                       break;
+
+               timeout_del(&rop->timeout);
+               route_cb.any_count--;
+               SRPL_REMOVE_LOCKED(&route_cb.rcb_rc, &route_cb.rcb,
+                   rop, routecb, rcb_list);
+               refcnt_finalize(&rop->refcnt, "rtsockrefs");
+
+               rw_exit(&route_cb.rcb_lk);
                /* FALLTHROUGH */
+
+               KERNEL_LOCK();
        default:
                error = raw_usrreq(so, req, m, nam, control, p);
        }
@@ -206,7 +236,6 @@ route_attach(struct socket *so, int prot
 {
        struct rawcb    *rp;
        struct routecb  *rop;
-       int              af;
        int              error = 0;
 
        /*
@@ -219,6 +248,7 @@ route_attach(struct socket *so, int prot
        so->so_pcb = rp;
        /* Init the timeout structure */
        timeout_set(&rop->timeout, route_senddesync, rp);
+       refcnt_init(&rop->refcnt);
 
        if (curproc == NULL)
                error = EACCES;
@@ -228,20 +258,27 @@ route_attach(struct socket *so, int prot
                free(rop, M_PCB, sizeof(struct routecb));
                return (error);
        }
+
+       KERNEL_UNLOCK();
+
        rop->rtableid = curproc->p_p->ps_rtableid;
-       af = rp->rcb_proto.sp_protocol;
-       if (af == AF_INET)
-               route_cb.ip_count++;
-       else if (af == AF_INET6)
-               route_cb.ip6_count++;
-#ifdef MPLS
-       else if (af == AF_MPLS)
-               route_cb.mpls_count++;
-#endif
        rp->rcb_faddr = &route_src;
+
+       error = rw_enter(&route_cb.rcb_lk, RW_WRITE | RW_INTR);
+       if (error != 0) {
+               free(rop, M_PCB, sizeof(struct routecb));
+               return (error);
+       }
+               
+       SRPL_INSERT_HEAD_LOCKED(&route_cb.rcb_rc, &route_cb.rcb, rop, rcb_list);
        route_cb.any_count++;
-       soisconnected(so);
+
+       rw_exit(&route_cb.rcb_lk);
+
+       KERNEL_LOCK();
+
        so->so_options |= SO_USELOOPBACK;
+       soisconnected(so);
 
        return (error);
 }
@@ -347,6 +384,7 @@ route_input(struct mbuf *m0, struct sock
        int sockets = 0;
        struct socket *last = NULL;
        struct sockaddr *sosrc, *sodst;
+       struct srp_ref sr;
 
        KERNEL_ASSERT_LOCKED();
 
@@ -359,7 +397,8 @@ route_input(struct mbuf *m0, struct sock
                return;
        }
 
-       LIST_FOREACH(rp, &rawcb, rcb_list) {
+       SRPL_FOREACH(rop, &sr, &route_cb.rcb, rcb_list) {
+               rp = &rop->rcb;
                if (rp->rcb_socket->so_state & SS_CANTRCVMORE)
                        continue;
                if (rp->rcb_proto.sp_family != PF_ROUTE)
@@ -393,7 +432,6 @@ route_input(struct mbuf *m0, struct sock
                        continue;
 
                /* filter messages that the process does not want */
-               rop = (struct routecb *)rp;
                rtm = mtod(m, struct rt_msghdr *);
                /* but RTM_DESYNC can't be filtered */
                if (rtm->rtm_type != RTM_DESYNC && rop->msgfilter != 0 &&
@@ -466,6 +504,8 @@ route_input(struct mbuf *m0, struct sock
                }
        } else
                m_freem(m);
+
+       SRPL_LEAVE(&sr);
 }
 
 struct rt_msghdr *
@@ -1781,7 +1821,7 @@ struct protosw routesw[] = {
   .pr_ctloutput        = route_ctloutput,
   .pr_usrreq   = route_usrreq,
   .pr_attach   = route_attach,
-  .pr_init     = raw_init,
+  .pr_init     = route_prinit,
   .pr_sysctl   = sysctl_rtable
 }
 };

Reply via email to