Functions delivering routing messages need currently the KERNEL_LOCK().
That's because routing socket/PCB states are still protected by it.

The diff below pushes the lock down by calling solock()/sounlock() inside
route_input(). 
As a result rtm_miss() & rtm_send() no longer need to be surrounded by a
KERNEL_LOCK()/UNLOCK() dance.  It also makes it easier to use a different
lock to protect socket/PCB states.

Note that rtm_senddesync_timer() doesn't need timeout_set_proc(9) yet,
because the routing socket lock stays the KERNEL_LOCK().

ok?

Index: net/route.c
===================================================================
RCS file: /cvs/src/sys/net/route.c,v
retrieving revision 1.374
diff -u -p -r1.374 route.c
--- net/route.c 24 Apr 2018 06:19:47 -0000      1.374
+++ net/route.c 6 Jun 2018 07:44:04 -0000
@@ -254,7 +254,6 @@ rt_clone(struct rtentry **rtp, struct so
        memset(&info, 0, sizeof(info));
        info.rti_info[RTAX_DST] = dst;
 
-       KERNEL_LOCK();
        /*
         * The priority of cloned route should be different
         * to avoid conflict with /32 cloning routes.
@@ -262,8 +261,10 @@ rt_clone(struct rtentry **rtp, struct so
         * It should also be higher to let the ARP layer find
         * cloned routes instead of the cloning one.
         */
+       KERNEL_LOCK();
        error = rtrequest(RTM_RESOLVE, &info, rt->rt_priority - 1, &rt,
            rtableid);
+       KERNEL_UNLOCK();
        if (error) {
                rtm_miss(RTM_MISS, &info, 0, RTP_NONE, 0, error, rtableid);
        } else {
@@ -272,7 +273,6 @@ rt_clone(struct rtentry **rtp, struct so
                rtfree(*rtp);
                *rtp = rt;
        }
-       KERNEL_UNLOCK();
        return (error);
 }
 
@@ -655,9 +655,7 @@ out:
        info.rti_info[RTAX_DST] = dst;
        info.rti_info[RTAX_GATEWAY] = gateway;
        info.rti_info[RTAX_AUTHOR] = src;
-       KERNEL_LOCK();
        rtm_miss(RTM_REDIRECT, &info, flags, prio, ifidx, error, rdomain);
-       KERNEL_UNLOCK();
 }
 
 /*
@@ -683,9 +681,7 @@ rtdeletemsg(struct rtentry *rt, struct i
        if (!ISSET(rt->rt_flags, RTF_HOST))
                info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
        error = rtrequest_delete(&info, rt->rt_priority, ifp, &rt, tableid);
-       KERNEL_LOCK();
        rtm_send(rt, RTM_DELETE, error, tableid);
-       KERNEL_UNLOCK();
        if (error == 0)
                rtfree(rt);
        return (error);
Index: net/rtsock.c
===================================================================
RCS file: /cvs/src/sys/net/rtsock.c,v
retrieving revision 1.268
diff -u -p -r1.268 rtsock.c
--- net/rtsock.c        6 Jun 2018 07:12:52 -0000       1.268
+++ net/rtsock.c        6 Jun 2018 07:44:05 -0000
@@ -113,7 +113,8 @@ int route_usrreq(struct socket *, int, s
 void   route_input(struct mbuf *m0, struct socket *, sa_family_t);
 int    route_arp_conflict(struct rtentry *, struct rt_addrinfo *);
 int    route_cleargateway(struct rtentry *, void *, unsigned int);
-void   rtm_senddesync(void *);
+void   rtm_senddesync_timer(void *);
+void   rtm_senddesync(struct socket *);
 int    rtm_sendup(struct socket *, struct mbuf *, int);
 
 int    rtm_getifa(struct rt_addrinfo *, unsigned int);
@@ -243,7 +244,7 @@ route_attach(struct socket *so, int prot
        rop = malloc(sizeof(struct routecb), M_PCB, M_WAITOK|M_ZERO);
        so->so_pcb = rop;
        /* Init the timeout structure */
-       timeout_set(&rop->rop_timeout, rtm_senddesync, rop);
+       timeout_set(&rop->rop_timeout, rtm_senddesync_timer, so);
        refcnt_init(&rop->rop_refcnt);
 
        if (curproc == NULL)
@@ -374,12 +375,23 @@ route_ctloutput(int op, struct socket *s
 }
 
 void
-rtm_senddesync(void *data)
+rtm_senddesync_timer(void *xso)
 {
-       struct routecb  *rop;
+       struct socket   *so = xso;
+       int              s;
+
+       s = solock(so);
+       rtm_senddesync(so);
+       sounlock(so, s);
+}
+
+void
+rtm_senddesync(struct socket *so)
+{
+       struct routecb  *rop = sotoroutecb(so);
        struct mbuf     *desync_mbuf;
 
-       rop = (struct routecb *)data;
+       soassertlocked(so);
 
        /* If we are in a DESYNC state, try to send a RTM_DESYNC packet */
        if ((rop->rop_flags & ROUTECB_FLAG_DESYNC) == 0)
@@ -391,7 +403,6 @@ rtm_senddesync(void *data)
         */
        desync_mbuf = rtm_msg1(RTM_DESYNC, NULL);
        if (desync_mbuf != NULL) {
-               struct socket *so = rop->rop_socket;
                if (sbappendaddr(so, &so->so_rcv, &route_src,
                    desync_mbuf, NULL) != 0) {
                        rop->rop_flags &= ~ROUTECB_FLAG_DESYNC;
@@ -405,15 +416,15 @@ rtm_senddesync(void *data)
 }
 
 void
-route_input(struct mbuf *m0, struct socket *so, sa_family_t sa_family)
+route_input(struct mbuf *m0, struct socket *so0, sa_family_t sa_family)
 {
+       struct socket *so;
        struct routecb *rop;
        struct rt_msghdr *rtm;
        struct mbuf *m = m0;
        struct socket *last = NULL;
        struct srp_ref sr;
-
-       KERNEL_ASSERT_LOCKED();
+       int s;
 
        /* ensure that we can access the rtm_type via mtod() */
        if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) {
@@ -422,14 +433,6 @@ route_input(struct mbuf *m0, struct sock
        }
 
        SRPL_FOREACH(rop, &sr, &route_cb.rcb, rop_list) {
-               if (!(rop->rop_socket->so_state & SS_ISCONNECTED))
-                       continue;
-               if (rop->rop_socket->so_state & SS_CANTRCVMORE)
-                       continue;
-               /* Check to see if we don't want our own messages. */
-               if (so == rop->rop_socket && !(so->so_options & SO_USELOOPBACK))
-                       continue;
-
                /*
                 * If route socket is bound to an address family only send
                 * messages that match the address family. Address family
@@ -440,15 +443,31 @@ route_input(struct mbuf *m0, struct sock
                    rop->rop_proto.sp_protocol != sa_family)
                        continue;
 
+
+               so = rop->rop_socket;
+               s = solock(so);
+
+               /*
+                * Check to see if we don't want our own messages and
+                * if we can receive anything.
+                */
+               if ((so0 == so && !(so0->so_options & SO_USELOOPBACK)) ||
+                   !(so->so_state & SS_ISCONNECTED) ||
+                   (so->so_state & SS_CANTRCVMORE)) {
+next:
+                       sounlock(so, s);
+                       continue;
+               }
+
                /* filter messages that the process does not want */
                rtm = mtod(m, struct rt_msghdr *);
                /* but RTM_DESYNC can't be filtered */
                if (rtm->rtm_type != RTM_DESYNC && rop->rop_msgfilter != 0 &&
                    !(rop->rop_msgfilter & (1 << rtm->rtm_type)))
-                       continue;
+                       goto next;
                if (rop->rop_priority != 0 &&
                    rop->rop_priority < rtm->rtm_priority)
-                       continue;
+                       goto next;
                switch (rtm->rtm_type) {
                case RTM_IFANNOUNCE:
                case RTM_DESYNC:
@@ -461,13 +480,13 @@ route_input(struct mbuf *m0, struct sock
                        /* check against rdomain id */
                        if (rop->rop_rtableid != RTABLE_ANY &&
                            rtable_l2(rop->rop_rtableid) != rtm->rtm_tableid)
-                               continue;
+                               goto next;
                        break;
                default:
                        /* check against rtable id */
                        if (rop->rop_rtableid != RTABLE_ANY &&
                            rop->rop_rtableid != rtm->rtm_tableid)
-                               continue;
+                               goto next;
                        break;
                }
 
@@ -476,10 +495,13 @@ route_input(struct mbuf *m0, struct sock
                 * any more messages until the flag is cleared.
                 */
                if ((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0)
-                       continue;
+                       goto next;
+               sounlock(so, s);
 
                if (last) {
+                       s = solock(last);
                        rtm_sendup(last, m, 1);
+                       sounlock(last, s);
                        refcnt_rele_wake(&sotoroutecb(last)->rop_refcnt);
                }
                /* keep a reference for last */
@@ -489,7 +511,9 @@ route_input(struct mbuf *m0, struct sock
        SRPL_LEAVE(&sr);
 
        if (last) {
+               s = solock(last);
                rtm_sendup(last, m, 0);
+               sounlock(last, s);
                refcnt_rele_wake(&sotoroutecb(last)->rop_refcnt);
        } else
                m_freem(m);
@@ -501,6 +525,8 @@ rtm_sendup(struct socket *so, struct mbu
        struct routecb *rop = sotoroutecb(so);
        struct mbuf *m;
 
+       soassertlocked(so);
+
        if (more) {
                m = m_copym(m0, 0, M_COPYALL, M_NOWAIT);
                if (m == NULL)
@@ -512,7 +538,7 @@ rtm_sendup(struct socket *so, struct mbu
            sbappendaddr(so, &so->so_rcv, &route_src, m, NULL) == 0) {
                /* Flag socket as desync'ed and flush required */
                rop->rop_flags |= ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH;
-               rtm_senddesync(rop);
+               rtm_senddesync(so);
                m_freem(m);
                return (ENOBUFS);
        }
Index: netinet/if_ether.c
===================================================================
RCS file: /cvs/src/sys/netinet/if_ether.c,v
retrieving revision 1.235
diff -u -p -r1.235 if_ether.c
--- netinet/if_ether.c  31 Mar 2018 15:07:09 -0000      1.235
+++ netinet/if_ether.c  6 Jun 2018 07:44:06 -0000
@@ -664,9 +664,7 @@ arpcache(struct ifnet *ifp, struct ether
 
        /* Notify userland that an ARP resolution has been done. */
        if (la->la_asked || changed) {
-               KERNEL_LOCK();
                rtm_send(rt, RTM_RESOLVE, 0, ifp->if_rdomain);
-               KERNEL_UNLOCK();
        }
 
        la->la_asked = 0;
Index: netinet/in_pcb.c
===================================================================
RCS file: /cvs/src/sys/netinet/in_pcb.c,v
retrieving revision 1.234
diff -u -p -r1.234 in_pcb.c
--- netinet/in_pcb.c    6 Jun 2018 06:55:22 -0000       1.234
+++ netinet/in_pcb.c    6 Jun 2018 07:44:07 -0000
@@ -722,10 +722,8 @@ in_losing(struct inpcb *inp)
                info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
                info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
 
-               KERNEL_LOCK();
                rtm_miss(RTM_LOSING, &info, rt->rt_flags, rt->rt_priority,
                    rt->rt_ifidx, 0, inp->inp_rtableid);
-               KERNEL_UNLOCK();
                if (rt->rt_flags & RTF_DYNAMIC) {
                        struct ifnet *ifp;
 
Index: netinet6/nd6_nbr.c
===================================================================
RCS file: /cvs/src/sys/netinet6/nd6_nbr.c,v
retrieving revision 1.122
diff -u -p -r1.122 nd6_nbr.c
--- netinet6/nd6_nbr.c  23 Nov 2017 13:32:25 -0000      1.122
+++ netinet6/nd6_nbr.c  6 Jun 2018 07:44:08 -0000
@@ -720,9 +720,7 @@ nd6_na_input(struct mbuf *m, int off, in
                        ln->ln_state = ND6_LLINFO_REACHABLE;
                        ln->ln_byhint = 0;
                        /* Notify userland that a new ND entry is reachable. */
-                       KERNEL_LOCK();
                        rtm_send(rt, RTM_RESOLVE, 0, ifp->if_rdomain);
-                       KERNEL_UNLOCK();
                        if (!ND6_LLINFO_PERMANENT(ln)) {
                                nd6_llinfo_settimer(ln,
                                    ND_IFINFO(ifp)->reachable);

Reply via email to