Functions delivering routing messages need currently the KERNEL_LOCK(). That's because routing socket/PCB states are still protected by it.
The diff below pushes the lock down by calling solock()/sounlock() inside route_input(). As a result rtm_miss() & rtm_send() no longer need to be surrounded by a KERNEL_LOCK()/UNLOCK() dance. It also makes it easier to use a different lock to protect socket/PCB states. Note that rtm_senddesync_timer() doesn't need timeout_set_proc(9) yet, because the routing socket lock stays the KERNEL_LOCK(). ok? Index: net/route.c =================================================================== RCS file: /cvs/src/sys/net/route.c,v retrieving revision 1.374 diff -u -p -r1.374 route.c --- net/route.c 24 Apr 2018 06:19:47 -0000 1.374 +++ net/route.c 6 Jun 2018 07:44:04 -0000 @@ -254,7 +254,6 @@ rt_clone(struct rtentry **rtp, struct so memset(&info, 0, sizeof(info)); info.rti_info[RTAX_DST] = dst; - KERNEL_LOCK(); /* * The priority of cloned route should be different * to avoid conflict with /32 cloning routes. @@ -262,8 +261,10 @@ rt_clone(struct rtentry **rtp, struct so * It should also be higher to let the ARP layer find * cloned routes instead of the cloning one. */ + KERNEL_LOCK(); error = rtrequest(RTM_RESOLVE, &info, rt->rt_priority - 1, &rt, rtableid); + KERNEL_UNLOCK(); if (error) { rtm_miss(RTM_MISS, &info, 0, RTP_NONE, 0, error, rtableid); } else { @@ -272,7 +273,6 @@ rt_clone(struct rtentry **rtp, struct so rtfree(*rtp); *rtp = rt; } - KERNEL_UNLOCK(); return (error); } @@ -655,9 +655,7 @@ out: info.rti_info[RTAX_DST] = dst; info.rti_info[RTAX_GATEWAY] = gateway; info.rti_info[RTAX_AUTHOR] = src; - KERNEL_LOCK(); rtm_miss(RTM_REDIRECT, &info, flags, prio, ifidx, error, rdomain); - KERNEL_UNLOCK(); } /* @@ -683,9 +681,7 @@ rtdeletemsg(struct rtentry *rt, struct i if (!ISSET(rt->rt_flags, RTF_HOST)) info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); error = rtrequest_delete(&info, rt->rt_priority, ifp, &rt, tableid); - KERNEL_LOCK(); rtm_send(rt, RTM_DELETE, error, tableid); - KERNEL_UNLOCK(); if (error == 0) rtfree(rt); return (error); Index: net/rtsock.c =================================================================== RCS file: /cvs/src/sys/net/rtsock.c,v retrieving revision 1.268 diff -u -p -r1.268 rtsock.c --- net/rtsock.c 6 Jun 2018 07:12:52 -0000 1.268 +++ net/rtsock.c 6 Jun 2018 07:44:05 -0000 @@ -113,7 +113,8 @@ int route_usrreq(struct socket *, int, s void route_input(struct mbuf *m0, struct socket *, sa_family_t); int route_arp_conflict(struct rtentry *, struct rt_addrinfo *); int route_cleargateway(struct rtentry *, void *, unsigned int); -void rtm_senddesync(void *); +void rtm_senddesync_timer(void *); +void rtm_senddesync(struct socket *); int rtm_sendup(struct socket *, struct mbuf *, int); int rtm_getifa(struct rt_addrinfo *, unsigned int); @@ -243,7 +244,7 @@ route_attach(struct socket *so, int prot rop = malloc(sizeof(struct routecb), M_PCB, M_WAITOK|M_ZERO); so->so_pcb = rop; /* Init the timeout structure */ - timeout_set(&rop->rop_timeout, rtm_senddesync, rop); + timeout_set(&rop->rop_timeout, rtm_senddesync_timer, so); refcnt_init(&rop->rop_refcnt); if (curproc == NULL) @@ -374,12 +375,23 @@ route_ctloutput(int op, struct socket *s } void -rtm_senddesync(void *data) +rtm_senddesync_timer(void *xso) { - struct routecb *rop; + struct socket *so = xso; + int s; + + s = solock(so); + rtm_senddesync(so); + sounlock(so, s); +} + +void +rtm_senddesync(struct socket *so) +{ + struct routecb *rop = sotoroutecb(so); struct mbuf *desync_mbuf; - rop = (struct routecb *)data; + soassertlocked(so); /* If we are in a DESYNC state, try to send a RTM_DESYNC packet */ if ((rop->rop_flags & ROUTECB_FLAG_DESYNC) == 0) @@ -391,7 +403,6 @@ rtm_senddesync(void *data) */ desync_mbuf = rtm_msg1(RTM_DESYNC, NULL); if (desync_mbuf != NULL) { - struct socket *so = rop->rop_socket; if (sbappendaddr(so, &so->so_rcv, &route_src, desync_mbuf, NULL) != 0) { rop->rop_flags &= ~ROUTECB_FLAG_DESYNC; @@ -405,15 +416,15 @@ rtm_senddesync(void *data) } void -route_input(struct mbuf *m0, struct socket *so, sa_family_t sa_family) +route_input(struct mbuf *m0, struct socket *so0, sa_family_t sa_family) { + struct socket *so; struct routecb *rop; struct rt_msghdr *rtm; struct mbuf *m = m0; struct socket *last = NULL; struct srp_ref sr; - - KERNEL_ASSERT_LOCKED(); + int s; /* ensure that we can access the rtm_type via mtod() */ if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) { @@ -422,14 +433,6 @@ route_input(struct mbuf *m0, struct sock } SRPL_FOREACH(rop, &sr, &route_cb.rcb, rop_list) { - if (!(rop->rop_socket->so_state & SS_ISCONNECTED)) - continue; - if (rop->rop_socket->so_state & SS_CANTRCVMORE) - continue; - /* Check to see if we don't want our own messages. */ - if (so == rop->rop_socket && !(so->so_options & SO_USELOOPBACK)) - continue; - /* * If route socket is bound to an address family only send * messages that match the address family. Address family @@ -440,15 +443,31 @@ route_input(struct mbuf *m0, struct sock rop->rop_proto.sp_protocol != sa_family) continue; + + so = rop->rop_socket; + s = solock(so); + + /* + * Check to see if we don't want our own messages and + * if we can receive anything. + */ + if ((so0 == so && !(so0->so_options & SO_USELOOPBACK)) || + !(so->so_state & SS_ISCONNECTED) || + (so->so_state & SS_CANTRCVMORE)) { +next: + sounlock(so, s); + continue; + } + /* filter messages that the process does not want */ rtm = mtod(m, struct rt_msghdr *); /* but RTM_DESYNC can't be filtered */ if (rtm->rtm_type != RTM_DESYNC && rop->rop_msgfilter != 0 && !(rop->rop_msgfilter & (1 << rtm->rtm_type))) - continue; + goto next; if (rop->rop_priority != 0 && rop->rop_priority < rtm->rtm_priority) - continue; + goto next; switch (rtm->rtm_type) { case RTM_IFANNOUNCE: case RTM_DESYNC: @@ -461,13 +480,13 @@ route_input(struct mbuf *m0, struct sock /* check against rdomain id */ if (rop->rop_rtableid != RTABLE_ANY && rtable_l2(rop->rop_rtableid) != rtm->rtm_tableid) - continue; + goto next; break; default: /* check against rtable id */ if (rop->rop_rtableid != RTABLE_ANY && rop->rop_rtableid != rtm->rtm_tableid) - continue; + goto next; break; } @@ -476,10 +495,13 @@ route_input(struct mbuf *m0, struct sock * any more messages until the flag is cleared. */ if ((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) - continue; + goto next; + sounlock(so, s); if (last) { + s = solock(last); rtm_sendup(last, m, 1); + sounlock(last, s); refcnt_rele_wake(&sotoroutecb(last)->rop_refcnt); } /* keep a reference for last */ @@ -489,7 +511,9 @@ route_input(struct mbuf *m0, struct sock SRPL_LEAVE(&sr); if (last) { + s = solock(last); rtm_sendup(last, m, 0); + sounlock(last, s); refcnt_rele_wake(&sotoroutecb(last)->rop_refcnt); } else m_freem(m); @@ -501,6 +525,8 @@ rtm_sendup(struct socket *so, struct mbu struct routecb *rop = sotoroutecb(so); struct mbuf *m; + soassertlocked(so); + if (more) { m = m_copym(m0, 0, M_COPYALL, M_NOWAIT); if (m == NULL) @@ -512,7 +538,7 @@ rtm_sendup(struct socket *so, struct mbu sbappendaddr(so, &so->so_rcv, &route_src, m, NULL) == 0) { /* Flag socket as desync'ed and flush required */ rop->rop_flags |= ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH; - rtm_senddesync(rop); + rtm_senddesync(so); m_freem(m); return (ENOBUFS); } Index: netinet/if_ether.c =================================================================== RCS file: /cvs/src/sys/netinet/if_ether.c,v retrieving revision 1.235 diff -u -p -r1.235 if_ether.c --- netinet/if_ether.c 31 Mar 2018 15:07:09 -0000 1.235 +++ netinet/if_ether.c 6 Jun 2018 07:44:06 -0000 @@ -664,9 +664,7 @@ arpcache(struct ifnet *ifp, struct ether /* Notify userland that an ARP resolution has been done. */ if (la->la_asked || changed) { - KERNEL_LOCK(); rtm_send(rt, RTM_RESOLVE, 0, ifp->if_rdomain); - KERNEL_UNLOCK(); } la->la_asked = 0; Index: netinet/in_pcb.c =================================================================== RCS file: /cvs/src/sys/netinet/in_pcb.c,v retrieving revision 1.234 diff -u -p -r1.234 in_pcb.c --- netinet/in_pcb.c 6 Jun 2018 06:55:22 -0000 1.234 +++ netinet/in_pcb.c 6 Jun 2018 07:44:07 -0000 @@ -722,10 +722,8 @@ in_losing(struct inpcb *inp) info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); - KERNEL_LOCK(); rtm_miss(RTM_LOSING, &info, rt->rt_flags, rt->rt_priority, rt->rt_ifidx, 0, inp->inp_rtableid); - KERNEL_UNLOCK(); if (rt->rt_flags & RTF_DYNAMIC) { struct ifnet *ifp; Index: netinet6/nd6_nbr.c =================================================================== RCS file: /cvs/src/sys/netinet6/nd6_nbr.c,v retrieving revision 1.122 diff -u -p -r1.122 nd6_nbr.c --- netinet6/nd6_nbr.c 23 Nov 2017 13:32:25 -0000 1.122 +++ netinet6/nd6_nbr.c 6 Jun 2018 07:44:08 -0000 @@ -720,9 +720,7 @@ nd6_na_input(struct mbuf *m, int off, in ln->ln_state = ND6_LLINFO_REACHABLE; ln->ln_byhint = 0; /* Notify userland that a new ND entry is reachable. */ - KERNEL_LOCK(); rtm_send(rt, RTM_RESOLVE, 0, ifp->if_rdomain); - KERNEL_UNLOCK(); if (!ND6_LLINFO_PERMANENT(ln)) { nd6_llinfo_settimer(ln, ND_IFINFO(ifp)->reachable);