Functions delivering routing messages need currently the KERNEL_LOCK().
That's because routing socket/PCB states are still protected by it.
The diff below pushes the lock down by calling solock()/sounlock() inside
route_input().
As a result rtm_miss() & rtm_send() no longer need to be surrounded by a
KERNEL_LOCK()/UNLOCK() dance. It also makes it easier to use a different
lock to protect socket/PCB states.
Note that rtm_senddesync_timer() doesn't need timeout_set_proc(9) yet,
because the routing socket lock stays the KERNEL_LOCK().
ok?
Index: net/route.c
===================================================================
RCS file: /cvs/src/sys/net/route.c,v
retrieving revision 1.374
diff -u -p -r1.374 route.c
--- net/route.c 24 Apr 2018 06:19:47 -0000 1.374
+++ net/route.c 6 Jun 2018 07:44:04 -0000
@@ -254,7 +254,6 @@ rt_clone(struct rtentry **rtp, struct so
memset(&info, 0, sizeof(info));
info.rti_info[RTAX_DST] = dst;
- KERNEL_LOCK();
/*
* The priority of cloned route should be different
* to avoid conflict with /32 cloning routes.
@@ -262,8 +261,10 @@ rt_clone(struct rtentry **rtp, struct so
* It should also be higher to let the ARP layer find
* cloned routes instead of the cloning one.
*/
+ KERNEL_LOCK();
error = rtrequest(RTM_RESOLVE, &info, rt->rt_priority - 1, &rt,
rtableid);
+ KERNEL_UNLOCK();
if (error) {
rtm_miss(RTM_MISS, &info, 0, RTP_NONE, 0, error, rtableid);
} else {
@@ -272,7 +273,6 @@ rt_clone(struct rtentry **rtp, struct so
rtfree(*rtp);
*rtp = rt;
}
- KERNEL_UNLOCK();
return (error);
}
@@ -655,9 +655,7 @@ out:
info.rti_info[RTAX_DST] = dst;
info.rti_info[RTAX_GATEWAY] = gateway;
info.rti_info[RTAX_AUTHOR] = src;
- KERNEL_LOCK();
rtm_miss(RTM_REDIRECT, &info, flags, prio, ifidx, error, rdomain);
- KERNEL_UNLOCK();
}
/*
@@ -683,9 +681,7 @@ rtdeletemsg(struct rtentry *rt, struct i
if (!ISSET(rt->rt_flags, RTF_HOST))
info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
error = rtrequest_delete(&info, rt->rt_priority, ifp, &rt, tableid);
- KERNEL_LOCK();
rtm_send(rt, RTM_DELETE, error, tableid);
- KERNEL_UNLOCK();
if (error == 0)
rtfree(rt);
return (error);
Index: net/rtsock.c
===================================================================
RCS file: /cvs/src/sys/net/rtsock.c,v
retrieving revision 1.268
diff -u -p -r1.268 rtsock.c
--- net/rtsock.c 6 Jun 2018 07:12:52 -0000 1.268
+++ net/rtsock.c 6 Jun 2018 07:44:05 -0000
@@ -113,7 +113,8 @@ int route_usrreq(struct socket *, int, s
void route_input(struct mbuf *m0, struct socket *, sa_family_t);
int route_arp_conflict(struct rtentry *, struct rt_addrinfo *);
int route_cleargateway(struct rtentry *, void *, unsigned int);
-void rtm_senddesync(void *);
+void rtm_senddesync_timer(void *);
+void rtm_senddesync(struct socket *);
int rtm_sendup(struct socket *, struct mbuf *, int);
int rtm_getifa(struct rt_addrinfo *, unsigned int);
@@ -243,7 +244,7 @@ route_attach(struct socket *so, int prot
rop = malloc(sizeof(struct routecb), M_PCB, M_WAITOK|M_ZERO);
so->so_pcb = rop;
/* Init the timeout structure */
- timeout_set(&rop->rop_timeout, rtm_senddesync, rop);
+ timeout_set(&rop->rop_timeout, rtm_senddesync_timer, so);
refcnt_init(&rop->rop_refcnt);
if (curproc == NULL)
@@ -374,12 +375,23 @@ route_ctloutput(int op, struct socket *s
}
void
-rtm_senddesync(void *data)
+rtm_senddesync_timer(void *xso)
{
- struct routecb *rop;
+ struct socket *so = xso;
+ int s;
+
+ s = solock(so);
+ rtm_senddesync(so);
+ sounlock(so, s);
+}
+
+void
+rtm_senddesync(struct socket *so)
+{
+ struct routecb *rop = sotoroutecb(so);
struct mbuf *desync_mbuf;
- rop = (struct routecb *)data;
+ soassertlocked(so);
/* If we are in a DESYNC state, try to send a RTM_DESYNC packet */
if ((rop->rop_flags & ROUTECB_FLAG_DESYNC) == 0)
@@ -391,7 +403,6 @@ rtm_senddesync(void *data)
*/
desync_mbuf = rtm_msg1(RTM_DESYNC, NULL);
if (desync_mbuf != NULL) {
- struct socket *so = rop->rop_socket;
if (sbappendaddr(so, &so->so_rcv, &route_src,
desync_mbuf, NULL) != 0) {
rop->rop_flags &= ~ROUTECB_FLAG_DESYNC;
@@ -405,15 +416,15 @@ rtm_senddesync(void *data)
}
void
-route_input(struct mbuf *m0, struct socket *so, sa_family_t sa_family)
+route_input(struct mbuf *m0, struct socket *so0, sa_family_t sa_family)
{
+ struct socket *so;
struct routecb *rop;
struct rt_msghdr *rtm;
struct mbuf *m = m0;
struct socket *last = NULL;
struct srp_ref sr;
-
- KERNEL_ASSERT_LOCKED();
+ int s;
/* ensure that we can access the rtm_type via mtod() */
if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) {
@@ -422,14 +433,6 @@ route_input(struct mbuf *m0, struct sock
}
SRPL_FOREACH(rop, &sr, &route_cb.rcb, rop_list) {
- if (!(rop->rop_socket->so_state & SS_ISCONNECTED))
- continue;
- if (rop->rop_socket->so_state & SS_CANTRCVMORE)
- continue;
- /* Check to see if we don't want our own messages. */
- if (so == rop->rop_socket && !(so->so_options & SO_USELOOPBACK))
- continue;
-
/*
* If route socket is bound to an address family only send
* messages that match the address family. Address family
@@ -440,15 +443,31 @@ route_input(struct mbuf *m0, struct sock
rop->rop_proto.sp_protocol != sa_family)
continue;
+
+ so = rop->rop_socket;
+ s = solock(so);
+
+ /*
+ * Check to see if we don't want our own messages and
+ * if we can receive anything.
+ */
+ if ((so0 == so && !(so0->so_options & SO_USELOOPBACK)) ||
+ !(so->so_state & SS_ISCONNECTED) ||
+ (so->so_state & SS_CANTRCVMORE)) {
+next:
+ sounlock(so, s);
+ continue;
+ }
+
/* filter messages that the process does not want */
rtm = mtod(m, struct rt_msghdr *);
/* but RTM_DESYNC can't be filtered */
if (rtm->rtm_type != RTM_DESYNC && rop->rop_msgfilter != 0 &&
!(rop->rop_msgfilter & (1 << rtm->rtm_type)))
- continue;
+ goto next;
if (rop->rop_priority != 0 &&
rop->rop_priority < rtm->rtm_priority)
- continue;
+ goto next;
switch (rtm->rtm_type) {
case RTM_IFANNOUNCE:
case RTM_DESYNC:
@@ -461,13 +480,13 @@ route_input(struct mbuf *m0, struct sock
/* check against rdomain id */
if (rop->rop_rtableid != RTABLE_ANY &&
rtable_l2(rop->rop_rtableid) != rtm->rtm_tableid)
- continue;
+ goto next;
break;
default:
/* check against rtable id */
if (rop->rop_rtableid != RTABLE_ANY &&
rop->rop_rtableid != rtm->rtm_tableid)
- continue;
+ goto next;
break;
}
@@ -476,10 +495,13 @@ route_input(struct mbuf *m0, struct sock
* any more messages until the flag is cleared.
*/
if ((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0)
- continue;
+ goto next;
+ sounlock(so, s);
if (last) {
+ s = solock(last);
rtm_sendup(last, m, 1);
+ sounlock(last, s);
refcnt_rele_wake(&sotoroutecb(last)->rop_refcnt);
}
/* keep a reference for last */
@@ -489,7 +511,9 @@ route_input(struct mbuf *m0, struct sock
SRPL_LEAVE(&sr);
if (last) {
+ s = solock(last);
rtm_sendup(last, m, 0);
+ sounlock(last, s);
refcnt_rele_wake(&sotoroutecb(last)->rop_refcnt);
} else
m_freem(m);
@@ -501,6 +525,8 @@ rtm_sendup(struct socket *so, struct mbu
struct routecb *rop = sotoroutecb(so);
struct mbuf *m;
+ soassertlocked(so);
+
if (more) {
m = m_copym(m0, 0, M_COPYALL, M_NOWAIT);
if (m == NULL)
@@ -512,7 +538,7 @@ rtm_sendup(struct socket *so, struct mbu
sbappendaddr(so, &so->so_rcv, &route_src, m, NULL) == 0) {
/* Flag socket as desync'ed and flush required */
rop->rop_flags |= ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH;
- rtm_senddesync(rop);
+ rtm_senddesync(so);
m_freem(m);
return (ENOBUFS);
}
Index: netinet/if_ether.c
===================================================================
RCS file: /cvs/src/sys/netinet/if_ether.c,v
retrieving revision 1.235
diff -u -p -r1.235 if_ether.c
--- netinet/if_ether.c 31 Mar 2018 15:07:09 -0000 1.235
+++ netinet/if_ether.c 6 Jun 2018 07:44:06 -0000
@@ -664,9 +664,7 @@ arpcache(struct ifnet *ifp, struct ether
/* Notify userland that an ARP resolution has been done. */
if (la->la_asked || changed) {
- KERNEL_LOCK();
rtm_send(rt, RTM_RESOLVE, 0, ifp->if_rdomain);
- KERNEL_UNLOCK();
}
la->la_asked = 0;
Index: netinet/in_pcb.c
===================================================================
RCS file: /cvs/src/sys/netinet/in_pcb.c,v
retrieving revision 1.234
diff -u -p -r1.234 in_pcb.c
--- netinet/in_pcb.c 6 Jun 2018 06:55:22 -0000 1.234
+++ netinet/in_pcb.c 6 Jun 2018 07:44:07 -0000
@@ -722,10 +722,8 @@ in_losing(struct inpcb *inp)
info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
- KERNEL_LOCK();
rtm_miss(RTM_LOSING, &info, rt->rt_flags, rt->rt_priority,
rt->rt_ifidx, 0, inp->inp_rtableid);
- KERNEL_UNLOCK();
if (rt->rt_flags & RTF_DYNAMIC) {
struct ifnet *ifp;
Index: netinet6/nd6_nbr.c
===================================================================
RCS file: /cvs/src/sys/netinet6/nd6_nbr.c,v
retrieving revision 1.122
diff -u -p -r1.122 nd6_nbr.c
--- netinet6/nd6_nbr.c 23 Nov 2017 13:32:25 -0000 1.122
+++ netinet6/nd6_nbr.c 6 Jun 2018 07:44:08 -0000
@@ -720,9 +720,7 @@ nd6_na_input(struct mbuf *m, int off, in
ln->ln_state = ND6_LLINFO_REACHABLE;
ln->ln_byhint = 0;
/* Notify userland that a new ND entry is reachable. */
- KERNEL_LOCK();
rtm_send(rt, RTM_RESOLVE, 0, ifp->if_rdomain);
- KERNEL_UNLOCK();
if (!ND6_LLINFO_PERMANENT(ln)) {
nd6_llinfo_settimer(ln,
ND_IFINFO(ifp)->reachable);