I sent this diff out some time ago and would really like to get this in.
This is one step on makeing rtsock.c less of a hornets nest.
This reduces the side effects in route_output and simplifies some other
bits as well. For example route_input is less variadic and simpler.
Anyone couragous enough to send me an OK?
--
:wq Claudio
Index: net/rtsock.c
===================================================================
RCS file: /cvs/src/sys/net/rtsock.c,v
retrieving revision 1.213
diff -u -p -r1.213 rtsock.c
--- net/rtsock.c 19 Jan 2017 23:18:29 -0000 1.213
+++ net/rtsock.c 20 Jan 2017 01:37:33 -0000
@@ -92,7 +92,6 @@
struct sockaddr route_dst = { 2, PF_ROUTE, };
struct sockaddr route_src = { 2, PF_ROUTE, };
-struct sockproto route_proto = { PF_ROUTE, };
struct walkarg {
int w_op, w_arg, w_given, w_needed, w_tmemsize;
@@ -100,7 +99,7 @@ struct walkarg {
};
int route_ctloutput(int, struct socket *, int, int, struct mbuf **);
-void route_input(struct mbuf *m0, ...);
+void route_input(struct mbuf *m0, unsigned short);
int route_arp_conflict(struct rtentry *, struct rt_addrinfo *);
int route_cleargateway(struct rtentry *, void *, unsigned int);
@@ -331,7 +330,7 @@ rt_senddesync(void *data)
}
void
-route_input(struct mbuf *m0, ...)
+route_input(struct mbuf *m0, unsigned short sa_family)
{
struct rawcb *rp;
struct routecb *rop;
@@ -339,15 +338,10 @@ route_input(struct mbuf *m0, ...)
struct mbuf *m = m0;
int s, sockets = 0;
struct socket *last = NULL;
- va_list ap;
- struct sockproto *proto;
struct sockaddr *sosrc, *sodst;
- va_start(ap, m0);
- proto = va_arg(ap, struct sockproto *);
- sosrc = va_arg(ap, struct sockaddr *);
- sodst = va_arg(ap, struct sockaddr *);
- va_end(ap);
+ sosrc = &route_src;
+ sodst = &route_src;
/* ensure that we can access the rtm_type via mtod() */
if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) {
@@ -358,10 +352,15 @@ route_input(struct mbuf *m0, ...)
LIST_FOREACH(rp, &rawcb, rcb_list) {
if (rp->rcb_socket->so_state & SS_CANTRCVMORE)
continue;
- if (rp->rcb_proto.sp_family != proto->sp_family)
+ if (rp->rcb_proto.sp_family != PF_ROUTE)
continue;
- if (rp->rcb_proto.sp_protocol && proto->sp_protocol &&
- rp->rcb_proto.sp_protocol != proto->sp_protocol)
+ /*
+ * If route socket is bound to an address family only send
+ * messages that match the address family. Address family
+ * agnostic messages are always send.
+ */
+ if (rp->rcb_proto.sp_protocol != 0 && sa_family != 0 &&
+ rp->rcb_proto.sp_protocol != sa_family)
continue;
/*
* We assume the lower level routines have
@@ -458,36 +457,99 @@ route_input(struct mbuf *m0, ...)
m_freem(m);
}
+struct rt_msghdr *
+rt_report(struct rtentry *rt, u_char type, int seq, int tableid)
+{
+ struct rt_msghdr *rtm;
+ struct rt_addrinfo info;
+ struct sockaddr_rtlabel sa_rl;
+ struct sockaddr_in6 sa_mask;
+#ifdef BFD
+ struct sockaddr_bfd sa_bfd;
+#endif
+#ifdef MPLS
+ struct sockaddr_mpls sa_mpls;
+#endif
+ struct ifnet *ifp = NULL;
+ int len;
+
+ bzero(&info, sizeof(info));
+ info.rti_info[RTAX_DST] = rt_key(rt);
+ info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
+ info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
+ info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
+#ifdef BFD
+ if (rt->rt_flags & RTF_BFD)
+ info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd);
+#endif
+#ifdef MPLS
+ if (rt->rt_flags & RTF_MPLS) {
+ bzero(&sa_mpls, sizeof(sa_mpls));
+ sa_mpls.smpls_family = AF_MPLS;
+ sa_mpls.smpls_len = sizeof(sa_mpls);
+ sa_mpls.smpls_label = ((struct rt_mpls *)
+ rt->rt_llinfo)->mpls_label;
+ info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls;
+ info.rti_mpls = ((struct rt_mpls *)
+ rt->rt_llinfo)->mpls_operation;
+ }
+#endif
+ ifp = if_get(rt->rt_ifidx);
+ if (ifp != NULL) {
+ info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
+ info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
+ if (ifp->if_flags & IFF_POINTOPOINT)
+ info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
+ }
+ if_put(ifp);
+ /* RTAX_GENMASK, RTAX_AUTHOR, RTAX_SRCMASK ignored */
+
+ /* build new route message */
+ len = rt_msg2(type, RTM_VERSION, &info, NULL, NULL);
+ /* XXX why can't we wait? Should be process context... */
+ rtm = malloc(len, M_RTABLE, M_NOWAIT | M_ZERO);
+ if (rtm == NULL)
+ return NULL;
+
+ rt_msg2(type, RTM_VERSION, &info, (caddr_t)rtm, NULL);
+ rtm->rtm_type = type;
+ rtm->rtm_index = rt->rt_ifidx;
+ rtm->rtm_tableid = tableid;
+ rtm->rtm_priority = rt->rt_priority & RTP_MASK;
+ rtm->rtm_flags = rt->rt_flags;
+ rtm->rtm_pid = curproc->p_p->ps_pid;
+ rtm->rtm_seq = seq;
+ rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
+ rtm->rtm_addrs = info.rti_addrs;
+#ifdef MPLS
+ rtm->rtm_mpls = info.rti_mpls;
+#endif
+ return rtm;
+}
+
int
route_output(struct mbuf *m, ...)
{
struct rt_msghdr *rtm = NULL;
struct rtentry *rt = NULL;
- struct rtentry *saved_nrt = NULL;
struct rt_addrinfo info;
- int plen, len, newgate = 0, error = 0;
+ int plen, len, seq, newgate = 0, error = 0;
struct ifnet *ifp = NULL;
struct ifaddr *ifa = NULL;
struct socket *so;
struct rawcb *rp = NULL;
- struct sockaddr_rtlabel sa_rl;
- struct sockaddr_in6 sa_mask;
-#ifdef BFD
- struct sockaddr_bfd sa_bfd;
-#endif
#ifdef MPLS
- struct sockaddr_mpls sa_mpls, *psa_mpls;
+ struct sockaddr_mpls *psa_mpls;
#endif
va_list ap;
u_int tableid;
u_int8_t prio;
- u_char vers;
+ u_char vers, type;
va_start(ap, m);
so = va_arg(ap, struct socket *);
va_end(ap);
- info.rti_info[RTAX_DST] = NULL; /* for error handling (goto flush) */
if (m == NULL || ((m->m_len < sizeof(int32_t)) &&
(m = m_pullup(m, sizeof(int32_t))) == 0))
return (ENOBUFS);
@@ -554,10 +616,10 @@ route_output(struct mbuf *m, ...)
if (!rtable_exists(tableid)) {
if (rtm->rtm_type == RTM_ADD) {
if ((error = rtable_add(tableid)) != 0)
- goto flush;
+ goto fail;
} else {
error = EINVAL;
- goto flush;
+ goto fail;
}
}
@@ -597,7 +659,7 @@ route_output(struct mbuf *m, ...)
info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) ||
info.rti_info[RTAX_GENMASK] != NULL) {
error = EINVAL;
- goto flush;
+ goto fail;
}
#ifdef MPLS
info.rti_mpls = rtm->rtm_mpls;
@@ -609,6 +671,11 @@ route_output(struct mbuf *m, ...)
info.rti_flags |= RTF_LLINFO;
}
+ /*
+ * Do not use goto flush before this point since the message itself
+ * may be not consistent and could cause unexpected behaviour in other
+ * userland clients.
+ */
switch (rtm->rtm_type) {
case RTM_ADD:
if (info.rti_info[RTAX_GATEWAY] == NULL) {
@@ -634,23 +701,14 @@ route_output(struct mbuf *m, ...)
rtfree(rt);
rt = NULL;
- error = rtrequest(RTM_ADD, &info, prio, &saved_nrt, tableid);
- if (error == 0) {
+ error = rtrequest(RTM_ADD, &info, prio, &rt, tableid);
+ if (error == 0)
rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
- &saved_nrt->rt_rmx);
- /* write back the priority the kernel used */
- rtm->rtm_priority = saved_nrt->rt_priority & RTP_MASK;
- rtm->rtm_index = saved_nrt->rt_ifidx;
- rtm->rtm_flags = saved_nrt->rt_flags;
- rtfree(saved_nrt);
- }
+ &rt->rt_rmx);
+ else
+ goto flush;
break;
case RTM_DELETE:
- if (!rtable_exists(tableid)) {
- error = EAFNOSUPPORT;
- goto flush;
- }
-
rt = rtable_lookup(tableid, info.rti_info[RTAX_DST],
info.rti_info[RTAX_NETMASK], info.rti_info[RTAX_GATEWAY],
prio);
@@ -668,7 +726,7 @@ route_output(struct mbuf *m, ...)
/* Reset the MTU of the gateway route. */
rtable_walk(tableid, rt_key(rt)->sa_family,
route_cleargateway, rt);
- goto report;
+ break;
}
/*
@@ -678,96 +736,18 @@ route_output(struct mbuf *m, ...)
if ((rt != NULL) &&
ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) {
error = EINVAL;
- goto report;
+ break;
}
rtfree(rt);
rt = NULL;
error = rtrequest(RTM_DELETE, &info, prio, &rt, tableid);
- if (error == 0)
- goto report;
- break;
- case RTM_GET:
- if (!rtable_exists(tableid)) {
- error = EAFNOSUPPORT;
+ if (error != 0)
goto flush;
- }
- rt = rtable_lookup(tableid, info.rti_info[RTAX_DST],
- info.rti_info[RTAX_NETMASK], info.rti_info[RTAX_GATEWAY],
- prio);
- if (rt == NULL) {
- error = ESRCH;
- goto flush;
- }
-
-report:
- info.rti_info[RTAX_DST] = rt_key(rt);
- info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
- info.rti_info[RTAX_NETMASK] =
- rt_plen2mask(rt, &sa_mask);
- info.rti_info[RTAX_LABEL] =
- rtlabel_id2sa(rt->rt_labelid, &sa_rl);
-#ifdef BFD
- if (rt->rt_flags & RTF_BFD)
- info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd);
-#endif
-#ifdef MPLS
- if (rt->rt_flags & RTF_MPLS) {
- bzero(&sa_mpls, sizeof(sa_mpls));
- sa_mpls.smpls_family = AF_MPLS;
- sa_mpls.smpls_len = sizeof(sa_mpls);
- sa_mpls.smpls_label = ((struct rt_mpls *)
- rt->rt_llinfo)->mpls_label;
- info.rti_info[RTAX_SRC] =
- (struct sockaddr *)&sa_mpls;
- info.rti_mpls = ((struct rt_mpls *)
- rt->rt_llinfo)->mpls_operation;
- rtm->rtm_mpls = info.rti_mpls;
- }
-#endif
- info.rti_info[RTAX_IFP] = NULL;
- info.rti_info[RTAX_IFA] = NULL;
- ifp = if_get(rt->rt_ifidx);
- if (ifp != NULL && rtm->rtm_addrs & (RTA_IFP|RTA_IFA)) {
- info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
- info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
- if (ifp->if_flags & IFF_POINTOPOINT)
- info.rti_info[RTAX_BRD] =
- rt->rt_ifa->ifa_dstaddr;
- else
- info.rti_info[RTAX_BRD] = NULL;
- }
- if_put(ifp);
- len = rt_msg2(rtm->rtm_type, RTM_VERSION, &info, NULL,
- NULL);
- if (len > rtm->rtm_msglen) {
- struct rt_msghdr *new_rtm;
- new_rtm = malloc(len, M_RTABLE, M_NOWAIT);
- if (new_rtm == NULL) {
- error = ENOBUFS;
- goto flush;
- }
- memcpy(new_rtm, rtm, rtm->rtm_msglen);
- free(rtm, M_RTABLE, 0);
- rtm = new_rtm;
- }
- rt_msg2(rtm->rtm_type, RTM_VERSION, &info, (caddr_t)rtm,
- NULL);
- rtm->rtm_flags = rt->rt_flags;
- rtm->rtm_use = 0;
- rtm->rtm_priority = rt->rt_priority & RTP_MASK;
- rtm->rtm_index = rt->rt_ifidx;
- rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
- rtm->rtm_addrs = info.rti_addrs;
break;
case RTM_CHANGE:
case RTM_LOCK:
- if (!rtable_exists(tableid)) {
- error = EAFNOSUPPORT;
- goto flush;
- }
-
rt = rtable_lookup(tableid, info.rti_info[RTAX_DST],
info.rti_info[RTAX_NETMASK], info.rti_info[RTAX_GATEWAY],
prio);
@@ -916,9 +896,6 @@ change:
rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
&rt->rt_rmx);
- rtm->rtm_index = rt->rt_ifidx;
- rtm->rtm_priority = rt->rt_priority & RTP_MASK;
- rtm->rtm_flags = rt->rt_flags;
ifp = if_get(rt->rt_ifidx);
KASSERT(ifp != NULL);
@@ -938,13 +915,45 @@ change:
rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
rt->rt_rmx.rmx_locks |=
(rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
- rtm->rtm_priority = rt->rt_priority & RTP_MASK;
break;
}
break;
+ case RTM_GET:
+ rt = rtable_lookup(tableid, info.rti_info[RTAX_DST],
+ info.rti_info[RTAX_NETMASK], info.rti_info[RTAX_GATEWAY],
+ prio);
+ if (rt == NULL) {
+ error = ESRCH;
+ goto flush;
+ }
+ break;
+ }
+
+ /* CODE DOWN HERE NEEDS:
+ rt, rtm, error, so, m, tableid, sa_family of some sort
+
+ OTHER NOTES:
+ - to end up here it seems all is OK, error is most probably 0
+ - error cases take the flush route or in bad cases fail.
+ - fail does not report the message back but just fails the call.
+ if the message is not valid then fail should be used
+ - the dance in route_input and rp->rcb_proto.sp_family = 0 dance is
+ insane, better to pass the so we don't want to listen on to
+ route_input.
+ */
+
+ type = rtm->rtm_type;
+ seq = rtm->rtm_seq;
+ free(rtm, M_RTABLE, 0);
+ rtm = rt_report(rt, type, seq, tableid);
+ if (rtm == NULL) {
+ error = ENOBUFS;
+ goto fail;
}
flush:
+ if (rt)
+ rtfree(rt);
if (rtm) {
if (error)
rtm->rtm_errno = error;
@@ -952,16 +961,12 @@ flush:
rtm->rtm_flags |= RTF_DONE;
}
}
- if (info.rti_info[RTAX_DST])
- route_proto.sp_protocol = info.rti_info[RTAX_DST]->sa_family;
- if (rt)
- rtfree(rt);
-
/*
* Check to see if we don't want our own messages.
*/
if (!(so->so_options & SO_USELOOPBACK)) {
if (route_cb.any_count <= 1) {
+ /* no other listener and no loopback of messages */
fail:
free(rtm, M_RTABLE, 0);
m_freem(m);
@@ -969,9 +974,8 @@ fail:
}
/* There is another listener, so construct message */
rp = sotorawcb(so);
- }
- if (rp)
rp->rcb_proto.sp_family = 0; /* Avoid us */
+ }
if (rtm) {
if (m_copyback(m, 0, rtm->rtm_msglen, rtm, M_NOWAIT)) {
m_freem(m);
@@ -981,9 +985,10 @@ fail:
free(rtm, M_RTABLE, 0);
}
if (m)
- route_input(m, &route_proto, &route_src, &route_dst);
+ route_input(m, info.rti_info[RTAX_DST] ?
+ info.rti_info[RTAX_DST]->sa_family : 0);
if (rp)
- rp->rcb_proto.sp_family = PF_ROUTE;
+ rp->rcb_proto.sp_family = PF_ROUTE; /* Readd us */
return (error);
}
@@ -1055,7 +1060,6 @@ rt_setmetrics(u_long which, const struct
out->rmx_expire = expire;
}
- /* RTV_PRIORITY handled before */
}
void
@@ -1257,11 +1261,7 @@ rt_missmsg(int type, struct rt_addrinfo
rtm->rtm_tableid = tableid;
rtm->rtm_addrs = rtinfo->rti_addrs;
rtm->rtm_index = ifidx;
- if (sa == NULL)
- route_proto.sp_protocol = 0;
- else
- route_proto.sp_protocol = sa->sa_family;
- route_input(m, &route_proto, &route_src, &route_dst);
+ route_input(m, sa ? sa->sa_family : 0);
}
/*
@@ -1286,8 +1286,7 @@ rt_ifmsg(struct ifnet *ifp)
ifm->ifm_xflags = ifp->if_xflags;
ifm->ifm_data = ifp->if_data;
ifm->ifm_addrs = 0;
- route_proto.sp_protocol = 0;
- route_input(m, &route_proto, &route_src, &route_dst);
+ route_input(m, 0);
}
/*
@@ -1323,11 +1322,7 @@ rt_sendaddrmsg(struct rtentry *rt, int c
ifam->ifam_addrs = info.rti_addrs;
ifam->ifam_tableid = ifp->if_rdomain;
- if (ifa->ifa_addr == NULL)
- route_proto.sp_protocol = 0;
- else
- route_proto.sp_protocol = ifa->ifa_addr->sa_family;
- route_input(m, &route_proto, &route_src, &route_dst);
+ route_input(m, ifa->ifa_addr ? ifa->ifa_addr->sa_family : 0);
}
/*
@@ -1349,8 +1344,7 @@ rt_ifannouncemsg(struct ifnet *ifp, int
ifan->ifan_index = ifp->if_index;
strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name));
ifan->ifan_what = what;
- route_proto.sp_protocol = 0;
- route_input(m, &route_proto, &route_src, &route_dst);
+ route_input(m, 0);
}
#ifdef BFD
@@ -1381,8 +1375,7 @@ rt_bfdmsg(struct bfd_config *bfd)
bfd2sa(bfd->bc_rt, &sa_bfd);
memcpy(&bfdm->bm_sa, &sa_bfd, sizeof(sa_bfd));
- route_proto.sp_protocol = info.rti_info[RTAX_DST]->sa_family;
- route_input(m, &route_proto, &route_src, &route_dst);
+ route_input(m, info.rti_info[RTAX_DST]->sa_family);
}
#endif /* BFD */
@@ -1647,7 +1640,7 @@ extern struct domain routedomain; /* or
struct protosw routesw[] = {
{ SOCK_RAW, &routedomain, 0, PR_ATOMIC|PR_ADDR|PR_WANTRCVD,
- route_input, route_output, 0, route_ctloutput,
+ 0, route_output, 0, route_ctloutput,
route_usrreq,
raw_init, 0, 0, 0,
sysctl_rtable,