As you might have seen, I started looking at routing entries pointing to stale ifa's. This is a necessary step if we want to rely on our routing entries to do address lookups.
This audit leads me to believe that the actual check used to determine if a cached route entry is still valid might not be enough. Plus some lookups are completely missing such check, see the recent cloning case. Now, before digging into cached route validity, I'd like to simplify the interface to do route lookups. In other words, I'd like to kill rtalloc() and rtalloc_noclone(). The diff below is the part #1. Apart from the fact that this change unifies all our route lookups, it also gives us the possibility to stop using a "struct route" or similar when it is not needed. This structure, used to cache a route, is not protocol agnostic and is responsible for a lot of switch() and code duplication. Search for "struct route" in net/pf.c if you want to see some examples by yourself. So I'm looking for careful reviewers since the diff below remove the following check for every route lookup: if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP) return; But apparently all the calls to rtalloc() and rtalloc_mpath() seems to be done after checking for a similar condition. Ok? Index: net/pf.c =================================================================== RCS file: /home/ncvs/src/sys/net/pf.c,v retrieving revision 1.886 diff -u -p -r1.886 pf.c --- net/pf.c 12 Aug 2014 15:29:33 -0000 1.886 +++ net/pf.c 21 Aug 2014 12:08:12 -0000 @@ -5567,7 +5567,7 @@ pf_route(struct mbuf **m, struct pf_rule ro->ro_tableid = m0->m_pkthdr.ph_rtableid; if (!r->rt) { - rtalloc(ro); + ro->ro_rt = rtalloc1(&ro->ro_dst, RT_REPORT, ro->ro_tableid); if (ro->ro_rt == 0) { ipstat.ips_noroute++; goto bad; Index: net/pfkeyv2.c =================================================================== RCS file: /home/ncvs/src/sys/net/pfkeyv2.c,v retrieving revision 1.133 diff -u -p -r1.133 pfkeyv2.c --- net/pfkeyv2.c 12 Jul 2014 18:44:22 -0000 1.133 +++ net/pfkeyv2.c 21 Aug 2014 12:08:12 -0000 @@ -1569,7 +1569,8 @@ pfkeyv2_send(struct socket *socket, void /* Set the rdomain that was obtained from the socket */ re.re_tableid = rdomain; - rtalloc((struct route *) &re); + re.re_rt = rtalloc1((struct sockaddr *)&re.re_dst, RT_REPORT, + re.re_tableid); if (re.re_rt != NULL) { ipo = ((struct sockaddr_encap *) re.re_rt->rt_gateway)->sen_ipsp; RTFREE(re.re_rt); Index: net/radix_mpath.c =================================================================== RCS file: /home/ncvs/src/sys/net/radix_mpath.c,v retrieving revision 1.23 diff -u -p -r1.23 radix_mpath.c --- net/radix_mpath.c 27 May 2014 19:38:15 -0000 1.23 +++ net/radix_mpath.c 21 Aug 2014 12:08:12 -0000 @@ -53,7 +53,7 @@ #include <netinet6/ip6_var.h> #endif -u_int32_t rn_mpath_hash(struct route *, u_int32_t *); +u_int32_t rn_mpath_hash(struct sockaddr *, u_int32_t *); /* * give some jitter to hash, to avoid synchronization between routers @@ -383,42 +383,37 @@ rn_mpath_reprio(struct radix_node *rn, i /* * allocate a route, potentially using multipath to select the peer. */ -void -rtalloc_mpath(struct route *ro, u_int32_t *srcaddrp) +struct rtentry * +rtalloc_mpath(struct sockaddr *dst, u_int32_t *srcaddrp, u_int rtableid) { + struct rtentry *rt; #if defined(INET) || defined(INET6) struct radix_node *rn; int hash, npaths, threshold; #endif - /* - * return a cached entry if it is still valid, otherwise we increase - * the risk of disrupting local flows. - */ - if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP)) - return; - ro->ro_rt = rtalloc1(&ro->ro_dst, RT_REPORT, ro->ro_tableid); + rt = rtalloc1(dst, RT_REPORT, rtableid); /* if the route does not exist or it is not multipath, don't care */ - if (!ro->ro_rt || !(ro->ro_rt->rt_flags & RTF_MPATH)) - return; + if (rt == NULL || !ISSET(rt->rt_flags, RTF_MPATH)) + return (rt); /* check if multipath routing is enabled for the specified protocol */ if (!(0 #ifdef INET - || (ipmultipath && ro->ro_dst.sa_family == AF_INET) + || (ipmultipath && dst->sa_family == AF_INET) #endif #ifdef INET6 - || (ip6_multipath && ro->ro_dst.sa_family == AF_INET6) + || (ip6_multipath && dst->sa_family == AF_INET6) #endif )) - return; + return (rt); #if defined(INET) || defined(INET6) /* gw selection by Hash-Threshold (RFC 2992) */ - rn = (struct radix_node *)ro->ro_rt; + rn = (struct radix_node *)rt; npaths = rn_mpath_active_count(rn); - hash = rn_mpath_hash(ro, srcaddrp) & 0xffff; + hash = rn_mpath_hash(dst, srcaddrp) & 0xffff; threshold = 1 + (0xffff / npaths); while (hash > threshold && rn) { /* stay within the multipath routes */ @@ -427,13 +422,14 @@ rtalloc_mpath(struct route *ro, u_int32_ } /* if gw selection fails, use the first match (default) */ - if (!rn) - return; - - rtfree(ro->ro_rt); - ro->ro_rt = (struct rtentry *)rn; - ro->ro_rt->rt_refcnt++; + if (rn != NULL) { + rtfree(rt); + rt = (struct rtentry *)rn; + rt->rt_refcnt++; + } #endif + + return (rt); } int @@ -468,20 +464,20 @@ rn_mpath_inithead(void **head, int off) } while (0) u_int32_t -rn_mpath_hash(struct route *ro, u_int32_t *srcaddrp) +rn_mpath_hash(struct sockaddr *dst, u_int32_t *srcaddrp) { u_int32_t a, b, c; a = b = 0x9e3779b9; c = hashjitter; - switch (ro->ro_dst.sa_family) { + switch (dst->sa_family) { #ifdef INET case AF_INET: { struct sockaddr_in *sin_dst; - sin_dst = (struct sockaddr_in *)&ro->ro_dst; + sin_dst = (struct sockaddr_in *)dst; a += sin_dst->sin_addr.s_addr; b += srcaddrp ? srcaddrp[0] : 0; mix(a, b, c); @@ -493,7 +489,7 @@ rn_mpath_hash(struct route *ro, u_int32_ { struct sockaddr_in6 *sin6_dst; - sin6_dst = (struct sockaddr_in6 *)&ro->ro_dst; + sin6_dst = (struct sockaddr_in6 *)dst; a += sin6_dst->sin6_addr.s6_addr32[0]; b += sin6_dst->sin6_addr.s6_addr32[2]; c += srcaddrp ? srcaddrp[0] : 0; Index: net/radix_mpath.h =================================================================== RCS file: /home/ncvs/src/sys/net/radix_mpath.h,v retrieving revision 1.12 diff -u -p -r1.12 radix_mpath.h --- net/radix_mpath.h 27 May 2014 19:38:15 -0000 1.12 +++ net/radix_mpath.h 21 Aug 2014 12:08:12 -0000 @@ -57,7 +57,7 @@ struct rtentry *rt_mpath_matchgate(struc u_int8_t); int rt_mpath_conflict(struct radix_node_head *, struct rtentry *, struct sockaddr *, int); -void rtalloc_mpath(struct route *, u_int32_t *); +struct rtentry *rtalloc_mpath(struct sockaddr *, u_int32_t *, u_int); int rn_mpath_inithead(void **, int); #endif /* _KERNEL */ Index: net/route.c =================================================================== RCS file: /home/ncvs/src/sys/net/route.c,v retrieving revision 1.180 diff -u -p -r1.180 route.c --- net/route.c 21 Aug 2014 10:07:07 -0000 1.180 +++ net/route.c 21 Aug 2014 12:08:12 -0000 @@ -322,14 +322,6 @@ rtalloc_noclone(struct route *ro) ro->ro_tableid); } -void -rtalloc(struct route *ro) -{ - if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP)) - return; /* cached route is still valid */ - ro->ro_rt = rtalloc1(&ro->ro_dst, RT_REPORT, ro->ro_tableid); -} - struct rtentry * rtalloc1(struct sockaddr *dst, int flags, u_int tableid) { Index: net/route.h =================================================================== RCS file: /home/ncvs/src/sys/net/route.h,v retrieving revision 1.96 diff -u -p -r1.96 route.h --- net/route.h 12 Aug 2014 13:52:08 -0000 1.96 +++ net/route.h 21 Aug 2014 12:08:12 -0000 @@ -381,9 +381,8 @@ unsigned long rt_timer_queue_count(str void rt_timer_timer(void *); void rtalloc_noclone(struct route *); -void rtalloc(struct route *); #ifdef SMALL_KERNEL -#define rtalloc_mpath(r, s) rtalloc(r) +#define rtalloc_mpath(dst, s, rtableid) rtalloc1((dst), RT_REPORT, (rtableid)) #endif struct rtentry * rtalloc1(struct sockaddr *, int, u_int); Index: netinet/in_pcb.c =================================================================== RCS file: /home/ncvs/src/sys/netinet/in_pcb.c,v retrieving revision 1.158 diff -u -p -r1.158 in_pcb.c --- netinet/in_pcb.c 22 Jul 2014 11:06:10 -0000 1.158 +++ netinet/in_pcb.c 21 Aug 2014 12:08:12 -0000 @@ -612,6 +612,7 @@ in_losing(struct inpcb *inp) if ((rt = inp->inp_route.ro_rt)) { inp->inp_route.ro_rt = 0; + bzero((caddr_t)&info, sizeof(info)); info.rti_flags = rt->rt_flags; info.rti_info[RTAX_DST] = &inp->inp_route.ro_dst; @@ -764,7 +765,8 @@ in_pcbrtentry(struct inpcb *inp) ro->ro_dst.sa_len = sizeof(struct sockaddr_in6); satosin6(&ro->ro_dst)->sin6_addr = inp->inp_faddr6; ro->ro_tableid = inp->inp_rtableid; - rtalloc_mpath(ro, &inp->inp_laddr6.s6_addr32[0]); + ro->ro_rt = rtalloc_mpath(&ro->ro_dst, + &inp->inp_laddr6.s6_addr32[0], ro->ro_tableid); break; #endif /* INET6 */ case PF_INET: @@ -774,7 +776,8 @@ in_pcbrtentry(struct inpcb *inp) ro->ro_dst.sa_len = sizeof(struct sockaddr_in); satosin(&ro->ro_dst)->sin_addr = inp->inp_faddr; ro->ro_tableid = inp->inp_rtableid; - rtalloc_mpath(ro, &inp->inp_laddr.s_addr); + ro->ro_rt = rtalloc_mpath(&ro->ro_dst, + &inp->inp_laddr.s_addr, ro->ro_tableid); break; } } @@ -838,7 +841,7 @@ in_selectsrc(struct in_addr **insrc, str ro->ro_dst.sa_len = sizeof(struct sockaddr_in); satosin(&ro->ro_dst)->sin_addr = sin->sin_addr; ro->ro_tableid = rtableid; - rtalloc_mpath(ro, NULL); + ro->ro_rt = rtalloc_mpath(&ro->ro_dst, NULL, ro->ro_tableid); /* * It is important to bzero out the rest of the Index: netinet/ip_input.c =================================================================== RCS file: /home/ncvs/src/sys/netinet/ip_input.c,v retrieving revision 1.235 diff -u -p -r1.235 ip_input.c --- netinet/ip_input.c 13 Jul 2014 13:57:56 -0000 1.235 +++ netinet/ip_input.c 21 Aug 2014 12:08:12 -0000 @@ -1424,7 +1424,8 @@ ip_forward(struct mbuf *m, struct ifnet sin->sin_addr = ip->ip_dst; ipforward_rt.ro_tableid = rtableid; - rtalloc_mpath(&ipforward_rt, &ip->ip_src.s_addr); + ipforward_rt.ro_rt = rtalloc_mpath(&ipforward_rt.ro_dst, + &ip->ip_src.s_addr, ipforward_rt.ro_tableid); if (ipforward_rt.ro_rt == 0) { icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0); return; Index: netinet/ip_output.c =================================================================== RCS file: /home/ncvs/src/sys/netinet/ip_output.c,v retrieving revision 1.266 diff -u -p -r1.266 ip_output.c --- netinet/ip_output.c 22 Jul 2014 11:06:10 -0000 1.266 +++ netinet/ip_output.c 21 Aug 2014 12:08:12 -0000 @@ -196,7 +196,8 @@ ip_output(struct mbuf *m0, struct mbuf * IFP_TO_IA(ifp, ia); } else { if (ro->ro_rt == 0) - rtalloc_mpath(ro, NULL); + ro->ro_rt = rtalloc_mpath(&ro->ro_dst, + NULL, ro->ro_tableid); if (ro->ro_rt == 0) { ipstat.ips_noroute++; @@ -346,7 +347,8 @@ reroute: IFP_TO_IA(ifp, ia); } else { if (ro->ro_rt == 0) - rtalloc_mpath(ro, &ip->ip_src.s_addr); + ro->ro_rt = rtalloc_mpath(&ro->ro_dst, + &ip->ip_src.s_addr, ro->ro_tableid); if (ro->ro_rt == 0) { ipstat.ips_noroute++; Index: netinet/ip_spd.c =================================================================== RCS file: /home/ncvs/src/sys/netinet/ip_spd.c,v retrieving revision 1.72 diff -u -p -r1.72 ip_spd.c --- netinet/ip_spd.c 22 Jul 2014 11:06:10 -0000 1.72 +++ netinet/ip_spd.c 21 Aug 2014 12:08:12 -0000 @@ -244,7 +244,8 @@ ipsp_spd_lookup(struct mbuf *m, int af, re->re_tableid = rdomain; /* Actual SPD lookup. */ - rtalloc((struct route *) re); + re->re_rt = rtalloc1((struct sockaddr *)&re->re_dst, RT_REPORT, + re->re_tableid); if (re->re_rt == NULL) { /* * Return whatever the socket requirements are, there are no Index: netinet6/frag6.c =================================================================== RCS file: /home/ncvs/src/sys/netinet6/frag6.c,v retrieving revision 1.54 diff -u -p -r1.54 frag6.c --- netinet6/frag6.c 22 Jul 2014 11:06:10 -0000 1.54 +++ netinet6/frag6.c 21 Aug 2014 12:08:12 -0000 @@ -194,7 +194,8 @@ frag6_input(struct mbuf **mp, int *offp, dst->sin6_len = sizeof(struct sockaddr_in6); dst->sin6_addr = ip6->ip6_dst; - rtalloc_mpath((struct route *)&ro, &ip6->ip6_src.s6_addr32[0]); + ro.ro_rt = rtalloc_mpath(sin6tosa(&ro.ro_dst), + &ip6->ip6_src.s6_addr32[0], ro.ro_tableid); if (ro.ro_rt != NULL && ro.ro_rt->rt_ifa != NULL) dstifp = ifatoia6(ro.ro_rt->rt_ifa)->ia_ifp; Index: netinet6/in6_src.c =================================================================== RCS file: /home/ncvs/src/sys/netinet6/in6_src.c,v retrieving revision 1.45 diff -u -p -r1.45 in6_src.c --- netinet6/in6_src.c 22 Jul 2014 11:06:10 -0000 1.45 +++ netinet6/in6_src.c 21 Aug 2014 12:08:12 -0000 @@ -264,9 +264,11 @@ in6_selectsrc(struct in6_addr **in6src, sa6->sin6_addr = *dst; sa6->sin6_scope_id = dstsock->sin6_scope_id; if (IN6_IS_ADDR_MULTICAST(dst)) { - rtalloc((struct route *)ro); + ro->ro_rt = rtalloc1(sin6tosa(&ro->ro_dst), + RT_REPORT, ro->ro_tableid); } else { - rtalloc_mpath((struct route *)ro, NULL); + ro->ro_rt = rtalloc_mpath(sin6tosa(&ro->ro_dst), + NULL, ro->ro_tableid); } } @@ -380,7 +382,8 @@ selectroute(struct sockaddr_in6 *dstsock ron->ro_tableid = rtableid; } if (ron->ro_rt == NULL) { - rtalloc((struct route *)ron); /* multi path case? */ + ron->ro_rt = rtalloc1(sin6tosa(&ron->ro_dst), + RT_REPORT, ron->ro_tableid); /* multi path case? */ if (ron->ro_rt == NULL || (ron->ro_rt->rt_flags & RTF_GATEWAY)) { if (ron->ro_rt) { @@ -431,7 +434,8 @@ selectroute(struct sockaddr_in6 *dstsock *sa6 = *dstsock; sa6->sin6_scope_id = 0; ro->ro_tableid = rtableid; - rtalloc_mpath((struct route *)ro, NULL); + ro->ro_rt = rtalloc_mpath(sin6tosa(&ro->ro_dst), + NULL, ro->ro_tableid); } /* Index: netinet6/ip6_forward.c =================================================================== RCS file: /home/ncvs/src/sys/netinet6/ip6_forward.c,v retrieving revision 1.67 diff -u -p -r1.67 ip6_forward.c --- netinet6/ip6_forward.c 3 Jun 2014 13:32:24 -0000 1.67 +++ netinet6/ip6_forward.c 21 Aug 2014 12:08:12 -0000 @@ -250,8 +250,10 @@ reroute: } /* this probably fails but give it a try again */ ip6_forward_rt.ro_tableid = rtableid; - rtalloc_mpath((struct route *)&ip6_forward_rt, - &ip6->ip6_src.s6_addr32[0]); + ip6_forward_rt.ro_rt = rtalloc_mpath( + sin6tosa(&ip6_forward_rt.ro_dst), + &ip6->ip6_src.s6_addr32[0], + ip6_forward_rt.ro_tableid); } if (ip6_forward_rt.ro_rt == 0) { @@ -277,9 +279,10 @@ reroute: dst->sin6_family = AF_INET6; dst->sin6_addr = ip6->ip6_dst; ip6_forward_rt.ro_tableid = rtableid; - - rtalloc_mpath((struct route *)&ip6_forward_rt, - &ip6->ip6_src.s6_addr32[0]); + ip6_forward_rt.ro_rt = rtalloc_mpath( + sin6tosa(&ip6_forward_rt.ro_dst), + &ip6->ip6_src.s6_addr32[0], + ip6_forward_rt.ro_tableid); if (ip6_forward_rt.ro_rt == 0) { ip6stat.ip6s_noroute++; Index: netinet6/ip6_input.c =================================================================== RCS file: /home/ncvs/src/sys/netinet6/ip6_input.c,v retrieving revision 1.128 diff -u -p -r1.128 ip6_input.c --- netinet6/ip6_input.c 22 Jul 2014 11:06:10 -0000 1.128 +++ netinet6/ip6_input.c 21 Aug 2014 12:08:12 -0000 @@ -449,8 +449,10 @@ ip6_input(struct mbuf *m) ip6_forward_rt.ro_dst.sin6_addr = ip6->ip6_dst; ip6_forward_rt.ro_tableid = rtableid; - rtalloc_mpath((struct route *)&ip6_forward_rt, - &ip6->ip6_src.s6_addr32[0]); + ip6_forward_rt.ro_rt = rtalloc_mpath( + sin6tosa(&ip6_forward_rt.ro_dst), + &ip6->ip6_src.s6_addr32[0], + ip6_forward_rt.ro_tableid); } /* Index: netinet6/ip6_output.c =================================================================== RCS file: /home/ncvs/src/sys/netinet6/ip6_output.c,v retrieving revision 1.158 diff -u -p -r1.158 ip6_output.c --- netinet6/ip6_output.c 22 Jul 2014 11:06:10 -0000 1.158 +++ netinet6/ip6_output.c 21 Aug 2014 12:08:12 -0000 @@ -1229,7 +1229,8 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, s sa6_dst->sin6_len = sizeof(struct sockaddr_in6); sa6_dst->sin6_addr = *dst; - rtalloc((struct route *)ro_pmtu); + ro_pmtu->ro_rt = rtalloc1(sin6tosa(&ro_pmtu->ro_dst), + RT_REPORT, ro_pmtu->ro_tableid); } } if (ro_pmtu->ro_rt) { @@ -2482,7 +2483,8 @@ ip6_setmoptions(int optname, struct ip6_ dst->sin6_len = sizeof(struct sockaddr_in6); dst->sin6_family = AF_INET6; dst->sin6_addr = mreq->ipv6mr_multiaddr; - rtalloc((struct route *)&ro); + ro.ro_rt = rtalloc1(sin6tosa(&ro.ro_dst), + RT_REPORT, ro.ro_tableid); if (ro.ro_rt == NULL) { error = EADDRNOTAVAIL; break;