On 21/08/14(Thu) 15:36, Martin Pieuchot wrote:
> As you might have seen, I started looking at routing entries pointing to
> stale ifa's. This is a necessary step if we want to rely on our routing
> entries to do address lookups.
>
> This audit leads me to believe that the actual check used to determine if
> a cached route entry is still valid might not be enough. Plus some
> lookups are completely missing such check, see the recent cloning case.
>
> Now, before digging into cached route validity, I'd like to simplify
> the interface to do route lookups. In other words, I'd like to kill
> rtalloc() and rtalloc_noclone(). The diff below is the part #1.
>
> Apart from the fact that this change unifies all our route lookups, it
> also gives us the possibility to stop using a "struct route" or similar
> when it is not needed. This structure, used to cache a route, is not
> protocol agnostic and is responsible for a lot of switch() and code
> duplication. Search for "struct route" in net/pf.c if you want to see
> some examples by yourself.
>
> So I'm looking for careful reviewers since the diff below remove the
> following check for every route lookup:
>
> if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP)
> return;
>
> But apparently all the calls to rtalloc() and rtalloc_mpath() seems to
> be done after checking for a similar condition.
>
> Ok?
Nobody?
>
> Index: net/pf.c
> ===================================================================
> RCS file: /home/ncvs/src/sys/net/pf.c,v
> retrieving revision 1.886
> diff -u -p -r1.886 pf.c
> --- net/pf.c 12 Aug 2014 15:29:33 -0000 1.886
> +++ net/pf.c 21 Aug 2014 12:08:12 -0000
> @@ -5567,7 +5567,7 @@ pf_route(struct mbuf **m, struct pf_rule
> ro->ro_tableid = m0->m_pkthdr.ph_rtableid;
>
> if (!r->rt) {
> - rtalloc(ro);
> + ro->ro_rt = rtalloc1(&ro->ro_dst, RT_REPORT, ro->ro_tableid);
> if (ro->ro_rt == 0) {
> ipstat.ips_noroute++;
> goto bad;
> Index: net/pfkeyv2.c
> ===================================================================
> RCS file: /home/ncvs/src/sys/net/pfkeyv2.c,v
> retrieving revision 1.133
> diff -u -p -r1.133 pfkeyv2.c
> --- net/pfkeyv2.c 12 Jul 2014 18:44:22 -0000 1.133
> +++ net/pfkeyv2.c 21 Aug 2014 12:08:12 -0000
> @@ -1569,7 +1569,8 @@ pfkeyv2_send(struct socket *socket, void
> /* Set the rdomain that was obtained from the socket */
> re.re_tableid = rdomain;
>
> - rtalloc((struct route *) &re);
> + re.re_rt = rtalloc1((struct sockaddr *)&re.re_dst, RT_REPORT,
> + re.re_tableid);
> if (re.re_rt != NULL) {
> ipo = ((struct sockaddr_encap *)
> re.re_rt->rt_gateway)->sen_ipsp;
> RTFREE(re.re_rt);
> Index: net/radix_mpath.c
> ===================================================================
> RCS file: /home/ncvs/src/sys/net/radix_mpath.c,v
> retrieving revision 1.23
> diff -u -p -r1.23 radix_mpath.c
> --- net/radix_mpath.c 27 May 2014 19:38:15 -0000 1.23
> +++ net/radix_mpath.c 21 Aug 2014 12:08:12 -0000
> @@ -53,7 +53,7 @@
> #include <netinet6/ip6_var.h>
> #endif
>
> -u_int32_t rn_mpath_hash(struct route *, u_int32_t *);
> +u_int32_t rn_mpath_hash(struct sockaddr *, u_int32_t *);
>
> /*
> * give some jitter to hash, to avoid synchronization between routers
> @@ -383,42 +383,37 @@ rn_mpath_reprio(struct radix_node *rn, i
> /*
> * allocate a route, potentially using multipath to select the peer.
> */
> -void
> -rtalloc_mpath(struct route *ro, u_int32_t *srcaddrp)
> +struct rtentry *
> +rtalloc_mpath(struct sockaddr *dst, u_int32_t *srcaddrp, u_int rtableid)
> {
> + struct rtentry *rt;
> #if defined(INET) || defined(INET6)
> struct radix_node *rn;
> int hash, npaths, threshold;
> #endif
>
> - /*
> - * return a cached entry if it is still valid, otherwise we increase
> - * the risk of disrupting local flows.
> - */
> - if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP))
> - return;
> - ro->ro_rt = rtalloc1(&ro->ro_dst, RT_REPORT, ro->ro_tableid);
> + rt = rtalloc1(dst, RT_REPORT, rtableid);
>
> /* if the route does not exist or it is not multipath, don't care */
> - if (!ro->ro_rt || !(ro->ro_rt->rt_flags & RTF_MPATH))
> - return;
> + if (rt == NULL || !ISSET(rt->rt_flags, RTF_MPATH))
> + return (rt);
>
> /* check if multipath routing is enabled for the specified protocol */
> if (!(0
> #ifdef INET
> - || (ipmultipath && ro->ro_dst.sa_family == AF_INET)
> + || (ipmultipath && dst->sa_family == AF_INET)
> #endif
> #ifdef INET6
> - || (ip6_multipath && ro->ro_dst.sa_family == AF_INET6)
> + || (ip6_multipath && dst->sa_family == AF_INET6)
> #endif
> ))
> - return;
> + return (rt);
>
> #if defined(INET) || defined(INET6)
> /* gw selection by Hash-Threshold (RFC 2992) */
> - rn = (struct radix_node *)ro->ro_rt;
> + rn = (struct radix_node *)rt;
> npaths = rn_mpath_active_count(rn);
> - hash = rn_mpath_hash(ro, srcaddrp) & 0xffff;
> + hash = rn_mpath_hash(dst, srcaddrp) & 0xffff;
> threshold = 1 + (0xffff / npaths);
> while (hash > threshold && rn) {
> /* stay within the multipath routes */
> @@ -427,13 +422,14 @@ rtalloc_mpath(struct route *ro, u_int32_
> }
>
> /* if gw selection fails, use the first match (default) */
> - if (!rn)
> - return;
> -
> - rtfree(ro->ro_rt);
> - ro->ro_rt = (struct rtentry *)rn;
> - ro->ro_rt->rt_refcnt++;
> + if (rn != NULL) {
> + rtfree(rt);
> + rt = (struct rtentry *)rn;
> + rt->rt_refcnt++;
> + }
> #endif
> +
> + return (rt);
> }
>
> int
> @@ -468,20 +464,20 @@ rn_mpath_inithead(void **head, int off)
> } while (0)
>
> u_int32_t
> -rn_mpath_hash(struct route *ro, u_int32_t *srcaddrp)
> +rn_mpath_hash(struct sockaddr *dst, u_int32_t *srcaddrp)
> {
> u_int32_t a, b, c;
>
> a = b = 0x9e3779b9;
> c = hashjitter;
>
> - switch (ro->ro_dst.sa_family) {
> + switch (dst->sa_family) {
> #ifdef INET
> case AF_INET:
> {
> struct sockaddr_in *sin_dst;
>
> - sin_dst = (struct sockaddr_in *)&ro->ro_dst;
> + sin_dst = (struct sockaddr_in *)dst;
> a += sin_dst->sin_addr.s_addr;
> b += srcaddrp ? srcaddrp[0] : 0;
> mix(a, b, c);
> @@ -493,7 +489,7 @@ rn_mpath_hash(struct route *ro, u_int32_
> {
> struct sockaddr_in6 *sin6_dst;
>
> - sin6_dst = (struct sockaddr_in6 *)&ro->ro_dst;
> + sin6_dst = (struct sockaddr_in6 *)dst;
> a += sin6_dst->sin6_addr.s6_addr32[0];
> b += sin6_dst->sin6_addr.s6_addr32[2];
> c += srcaddrp ? srcaddrp[0] : 0;
> Index: net/radix_mpath.h
> ===================================================================
> RCS file: /home/ncvs/src/sys/net/radix_mpath.h,v
> retrieving revision 1.12
> diff -u -p -r1.12 radix_mpath.h
> --- net/radix_mpath.h 27 May 2014 19:38:15 -0000 1.12
> +++ net/radix_mpath.h 21 Aug 2014 12:08:12 -0000
> @@ -57,7 +57,7 @@ struct rtentry *rt_mpath_matchgate(struc
> u_int8_t);
> int rt_mpath_conflict(struct radix_node_head *, struct rtentry *,
> struct sockaddr *, int);
> -void rtalloc_mpath(struct route *, u_int32_t *);
> +struct rtentry *rtalloc_mpath(struct sockaddr *, u_int32_t *, u_int);
> int rn_mpath_inithead(void **, int);
> #endif /* _KERNEL */
>
> Index: net/route.c
> ===================================================================
> RCS file: /home/ncvs/src/sys/net/route.c,v
> retrieving revision 1.180
> diff -u -p -r1.180 route.c
> --- net/route.c 21 Aug 2014 10:07:07 -0000 1.180
> +++ net/route.c 21 Aug 2014 12:08:12 -0000
> @@ -322,14 +322,6 @@ rtalloc_noclone(struct route *ro)
> ro->ro_tableid);
> }
>
> -void
> -rtalloc(struct route *ro)
> -{
> - if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP))
> - return; /* cached route is still valid */
> - ro->ro_rt = rtalloc1(&ro->ro_dst, RT_REPORT, ro->ro_tableid);
> -}
> -
> struct rtentry *
> rtalloc1(struct sockaddr *dst, int flags, u_int tableid)
> {
> Index: net/route.h
> ===================================================================
> RCS file: /home/ncvs/src/sys/net/route.h,v
> retrieving revision 1.96
> diff -u -p -r1.96 route.h
> --- net/route.h 12 Aug 2014 13:52:08 -0000 1.96
> +++ net/route.h 21 Aug 2014 12:08:12 -0000
> @@ -381,9 +381,8 @@ unsigned long rt_timer_queue_count(str
> void rt_timer_timer(void *);
>
> void rtalloc_noclone(struct route *);
> -void rtalloc(struct route *);
> #ifdef SMALL_KERNEL
> -#define rtalloc_mpath(r, s) rtalloc(r)
> +#define rtalloc_mpath(dst, s, rtableid) rtalloc1((dst), RT_REPORT,
> (rtableid))
> #endif
> struct rtentry *
> rtalloc1(struct sockaddr *, int, u_int);
> Index: netinet/in_pcb.c
> ===================================================================
> RCS file: /home/ncvs/src/sys/netinet/in_pcb.c,v
> retrieving revision 1.158
> diff -u -p -r1.158 in_pcb.c
> --- netinet/in_pcb.c 22 Jul 2014 11:06:10 -0000 1.158
> +++ netinet/in_pcb.c 21 Aug 2014 12:08:12 -0000
> @@ -612,6 +612,7 @@ in_losing(struct inpcb *inp)
>
> if ((rt = inp->inp_route.ro_rt)) {
> inp->inp_route.ro_rt = 0;
> +
> bzero((caddr_t)&info, sizeof(info));
> info.rti_flags = rt->rt_flags;
> info.rti_info[RTAX_DST] = &inp->inp_route.ro_dst;
> @@ -764,7 +765,8 @@ in_pcbrtentry(struct inpcb *inp)
> ro->ro_dst.sa_len = sizeof(struct sockaddr_in6);
> satosin6(&ro->ro_dst)->sin6_addr = inp->inp_faddr6;
> ro->ro_tableid = inp->inp_rtableid;
> - rtalloc_mpath(ro, &inp->inp_laddr6.s6_addr32[0]);
> + ro->ro_rt = rtalloc_mpath(&ro->ro_dst,
> + &inp->inp_laddr6.s6_addr32[0], ro->ro_tableid);
> break;
> #endif /* INET6 */
> case PF_INET:
> @@ -774,7 +776,8 @@ in_pcbrtentry(struct inpcb *inp)
> ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
> satosin(&ro->ro_dst)->sin_addr = inp->inp_faddr;
> ro->ro_tableid = inp->inp_rtableid;
> - rtalloc_mpath(ro, &inp->inp_laddr.s_addr);
> + ro->ro_rt = rtalloc_mpath(&ro->ro_dst,
> + &inp->inp_laddr.s_addr, ro->ro_tableid);
> break;
> }
> }
> @@ -838,7 +841,7 @@ in_selectsrc(struct in_addr **insrc, str
> ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
> satosin(&ro->ro_dst)->sin_addr = sin->sin_addr;
> ro->ro_tableid = rtableid;
> - rtalloc_mpath(ro, NULL);
> + ro->ro_rt = rtalloc_mpath(&ro->ro_dst, NULL, ro->ro_tableid);
>
> /*
> * It is important to bzero out the rest of the
> Index: netinet/ip_input.c
> ===================================================================
> RCS file: /home/ncvs/src/sys/netinet/ip_input.c,v
> retrieving revision 1.235
> diff -u -p -r1.235 ip_input.c
> --- netinet/ip_input.c 13 Jul 2014 13:57:56 -0000 1.235
> +++ netinet/ip_input.c 21 Aug 2014 12:08:12 -0000
> @@ -1424,7 +1424,8 @@ ip_forward(struct mbuf *m, struct ifnet
> sin->sin_addr = ip->ip_dst;
> ipforward_rt.ro_tableid = rtableid;
>
> - rtalloc_mpath(&ipforward_rt, &ip->ip_src.s_addr);
> + ipforward_rt.ro_rt = rtalloc_mpath(&ipforward_rt.ro_dst,
> + &ip->ip_src.s_addr, ipforward_rt.ro_tableid);
> if (ipforward_rt.ro_rt == 0) {
> icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
> return;
> Index: netinet/ip_output.c
> ===================================================================
> RCS file: /home/ncvs/src/sys/netinet/ip_output.c,v
> retrieving revision 1.266
> diff -u -p -r1.266 ip_output.c
> --- netinet/ip_output.c 22 Jul 2014 11:06:10 -0000 1.266
> +++ netinet/ip_output.c 21 Aug 2014 12:08:12 -0000
> @@ -196,7 +196,8 @@ ip_output(struct mbuf *m0, struct mbuf *
> IFP_TO_IA(ifp, ia);
> } else {
> if (ro->ro_rt == 0)
> - rtalloc_mpath(ro, NULL);
> + ro->ro_rt = rtalloc_mpath(&ro->ro_dst,
> + NULL, ro->ro_tableid);
>
> if (ro->ro_rt == 0) {
> ipstat.ips_noroute++;
> @@ -346,7 +347,8 @@ reroute:
> IFP_TO_IA(ifp, ia);
> } else {
> if (ro->ro_rt == 0)
> - rtalloc_mpath(ro, &ip->ip_src.s_addr);
> + ro->ro_rt = rtalloc_mpath(&ro->ro_dst,
> + &ip->ip_src.s_addr, ro->ro_tableid);
>
> if (ro->ro_rt == 0) {
> ipstat.ips_noroute++;
> Index: netinet/ip_spd.c
> ===================================================================
> RCS file: /home/ncvs/src/sys/netinet/ip_spd.c,v
> retrieving revision 1.72
> diff -u -p -r1.72 ip_spd.c
> --- netinet/ip_spd.c 22 Jul 2014 11:06:10 -0000 1.72
> +++ netinet/ip_spd.c 21 Aug 2014 12:08:12 -0000
> @@ -244,7 +244,8 @@ ipsp_spd_lookup(struct mbuf *m, int af,
> re->re_tableid = rdomain;
>
> /* Actual SPD lookup. */
> - rtalloc((struct route *) re);
> + re->re_rt = rtalloc1((struct sockaddr *)&re->re_dst, RT_REPORT,
> + re->re_tableid);
> if (re->re_rt == NULL) {
> /*
> * Return whatever the socket requirements are, there are no
> Index: netinet6/frag6.c
> ===================================================================
> RCS file: /home/ncvs/src/sys/netinet6/frag6.c,v
> retrieving revision 1.54
> diff -u -p -r1.54 frag6.c
> --- netinet6/frag6.c 22 Jul 2014 11:06:10 -0000 1.54
> +++ netinet6/frag6.c 21 Aug 2014 12:08:12 -0000
> @@ -194,7 +194,8 @@ frag6_input(struct mbuf **mp, int *offp,
> dst->sin6_len = sizeof(struct sockaddr_in6);
> dst->sin6_addr = ip6->ip6_dst;
>
> - rtalloc_mpath((struct route *)&ro, &ip6->ip6_src.s6_addr32[0]);
> + ro.ro_rt = rtalloc_mpath(sin6tosa(&ro.ro_dst),
> + &ip6->ip6_src.s6_addr32[0], ro.ro_tableid);
>
> if (ro.ro_rt != NULL && ro.ro_rt->rt_ifa != NULL)
> dstifp = ifatoia6(ro.ro_rt->rt_ifa)->ia_ifp;
> Index: netinet6/in6_src.c
> ===================================================================
> RCS file: /home/ncvs/src/sys/netinet6/in6_src.c,v
> retrieving revision 1.45
> diff -u -p -r1.45 in6_src.c
> --- netinet6/in6_src.c 22 Jul 2014 11:06:10 -0000 1.45
> +++ netinet6/in6_src.c 21 Aug 2014 12:08:12 -0000
> @@ -264,9 +264,11 @@ in6_selectsrc(struct in6_addr **in6src,
> sa6->sin6_addr = *dst;
> sa6->sin6_scope_id = dstsock->sin6_scope_id;
> if (IN6_IS_ADDR_MULTICAST(dst)) {
> - rtalloc((struct route *)ro);
> + ro->ro_rt = rtalloc1(sin6tosa(&ro->ro_dst),
> + RT_REPORT, ro->ro_tableid);
> } else {
> - rtalloc_mpath((struct route *)ro, NULL);
> + ro->ro_rt = rtalloc_mpath(sin6tosa(&ro->ro_dst),
> + NULL, ro->ro_tableid);
> }
> }
>
> @@ -380,7 +382,8 @@ selectroute(struct sockaddr_in6 *dstsock
> ron->ro_tableid = rtableid;
> }
> if (ron->ro_rt == NULL) {
> - rtalloc((struct route *)ron); /* multi path case? */
> + ron->ro_rt = rtalloc1(sin6tosa(&ron->ro_dst),
> + RT_REPORT, ron->ro_tableid); /* multi path case? */
> if (ron->ro_rt == NULL ||
> (ron->ro_rt->rt_flags & RTF_GATEWAY)) {
> if (ron->ro_rt) {
> @@ -431,7 +434,8 @@ selectroute(struct sockaddr_in6 *dstsock
> *sa6 = *dstsock;
> sa6->sin6_scope_id = 0;
> ro->ro_tableid = rtableid;
> - rtalloc_mpath((struct route *)ro, NULL);
> + ro->ro_rt = rtalloc_mpath(sin6tosa(&ro->ro_dst),
> + NULL, ro->ro_tableid);
> }
>
> /*
> Index: netinet6/ip6_forward.c
> ===================================================================
> RCS file: /home/ncvs/src/sys/netinet6/ip6_forward.c,v
> retrieving revision 1.67
> diff -u -p -r1.67 ip6_forward.c
> --- netinet6/ip6_forward.c 3 Jun 2014 13:32:24 -0000 1.67
> +++ netinet6/ip6_forward.c 21 Aug 2014 12:08:12 -0000
> @@ -250,8 +250,10 @@ reroute:
> }
> /* this probably fails but give it a try again */
> ip6_forward_rt.ro_tableid = rtableid;
> - rtalloc_mpath((struct route *)&ip6_forward_rt,
> - &ip6->ip6_src.s6_addr32[0]);
> + ip6_forward_rt.ro_rt = rtalloc_mpath(
> + sin6tosa(&ip6_forward_rt.ro_dst),
> + &ip6->ip6_src.s6_addr32[0],
> + ip6_forward_rt.ro_tableid);
> }
>
> if (ip6_forward_rt.ro_rt == 0) {
> @@ -277,9 +279,10 @@ reroute:
> dst->sin6_family = AF_INET6;
> dst->sin6_addr = ip6->ip6_dst;
> ip6_forward_rt.ro_tableid = rtableid;
> -
> - rtalloc_mpath((struct route *)&ip6_forward_rt,
> - &ip6->ip6_src.s6_addr32[0]);
> + ip6_forward_rt.ro_rt = rtalloc_mpath(
> + sin6tosa(&ip6_forward_rt.ro_dst),
> + &ip6->ip6_src.s6_addr32[0],
> + ip6_forward_rt.ro_tableid);
>
> if (ip6_forward_rt.ro_rt == 0) {
> ip6stat.ip6s_noroute++;
> Index: netinet6/ip6_input.c
> ===================================================================
> RCS file: /home/ncvs/src/sys/netinet6/ip6_input.c,v
> retrieving revision 1.128
> diff -u -p -r1.128 ip6_input.c
> --- netinet6/ip6_input.c 22 Jul 2014 11:06:10 -0000 1.128
> +++ netinet6/ip6_input.c 21 Aug 2014 12:08:12 -0000
> @@ -449,8 +449,10 @@ ip6_input(struct mbuf *m)
> ip6_forward_rt.ro_dst.sin6_addr = ip6->ip6_dst;
> ip6_forward_rt.ro_tableid = rtableid;
>
> - rtalloc_mpath((struct route *)&ip6_forward_rt,
> - &ip6->ip6_src.s6_addr32[0]);
> + ip6_forward_rt.ro_rt = rtalloc_mpath(
> + sin6tosa(&ip6_forward_rt.ro_dst),
> + &ip6->ip6_src.s6_addr32[0],
> + ip6_forward_rt.ro_tableid);
> }
>
> /*
> Index: netinet6/ip6_output.c
> ===================================================================
> RCS file: /home/ncvs/src/sys/netinet6/ip6_output.c,v
> retrieving revision 1.158
> diff -u -p -r1.158 ip6_output.c
> --- netinet6/ip6_output.c 22 Jul 2014 11:06:10 -0000 1.158
> +++ netinet6/ip6_output.c 21 Aug 2014 12:08:12 -0000
> @@ -1229,7 +1229,8 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, s
> sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
> sa6_dst->sin6_addr = *dst;
>
> - rtalloc((struct route *)ro_pmtu);
> + ro_pmtu->ro_rt = rtalloc1(sin6tosa(&ro_pmtu->ro_dst),
> + RT_REPORT, ro_pmtu->ro_tableid);
> }
> }
> if (ro_pmtu->ro_rt) {
> @@ -2482,7 +2483,8 @@ ip6_setmoptions(int optname, struct ip6_
> dst->sin6_len = sizeof(struct sockaddr_in6);
> dst->sin6_family = AF_INET6;
> dst->sin6_addr = mreq->ipv6mr_multiaddr;
> - rtalloc((struct route *)&ro);
> + ro.ro_rt = rtalloc1(sin6tosa(&ro.ro_dst),
> + RT_REPORT, ro.ro_tableid);
> if (ro.ro_rt == NULL) {
> error = EADDRNOTAVAIL;
> break;
>