As you might have seen, I started looking at routing entries pointing to
stale ifa's.  This is a necessary step if we want to rely on our routing
entries to do address lookups.

This audit leads me to believe that the actual check used to determine if
a cached route entry is still valid might not be enough.  Plus some 
lookups are completely missing such check, see the recent cloning case.

Now, before digging into cached route validity, I'd like to simplify
the interface to do route lookups.  In other words, I'd like to kill
rtalloc() and rtalloc_noclone().  The diff below is the part #1.

Apart from the fact that this change unifies all our route lookups, it
also gives us the possibility to stop using a "struct route" or similar
when it is not needed.  This structure, used to cache a route, is not
protocol agnostic and is responsible for a lot of switch() and code
duplication.  Search for "struct route" in net/pf.c if you want to see
some examples by yourself.

So I'm looking for careful reviewers since the diff below remove the
following check for every route lookup:

        if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP)
                return;

But apparently all the calls to rtalloc() and rtalloc_mpath() seems to
be done after checking for a similar condition.

Ok?

Index: net/pf.c
===================================================================
RCS file: /home/ncvs/src/sys/net/pf.c,v
retrieving revision 1.886
diff -u -p -r1.886 pf.c
--- net/pf.c    12 Aug 2014 15:29:33 -0000      1.886
+++ net/pf.c    21 Aug 2014 12:08:12 -0000
@@ -5567,7 +5567,7 @@ pf_route(struct mbuf **m, struct pf_rule
        ro->ro_tableid = m0->m_pkthdr.ph_rtableid;
 
        if (!r->rt) {
-               rtalloc(ro);
+               ro->ro_rt = rtalloc1(&ro->ro_dst, RT_REPORT, ro->ro_tableid);
                if (ro->ro_rt == 0) {
                        ipstat.ips_noroute++;
                        goto bad;
Index: net/pfkeyv2.c
===================================================================
RCS file: /home/ncvs/src/sys/net/pfkeyv2.c,v
retrieving revision 1.133
diff -u -p -r1.133 pfkeyv2.c
--- net/pfkeyv2.c       12 Jul 2014 18:44:22 -0000      1.133
+++ net/pfkeyv2.c       21 Aug 2014 12:08:12 -0000
@@ -1569,7 +1569,8 @@ pfkeyv2_send(struct socket *socket, void
                /* Set the rdomain that was obtained from the socket */
                re.re_tableid = rdomain;
 
-               rtalloc((struct route *) &re);
+               re.re_rt = rtalloc1((struct sockaddr *)&re.re_dst, RT_REPORT,
+                   re.re_tableid);
                if (re.re_rt != NULL) {
                        ipo = ((struct sockaddr_encap *) 
re.re_rt->rt_gateway)->sen_ipsp;
                        RTFREE(re.re_rt);
Index: net/radix_mpath.c
===================================================================
RCS file: /home/ncvs/src/sys/net/radix_mpath.c,v
retrieving revision 1.23
diff -u -p -r1.23 radix_mpath.c
--- net/radix_mpath.c   27 May 2014 19:38:15 -0000      1.23
+++ net/radix_mpath.c   21 Aug 2014 12:08:12 -0000
@@ -53,7 +53,7 @@
 #include <netinet6/ip6_var.h>
 #endif
 
-u_int32_t rn_mpath_hash(struct route *, u_int32_t *);
+u_int32_t rn_mpath_hash(struct sockaddr *, u_int32_t *);
 
 /*
  * give some jitter to hash, to avoid synchronization between routers
@@ -383,42 +383,37 @@ rn_mpath_reprio(struct radix_node *rn, i
 /*
  * allocate a route, potentially using multipath to select the peer.
  */
-void
-rtalloc_mpath(struct route *ro, u_int32_t *srcaddrp)
+struct rtentry *
+rtalloc_mpath(struct sockaddr *dst, u_int32_t *srcaddrp, u_int rtableid)
 {
+       struct rtentry *rt;
 #if defined(INET) || defined(INET6)
        struct radix_node *rn;
        int hash, npaths, threshold;
 #endif
 
-       /*
-        * return a cached entry if it is still valid, otherwise we increase
-        * the risk of disrupting local flows.
-        */
-       if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP))
-               return;
-       ro->ro_rt = rtalloc1(&ro->ro_dst, RT_REPORT, ro->ro_tableid);
+       rt = rtalloc1(dst, RT_REPORT, rtableid);
 
        /* if the route does not exist or it is not multipath, don't care */
-       if (!ro->ro_rt || !(ro->ro_rt->rt_flags & RTF_MPATH))
-               return;
+       if (rt == NULL || !ISSET(rt->rt_flags, RTF_MPATH))
+               return (rt);
 
        /* check if multipath routing is enabled for the specified protocol */
        if (!(0
 #ifdef INET
-           || (ipmultipath && ro->ro_dst.sa_family == AF_INET)
+           || (ipmultipath && dst->sa_family == AF_INET)
 #endif
 #ifdef INET6
-           || (ip6_multipath && ro->ro_dst.sa_family == AF_INET6)
+           || (ip6_multipath && dst->sa_family == AF_INET6)
 #endif
            ))
-               return;
+               return (rt);
 
 #if defined(INET) || defined(INET6)
        /* gw selection by Hash-Threshold (RFC 2992) */
-       rn = (struct radix_node *)ro->ro_rt;
+       rn = (struct radix_node *)rt;
        npaths = rn_mpath_active_count(rn);
-       hash = rn_mpath_hash(ro, srcaddrp) & 0xffff;
+       hash = rn_mpath_hash(dst, srcaddrp) & 0xffff;
        threshold = 1 + (0xffff / npaths);
        while (hash > threshold && rn) {
                /* stay within the multipath routes */
@@ -427,13 +422,14 @@ rtalloc_mpath(struct route *ro, u_int32_
        }
 
        /* if gw selection fails, use the first match (default) */
-       if (!rn)
-               return;
-
-       rtfree(ro->ro_rt);
-       ro->ro_rt = (struct rtentry *)rn;
-       ro->ro_rt->rt_refcnt++;
+       if (rn != NULL) {
+               rtfree(rt);
+               rt = (struct rtentry *)rn;
+               rt->rt_refcnt++;
+       }
 #endif
+
+       return (rt);
 }
 
 int
@@ -468,20 +464,20 @@ rn_mpath_inithead(void **head, int off)
        } while (0)
 
 u_int32_t
-rn_mpath_hash(struct route *ro, u_int32_t *srcaddrp)
+rn_mpath_hash(struct sockaddr *dst, u_int32_t *srcaddrp)
 {
        u_int32_t a, b, c;
 
        a = b = 0x9e3779b9;
        c = hashjitter;
 
-       switch (ro->ro_dst.sa_family) {
+       switch (dst->sa_family) {
 #ifdef INET
        case AF_INET:
            {
                struct sockaddr_in *sin_dst;
 
-               sin_dst = (struct sockaddr_in *)&ro->ro_dst;
+               sin_dst = (struct sockaddr_in *)dst;
                a += sin_dst->sin_addr.s_addr;
                b += srcaddrp ? srcaddrp[0] : 0;
                mix(a, b, c);
@@ -493,7 +489,7 @@ rn_mpath_hash(struct route *ro, u_int32_
            {
                struct sockaddr_in6 *sin6_dst;
 
-               sin6_dst = (struct sockaddr_in6 *)&ro->ro_dst;
+               sin6_dst = (struct sockaddr_in6 *)dst;
                a += sin6_dst->sin6_addr.s6_addr32[0];
                b += sin6_dst->sin6_addr.s6_addr32[2];
                c += srcaddrp ? srcaddrp[0] : 0;
Index: net/radix_mpath.h
===================================================================
RCS file: /home/ncvs/src/sys/net/radix_mpath.h,v
retrieving revision 1.12
diff -u -p -r1.12 radix_mpath.h
--- net/radix_mpath.h   27 May 2014 19:38:15 -0000      1.12
+++ net/radix_mpath.h   21 Aug 2014 12:08:12 -0000
@@ -57,7 +57,7 @@ struct rtentry *rt_mpath_matchgate(struc
            u_int8_t);
 int    rt_mpath_conflict(struct radix_node_head *, struct rtentry *,
            struct sockaddr *, int);
-void   rtalloc_mpath(struct route *, u_int32_t *);
+struct rtentry *rtalloc_mpath(struct sockaddr *, u_int32_t *, u_int);
 int    rn_mpath_inithead(void **, int);
 #endif /* _KERNEL */
 
Index: net/route.c
===================================================================
RCS file: /home/ncvs/src/sys/net/route.c,v
retrieving revision 1.180
diff -u -p -r1.180 route.c
--- net/route.c 21 Aug 2014 10:07:07 -0000      1.180
+++ net/route.c 21 Aug 2014 12:08:12 -0000
@@ -322,14 +322,6 @@ rtalloc_noclone(struct route *ro)
            ro->ro_tableid);
 }
 
-void
-rtalloc(struct route *ro)
-{
-       if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP))
-               return;         /* cached route is still valid */
-       ro->ro_rt = rtalloc1(&ro->ro_dst, RT_REPORT, ro->ro_tableid);
-}
-
 struct rtentry *
 rtalloc1(struct sockaddr *dst, int flags, u_int tableid)
 {
Index: net/route.h
===================================================================
RCS file: /home/ncvs/src/sys/net/route.h,v
retrieving revision 1.96
diff -u -p -r1.96 route.h
--- net/route.h 12 Aug 2014 13:52:08 -0000      1.96
+++ net/route.h 21 Aug 2014 12:08:12 -0000
@@ -381,9 +381,8 @@ unsigned long                rt_timer_queue_count(str
 void                    rt_timer_timer(void *);
 
 void    rtalloc_noclone(struct route *);
-void    rtalloc(struct route *);
 #ifdef SMALL_KERNEL
-#define        rtalloc_mpath(r, s)     rtalloc(r)
+#define        rtalloc_mpath(dst, s, rtableid) rtalloc1((dst), RT_REPORT, 
(rtableid))
 #endif
 struct rtentry *
         rtalloc1(struct sockaddr *, int, u_int);
Index: netinet/in_pcb.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet/in_pcb.c,v
retrieving revision 1.158
diff -u -p -r1.158 in_pcb.c
--- netinet/in_pcb.c    22 Jul 2014 11:06:10 -0000      1.158
+++ netinet/in_pcb.c    21 Aug 2014 12:08:12 -0000
@@ -612,6 +612,7 @@ in_losing(struct inpcb *inp)
 
        if ((rt = inp->inp_route.ro_rt)) {
                inp->inp_route.ro_rt = 0;
+
                bzero((caddr_t)&info, sizeof(info));
                info.rti_flags = rt->rt_flags;
                info.rti_info[RTAX_DST] = &inp->inp_route.ro_dst;
@@ -764,7 +765,8 @@ in_pcbrtentry(struct inpcb *inp)
                        ro->ro_dst.sa_len = sizeof(struct sockaddr_in6);
                        satosin6(&ro->ro_dst)->sin6_addr = inp->inp_faddr6;
                        ro->ro_tableid = inp->inp_rtableid;
-                       rtalloc_mpath(ro, &inp->inp_laddr6.s6_addr32[0]);
+                       ro->ro_rt = rtalloc_mpath(&ro->ro_dst,
+                           &inp->inp_laddr6.s6_addr32[0], ro->ro_tableid);
                        break;
 #endif /* INET6 */
                case PF_INET:
@@ -774,7 +776,8 @@ in_pcbrtentry(struct inpcb *inp)
                        ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
                        satosin(&ro->ro_dst)->sin_addr = inp->inp_faddr;
                        ro->ro_tableid = inp->inp_rtableid;
-                       rtalloc_mpath(ro, &inp->inp_laddr.s_addr);
+                       ro->ro_rt = rtalloc_mpath(&ro->ro_dst,
+                           &inp->inp_laddr.s_addr, ro->ro_tableid);
                        break;
                }
        }
@@ -838,7 +841,7 @@ in_selectsrc(struct in_addr **insrc, str
                ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
                satosin(&ro->ro_dst)->sin_addr = sin->sin_addr;
                ro->ro_tableid = rtableid;
-               rtalloc_mpath(ro, NULL);
+               ro->ro_rt = rtalloc_mpath(&ro->ro_dst, NULL, ro->ro_tableid);
 
                /*
                 * It is important to bzero out the rest of the
Index: netinet/ip_input.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.235
diff -u -p -r1.235 ip_input.c
--- netinet/ip_input.c  13 Jul 2014 13:57:56 -0000      1.235
+++ netinet/ip_input.c  21 Aug 2014 12:08:12 -0000
@@ -1424,7 +1424,8 @@ ip_forward(struct mbuf *m, struct ifnet 
                sin->sin_addr = ip->ip_dst;
                ipforward_rt.ro_tableid = rtableid;
 
-               rtalloc_mpath(&ipforward_rt, &ip->ip_src.s_addr);
+               ipforward_rt.ro_rt = rtalloc_mpath(&ipforward_rt.ro_dst,
+                   &ip->ip_src.s_addr, ipforward_rt.ro_tableid);
                if (ipforward_rt.ro_rt == 0) {
                        icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
                        return;
Index: netinet/ip_output.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet/ip_output.c,v
retrieving revision 1.266
diff -u -p -r1.266 ip_output.c
--- netinet/ip_output.c 22 Jul 2014 11:06:10 -0000      1.266
+++ netinet/ip_output.c 21 Aug 2014 12:08:12 -0000
@@ -196,7 +196,8 @@ ip_output(struct mbuf *m0, struct mbuf *
                        IFP_TO_IA(ifp, ia);
                } else {
                        if (ro->ro_rt == 0)
-                               rtalloc_mpath(ro, NULL);
+                               ro->ro_rt = rtalloc_mpath(&ro->ro_dst,
+                                   NULL, ro->ro_tableid);
 
                        if (ro->ro_rt == 0) {
                                ipstat.ips_noroute++;
@@ -346,7 +347,8 @@ reroute:
                        IFP_TO_IA(ifp, ia);
                } else {
                        if (ro->ro_rt == 0)
-                               rtalloc_mpath(ro, &ip->ip_src.s_addr);
+                               ro->ro_rt = rtalloc_mpath(&ro->ro_dst,
+                                   &ip->ip_src.s_addr, ro->ro_tableid);
 
                        if (ro->ro_rt == 0) {
                                ipstat.ips_noroute++;
Index: netinet/ip_spd.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet/ip_spd.c,v
retrieving revision 1.72
diff -u -p -r1.72 ip_spd.c
--- netinet/ip_spd.c    22 Jul 2014 11:06:10 -0000      1.72
+++ netinet/ip_spd.c    21 Aug 2014 12:08:12 -0000
@@ -244,7 +244,8 @@ ipsp_spd_lookup(struct mbuf *m, int af, 
        re->re_tableid = rdomain;
 
        /* Actual SPD lookup. */
-       rtalloc((struct route *) re);
+       re->re_rt = rtalloc1((struct sockaddr *)&re->re_dst, RT_REPORT,
+           re->re_tableid);
        if (re->re_rt == NULL) {
                /*
                 * Return whatever the socket requirements are, there are no
Index: netinet6/frag6.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet6/frag6.c,v
retrieving revision 1.54
diff -u -p -r1.54 frag6.c
--- netinet6/frag6.c    22 Jul 2014 11:06:10 -0000      1.54
+++ netinet6/frag6.c    21 Aug 2014 12:08:12 -0000
@@ -194,7 +194,8 @@ frag6_input(struct mbuf **mp, int *offp,
        dst->sin6_len = sizeof(struct sockaddr_in6);
        dst->sin6_addr = ip6->ip6_dst;
 
-       rtalloc_mpath((struct route *)&ro, &ip6->ip6_src.s6_addr32[0]);
+       ro.ro_rt = rtalloc_mpath(sin6tosa(&ro.ro_dst),
+           &ip6->ip6_src.s6_addr32[0], ro.ro_tableid);
 
        if (ro.ro_rt != NULL && ro.ro_rt->rt_ifa != NULL)
                dstifp = ifatoia6(ro.ro_rt->rt_ifa)->ia_ifp;
Index: netinet6/in6_src.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet6/in6_src.c,v
retrieving revision 1.45
diff -u -p -r1.45 in6_src.c
--- netinet6/in6_src.c  22 Jul 2014 11:06:10 -0000      1.45
+++ netinet6/in6_src.c  21 Aug 2014 12:08:12 -0000
@@ -264,9 +264,11 @@ in6_selectsrc(struct in6_addr **in6src, 
                        sa6->sin6_addr = *dst;
                        sa6->sin6_scope_id = dstsock->sin6_scope_id;
                        if (IN6_IS_ADDR_MULTICAST(dst)) {
-                               rtalloc((struct route *)ro);
+                               ro->ro_rt = rtalloc1(sin6tosa(&ro->ro_dst),
+                                   RT_REPORT, ro->ro_tableid);
                        } else {
-                               rtalloc_mpath((struct route *)ro, NULL);
+                               ro->ro_rt = rtalloc_mpath(sin6tosa(&ro->ro_dst),
+                                   NULL, ro->ro_tableid);
                        }
                }
 
@@ -380,7 +382,8 @@ selectroute(struct sockaddr_in6 *dstsock
                        ron->ro_tableid = rtableid;
                }
                if (ron->ro_rt == NULL) {
-                       rtalloc((struct route *)ron); /* multi path case? */
+                       ron->ro_rt = rtalloc1(sin6tosa(&ron->ro_dst),
+                           RT_REPORT, ron->ro_tableid); /* multi path case? */
                        if (ron->ro_rt == NULL ||
                            (ron->ro_rt->rt_flags & RTF_GATEWAY)) {
                                if (ron->ro_rt) {
@@ -431,7 +434,8 @@ selectroute(struct sockaddr_in6 *dstsock
                        *sa6 = *dstsock;
                        sa6->sin6_scope_id = 0;
                        ro->ro_tableid = rtableid;
-                       rtalloc_mpath((struct route *)ro, NULL);
+                       ro->ro_rt = rtalloc_mpath(sin6tosa(&ro->ro_dst),
+                           NULL, ro->ro_tableid);
                }
 
                /*
Index: netinet6/ip6_forward.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet6/ip6_forward.c,v
retrieving revision 1.67
diff -u -p -r1.67 ip6_forward.c
--- netinet6/ip6_forward.c      3 Jun 2014 13:32:24 -0000       1.67
+++ netinet6/ip6_forward.c      21 Aug 2014 12:08:12 -0000
@@ -250,8 +250,10 @@ reroute:
                        }
                        /* this probably fails but give it a try again */
                        ip6_forward_rt.ro_tableid = rtableid;
-                       rtalloc_mpath((struct route *)&ip6_forward_rt,
-                           &ip6->ip6_src.s6_addr32[0]);
+                       ip6_forward_rt.ro_rt = rtalloc_mpath(
+                           sin6tosa(&ip6_forward_rt.ro_dst),
+                           &ip6->ip6_src.s6_addr32[0],
+                           ip6_forward_rt.ro_tableid);
                }
 
                if (ip6_forward_rt.ro_rt == 0) {
@@ -277,9 +279,10 @@ reroute:
                dst->sin6_family = AF_INET6;
                dst->sin6_addr = ip6->ip6_dst;
                ip6_forward_rt.ro_tableid = rtableid;
-
-               rtalloc_mpath((struct route *)&ip6_forward_rt,
-                   &ip6->ip6_src.s6_addr32[0]);
+               ip6_forward_rt.ro_rt = rtalloc_mpath(
+                   sin6tosa(&ip6_forward_rt.ro_dst),
+                   &ip6->ip6_src.s6_addr32[0],
+                   ip6_forward_rt.ro_tableid);
 
                if (ip6_forward_rt.ro_rt == 0) {
                        ip6stat.ip6s_noroute++;
Index: netinet6/ip6_input.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet6/ip6_input.c,v
retrieving revision 1.128
diff -u -p -r1.128 ip6_input.c
--- netinet6/ip6_input.c        22 Jul 2014 11:06:10 -0000      1.128
+++ netinet6/ip6_input.c        21 Aug 2014 12:08:12 -0000
@@ -449,8 +449,10 @@ ip6_input(struct mbuf *m)
                ip6_forward_rt.ro_dst.sin6_addr = ip6->ip6_dst;
                ip6_forward_rt.ro_tableid = rtableid;
 
-               rtalloc_mpath((struct route *)&ip6_forward_rt,
-                   &ip6->ip6_src.s6_addr32[0]);
+               ip6_forward_rt.ro_rt = rtalloc_mpath(
+                   sin6tosa(&ip6_forward_rt.ro_dst),
+                   &ip6->ip6_src.s6_addr32[0],
+                   ip6_forward_rt.ro_tableid);
        }
 
        /*
Index: netinet6/ip6_output.c
===================================================================
RCS file: /home/ncvs/src/sys/netinet6/ip6_output.c,v
retrieving revision 1.158
diff -u -p -r1.158 ip6_output.c
--- netinet6/ip6_output.c       22 Jul 2014 11:06:10 -0000      1.158
+++ netinet6/ip6_output.c       21 Aug 2014 12:08:12 -0000
@@ -1229,7 +1229,8 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, s
                        sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
                        sa6_dst->sin6_addr = *dst;
 
-                       rtalloc((struct route *)ro_pmtu);
+                       ro_pmtu->ro_rt = rtalloc1(sin6tosa(&ro_pmtu->ro_dst),
+                           RT_REPORT, ro_pmtu->ro_tableid);
                }
        }
        if (ro_pmtu->ro_rt) {
@@ -2482,7 +2483,8 @@ ip6_setmoptions(int optname, struct ip6_
                        dst->sin6_len = sizeof(struct sockaddr_in6);
                        dst->sin6_family = AF_INET6;
                        dst->sin6_addr = mreq->ipv6mr_multiaddr;
-                       rtalloc((struct route *)&ro);
+                       ro.ro_rt = rtalloc1(sin6tosa(&ro.ro_dst),
+                           RT_REPORT, ro.ro_tableid);
                        if (ro.ro_rt == NULL) {
                                error = EADDRNOTAVAIL;
                                break;

Reply via email to