On 12/08/15(Wed) 17:03, Martin Pieuchot wrote:
> I'm currently working on the routing table interface to make is safe
> to use by multiple CPUs at the same time.  The diff below is a big
> step in this direction and I'd really appreciate if people could test
> it with their usual network setup and report back.

Below is an updated version of the diff now that bluhm@ fixed the
potential loop with RTF_GATEWAY routes.

Note that regression tests will fail because we no longer call
rtalloc(9) inside rt_setgate().  In other words we do not cache
a next-hop route before using it.

Ok?

Index: net/route.c
===================================================================
RCS file: /cvs/src/sys/net/route.c,v
retrieving revision 1.268
diff -u -p -r1.268 route.c
--- net/route.c 4 Nov 2015 10:13:55 -0000       1.268
+++ net/route.c 4 Nov 2015 11:15:00 -0000
@@ -151,6 +151,7 @@ void        rt_timer_init(void);
 int    rtflushclone1(struct rtentry *, void *, u_int);
 void   rtflushclone(unsigned int, struct rtentry *);
 int    rt_if_remove_rtdelete(struct rtentry *, void *, u_int);
+struct rtentry *rt_match(struct sockaddr *, int, unsigned int);
 
 struct ifaddr *ifa_ifwithroute(int, struct sockaddr *, struct sockaddr *,
                    u_int);
@@ -194,6 +195,12 @@ rtisvalid(struct rtentry *rt)
        if (rt == NULL)
                return (0);
 
+#ifdef DIAGNOSTIC
+       if (ISSET(rt->rt_flags, RTF_GATEWAY) && (rt->rt_gwroute != NULL) &&
+           ISSET(rt->rt_gwroute->rt_flags, RTF_GATEWAY))
+               panic("next hop must be directly reachable");
+#endif
+
        if ((rt->rt_flags & RTF_UP) == 0)
                return (0);
 
@@ -201,11 +208,27 @@ rtisvalid(struct rtentry *rt)
        if (rt->rt_ifa == NULL || rt->rt_ifa->ifa_ifp == NULL)
                return (0);
 
+       if (ISSET(rt->rt_flags, RTF_GATEWAY) && !rtisvalid(rt->rt_gwroute))
+               return (0);
+
        return (1);
 }
 
+/*
+ * Do the actual lookup for rtalloc(9), do not use directly!
+ *
+ * Return the best matching entry for the destination ``dst''.
+ *
+ * "RT_RESOLVE" means that a corresponding L2 entry should
+ *   be added to the routing table and resolved (via ARP or
+ *   NDP), if it does not exist.
+ *
+ * "RT_REPORT" indicates that a message should be sent to
+ *   userland if no matching route has been found or if an
+ *   error occured while adding a L2 entry.
+ */
 struct rtentry *
-rtalloc(struct sockaddr *dst, int flags, unsigned int tableid)
+rt_match(struct sockaddr *dst, int flags, unsigned int tableid)
 {
        struct rtentry          *rt0, *rt = NULL;
        struct rt_addrinfo       info;
@@ -336,6 +359,76 @@ rtalloc_mpath(struct sockaddr *dst, uint
 }
 #endif /* SMALL_KERNEL */
 
+/*
+ * Look in the routing table for the best matching entry for
+ * ``dst''.
+ *
+ * If a route with a gateway is found and its next hop is no
+ * longer valid, try to cache it.
+ */
+struct rtentry *
+rtalloc(struct sockaddr *dst, int flags, unsigned int rtableid)
+{
+       struct rtentry *rt, *nhrt;
+
+       rt = rt_match(dst, flags, rtableid);
+
+       /* No match or route to host?  We're done. */
+       if (rt == NULL || !ISSET(rt->rt_flags, RTF_GATEWAY))
+               return (rt);
+
+       /* Nothing to do if the next hop is valid. */
+       if (rtisvalid(rt->rt_gwroute))
+               return (rt);
+
+       rtfree(rt->rt_gwroute);
+       rt->rt_gwroute = NULL;
+
+       /*
+        * If we cannot find a valid next hop, return the route
+        * with a gateway.
+        *
+        * XXX Some dragons hiding in the tree certainly depends on
+        * this behavior.  But it is safe since rt_checkgate() wont
+        * allow us to us this route later on.
+        */
+       nhrt = rt_match(rt->rt_gateway, flags | RT_RESOLVE, rtableid);
+       if (nhrt == NULL)
+               return (rt);
+
+       /*
+        * Next hop must be reachable, this also prevents rtentry
+        * loops for example when rt->rt_gwroute points to rt.
+        */
+       if (ISSET(nhrt->rt_flags, RTF_CLONING|RTF_GATEWAY)) {
+               rtfree(nhrt);
+               return (rt);
+       }
+
+       /*
+        * Next hop entry must be UP and on the same interface.
+        */
+       if (!ISSET(nhrt->rt_flags, RTF_UP) || nhrt->rt_ifidx != rt->rt_ifidx) {
+               rtfree(nhrt);
+               return (rt);
+       }
+
+       /*
+        * If the MTU of next hop is 0, this will reset the MTU of the
+        * route to run PMTUD again from scratch.
+        */
+       if (!ISSET(rt->rt_locks, RTV_MTU) && (rt->rt_mtu > nhrt->rt_mtu))
+               rt->rt_mtu = nhrt->rt_mtu;
+
+       /*
+        * Do not return the cached next-hop route, rt_checkgate() will
+        * do the magic for us.
+        */
+       rt->rt_gwroute = nhrt;
+
+       return (rt);
+}
+
 void
 rtref(struct rtentry *rt)
 {
@@ -499,7 +592,7 @@ create:
                        rt->rt_flags |= RTF_MODIFIED;
                        flags |= RTF_MODIFIED;
                        stat = &rtstat.rts_newgateway;
-                       rt_setgate(rt, gateway, rdomain);
+                       rt_setgate(rt, gateway);
                }
        } else
                error = EHOSTUNREACH;
@@ -931,8 +1024,7 @@ rtrequest(int req, struct rt_addrinfo *i
                 * the routing table because the radix MPATH code use
                 * it to (re)order routes.
                 */
-               if ((error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY],
-                   tableid))) {
+               if ((error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY]))) {
                        free(ndst, M_RTABLE, dlen);
                        pool_put(&rtentry_pool, rt);
                        return (error);
@@ -985,7 +1077,7 @@ rtrequest(int req, struct rt_addrinfo *i
 }
 
 int
-rt_setgate(struct rtentry *rt, struct sockaddr *gate, unsigned int tableid)
+rt_setgate(struct rtentry *rt, struct sockaddr *gate)
 {
        int glen = ROUNDUP(gate->sa_len);
        struct sockaddr *sa;
@@ -1003,22 +1095,7 @@ rt_setgate(struct rtentry *rt, struct so
                rtfree(rt->rt_gwroute);
                rt->rt_gwroute = NULL;
        }
-       if (rt->rt_flags & RTF_GATEWAY) {
-               /* XXX is this actually valid to cross tables here? */
-               rt->rt_gwroute = rtalloc(gate, RT_REPORT|RT_RESOLVE, tableid);
-               /*
-                * If we switched gateways, grab the MTU from the new
-                * gateway route if the current MTU is 0 or greater
-                * than the MTU of gateway.
-                * Note that, if the MTU of gateway is 0, we will reset the
-                * MTU of the route to run PMTUD again from scratch. XXX
-                */
-               if (rt->rt_gwroute && !(rt->rt_rmx.rmx_locks & RTV_MTU) &&
-                   rt->rt_rmx.rmx_mtu &&
-                   rt->rt_rmx.rmx_mtu > rt->rt_gwroute->rt_rmx.rmx_mtu) {
-                       rt->rt_rmx.rmx_mtu = rt->rt_gwroute->rt_rmx.rmx_mtu;
-               }
-       }
+
        return (0);
 }
 
@@ -1033,26 +1110,8 @@ rt_checkgate(struct ifnet *ifp, struct r
        rt0 = rt;
 
        if (rt->rt_flags & RTF_GATEWAY) {
-               if (rt->rt_gwroute && !(rt->rt_gwroute->rt_flags & RTF_UP)) {
-                       rtfree(rt->rt_gwroute);
-                       rt->rt_gwroute = NULL;
-               }
-               if (rt->rt_gwroute == NULL) {
-                       rt->rt_gwroute = rtalloc(rt->rt_gateway,
-                           RT_REPORT|RT_RESOLVE, rtableid);
-                       if (rt->rt_gwroute == NULL)
-                               return (EHOSTUNREACH);
-               }
-               /*
-                * Next hop must be reachable, this also prevents rtentry
-                * loops, for example when rt->rt_gwroute points to rt.
-                */
-               if (((rt->rt_gwroute->rt_flags & (RTF_UP|RTF_GATEWAY)) !=
-                   RTF_UP) || (rt->rt_gwroute->rt_ifidx != ifp->if_index)) {
-                       rtfree(rt->rt_gwroute);
-                       rt->rt_gwroute = NULL;
+               if (rt->rt_gwroute == NULL)
                        return (EHOSTUNREACH);
-               }
                rt = rt->rt_gwroute;
        }
 
Index: net/route.h
===================================================================
RCS file: /cvs/src/sys/net/route.h,v
retrieving revision 1.118
diff -u -p -r1.118 route.h
--- net/route.h 30 Oct 2015 09:39:42 -0000      1.118
+++ net/route.h 4 Nov 2015 11:00:15 -0000
@@ -119,6 +119,8 @@ struct rtentry {
 };
 #define        rt_use          rt_rmx.rmx_pksent
 #define        rt_expire       rt_rmx.rmx_expire
+#define        rt_locks        rt_rmx.rmx_locks
+#define        rt_mtu          rt_rmx.rmx_mtu
 
 #define        RTF_UP          0x1             /* route usable */
 #define        RTF_GATEWAY     0x2             /* destination is a gateway */
@@ -358,7 +360,7 @@ void         rt_maskedcopy(struct sockaddr *,
 void    rt_sendmsg(struct rtentry *, int, u_int);
 void    rt_sendaddrmsg(struct rtentry *, int);
 void    rt_missmsg(int, struct rt_addrinfo *, int, u_int, int, u_int);
-int     rt_setgate(struct rtentry *, struct sockaddr *, unsigned int);
+int     rt_setgate(struct rtentry *, struct sockaddr *);
 int     rt_checkgate(struct ifnet *, struct rtentry *, struct sockaddr *,
            unsigned int, struct rtentry **);
 void    rt_setmetrics(u_long, struct rt_metrics *, struct rt_kmetrics *);
Index: net/rtsock.c
===================================================================
RCS file: /cvs/src/sys/net/rtsock.c,v
retrieving revision 1.181
diff -u -p -r1.181 rtsock.c
--- net/rtsock.c        2 Nov 2015 14:40:09 -0000       1.181
+++ net/rtsock.c        4 Nov 2015 11:01:17 -0000
@@ -745,9 +745,8 @@ report:
                                        goto flush;
                                ifa = info.rti_ifa;
                        }
-                       if (info.rti_info[RTAX_GATEWAY] != NULL &&
-                           (error = rt_setgate(rt, info.rti_info[RTAX_GATEWAY],
-                            tableid)))
+                       if (info.rti_info[RTAX_GATEWAY] != NULL && (error =
+                           rt_setgate(rt, info.rti_info[RTAX_GATEWAY])))
                                goto flush;
                        if (ifa) {
                                if (rt->rt_ifa != ifa) {
Index: net/if_spppsubr.c
===================================================================
RCS file: /cvs/src/sys/net/if_spppsubr.c,v
retrieving revision 1.144
diff -u -p -r1.144 if_spppsubr.c
--- net/if_spppsubr.c   2 Nov 2015 11:19:30 -0000       1.144
+++ net/if_spppsubr.c   4 Nov 2015 11:03:24 -0000
@@ -4244,10 +4244,9 @@ sppp_update_gw_walker(struct rtentry *rt
        if (rt->rt_ifidx == ifp->if_index) {
                if (rt->rt_ifa->ifa_dstaddr->sa_family !=
                    rt->rt_gateway->sa_family ||
-                   (rt->rt_flags & RTF_GATEWAY) == 0)
+                   !ISSET(rt->rt_flags, RTF_GATEWAY))
                        return (0);     /* do not modify non-gateway routes */
-               bcopy(rt->rt_ifa->ifa_dstaddr, rt->rt_gateway,
-                   rt->rt_ifa->ifa_dstaddr->sa_len);
+               rt_setgate(rt, rt->rt_ifa->ifa_dstaddr);
        }
        return (0);
 }

Reply via email to