Author: kmacy
Date: Tue Apr 14 23:05:36 2009
New Revision: 191080
URL: http://svn.freebsd.org/changeset/base/191080

Log:
  Extend route command:
        - add show as alias for get
        - add weights to allow mpath to do more than equal cost
        - add sticky / nostick to disable / re-enable per-connection load 
balancing
  
  This adds a field to rt_metrics_lite so network bits of world will need to be 
re-built.
  
  Reviewed by:  jeli & qingli

Modified:
  head/UPDATING
  head/sbin/route/keywords
  head/sbin/route/route.c
  head/sys/net/radix_mpath.c
  head/sys/net/route.c
  head/sys/net/route.h
  head/sys/net/rtsock.c
  head/sys/sys/param.h

Modified: head/UPDATING
==============================================================================
--- head/UPDATING       Tue Apr 14 22:53:22 2009        (r191079)
+++ head/UPDATING       Tue Apr 14 23:05:36 2009        (r191080)
@@ -22,6 +22,13 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 8.
        to maximize performance.  (To disable malloc debugging, run
        ln -s aj /etc/malloc.conf.)
 
+20090414:
+       The size of rt_metrics_lite and by extension rtentry has changed.
+       Networking administration apps will need to be recompiled.
+       The route command now supports show as an alias for get, weighting 
+       of routes, sticky and nostick flags to alter the behavior of stateful
+       load balancing.
+       Bump __FreeBSD_version to 800078.
 20090408:
        Do not use Giant for kbdmux(4) locking. This is wrong and
        apparently causing more problems than it solves. This will

Modified: head/sbin/route/keywords
==============================================================================
--- head/sbin/route/keywords    Tue Apr 14 22:53:22 2009        (r191079)
+++ head/sbin/route/keywords    Tue Apr 14 23:05:36 2009        (r191080)
@@ -33,6 +33,7 @@ mtu
 net
 netmask
 nostatic
+nostick
 osi
 prefixlen
 proto1
@@ -44,8 +45,11 @@ rtt
 rttvar
 sa
 sendpipe
+show
 ssthresh
 static
+sticky
+weight
 x25
 xns
 xresolve

Modified: head/sbin/route/route.c
==============================================================================
--- head/sbin/route/route.c     Tue Apr 14 22:53:22 2009        (r191079)
+++ head/sbin/route/route.c     Tue Apr 14 23:05:36 2009        (r191080)
@@ -169,6 +169,7 @@ main(argc, argv)
        if (*argv)
                switch (keyword(*argv)) {
                case K_GET:
+               case K_SHOW:
                        uid = 0;
                        /* FALLTHROUGH */
 
@@ -548,6 +549,7 @@ set_metric(value, key)
        caseof(K_SSTHRESH, RTV_SSTHRESH, rmx_ssthresh);
        caseof(K_RTT, RTV_RTT, rmx_rtt);
        caseof(K_RTTVAR, RTV_RTTVAR, rmx_rttvar);
+       caseof(K_WEIGHT, RTV_WEIGHT, rmx_weight);
        }
        rtm_inits |= flag;
        if (lockrest || locking)
@@ -571,8 +573,9 @@ newroute(argc, argv)
                errx(EX_NOPERM, "must be root to alter routing table");
        }
        cmd = argv[0];
-       if (*cmd != 'g')
+       if (*cmd != 'g' && *cmd != 's')
                shutdown(s, SHUT_RD); /* Don't want to read back our messages */
+
        while (--argc > 0) {
                if (**(++argv)== '-') {
                        switch (key = keyword(1 + *argv)) {
@@ -635,6 +638,12 @@ newroute(argc, argv)
                        case K_STATIC:
                                flags |= RTF_STATIC;
                                break;
+                       case K_STICKY:
+                               flags |= RTF_STICKY;
+                               break;
+                       case K_NOSTICK:
+                               flags &= ~RTF_STICKY;
+                               break;
                        case K_IFA:
                                if (!--argc)
                                        usage((char *)NULL);
@@ -688,6 +697,7 @@ newroute(argc, argv)
                        case K_SSTHRESH:
                        case K_RTT:
                        case K_RTTVAR:
+                       case K_WEIGHT:
                                if (!--argc)
                                        usage((char *)NULL);
                                set_metric(*++argv, key);
@@ -741,7 +751,7 @@ newroute(argc, argv)
                } else
                        break;
        }
-       if (*cmd == 'g')
+       if (*cmd == 'g' || *cmd == 's')
                exit(ret != 0);
        if (!qflag) {
                oerrno = errno;
@@ -1193,7 +1203,7 @@ rtmsg(cmd, flags)
                cmd = RTM_ADD;
        else if (cmd == 'c')
                cmd = RTM_CHANGE;
-       else if (cmd == 'g') {
+       else if (cmd == 'g' || cmd == 's') {
                cmd = RTM_GET;
                if (so_ifp.sa.sa_family == 0) {
                        so_ifp.sa.sa_family = AF_LINK;
@@ -1297,13 +1307,13 @@ char *msgtypes[] = {
 };
 
 char metricnames[] =
-"\011pksent\010rttvar\7rtt\6ssthresh\5sendpipe\4recvpipe\3expire\2hopcount"
+"\011weight\010rttvar\7rtt\6ssthresh\5sendpipe\4recvpipe\3expire"
 "\1mtu";
 char routeflags[] =
-"\1UP\2GATEWAY\3HOST\4REJECT\5DYNAMIC\6MODIFIED\7DONE\010MASK_PRESENT"
-"\011CLONING\012XRESOLVE\013LLINFO\014STATIC\015BLACKHOLE\016b016"
-"\017PROTO2\020PROTO1\021PRCLONING\022WASCLONED\023PROTO3\024CHAINDELETE"
-"\025PINNED\026LOCAL\027BROADCAST\030MULTICAST";
+"\1UP\2GATEWAY\3HOST\4REJECT\5DYNAMIC\6MODIFIED\7DONE"
+"\012XRESOLVE\013LLINFO\014STATIC\015BLACKHOLE"
+"\017PROTO2\020PROTO1\021PRCLONING\022WASCLONED\023PROTO3"
+"\025PINNED\026LOCAL\027BROADCAST\030MULTICAST\035STICKY";
 char ifnetflags[] =
 "\1UP\2BROADCAST\3DEBUG\4LOOPBACK\5PTP\6b6\7RUNNING\010NOARP"
 "\011PPROMISC\012ALLMULTI\013OACTIVE\014SIMPLEX\015LINK0\016LINK1"
@@ -1466,14 +1476,13 @@ print_getmsg(rtm, msglen)
 #define msec(u)        (((u) + 500) / 1000)            /* usec to msec */
 
        (void) printf("\n%s\n", "\
- recvpipe  sendpipe  ssthresh  rtt,msec    rttvar  hopcount      mtu     
expire");
+ recvpipe  sendpipe  ssthresh  rtt,msec    mtu        weight    expire");
        printf("%8ld%c ", rtm->rtm_rmx.rmx_recvpipe, lock(RPIPE));
        printf("%8ld%c ", rtm->rtm_rmx.rmx_sendpipe, lock(SPIPE));
        printf("%8ld%c ", rtm->rtm_rmx.rmx_ssthresh, lock(SSTHRESH));
        printf("%8ld%c ", msec(rtm->rtm_rmx.rmx_rtt), lock(RTT));
-       printf("%8ld%c ", msec(rtm->rtm_rmx.rmx_rttvar), lock(RTTVAR));
-       printf("%8ld%c ", rtm->rtm_rmx.rmx_hopcount, lock(HOPCOUNT));
        printf("%8ld%c ", rtm->rtm_rmx.rmx_mtu, lock(MTU));
+       printf("%8ld%c ", rtm->rtm_rmx.rmx_weight, lock(WEIGHT));
        if (rtm->rtm_rmx.rmx_expire)
                rtm->rtm_rmx.rmx_expire -= time(0);
        printf("%8ld%c\n", rtm->rtm_rmx.rmx_expire, lock(EXPIRE));

Modified: head/sys/net/radix_mpath.c
==============================================================================
--- head/sys/net/radix_mpath.c  Tue Apr 14 22:53:22 2009        (r191079)
+++ head/sys/net/radix_mpath.c  Tue Apr 14 23:05:36 2009        (r191080)
@@ -77,15 +77,18 @@ rn_mpath_next(struct radix_node *rn)
                return NULL;
 }
 
-u_int32_t
+uint32_t
 rn_mpath_count(struct radix_node *rn)
 {
-       u_int32_t i;
-
-       i = 1;
-       while ((rn = rn_mpath_next(rn)) != NULL)
-               i++;
-       return i;
+       uint32_t i = 0;
+       struct rtentry *rt;
+       
+       while (rn != NULL) {
+               rt = (struct rtentry *)rn;
+               i += rt->rt_rmx.rmx_weight;
+               rn = rn_mpath_next(rn);
+       }
+       return (i);
 }
 
 struct rtentry *
@@ -256,10 +259,12 @@ different:
 }
 
 void
-rtalloc_mpath_fib(struct route *ro, u_int32_t hash, u_int fibnum)
+rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum)
 {
        struct radix_node *rn0, *rn;
        u_int32_t n;
+       struct rtentry *rt;
+       int64_t weight;
 
        /*
         * XXX we don't attempt to lookup cached route again; what should
@@ -284,25 +289,31 @@ rtalloc_mpath_fib(struct route *ro, u_in
        /* gw selection by Modulo-N Hash (RFC2991) XXX need improvement? */
        hash += hashjitter;
        hash %= n;
-       while (hash-- > 0 && rn) {
+       for (weight = abs((int32_t)hash), rt = ro->ro_rt;
+            weight >= rt->rt_rmx.rmx_weight && rn; 
+            weight -= rt->rt_rmx.rmx_weight) {
+               
                /* stay within the multipath routes */
                if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask)
                        break;
                rn = rn->rn_dupedkey;
+               rt = (struct rtentry *)rn;
        }
-
        /* XXX try filling rt_gwroute and avoid unreachable gw  */
 
-       /* if gw selection fails, use the first match (default) */
+       /* gw selection has failed - there must be only zero weight routes */
        if (!rn) {
                RT_UNLOCK(ro->ro_rt);
+               ro->ro_rt = NULL;
                return;
        }
-       
-       RTFREE_LOCKED(ro->ro_rt);
-       ro->ro_rt = (struct rtentry *)rn;
-       RT_LOCK(ro->ro_rt);
-       RT_ADDREF(ro->ro_rt);
+       if (ro->ro_rt != rt) {
+               RTFREE_LOCKED(ro->ro_rt);
+               ro->ro_rt = (struct rtentry *)rn;
+               RT_LOCK(ro->ro_rt);
+               RT_ADDREF(ro->ro_rt);
+
+       } 
        RT_UNLOCK(ro->ro_rt);
 }
 

Modified: head/sys/net/route.c
==============================================================================
--- head/sys/net/route.c        Tue Apr 14 22:53:22 2009        (r191079)
+++ head/sys/net/route.c        Tue Apr 14 23:05:36 2009        (r191080)
@@ -826,6 +826,103 @@ bad:
        return (error);
 }
 
+#ifdef RADIX_MPATH
+static int
+rn_mpath_update(int req, struct rt_addrinfo *info,
+    struct radix_node_head *rnh, struct rtentry **ret_nrt)
+{
+       /*
+        * if we got multipath routes, we require users to specify
+        * a matching RTAX_GATEWAY.
+        */
+       struct rtentry *rt, *rto = NULL;
+       register struct radix_node *rn;
+       int error = 0;
+
+       rn = rnh->rnh_matchaddr(dst, rnh);
+       if (rn == NULL)
+               return (ESRCH);
+       rto = rt = RNTORT(rn);
+       rt = rt_mpath_matchgate(rt, gateway);
+       if (rt == NULL)
+               return (ESRCH);
+       /*
+        * this is the first entry in the chain
+        */
+       if (rto == rt) {
+               rn = rn_mpath_next((struct radix_node *)rt);
+               /*
+                * there is another entry, now it's active
+                */
+               if (rn) {
+                       rto = RNTORT(rn);
+                       RT_LOCK(rto);
+                       rto->rt_flags |= RTF_UP;
+                       RT_UNLOCK(rto);
+               } else if (rt->rt_flags & RTF_GATEWAY) {
+                       /*
+                        * For gateway routes, we need to 
+                        * make sure that we we are deleting
+                        * the correct gateway. 
+                        * rt_mpath_matchgate() does not 
+                        * check the case when there is only
+                        * one route in the chain.  
+                        */
+                       if (gateway &&
+                           (rt->rt_gateway->sa_len != gateway->sa_len ||
+                               memcmp(rt->rt_gateway, gateway, 
gateway->sa_len)))
+                               error = ESRCH;
+                       goto done;
+               }
+               /*
+                * use the normal delete code to remove
+                * the first entry
+                */
+               if (req != RTM_DELETE) 
+                       goto nondelete;
+
+               error = ENOENT;
+               goto done;
+       }
+               
+       /*
+        * if the entry is 2nd and on up
+        */
+       if ((req == RTM_DELETE) && !rt_mpath_deldup(rto, rt))
+               panic ("rtrequest1: rt_mpath_deldup");
+       RT_LOCK(rt);
+       RT_ADDREF(rt);
+       if (req == RTM_DELETE) {
+               rt->rt_flags &= ~RTF_UP;
+               /*
+                * One more rtentry floating around that is not
+                * linked to the routing table. rttrash will be decremented
+                * when RTFREE(rt) is eventually called.
+                */
+               V_rttrash++;
+               
+       }
+       
+nondelete:
+       if (req != RTM_DELETE)
+               panic("unrecognized request %d", req);
+       
+
+       /*
+        * If the caller wants it, then it can have it,
+        * but it's up to it to free the rtentry as we won't be
+        * doing it.
+        */
+       if (ret_nrt) {
+               *ret_nrt = rt;
+               RT_UNLOCK(rt);
+       } else
+               RTFREE_LOCKED(rt);
+done:
+       return (error);
+}
+#endif
+
 int
 rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
                                u_int fibnum)
@@ -864,65 +961,15 @@ rtrequest1_fib(int req, struct rt_addrin
        switch (req) {
        case RTM_DELETE:
 #ifdef RADIX_MPATH
-               /*
-                * if we got multipath routes, we require users to specify
-                * a matching RTAX_GATEWAY.
-                */
                if (rn_mpath_capable(rnh)) {
-                       struct rtentry *rto = NULL;
-
-                       rn = rnh->rnh_matchaddr(dst, rnh);
-                       if (rn == NULL)
-                               senderr(ESRCH);
-                       rto = rt = RNTORT(rn);
-                       rt = rt_mpath_matchgate(rt, gateway);
-                       if (!rt)
-                               senderr(ESRCH);
-                       /*
-                        * this is the first entry in the chain
-                        */
-                       if (rto == rt) {
-                               rn = rn_mpath_next((struct radix_node *)rt);
-                               /*
-                                * there is another entry, now it's active
-                                */
-                               if (rn) {
-                                       rto = RNTORT(rn);
-                                       RT_LOCK(rto);
-                                       rto->rt_flags |= RTF_UP;
-                                       RT_UNLOCK(rto);
-                               } else if (rt->rt_flags & RTF_GATEWAY) {
-                                       /*
-                                        * For gateway routes, we need to 
-                                        * make sure that we we are deleting
-                                        * the correct gateway. 
-                                        * rt_mpath_matchgate() does not 
-                                        * check the case when there is only
-                                        * one route in the chain.  
-                                        */
-                                       if (gateway &&
-                                           (rt->rt_gateway->sa_len != 
gateway->sa_len ||
-                                           memcmp(rt->rt_gateway, gateway, 
gateway->sa_len)))
-                                               senderr(ESRCH);
-                               }
-                               /*
-                                * use the normal delete code to remove
-                                * the first entry
-                                */
-                               goto normal_rtdel;
-                       }
+                       error = rn_mpath_update(req, info, rnh, ret_nrt);
                        /*
-                        * if the entry is 2nd and on up
+                        * "bad" holds true for the success case
+                        * as well
                         */
-                       if (!rt_mpath_deldup(rto, rt))
-                               panic ("rtrequest1: rt_mpath_deldup");
-                       RT_LOCK(rt);
-                       RT_ADDREF(rt);
-                       rt->rt_flags &= ~RTF_UP;
-                       goto deldone;  /* done with the RTM_DELETE command */
+                       if (error != ENOENT)
+                               goto bad;
                }
-
-normal_rtdel:
 #endif
                /*
                 * Remove the item from the tree and return it.
@@ -944,9 +991,6 @@ normal_rtdel:
                if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
                        ifa->ifa_rtrequest(RTM_DELETE, rt, info);
 
-#ifdef RADIX_MPATH
-deldone:
-#endif
                /*
                 * One more rtentry floating around that is not
                 * linked to the routing table. rttrash will be decremented
@@ -1019,6 +1063,7 @@ deldone:
                IFAREF(ifa);
                rt->rt_ifa = ifa;
                rt->rt_ifp = ifa->ifa_ifp;
+               rt->rt_rmx.rmx_weight = 1;
 
 #ifdef RADIX_MPATH
                /* do not permit exactly the same dst/mask/gw pair */

Modified: head/sys/net/route.h
==============================================================================
--- head/sys/net/route.h        Tue Apr 14 22:53:22 2009        (r191079)
+++ head/sys/net/route.h        Tue Apr 14 23:05:36 2009        (r191080)
@@ -58,6 +58,7 @@ struct rt_metrics_lite {
        u_long  rmx_mtu;        /* MTU for this path */
        u_long  rmx_expire;     /* lifetime for route, e.g. redirect */
        u_long  rmx_pksent;     /* packets sent using this route */
+       u_long  rmx_weight;     /* absolute weight */ 
 };
 
 struct rt_metrics {
@@ -71,7 +72,8 @@ struct rt_metrics {
        u_long  rmx_rtt;        /* estimated round trip time */
        u_long  rmx_rttvar;     /* estimated rtt variance */
        u_long  rmx_pksent;     /* packets sent using this route */
-       u_long  rmx_filler[4];  /* will be used for T/TCP later */
+       u_long  rmx_weight;     /* route weight */
+       u_long  rmx_filler[3];  /* will be used for T/TCP later */
 };
 
 /*
@@ -193,13 +195,15 @@ struct ortentry {
 #define        RTF_LOCAL       0x200000        /* route represents a local 
address */
 #define        RTF_BROADCAST   0x400000        /* route represents a bcast 
address */
 #define        RTF_MULTICAST   0x800000        /* route represents a mcast 
address */
-                                       /* 0x1000000 and up unassigned */
-#define        RTF_RNH_LOCKED   0x40000000     /* radix node head locked by 
caller */
+                                       /* 0x8000000 and up unassigned */
+#define        RTF_STICKY       0x10000000     /* always route dst->src */
+
+#define        RTF_RNH_LOCKED   0x40000000     /* radix node head is locked */
 
 /* Mask of RTF flags that are allowed to be modified by RTM_CHANGE. */
 #define RTF_FMASK      \
        (RTF_PROTO1 | RTF_PROTO2 | RTF_PROTO3 | RTF_BLACKHOLE | \
-        RTF_REJECT | RTF_STATIC)
+        RTF_REJECT | RTF_STATIC | RTF_STICKY)
 
 /*
  * Routing statistics.
@@ -225,12 +229,11 @@ struct rt_msghdr {
        int     rtm_seq;        /* for sender to identify action */
        int     rtm_errno;      /* why failed */
        int     rtm_fmask;      /* bitmask used in RTM_CHANGE message */
-#define        rtm_use rtm_fmask       /* deprecated, use rtm_rmx->rmx_pksent 
*/
        u_long  rtm_inits;      /* which metrics we are initializing */
        struct  rt_metrics rtm_rmx; /* metrics themselves */
 };
 
-#define RTM_VERSION    5       /* Up the ante and ignore older versions */
+#define RTM_VERSION    6       /* Up the ante and ignore older versions */
 
 /*
  * Message types.
@@ -265,6 +268,7 @@ struct rt_msghdr {
 #define RTV_SSTHRESH   0x20    /* init or lock _ssthresh */
 #define RTV_RTT                0x40    /* init or lock _rtt */
 #define RTV_RTTVAR     0x80    /* init or lock _rttvar */
+#define RTV_WEIGHT     0x100   /* init or lock _weight */
 
 /*
  * Bitmask values for rtm_addrs.

Modified: head/sys/net/rtsock.c
==============================================================================
--- head/sys/net/rtsock.c       Tue Apr 14 22:53:22 2009        (r191079)
+++ head/sys/net/rtsock.c       Tue Apr 14 23:05:36 2009        (r191080)
@@ -637,7 +637,6 @@ route_output(struct mbuf *m, struct sock
                        }
                        (void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, NULL);
                        rtm->rtm_flags = rt->rt_flags;
-                       rtm->rtm_use = 0;
                        rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
                        rtm->rtm_addrs = info.rti_addrs;
                        break;
@@ -691,10 +690,8 @@ route_output(struct mbuf *m, struct sock
                                rt->rt_ifp = info.rti_ifp;
                        }
                        /* Allow some flags to be toggled on change. */
-                       if (rtm->rtm_fmask & RTF_FMASK)
-                               rt->rt_flags = (rt->rt_flags &
-                                   ~rtm->rtm_fmask) |
-                                   (rtm->rtm_flags & rtm->rtm_fmask);
+                       rt->rt_flags = (rt->rt_flags & ~RTF_FMASK) |
+                                   (rtm->rtm_flags & RTF_FMASK);
                        rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
                                        &rt->rt_rmx);
                        rtm->rtm_index = rt->rt_ifp->if_index;
@@ -773,6 +770,7 @@ rt_setmetrics(u_long which, const struct
         * of tcp hostcache. The rest is ignored.
         */
        metric(RTV_MTU, rmx_mtu);
+       metric(RTV_WEIGHT, rmx_weight);
        /* Userland -> kernel timebase conversion. */
        if (which & RTV_EXPIRE)
                out->rmx_expire = in->rmx_expire ?
@@ -786,6 +784,7 @@ rt_getmetrics(const struct rt_metrics_li
 #define metric(e) out->e = in->e;
        bzero(out, sizeof(*out));
        metric(rmx_mtu);
+       metric(rmx_weight);
        /* Kernel -> userland timebase conversion. */
        out->rmx_expire = in->rmx_expire ?
            in->rmx_expire - time_uptime + time_second : 0;
@@ -1257,7 +1256,10 @@ sysctl_dumpentry(struct radix_node *rn, 
                struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
 
                rtm->rtm_flags = rt->rt_flags;
-               rtm->rtm_use = rt->rt_rmx.rmx_pksent;
+               /*
+                * let's be honest about this being a retarded hack
+                */
+               rtm->rtm_fmask = rt->rt_rmx.rmx_pksent;
                rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
                rtm->rtm_index = rt->rt_ifp->if_index;
                rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;

Modified: head/sys/sys/param.h
==============================================================================
--- head/sys/sys/param.h        Tue Apr 14 22:53:22 2009        (r191079)
+++ head/sys/sys/param.h        Tue Apr 14 23:05:36 2009        (r191080)
@@ -57,7 +57,7 @@
  *             is created, otherwise 1.
  */
 #undef __FreeBSD_version
-#define __FreeBSD_version 800077       /* Master, propagated to newvers */
+#define __FreeBSD_version 800078       /* Master, propagated to newvers */
 
 #ifndef LOCORE
 #include <sys/types.h>
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to