>Number:         173477
>Category:       kern
>Synopsis:       mpath bugfixes
>Confidential:   no
>Severity:       non-critical
>Priority:       low
>Responsible:    freebsd-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Thu Nov 08 16:50:01 UTC 2012
>Closed-Date:
>Last-Modified:
>Originator:     Ingo Flaschberger
>Release:        9.1 Stable
>Organization:
crossip communications gmbh
>Environment:
9.1-PRERELEASE
>Description:
Severall mpath bugfixes:
*) if mpath is enabled, the interface loopbackroute could not be deleted
   (introduced SVN rev 226241)
*) route selection crashes when 3 mpath routes are installed and deleted:
   1: route to gw1 weight 3
   2: roote to gw2 weight 2
   3: interface route metric 1
   and deleted in 2-1 order (already freed rm_leaf returned)
*) added correct mpath selection on interface-routes (in_lltable_rtcheck)
*) added mpath to fastforward
*) do correct equal cost mpath route selection based on weight 
(rtalloc_mpath_fib_flags)

>How-To-Repeat:
Mpath test-script:
em0: interface must be up
em3: up and there must be a pingable host with 10.11.11.1/24

Routingtable have to be the same before and after running the script.
#!/bin/sh

ifconfig em0 192.168.2.100/24

read "Press [Enter] key"

ifconfig em3 alias 10.11.11.175/24 > /dev/null
ping -t1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test1 failed
else
        echo test1 ok
fi

read "Press [Enter] key"

route add 10.11.11.0/24 192.168.2.1 -weight 2 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test2 failed
else
        echo test2 ok
fi

read "Press [Enter] key"

route add 10.11.11.0/24 192.168.2.3 -weight 3 > /dev/null
ping -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test3 failed
else
        echo test3 ok
fi

read "Press [Enter] key"

route delete 10.11.11.0/24 192.168.2.1 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test4 failed
else
        echo test4 ok
fi

read "Press [Enter] key"

route delete 10.11.11.0/24 192.168.2.3 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test5 failed
else
        echo test5 ok
fi

read "Press [Enter] key"

ifconfig em3 -alias 10.11.11.175 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test6 ok
else
        echo test6 failed
fi

read "Press [Enter] key"

route add 10.11.11.0/24 192.168.2.1 -weight 2 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test7 ok    
else
        echo test7 failed
fi

read "Press [Enter] key"

ifconfig em3 alias 10.11.11.175/24 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test8 failed
else
        echo test8 ok
fi

read "Press [Enter] key"

route add 10.11.11.0/24 192.168.2.3 -weight 3 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test9 failed
else
        echo test9 ok
fi

read "Press [Enter] key"

ifconfig em3 -alias 10.11.11.175 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test10 ok    
else
        echo test10 failed
fi

read "Press [Enter] key"

route delete 10.11.11.0/24 192.168.2.1 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test11 ok    
else
        echo test11 failed
fi

read "Press [Enter] key"

route delete 10.11.11.0/24 192.168.2.3 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test12 ok   
else
        echo test12 failed
fi

read "Press [Enter] key"

route add 10.11.11.0/24 192.168.2.1 -weight 2 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test13 ok   
else
        echo test13 failed
fi

read "Press [Enter] key"

route add 10.11.11.0/24 192.168.2.3 -weight 3 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test14 ok   
else
        echo test14 failed
fi

read "Press [Enter] key"

ifconfig em3 alias 10.11.11.175/24 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test15 failed
else
        echo test15 ok
fi

read "Press [Enter] key"

route delete 10.11.11.0/24 192.168.2.3 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test16 failed
else
        echo test16 ok    
fi

read "Press [Enter] key"

route delete 10.11.11.0/24 192.168.2.1 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test17 failed
else
        echo test17 ok    
fi

read "Press [Enter] key"

ifconfig em3 -alias 10.11.11.175 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test18 ok
else
        echo test18 failed
fi


>Fix:


Patch attached with submission follows:

diff -u -r sys_org/contrib/ipfilter/netinet/ip_pool.c 
/router/usr/src/sys/contrib/ipfilter/netinet/ip_pool.c
--- sys_org/contrib/ipfilter/netinet/ip_pool.c  2012-11-08 15:15:22.000000000 
+0100
+++ /router/usr/src/sys/contrib/ipfilter/netinet/ip_pool.c      2012-10-29 
16:19:05.000000000 +0100
@@ -620,7 +620,7 @@
 
        RADIX_NODE_HEAD_LOCK(ipo->ipo_head);
        ipo->ipo_head->rnh_deladdr(&ipe->ipn_addr, &ipe->ipn_mask,
-                                  ipo->ipo_head);
+                                  ipo->ipo_head, NULL);
        RADIX_NODE_HEAD_UNLOCK(ipo->ipo_head);
 
        ip_pool_node_deref(ipe);
@@ -751,7 +751,7 @@
        RADIX_NODE_HEAD_LOCK(ipo->ipo_head);
        while ((n = ipo->ipo_list) != NULL) {
                ipo->ipo_head->rnh_deladdr(&n->ipn_addr, &n->ipn_mask,
-                                          ipo->ipo_head);
+                                          ipo->ipo_head, NULL);
 
                *n->ipn_pnext = n->ipn_next;
                if (n->ipn_next)
@@ -963,7 +963,7 @@
        struct radix_node_head *rnh = p;
        struct radix_node *d;
 
-       d = rnh->rnh_deladdr(n->rn_key, NULL, rnh);
+       d = rnh->rnh_deladdr(n->rn_key, NULL, rnh, NULL);
        if (d != NULL) {
                FreeS(d, max_keylen + 2 * sizeof (*d));
        }
diff -u -r sys_org/kern/vfs_export.c /router/usr/src/sys/kern/vfs_export.c
--- sys_org/kern/vfs_export.c   2012-11-08 15:15:13.000000000 +0100
+++ /router/usr/src/sys/kern/vfs_export.c       2012-10-29 16:16:33.000000000 
+0100
@@ -228,7 +228,7 @@
        struct radix_node_head *rnh = (struct radix_node_head *) w;
        struct ucred *cred;
 
-       (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
+       (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh, NULL);
        cred = ((struct netcred *)rn)->netc_anon;
        if (cred != NULL)
                crfree(cred);
diff -u -r sys_org/net/if.c /router/usr/src/sys/net/if.c
--- sys_org/net/if.c    2012-11-08 15:15:11.000000000 +0100
+++ /router/usr/src/sys/net/if.c        2012-10-30 00:34:40.000000000 +0100
@@ -70,6 +70,7 @@
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/radix.h>
+#include "opt_mpath.h"
 #include <net/route.h>
 #include <net/vnet.h>
 
@@ -1485,6 +1486,9 @@
 {
        int error = 0;
        struct rt_addrinfo info;
+#ifdef RADIX_MPATH
+       struct ifaddr *new_ifa;
+#else
        struct sockaddr_dl null_sdl;
 
        bzero(&null_sdl, sizeof(null_sdl));
@@ -1492,14 +1496,25 @@
        null_sdl.sdl_family = AF_LINK;
        null_sdl.sdl_type = ifa->ifa_ifp->if_type;
        null_sdl.sdl_index = ifa->ifa_ifp->if_index;
+#endif
        bzero(&info, sizeof(info));
        info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC;
        info.rti_info[RTAX_DST] = ia;
+#ifdef RADIX_MPATH
+       info.rti_ifp = V_loif;
+
+       /* link_rtrequest modifies ifa - do this also */
+       new_ifa = ifaof_ifpforaddr( ia, V_loif);
+
+       /* rt_mpath_matchgate matches ifa_addr and not gateway */               
+       info.rti_info[RTAX_GATEWAY] = new_ifa->ifa_addr;
+#else
        info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl;
+#endif
        error = rtrequest1_fib(RTM_DELETE, &info, NULL, 0);
 
        if (error != 0)
-               log(LOG_INFO, "ifa_del_loopback_route: deletion failed\n");
+               log(LOG_INFO, "ifa_del_loopback_route: deletion failed err: 
%d\n", error);
 
        return (error);
 }
diff -u -r sys_org/net/radix.c /router/usr/src/sys/net/radix.c
--- sys_org/net/radix.c 2012-11-08 15:15:11.000000000 +0100
+++ /router/usr/src/sys/net/radix.c     2012-11-08 15:20:04.000000000 +0100
@@ -312,7 +312,7 @@
         * lot of confusion.
         */
        if (t->rn_flags & RNF_ROOT)
-               t = t->rn_dupedkey;
+               t = t->rn_dupedkey;
        return t;
 on1:
        test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */
@@ -723,12 +723,20 @@
                x = t->rn_right;
        /* Promote general routes from below */
        if (x->rn_bit < 0) {
-           for (mp = &t->rn_mklist; x; x = x->rn_dupedkey)
-               if (x->rn_mask && (x->rn_bit >= b_leaf) && x->rn_mklist == 0) {
-                       *mp = m = rn_new_radix_mask(x, 0);
-                       if (m)
-                               mp = &m->rm_mklist;
-               }
+               struct  radix_node *xx = NULL;
+               for (mp = &t->rn_mklist; x; xx = x, x = x->rn_dupedkey) {
+                       if (xx && xx->rn_mklist && xx->rn_mask == x->rn_mask &&
+                           x->rn_mklist == 0) {
+                               /* multipath route, bump refcount on first 
mklist */
+                               x->rn_mklist = xx->rn_mklist;
+                               x->rn_mklist->rm_refs++;
+                        }
+                        if (x->rn_mask && (x->rn_bit >= b_leaf) && 
x->rn_mklist == 0) {
+                               *mp = m = rn_new_radix_mask(x, 0);
+                               if (m)
+                                       mp = &m->rm_mklist;
+                        }
+                }
        } else if (x->rn_mklist) {
                /*
                 * Skip over masks whose index is > that of new node
@@ -760,11 +768,30 @@
                        break;
                if (m->rm_flags & RNF_NORMAL) {
                        mmask = m->rm_leaf->rn_mask;
-                       if (tt->rn_flags & RNF_NORMAL) {
-#if !defined(RADIX_MPATH)
+                       if (keyduplicated) {
+                               if (m->rm_leaf->rn_parent == tt)
+                                       /* new route is better */
+                                        m->rm_leaf = tt;
+#ifdef DIAGNOSTIC
+                                else {
+                                        for (t = m->rm_leaf; t;
+                                            t = t->rn_dupedkey)
+                                                if (t == tt)
+                                                        break;
+                                        if (t == NULL) {
+                                                log(LOG_ERR, "Non-unique "
+                                                    "normal route on dupedkey, 
"
+                                                    "mask not entered\n");
+                                                return tt;
+                                        }
+                                }
+#endif
+                                m->rm_refs++;
+                                tt->rn_mklist = m;
+                                return tt;
+                        } else if (tt->rn_flags & RNF_NORMAL) {
                            log(LOG_ERR,
                                "Non-unique normal route, mask not entered\n");
-#endif
                                return tt;
                        }
                } else
@@ -783,9 +810,10 @@
 }
 
 struct radix_node *
-rn_delete(v_arg, netmask_arg, head)
+rn_delete(v_arg, netmask_arg, head, rn)
        void *v_arg, *netmask_arg;
        struct radix_node_head *head;
+       struct radix_node *rn;
 {
        register struct radix_node *t, *p, *x, *tt;
        struct radix_mask *m, *saved_m, **mp;
@@ -815,18 +843,41 @@
                        if ((tt = tt->rn_dupedkey) == 0)
                                return (0);
        }
+#ifdef RADIX_MPATH
+        if (rn) {
+                while (tt != rn)
+                        if ((tt = tt->rn_dupedkey) == 0)
+                                return (0);
+        }
+#endif
        if (tt->rn_mask == 0 || (saved_m = m = tt->rn_mklist) == 0)
                goto on1;
        if (tt->rn_flags & RNF_NORMAL) {
-               if (m->rm_leaf != tt || m->rm_refs > 0) {
-                       log(LOG_ERR, "rn_delete: inconsistent annotation\n");
-                       return 0;  /* dangling ref could cause disaster */
-               }
+               if (m->rm_leaf != tt && m->rm_refs == 0) {
+                       log(LOG_ERR, "rn_delete: inconsistent normal "
+                           "annotation\n");
+                        return (0);
+                }
+                if (m->rm_leaf != tt) {
+                        if (--m->rm_refs >= 0)
+                                goto on1;
+                }
+                /* tt is currently the head of the possible multipath chain */
+                if (m->rm_refs > 0) {
+                        if (tt->rn_dupedkey == NULL ||
+                            tt->rn_dupedkey->rn_mklist != m) {
+                                log(LOG_ERR, "rn_delete: inconsistent "
+                                    "dupedkey list\n");
+                                return (0);
+                        }
+                        m->rm_leaf = tt->rn_dupedkey;
+                        --m->rm_refs;
+                        goto on1;
+                }
+               /* else tt is last and only route */
        } else {
-               if (m->rm_mask != tt->rn_mask) {
-                       log(LOG_ERR, "rn_delete: inconsistent annotation\n");
+               if (m->rm_mask != tt->rn_mask)
                        goto on1;
-               }
                if (--m->rm_refs >= 0)
                        goto on1;
        }
@@ -875,15 +926,10 @@
                        else
                                t->rn_right = x;
                } else {
-                       /* find node in front of tt on the chain */
-                       for (x = p = saved_tt; p && p->rn_dupedkey != tt;)
-                               p = p->rn_dupedkey;
-                       if (p) {
-                               p->rn_dupedkey = tt->rn_dupedkey;
-                               if (tt->rn_dupedkey)            /* parent */
-                                       tt->rn_dupedkey->rn_parent = p;
-                                                               /* parent */
-                       } else log(LOG_ERR, "rn_delete: couldn't find us\n");
+                       x = saved_tt;
+                       t->rn_dupedkey = tt->rn_dupedkey;
+                       if (tt->rn_dupedkey)
+                               tt->rn_dupedkey->rn_parent = t;
                }
                t = tt + 1;
                if  (t->rn_flags & RNF_ACTIVE) {
@@ -931,8 +977,16 @@
                                if (m == x->rn_mklist) {
                                        struct radix_mask *mm = m->rm_mklist;
                                        x->rn_mklist = 0;
-                                       if (--(m->rm_refs) < 0)
+                                       if (--(m->rm_refs) < 0) {
                                                MKFree(m);
+                                        } else if (m->rm_flags & RNF_NORMAL) {
+                                                /*
+                                                 * don't progress because this
+                                                 * a multipath route. Next
+                                                 * route will use the same m.
+                                                 */
+                                                mm = m;
+                                        }
                                        m = mm;
                                }
                        if (m)
@@ -1107,7 +1161,7 @@
                        rn = rn->rn_left;
                next = rn;
                /* Process leaves */
-               while ((rn = base)) {
+               while ((rn = base) != NULL) {
                        base = rn->rn_dupedkey;
                        if (!(rn->rn_flags & RNF_ROOT)
                            && (error = (*f)(rn, w)))
diff -u -r sys_org/net/radix.h /router/usr/src/sys/net/radix.h
--- sys_org/net/radix.h 2012-11-08 15:15:11.000000000 +0100
+++ /router/usr/src/sys/net/radix.h     2012-10-29 16:15:23.000000000 +0100
@@ -116,7 +116,8 @@
                (void *v, void *mask,
                     struct radix_node_head *head, struct radix_node nodes[]);
        struct  radix_node *(*rnh_deladdr)      /* remove based on sockaddr */
-               (void *v, void *mask, struct radix_node_head *head);
+               (void *v, void *mask, struct radix_node_head *head,
+                     struct radix_node *rn);
        struct  radix_node *(*rnh_delpkt)       /* remove based on packet hdr */
                (void *v, void *mask, struct radix_node_head *head);
        struct  radix_node *(*rnh_matchaddr)    /* locate based on sockaddr */
@@ -169,7 +170,8 @@
         *rn_addmask(void *, int, int),
         *rn_addroute (void *, void *, struct radix_node_head *,
                        struct radix_node [2]),
-        *rn_delete(void *, void *, struct radix_node_head *),
+        *rn_delete(void *, void *, struct radix_node_head *,
+                        struct radix_node *),
         *rn_lookup (void *v_arg, void *m_arg,
                        struct radix_node_head *head),
         *rn_match(void *, struct radix_node_head *);
diff -u -r sys_org/net/radix_mpath.c /router/usr/src/sys/net/radix_mpath.c
--- sys_org/net/radix_mpath.c   2012-11-08 15:15:11.000000000 +0100
+++ /router/usr/src/sys/net/radix_mpath.c       2012-10-30 01:33:18.000000000 
+0100
@@ -77,20 +77,6 @@
                return NULL;
 }
 
-uint32_t
-rn_mpath_count(struct radix_node *rn)
-{
-       uint32_t i = 0;
-       struct rtentry *rt;
-       
-       while (rn != NULL) {
-               rt = (struct rtentry *)rn;
-               i += rt->rt_rmx.rmx_weight;
-               rn = rn_mpath_next(rn);
-       }
-       return (i);
-}
-
 struct rtentry *
 rt_mpath_matchgate(struct rtentry *rt, struct sockaddr *gate)
 {
@@ -122,33 +108,6 @@
        return (struct rtentry *)rn;
 }
 
-/* 
- * go through the chain and unlink "rt" from the list
- * the caller will free "rt"
- */
-int
-rt_mpath_deldup(struct rtentry *headrt, struct rtentry *rt)
-{
-        struct radix_node *t, *tt;
-
-        if (!headrt || !rt)
-            return (0);
-        t = (struct radix_node *)headrt;
-        tt = rn_mpath_next(t);
-        while (tt) {
-            if (tt == (struct radix_node *)rt) {
-                t->rn_dupedkey = tt->rn_dupedkey;
-                tt->rn_dupedkey = NULL;
-               tt->rn_flags &= ~RNF_ACTIVE;
-               tt[1].rn_flags &= ~RNF_ACTIVE;
-                return (1);
-            }
-            t = tt;
-            tt = rn_mpath_next((struct radix_node *)t);
-        }
-        return (0);
-}
-
 /*
  * check if we have the same key/mask/gateway on the table already.
  */
@@ -256,12 +215,21 @@
 }
 
 void
-rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum)
+rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum) {
+       rtalloc_mpath_fib_flags( ro, hash, fibnum, 0);
+}
+
+/*
+ * flag RTF_GATEWAY returns only interface routes,
+ * only one interface-route is possible
+ */ 
+void
+rtalloc_mpath_fib_flags(struct route *ro, uint32_t hash, u_int fibnum, int 
flags)
 {
        struct radix_node *rn0, *rn;
-       u_int32_t n;
+       u_int32_t n = 0;
        struct rtentry *rt;
-       int64_t weight;
+       int64_t lowest_weight;
 
        /*
         * XXX we don't attempt to lookup cached route again; what should
@@ -269,29 +237,52 @@
         */
        if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP)
            && RT_LINK_IS_UP(ro->ro_rt->rt_ifp))
-               return;                          
+               return;
        ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, 0, fibnum);
 
        /* if the route does not exist or it is not multipath, don't care */
        if (ro->ro_rt == NULL)
                return;
        if (rn_mpath_next((struct radix_node *)ro->ro_rt) == NULL) {
+               if (flags & RTF_GATEWAY)
+                       return;
                RT_UNLOCK(ro->ro_rt);
                return;
        }
 
        /* beyond here, we use rn as the master copy */
        rn0 = rn = (struct radix_node *)ro->ro_rt;
-       n = rn_mpath_count(rn0);
 
-       /* gw selection by Modulo-N Hash (RFC2991) XXX need improvement? */
+       /* find count of lowest weight route */
+       for (rt = ro->ro_rt, lowest_weight = 9223372036854775807; rn != NULL;){
+               if( rt->rt_flags & RTF_UP) {
+                       if ((flags & RTF_GATEWAY) && 
+                           (!(rt->rt_flags & RTF_GATEWAY)) && 
+                           (!(rt->rt_flags & RTF_HOST)) )
+                               goto end;   /* only 1 interface route possible! 
*/
+                       if( lowest_weight > rt->rt_rmx.rmx_weight) {
+                               lowest_weight = rt->rt_rmx.rmx_weight;
+                               n = 1;
+                       } else if( lowest_weight == rt->rt_rmx.rmx_weight)
+                               n++;
+               }
+               if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask)
+                       break;
+               rn = rn->rn_dupedkey;
+               rt = (struct rtentry *)rn;
+       }
+       /* select now one of the lowest weight routes */
+       /* gw selection by Modulo-N Hash (RFC2991) */
        hash += hashjitter;
        hash %= n;
-       for (weight = abs((int32_t)hash), rt = ro->ro_rt;
-            weight >= rt->rt_rmx.rmx_weight && rn; 
-            weight -= rt->rt_rmx.rmx_weight) {
-               
-               /* stay within the multipath routes */
+       for ( rt = ro->ro_rt, rn = rn0, n = 0; rn != NULL; ) {
+               if( rt->rt_flags & RTF_UP) {
+                       if ( rt->rt_rmx.rmx_weight == lowest_weight) {
+                               if (n == hash)
+                                       break;
+                               n++;
+                       }
+               }
                if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask)
                        break;
                rn = rn->rn_dupedkey;
@@ -300,19 +291,22 @@
        /* XXX try filling rt_gwroute and avoid unreachable gw  */
 
        /* gw selection has failed - there must be only zero weight routes */
-       if (!rn) {
+       if (!rn || (flags & RTF_GATEWAY)) {
                RT_UNLOCK(ro->ro_rt);
                ro->ro_rt = NULL;
                return;
        }
+
+end:           
        if (ro->ro_rt != rt) {
                RTFREE_LOCKED(ro->ro_rt);
                ro->ro_rt = (struct rtentry *)rn;
                RT_LOCK(ro->ro_rt);
                RT_ADDREF(ro->ro_rt);
 
-       } 
-       RT_UNLOCK(ro->ro_rt);
+       }
+       if (!(flags & RTF_GATEWAY))
+               RT_UNLOCK(ro->ro_rt);
 }
 
 extern int     in6_inithead(void **head, int off);
diff -u -r sys_org/net/radix_mpath.h /router/usr/src/sys/net/radix_mpath.h
--- sys_org/net/radix_mpath.h   2012-11-08 15:15:11.000000000 +0100
+++ /router/usr/src/sys/net/radix_mpath.h       2012-10-30 01:33:28.000000000 
+0100
@@ -46,12 +46,12 @@
 struct sockaddr;
 int    rn_mpath_capable(struct radix_node_head *);
 struct radix_node *rn_mpath_next(struct radix_node *);
-u_int32_t rn_mpath_count(struct radix_node *);
 struct rtentry *rt_mpath_matchgate(struct rtentry *, struct sockaddr *);
 int rt_mpath_conflict(struct radix_node_head *, struct rtentry *,
     struct sockaddr *);
 void rtalloc_mpath_fib(struct route *, u_int32_t, u_int);
 #define rtalloc_mpath(_route, _hash) rtalloc_mpath_fib((_route), (_hash), 0)
+void rtalloc_mpath_fib_flags(struct route *, u_int32_t, u_int, int);
 struct radix_node *rn_mpath_lookup(void *, void *,
     struct radix_node_head *);
 int rt_mpath_deldup(struct rtentry *, struct rtentry *);
diff -u -r sys_org/net/route.c /router/usr/src/sys/net/route.c
--- sys_org/net/route.c 2012-11-08 15:15:11.000000000 +0100
+++ /router/usr/src/sys/net/route.c     2012-11-08 15:24:13.000000000 +0100
@@ -904,7 +904,7 @@
         * Remove the item from the tree; it should be there,
         * but when callers invoke us blindly it may not (sigh).
         */
-       rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), rnh);
+       rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), rnh, NULL);
        if (rn == NULL) {
                error = ESRCH;
                goto bad;
@@ -942,112 +942,6 @@
        return (error);
 }
 
-#ifdef RADIX_MPATH
-static int
-rn_mpath_update(int req, struct rt_addrinfo *info,
-    struct radix_node_head *rnh, struct rtentry **ret_nrt)
-{
-       /*
-        * if we got multipath routes, we require users to specify
-        * a matching RTAX_GATEWAY.
-        */
-       struct rtentry *rt, *rto = NULL;
-       register struct radix_node *rn;
-       int error = 0;
-
-       rn = rnh->rnh_matchaddr(dst, rnh);
-       if (rn == NULL)
-               return (ESRCH);
-       rto = rt = RNTORT(rn);
-       rt = rt_mpath_matchgate(rt, gateway);
-       if (rt == NULL)
-               return (ESRCH);
-       /*
-        * this is the first entry in the chain
-        */
-       if (rto == rt) {
-               rn = rn_mpath_next((struct radix_node *)rt);
-               /*
-                * there is another entry, now it's active
-                */
-               if (rn) {
-                       rto = RNTORT(rn);
-                       RT_LOCK(rto);
-                       rto->rt_flags |= RTF_UP;
-                       RT_UNLOCK(rto);
-               } else if (rt->rt_flags & RTF_GATEWAY) {
-                       /*
-                        * For gateway routes, we need to 
-                        * make sure that we we are deleting
-                        * the correct gateway. 
-                        * rt_mpath_matchgate() does not 
-                        * check the case when there is only
-                        * one route in the chain.  
-                        */
-                       if (gateway &&
-                           (rt->rt_gateway->sa_len != gateway->sa_len ||
-                               memcmp(rt->rt_gateway, gateway, 
gateway->sa_len)))
-                               error = ESRCH;
-                       else {
-                               /*
-                                * remove from tree before returning it
-                                * to the caller
-                                */
-                               rn = rnh->rnh_deladdr(dst, netmask, rnh);
-                               KASSERT(rt == RNTORT(rn), ("radix node 
disappeared"));
-                               goto gwdelete;
-                       }
-                       
-               }
-               /*
-                * use the normal delete code to remove
-                * the first entry
-                */
-               if (req != RTM_DELETE) 
-                       goto nondelete;
-
-               error = ENOENT;
-               goto done;
-       }
-               
-       /*
-        * if the entry is 2nd and on up
-        */
-       if ((req == RTM_DELETE) && !rt_mpath_deldup(rto, rt))
-               panic ("rtrequest1: rt_mpath_deldup");
-gwdelete:
-       RT_LOCK(rt);
-       RT_ADDREF(rt);
-       if (req == RTM_DELETE) {
-               rt->rt_flags &= ~RTF_UP;
-               /*
-                * One more rtentry floating around that is not
-                * linked to the routing table. rttrash will be decremented
-                * when RTFREE(rt) is eventually called.
-                */
-               V_rttrash++;
-       }
-       
-nondelete:
-       if (req != RTM_DELETE)
-               panic("unrecognized request %d", req);
-       
-
-       /*
-        * If the caller wants it, then it can have it,
-        * but it's up to it to free the rtentry as we won't be
-        * doing it.
-        */
-       if (ret_nrt) {
-               *ret_nrt = rt;
-               RT_UNLOCK(rt);
-       } else
-               RTFREE_LOCKED(rt);
-done:
-       return (error);
-}
-#endif
-
 int
 rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
                                u_int fibnum)
@@ -1100,23 +994,26 @@
                        rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask);
                        dst = (struct sockaddr *)&mdst;
                }
+               if ((rn = rnh->rnh_lookup(dst, netmask, rnh)) == NULL)
+                       senderr(ESRCH);
+                rt = RNTORT(rn);
 #ifdef RADIX_MPATH
+                /*
+                 * if we got multipath routes, we require users to specify
+                 * a matching RTAX_GATEWAY.
+                 */
                if (rn_mpath_capable(rnh)) {
-                       error = rn_mpath_update(req, info, rnh, ret_nrt);
-                       /*
-                        * "bad" holds true for the success case
-                        * as well
-                        */
-                       if (error != ENOENT)
-                               goto bad;
-                       error = 0;
+                       rt = rt_mpath_matchgate( rt, gateway);
+                       rn = (struct radix_node *)rt;
+                       if (!rt)
+                               senderr(ESRCH);
                }
 #endif
                /*
                 * Remove the item from the tree and return it.
                 * Complain if it is not there and do no more processing.
                 */
-               rn = rnh->rnh_deladdr(dst, netmask, rnh);
+               rn = rnh->rnh_deladdr(dst, netmask, rnh, rn);
                if (rn == NULL)
                        senderr(ESRCH);
                if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
@@ -1212,7 +1109,7 @@
                rt->rt_ifa = ifa;
                rt->rt_ifp = ifa->ifa_ifp;
                rt->rt_rmx.rmx_weight = 1;
-
+               
 #ifdef RADIX_MPATH
                /* do not permit exactly the same dst/mask/gw pair */
                if (rn_mpath_capable(rnh) &&
@@ -1373,7 +1270,7 @@
         */
        if (rt->rt_gateway == NULL || glen > SA_SIZE(rt->rt_gateway)) {
                caddr_t new;
-
+               
                R_Malloc(new, caddr_t, dlen + glen);
                if (new == NULL)
                        return ENOBUFS;
@@ -1506,9 +1403,8 @@
                        RADIX_NODE_HEAD_LOCK(rnh);
 #ifdef RADIX_MPATH
                        if (rn_mpath_capable(rnh)) {
-
-                               rn = rnh->rnh_matchaddr(dst, rnh);
-                               if (rn == NULL) 
+                               rn = rnh->rnh_lookup(dst, netmask, rnh);
+                               if (rn == NULL)
                                        error = ESRCH;
                                else {
                                        rt = RNTORT(rn);
@@ -1523,6 +1419,7 @@
                                            ifa->ifa_addr);
                                        if (!rt) 
                                                error = ESRCH;
+                                        rn = (struct radix_node *)rt;
                                }
                        }
                        else
diff -u -r sys_org/netatalk/at_rmx.c /router/usr/src/sys/netatalk/at_rmx.c
--- sys_org/netatalk/at_rmx.c   2012-11-08 15:15:09.000000000 +0100
+++ /router/usr/src/sys/netatalk/at_rmx.c       2012-10-29 16:20:11.000000000 
+0100
@@ -91,10 +91,10 @@
 }
 
 static struct radix_node *
-at_delroute(void *v_arg, void *netmask_arg, struct radix_node_head *head)
+at_delroute(void *v_arg, void *netmask_arg, struct radix_node_head *head, 
struct radix_node *rn)
 {
 
-       return (rn_delete(v_arg, netmask_arg, head));
+       return (rn_delete(v_arg, netmask_arg, head, rn));
 }
 
 /*
diff -u -r sys_org/netinet/in.c /router/usr/src/sys/netinet/in.c
--- sys_org/netinet/in.c        2012-11-08 15:15:11.000000000 +0100
+++ /router/usr/src/sys/netinet/in.c    2012-10-30 03:09:39.000000000 +0100
@@ -1397,13 +1397,23 @@
 in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr 
*l3addr)
 {
        struct rtentry *rt;
+#ifdef RADIX_MPATH
+       struct route ro;
+#endif
 
        KASSERT(l3addr->sa_family == AF_INET,
            ("sin_family %d", l3addr->sa_family));
 
+#ifdef RADIX_MPATH
+       /* ensure to select a interface route */
+       bzero( &ro, sizeof(ro));
+       bcopy( __DECONST(struct sockaddr *, l3addr), &ro.ro_dst, sizeof(struct 
sockaddr));
+       rtalloc_mpath_fib_flags( (struct route *)&ro, 0, RT_DEFAULT_FIB, 
RTF_GATEWAY);
+       rt = ro.ro_rt;
+#else
        /* XXX rtalloc1 should take a const param */
        rt = rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0);
-
+#endif
        if (rt == NULL)
                return (EINVAL);
 
diff -u -r sys_org/netinet/ip_fastfwd.c /router/usr/src/sys/netinet/ip_fastfwd.c
--- sys_org/netinet/ip_fastfwd.c        2012-11-08 15:15:11.000000000 +0100
+++ /router/usr/src/sys/netinet/ip_fastfwd.c    2012-11-08 15:32:49.000000000 
+0100
@@ -78,6 +78,7 @@
 
 #include "opt_ipfw.h"
 #include "opt_ipstealth.h"
+#include "opt_mpath.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -113,7 +115,11 @@
     &VNET_NAME(ipfastforward_active), 0, "Enable fast IP forwarding");
 
 static struct sockaddr_in *
+#ifdef RADIX_MPATH
+ip_findroute(struct route *ro, uint32_t hash, struct in_addr dest, struct mbuf 
*m)
+#else
 ip_findroute(struct route *ro, struct in_addr dest, struct mbuf *m)
+#endif
 {
        struct sockaddr_in *dst;
        struct rtentry *rt;
@@ -126,7 +132,11 @@
        dst->sin_family = AF_INET;
        dst->sin_len = sizeof(*dst);
        dst->sin_addr.s_addr = dest.s_addr;
+#ifdef RADIX_MPATH
+       rtalloc_mpath_fib(ro, hash, M_GETFIB(m));
+#else
        in_rtalloc_ign(ro, 0, M_GETFIB(m));
+#endif
 
        /*
         * Route there and interface still up?
@@ -420,7 +440,12 @@
        /*
         * Find route to destination.
         */
+#ifdef RADIX_MPATH
+       if ((dst = ip_findroute(&ro, ntohl(ip->ip_src.s_addr ^ 
ip->ip_dst.s_addr),
+           dest, m)) == NULL)
+#else
        if ((dst = ip_findroute(&ro, dest, m)) == NULL)
+#endif
                return NULL;    /* icmp unreach already sent */
        ifp = ro.ro_rt->rt_ifp;
 
@@ -491,7 +516,13 @@
                }
 #endif /* IPFIREWALL_FORWARD */
                RTFREE(ro.ro_rt);
+#ifdef RADIX_MPATH
+               if ((dst = ip_findroute(&ro,
+                   ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr), dest, m))
+                   == NULL)
+#else
                if ((dst = ip_findroute(&ro, dest, m)) == NULL)
+#endif
                        return NULL;    /* icmp unreach already sent */
                ifp = ro.ro_rt->rt_ifp;
        }
diff -u -r sys_org/netinet/ipfw/ip_fw_table.c 
/router/usr/src/sys/netinet/ipfw/ip_fw_table.c
--- sys_org/netinet/ipfw/ip_fw_table.c  2012-11-08 15:15:11.000000000 +0100
+++ /router/usr/src/sys/netinet/ipfw/ip_fw_table.c      2012-10-29 
16:07:26.000000000 +0100
@@ -379,7 +379,7 @@
                return (EINVAL);
        }
 
-       ent = (struct table_entry *)rnh->rnh_deladdr(sa_ptr, mask_ptr, rnh);
+       ent = (struct table_entry *)rnh->rnh_deladdr(sa_ptr, mask_ptr, rnh, 
NULL);
        IPFW_WUNLOCK(ch);
 
        if (ent == NULL)
@@ -396,7 +396,7 @@
        struct table_entry *ent;
 
        ent = (struct table_entry *)
-           rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh);
+           rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh, NULL);
        if (ent != NULL)
                free(ent, M_IPFW_TBL);
        return (0);


>Release-Note:
>Audit-Trail:
>Unformatted:
_______________________________________________
[email protected] mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-bugs
To unsubscribe, send any mail to "[email protected]"

Reply via email to