With IPv6 routing subtrees we need to take into account that the
source address is typically not specified at the time of the route
lookup.

There are two separate cases where this can happen. In the typical
case the source address hasn't been selected before the route lookup.
Skipping a source prefix policy rule because of this will lead to
inconsistent routing behavior between for example bound and unbound
sockets.

We avoid this by passing the policy rule source prefix to the lookup
and source address selection functions. For source prefix rules the
source address is selected before the route lookup, otherwise we do it
the other way around. The source address selection algorithm remains
virtually unchanged; the source prefix is just used to verify the
selected address is compatible with the rule. If the source address
doesn't match, the route lookup with the current rule is aborted and
is started again with the next rule in the policy chain.

The more uncommon case is where the unspecified address is actually
used as a valid source address. When the kernel uses the unspecified
address it doesn't touch the routing table. We need to make sure a
userland application using a raw socket can do the same. If the user
includes the IPv6 header we therefore have to bypass the source
address selection even then the source address is unspecified. In
addition, we don't insert any routing cache entry created by such a
lookup.

Signed-off-by: Ville Nuorvala <[EMAIL PROTECTED]>
---
 include/net/addrconf.h |    4 +++-
 include/net/ip6_fib.h  |   16 +++++++++++++++-
 net/ipv6/addrconf.c    |   13 +++++++++++--
 net/ipv6/fib6_rules.c  |   16 ++--------------
 net/ipv6/ip6_fib.c     |    2 +-
 net/ipv6/ndisc.c       |    2 +-
 net/ipv6/route.c       |   41 +++++++++++++++++++++++++++++++++--------
 7 files changed, 66 insertions(+), 28 deletions(-)

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index d075693..7066362 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -67,8 +67,10 @@ #endif
 extern struct inet6_ifaddr *   ipv6_get_ifaddr(struct in6_addr *addr,
                                                struct net_device *dev,
                                                int strict);
-extern int                     ipv6_get_saddr(int pref_if,
+struct rt6key;
+extern int                     ipv6_get_saddr(int pref_if,
                                               struct in6_addr *daddr,
+                                              struct rt6key *sconstr,
                                               struct in6_addr *saddr);
 extern int                     ipv6_get_lladdr(struct net_device *dev, struct 
in6_addr *);
 extern int                     ipv6_rcv_saddr_equal(const struct sock *sk,
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index e4438de..8887b5c 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -21,6 +21,7 @@ #include <linux/spinlock.h>
 #include <net/dst.h>
 #include <net/flow.h>
 #include <net/netlink.h>
+#include <net/fib_rules.h>

 struct rt6_info;

@@ -77,6 +78,18 @@ struct rt6key
        int             plen;
 };

+struct fib6_rule
+{
+       struct fib_rule         common;
+       struct rt6key           src;
+       struct rt6key           dst;
+#ifdef CONFIG_IPV6_ROUTE_FWMARK
+       u32                     fwmark;
+       u32                     fwmask;
+#endif
+       u8                      tclass;
+};
+
 struct fib6_table;

 struct rt6_info
@@ -174,7 +187,8 @@ #define RT6_TABLE_LOCAL             RT6_TABLE_MAIN
 #endif

 typedef struct rt6_info *(*pol_lookup_t)(struct fib6_table *,
-                                        struct flowi *, int);
+                                        struct flowi *, int,
+                                        struct fib6_rule *);

 /*
  *     exported functions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 09a22c8..486af76 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -904,7 +904,8 @@ static int inline ipv6_saddr_label(const
        return 1;
 }

-int ipv6_get_saddr(int pref_if, struct in6_addr *daddr, struct in6_addr *saddr)
+int ipv6_get_saddr(int pref_if, struct in6_addr *daddr,
+                  struct rt6key *sconstr, struct in6_addr *saddr)
 {
        struct ipv6_saddr_score hiscore;
        struct inet6_ifaddr *ifa_result = NULL;
@@ -1151,7 +1152,15 @@ record_it:

        if (!ifa_result)
                return -EADDRNOTAVAIL;
-       
+#ifdef CONFIG_IPV6_SUBTREES
+       /* Don't let source address based routing interfere with the
+          address selection, just make sure the selected address
+          matches the routing policy constraints */
+
+       if (sconstr && sconstr->plen > 0 &&
+           !ipv6_prefix_equal(saddr, &sconstr->addr, sconstr->plen))
+               return -EADDRNOTAVAIL;
+#endif
        ipv6_addr_copy(saddr, &ifa_result->addr);
        in6_ifa_put(ifa_result);
        return 0;
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index fc56a19..a5b7803 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -21,18 +21,6 @@ #include <net/ipv6.h>
 #include <net/ip6_route.h>
 #include <net/netlink.h>

-struct fib6_rule
-{
-       struct fib_rule         common;
-       struct rt6key           src;
-       struct rt6key           dst;
-#ifdef CONFIG_IPV6_ROUTE_FWMARK
-       u32                     fwmark;
-       u32                     fwmask;
-#endif
-       u8                      tclass;
-};
-
 static struct fib_rules_ops fib6_rules_ops;

 static struct fib6_rule main_rule = {
@@ -98,7 +86,7 @@ static int fib6_rule_action(struct fib_r

        table = fib6_get_table(rule->table);
        if (table)
-               rt = lookup(table, flp, flags);
+               rt = lookup(table, flp, flags, (struct fib6_rule *) rule);

        if (rt != &ip6_null_entry)
                goto out;
@@ -123,7 +111,7 @@ static int fib6_rule_match(struct fib_ru
                return 0;

        if (r->src.plen) {
-               if (!(flags & RT6_LOOKUP_F_HAS_SADDR) ||
+               if (flags & RT6_LOOKUP_F_HAS_SADDR &&
                    !ipv6_prefix_equal(&fl->fl6_src, &r->src.addr, r->src.plen))
                        return 0;
        }
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 8fcae7a..c77e44b 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -280,7 +280,7 @@ struct fib6_table *fib6_get_table(u32 id
 struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags,
                                   pol_lookup_t lookup)
 {
-       return (struct dst_entry *) lookup(&fib6_main_tbl, fl, flags);
+       return (struct dst_entry *) lookup(&fib6_main_tbl, fl, flags, NULL);
 }

 static void __init fib6_tables_init(void)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 3ac4e12..809132f 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -449,7 +449,7 @@ static void ndisc_send_na(struct net_dev
                src_addr = solicited_addr;
                in6_ifa_put(ifp);
        } else {
-               if (ipv6_get_saddr(dev->ifindex, daddr, &tmpaddr))
+               if (ipv6_get_saddr(dev->ifindex, daddr, NULL, &tmpaddr))
                        return;
                src_addr = &tmpaddr;
        }
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7c3438e..e0bfd30 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -499,7 +499,8 @@ do { \
 } while(0)

 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
-                                            struct flowi *fl, int flags)
+                                            struct flowi *fl, int flags,
+                                            struct fib6_rule *rule)
 {
        struct fib6_node *fn;
        struct rt6_info *rt;
@@ -624,7 +625,8 @@ #endif
 }

 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
-                                           struct flowi *fl, int flags)
+                                           struct flowi *fl, int flags,
+                                           struct fib6_rule *rule)
 {
        struct fib6_node *fn;
        struct rt6_info *rt, *nrt;
@@ -719,7 +721,8 @@ #endif
 }

 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
-                                            struct flowi *fl, int flags)
+                                            struct flowi *fl, int flags,
+                                            struct fib6_rule *rule)
 {
        struct fib6_node *fn;
        struct rt6_info *rt, *nrt;
@@ -730,6 +733,16 @@ static struct rt6_info *ip6_pol_route_ou
        struct in6_addr saddr;

        ipv6_addr_copy(&saddr, &fl->fl6_src);
+#ifdef CONFIG_IPV6_SUBTREES
+       if (!has_saddr && rule->src.plen > 0) {
+               /* a source prefix specific policy rule has to override the
+                  normal source address selection process */
+               if (ipv6_get_saddr(fl->oif, &fl->fl6_dst, &rule->src, &saddr))
+                       goto no_saddr;
+               has_saddr = RT6_LOOKUP_F_HAS_SADDR;
+               ipv6_addr_copy(&fl->fl6_src, &saddr);
+       }
+#endif
 relookup:
        read_lock_bh(&table->tb6_lock);

@@ -749,7 +762,7 @@ restart:
        if (!has_saddr) {
                int oif = rt->rt6i_dev->ifindex;
                /* policy rule doesn't restrict source address */
-               if (ipv6_get_saddr(oif, &fl->fl6_dst, &saddr))
+               if (ipv6_get_saddr(oif, &fl->fl6_dst, NULL, &saddr))
                        goto no_saddr;
                has_saddr = RT6_LOOKUP_F_HAS_SADDR;
                ipv6_addr_copy(&fl->fl6_src, &saddr);
@@ -769,7 +782,12 @@ #endif

        dst_hold(&rt->u.dst);
        if (nrt) {
-               if (!ip6_ins_rt(nrt))
+               /*
+                * Kernel generated messages with an unspecifed source
+                * address don't change the routing table; make sure user
+                * generated messages behave the same.
+                */
+               if (ipv6_addr_any(&rt->rt6i_src.addr) || !ip6_ins_rt(nrt))
                        goto out2;
        }

@@ -806,7 +824,12 @@ struct dst_entry * ip6_route_output(stru
        if (rt6_need_strict(&fl->fl6_dst))
                flags |= RT6_LOOKUP_F_IFACE;

-       if (!ipv6_addr_any(&fl->fl6_src))
+       /*
+        * In some rare cases (RS, DAD, etc.) the unspecified address
+        * may be used as a source address. Such packets can only be sent
+        * from userspace if the user passes the complete IPv6 header.
+        */
+       if (!ipv6_addr_any(&fl->fl6_src) || (sk && inet_sk(sk)->hdrincl))
                flags |= RT6_LOOKUP_F_HAS_SADDR;

        return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
@@ -1304,7 +1327,8 @@ struct ip6rd_flowi {

 static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
                                             struct flowi *fl,
-                                            int flags)
+                                            int flags,
+                                            struct fib6_rule *rule)
 {
        struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
        struct rt6_info *rt;
@@ -2055,7 +2079,8 @@ #endif
                NLA_PUT_U32(skb, RTA_IIF, iif);
        else if (dst) {
                struct in6_addr saddr_buf;
-               if (!ipv6_get_saddr(rt->rt6i_dev->ifindex, dst, &saddr_buf))
+               if (!ipv6_get_saddr(rt->rt6i_dev->ifindex, dst,
+                                   &rt->rt6i_src, &saddr_buf))
                        NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
        }

-- 
1.4.2.3

-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to