ipv6_chk_addr_and_flags determines if an address is a local address. It
is called by ip6_route_info_create to validate a gateway address is not a
local address. It currently does not consider L3 domains and as a result
does not allow a route to be added in one VRF if the nexthop points to
an address in a second VRF. e.g.,

    $ ip route add 2001:db8:1::/64 vrf r2 via 2001:db8:102::23
    Error: Invalid gateway address.

where 2001:db8:102::23 is an address on an interface in vrf r1.

Resolve by comparing the l3mdev for the passed in device and requiring an
l3mdev match with the device containing an address. The intent of checking
for an address on the specified device versus any device in the domain is
mantained by a new argument to skip the check between the passed in device
and the device with the address.

Update the handful of users of ipv6_chk_addr with a NULL dev argument:
- anycast to call ipv6_chk_addr_and_flags. If the device is given by the
  user, look for the given address across the L3 domain. If the index is
  not given, the default table is presumed so only addresses on devices
  not enslaved are considered.

- ip6_tnl_rcv_ctl - local address must exist on device, remote address
  can not exist in L3 domain; only remote check needs to be updated but
  do both for consistency.

ip6_validate_gw needs to handle 2 cases - one where the device is given
as part of the nexthop spec and the other where the device is resolved.
There is at least 1 VRF case where deferring the check to only after
the route lookup has resolved the device fails with an unintuitive error
"RTNETLINK answers: No route to host" as opposed to the preferred
"Error: Gateway can not be a local address." The 'no route to host'
error is because of the fallback to a full lookup.

Signed-off-by: David Ahern <dsah...@gmail.com>
---
 include/net/addrconf.h |  4 ++--
 net/ipv6/addrconf.c    | 26 ++++++++++++++++++++++----
 net/ipv6/anycast.c     |  9 ++++++---
 net/ipv6/datagram.c    |  5 +++--
 net/ipv6/ip6_tunnel.c  | 12 ++++++++----
 net/ipv6/ndisc.c       |  2 +-
 net/ipv6/route.c       | 37 ++++++++++++++++++++++++++++---------
 7 files changed, 70 insertions(+), 25 deletions(-)

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index c4185a7b0e90..132e5b95167a 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -69,8 +69,8 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg);
 int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
                  const struct net_device *dev, int strict);
 int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
-                           const struct net_device *dev, int strict,
-                           u32 banned_flags);
+                           const struct net_device *dev, bool skip_dev_check,
+                           int strict, u32 banned_flags);
 
 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b5fd116c046a..17d5d3f42d21 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1851,22 +1851,40 @@ static int ipv6_count_addresses(const struct inet6_dev 
*idev)
 int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
                  const struct net_device *dev, int strict)
 {
-       return ipv6_chk_addr_and_flags(net, addr, dev, strict, IFA_F_TENTATIVE);
+       return ipv6_chk_addr_and_flags(net, addr, dev, !dev,
+                                      strict, IFA_F_TENTATIVE);
 }
 EXPORT_SYMBOL(ipv6_chk_addr);
 
+/* device argument is used to find the L3 domain of interest. If
+ * skip_dev_check is set, then the ifp device is not checked against
+ * the passed in dev argument. So the 2 cases for addresses checks are:
+ *   1. does the address exist in the L3 domain that dev is part of
+ *      (skip_dev_check = true), or
+ *
+ *   2. does the address exist on the specific device
+ *      (skip_dev_check = false)
+ */
 int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
-                           const struct net_device *dev, int strict,
-                           u32 banned_flags)
+                           const struct net_device *dev, bool skip_dev_check,
+                           int strict, u32 banned_flags)
 {
        unsigned int hash = inet6_addr_hash(net, addr);
+       const struct net_device *l3mdev;
        struct inet6_ifaddr *ifp;
        u32 ifp_flags;
 
        rcu_read_lock();
+
+       l3mdev = l3mdev_master_dev_rcu(dev);
+
        hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
                if (!net_eq(dev_net(ifp->idev->dev), net))
                        continue;
+
+               if (l3mdev_master_dev_rcu(ifp->idev->dev) != l3mdev)
+                       continue;
+
                /* Decouple optimistic from tentative for evaluation here.
                 * Ban optimistic addresses explicitly, when required.
                 */
@@ -1875,7 +1893,7 @@ int ipv6_chk_addr_and_flags(struct net *net, const struct 
in6_addr *addr,
                            : ifp->flags;
                if (ipv6_addr_equal(&ifp->addr, addr) &&
                    !(ifp_flags&banned_flags) &&
-                   (!dev || ifp->idev->dev == dev ||
+                   (skip_dev_check || ifp->idev->dev == dev ||
                     !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))) {
                        rcu_read_unlock();
                        return 1;
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index c61718dba2e6..d580d4d456a5 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -66,7 +66,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const 
struct in6_addr *addr)
                return -EPERM;
        if (ipv6_addr_is_multicast(addr))
                return -EINVAL;
-       if (ipv6_chk_addr(net, addr, NULL, 0))
+
+       if (ifindex)
+               dev = __dev_get_by_index(net, ifindex);
+
+       if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE))
                return -EINVAL;
 
        pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
@@ -90,8 +94,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const 
struct in6_addr *addr)
                        dev = __dev_get_by_flags(net, IFF_UP,
                                                 IFF_UP | IFF_LOOPBACK);
                }
-       } else
-               dev = __dev_get_by_index(net, ifindex);
+       }
 
        if (!dev) {
                err = -ENODEV;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index fbf08ce3f5ab..b27333d7b099 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -801,8 +801,9 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
                        if (addr_type != IPV6_ADDR_ANY) {
                                int strict = __ipv6_addr_src_scope(addr_type) 
<= IPV6_ADDR_SCOPE_LINKLOCAL;
                                if (!(inet_sk(sk)->freebind || 
inet_sk(sk)->transparent) &&
-                                   !ipv6_chk_addr(net, &src_info->ipi6_addr,
-                                                  strict ? dev : NULL, 0) &&
+                                   !ipv6_chk_addr_and_flags(net, 
&src_info->ipi6_addr,
+                                                            dev, !strict, 0,
+                                                            IFA_F_TENTATIVE) &&
                                    !ipv6_chk_acast_addr_src(net, dev,
                                                             
&src_info->ipi6_addr))
                                        err = -EINVAL;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 56c4967f1868..1ce8244e8aee 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -758,9 +758,11 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
                        ldev = dev_get_by_index_rcu(net, p->link);
 
                if ((ipv6_addr_is_multicast(laddr) ||
-                    likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
+                    likely(ipv6_chk_addr_and_flags(net, laddr, ldev, false,
+                                                   0, IFA_F_TENTATIVE))) &&
                    ((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) ||
-                    likely(!ipv6_chk_addr(net, raddr, NULL, 0))))
+                    likely(!ipv6_chk_addr_and_flags(net, raddr, ldev, true,
+                                                    0, IFA_F_TENTATIVE))))
                        ret = 1;
        }
        return ret;
@@ -990,12 +992,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
                if (p->link)
                        ldev = dev_get_by_index_rcu(net, p->link);
 
-               if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0)))
+               if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false,
+                                                     0, IFA_F_TENTATIVE)))
                        pr_warn("%s xmit: Local address not yet configured!\n",
                                p->name);
                else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
                         !ipv6_addr_is_multicast(raddr) &&
-                        unlikely(ipv6_chk_addr(net, raddr, NULL, 0)))
+                        unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev,
+                                                         true, 0, 
IFA_F_TENTATIVE)))
                        pr_warn("%s xmit: Routing loop! Remote address found on 
this node!\n",
                                p->name);
                else
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 0a19ce3a6f7f..13bf775c7f1a 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -707,7 +707,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct 
sk_buff *skb)
        int probes = atomic_read(&neigh->probes);
 
        if (skb && ipv6_chk_addr_and_flags(dev_net(dev), &ipv6_hdr(skb)->saddr,
-                                          dev, 1,
+                                          dev, false, 1,
                                           IFA_F_TENTATIVE|IFA_F_OPTIMISTIC))
                saddr = &ipv6_hdr(skb)->saddr;
        probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 3851c3ccfd7a..bbc62799eb3b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2633,18 +2633,25 @@ static int ip6_validate_gw(struct net *net, struct 
fib6_config *cfg,
        const struct in6_addr *gw_addr = &cfg->fc_gateway;
        int gwa_type = ipv6_addr_type(gw_addr);
        const struct net_device *dev = *_dev;
+       bool need_local_addr_check = !dev;
        int err = -EINVAL;
 
-       /* if gw_addr is local we will fail to detect this in case
-        * address is still TENTATIVE (DAD in progress). rt6_lookup()
-        * will return already-added prefix route via interface that
-        * prefix route was assigned to, which might be non-loopback.
+       /* if route spec contains the device, check if gateway address
+        * is a local address in the same L3 domain
         */
-       if (ipv6_chk_addr_and_flags(net, gw_addr,
-                                   gwa_type & IPV6_ADDR_LINKLOCAL ?
-                                   dev : NULL, 0, 0)) {
-               NL_SET_ERR_MSG(extack, "Invalid gateway address");
-               goto out;
+       if (dev) {
+               /* if gw_addr is local we will fail to detect this in case
+                * address is still TENTATIVE (DAD in progress). rt6_lookup()
+                * will return already-added prefix route via interface that
+                * prefix route was assigned to, which might be non-loopback.
+                */
+               if (ipv6_chk_addr_and_flags(net, gw_addr, dev,
+                                           gwa_type & IPV6_ADDR_LINKLOCAL ?
+                                           false : true, 0, 0)) {
+                       NL_SET_ERR_MSG(extack,
+                                      "Gateway can not be a local address");
+                       goto out;
+               }
        }
 
        if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
@@ -2683,6 +2690,18 @@ static int ip6_validate_gw(struct net *net, struct 
fib6_config *cfg,
                               "Egress device can not be loopback device for 
this route");
                goto out;
        }
+
+       /* if we did not check gw_addr above, do so now that the
+        * egress device has been resolved.
+        */
+       if (need_local_addr_check &&
+           ipv6_chk_addr_and_flags(net, gw_addr, dev,
+                                   gwa_type & IPV6_ADDR_LINKLOCAL ?
+                                   false : true, 0, 0)) {
+               NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
+               goto out;
+       }
+
        err = 0;
 out:
        return err;
-- 
2.11.0

Reply via email to