On Fri, Feb 23, 2018 at 07:23:55PM +0100, Greg Kroah-Hartman wrote:
> 4.4-stable review patch.  If anyone has any objections, please let me know.
> 
> ------------------
> 
> From: Paolo Abeni <pab...@redhat.com>
> 
> commit 607f725f6f7d5ec3759fbc16224afb60e2152a5b upstream.
> 
> This also fix a potential race into the existing tunnel code, which
> could lead to the wrong dst to be permanenty cached:
> 
> CPU1:                                 CPU2:
>   <xmit on ip6_tunnel>
>   <cache lookup fails>
>   dst = ip6_route_output(...)
>                                       <tunnel params are changed via nl>
>                                       dst_cache_reset() // no effect,
>                                                       // the cache is empty
>   dst_cache_set() // the wrong dst
>       // is permanenty stored
>       // into the cache
> 
> With the new dst implementation the above race is not possible
> since the first cache lookup after dst_cache_reset will fail due
> to the timestamp check
> 
> Signed-off-by: Paolo Abeni <pab...@redhat.com>
> Suggested-and-acked-by: Hannes Frederic Sowa <han...@stressinduktion.org>
> Signed-off-by: David S. Miller <da...@davemloft.net>
> Signed-off-by: Manoj Boopathi Raj <manojboopa...@google.com>
> Signed-off-by: Greg Kroah-Hartman <gre...@linuxfoundation.org>
> 
> ---
>  include/net/ip6_tunnel.h |   15 ----
>  net/ipv6/Kconfig         |    1 
>  net/ipv6/ip6_gre.c       |   12 +--
>  net/ipv6/ip6_tunnel.c    |  151 
> +++++++----------------------------------------
>  net/ipv6/ip6_vti.c       |    2 
>  5 files changed, 35 insertions(+), 146 deletions(-)
> 
> --- a/include/net/ip6_tunnel.h
> +++ b/include/net/ip6_tunnel.h
> @@ -5,6 +5,8 @@
>  #include <linux/netdevice.h>
>  #include <linux/if_tunnel.h>
>  #include <linux/ip6_tunnel.h>
> +#include <net/ip_tunnels.h>
> +#include <net/dst_cache.h>
>  
>  #define IP6TUNNEL_ERR_TIMEO (30*HZ)
>  
> @@ -32,12 +34,6 @@ struct __ip6_tnl_parm {
>       __be32                  o_key;
>  };
>  
> -struct ip6_tnl_dst {
> -     seqlock_t lock;
> -     struct dst_entry __rcu *dst;
> -     u32 cookie;
> -};
> -
>  /* IPv6 tunnel */
>  struct ip6_tnl {
>       struct ip6_tnl __rcu *next;     /* next tunnel in list */
> @@ -45,7 +41,7 @@ struct ip6_tnl {
>       struct net *net;        /* netns for packet i/o */
>       struct __ip6_tnl_parm parms;    /* tunnel configuration parameters */
>       struct flowi fl;        /* flowi template for xmit */
> -     struct ip6_tnl_dst __percpu *dst_cache; /* cached dst */
> +     struct dst_cache dst_cache;     /* cached dst */
>  
>       int err_count;
>       unsigned long err_time;
> @@ -65,11 +61,6 @@ struct ipv6_tlv_tnl_enc_lim {
>       __u8 encap_limit;       /* tunnel encapsulation limit   */
>  } __packed;
>  
> -struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t);
> -int ip6_tnl_dst_init(struct ip6_tnl *t);
> -void ip6_tnl_dst_destroy(struct ip6_tnl *t);
> -void ip6_tnl_dst_reset(struct ip6_tnl *t);
> -void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst);
>  int ip6_tnl_rcv_ctl(struct ip6_tnl *t, const struct in6_addr *laddr,
>               const struct in6_addr *raddr);
>  int ip6_tnl_xmit_ctl(struct ip6_tnl *t, const struct in6_addr *laddr,
> --- a/net/ipv6/Kconfig
> +++ b/net/ipv6/Kconfig
> @@ -205,6 +205,7 @@ config IPV6_NDISC_NODETYPE
>  config IPV6_TUNNEL
>       tristate "IPv6: IP-in-IPv6 tunnel (RFC2473)"
>       select INET6_TUNNEL
> +     select DST_CACHE
>       ---help---
>         Support for IPv6-in-IPv6 and IPv4-in-IPv6 tunnels described in
>         RFC 2473.
> --- a/net/ipv6/ip6_gre.c
> +++ b/net/ipv6/ip6_gre.c
> @@ -362,7 +362,7 @@ static void ip6gre_tunnel_uninit(struct
>       struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
>  
>       ip6gre_tunnel_unlink(ign, t);
> -     ip6_tnl_dst_reset(t);
> +     dst_cache_reset(&t->dst_cache);
>       dev_put(dev);
>  }
>  
> @@ -640,7 +640,7 @@ static netdev_tx_t ip6gre_xmit2(struct s
>       }
>  
>       if (!fl6->flowi6_mark)
> -             dst = ip6_tnl_dst_get(tunnel);
> +             dst = dst_cache_get(&tunnel->dst_cache);
>  
>       if (!dst) {
>               dst = ip6_route_output(net, NULL, fl6);
> @@ -709,7 +709,7 @@ static netdev_tx_t ip6gre_xmit2(struct s
>       }
>  
>       if (!fl6->flowi6_mark && ndst)
> -             ip6_tnl_dst_set(tunnel, ndst);
> +             dst_cache_set_ip6(&tunnel->dst_cache, ndst, &fl6->saddr);
>       skb_dst_set(skb, dst);
>  
>       proto = NEXTHDR_GRE;
> @@ -1017,7 +1017,7 @@ static int ip6gre_tnl_change(struct ip6_
>       t->parms.o_key = p->o_key;
>       t->parms.i_flags = p->i_flags;
>       t->parms.o_flags = p->o_flags;
> -     ip6_tnl_dst_reset(t);
> +     dst_cache_reset(&t->dst_cache);
>       ip6gre_tnl_link_config(t, set_mtu);
>       return 0;
>  }
> @@ -1228,7 +1228,7 @@ static void ip6gre_dev_free(struct net_d
>  {
>       struct ip6_tnl *t = netdev_priv(dev);
>  
> -     ip6_tnl_dst_destroy(t);
> +     dst_cache_destroy(&t->dst_cache);
>       free_percpu(dev->tstats);
>       free_netdev(dev);
>  }
> @@ -1266,7 +1266,7 @@ static int ip6gre_tunnel_init_common(str
>       if (!dev->tstats)
>               return -ENOMEM;
>  
> -     ret = ip6_tnl_dst_init(tunnel);
> +     ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
>       if (ret) {
>               free_percpu(dev->tstats);
>               dev->tstats = NULL;
> --- a/net/ipv6/ip6_tunnel.c
> +++ b/net/ipv6/ip6_tunnel.c
> @@ -122,97 +122,6 @@ static struct net_device_stats *ip6_get_
>       return &dev->stats;
>  }
>  
> -/*
> - * Locking : hash tables are protected by RCU and RTNL
> - */
> -
> -static void ip6_tnl_per_cpu_dst_set(struct ip6_tnl_dst *idst,
> -                                 struct dst_entry *dst)
> -{
> -     write_seqlock_bh(&idst->lock);
> -     dst_release(rcu_dereference_protected(
> -                         idst->dst,
> -                         lockdep_is_held(&idst->lock.lock)));
> -     if (dst) {
> -             dst_hold(dst);
> -             idst->cookie = rt6_get_cookie((struct rt6_info *)dst);
> -     } else {
> -             idst->cookie = 0;
> -     }
> -     rcu_assign_pointer(idst->dst, dst);
> -     write_sequnlock_bh(&idst->lock);
> -}
> -
> -struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t)
> -{
> -     struct ip6_tnl_dst *idst;
> -     struct dst_entry *dst;
> -     unsigned int seq;
> -     u32 cookie;
> -
> -     idst = raw_cpu_ptr(t->dst_cache);
> -
> -     rcu_read_lock();
> -     do {
> -             seq = read_seqbegin(&idst->lock);
> -             dst = rcu_dereference(idst->dst);
> -             cookie = idst->cookie;
> -     } while (read_seqretry(&idst->lock, seq));
> -
> -     if (dst && !atomic_inc_not_zero(&dst->__refcnt))
> -             dst = NULL;
> -     rcu_read_unlock();
> -
> -     if (dst && dst->obsolete && !dst->ops->check(dst, cookie)) {
> -             ip6_tnl_per_cpu_dst_set(idst, NULL);
> -             dst_release(dst);
> -             dst = NULL;
> -     }
> -     return dst;
> -}
> -EXPORT_SYMBOL_GPL(ip6_tnl_dst_get);
> -
> -void ip6_tnl_dst_reset(struct ip6_tnl *t)
> -{
> -     int i;
> -
> -     for_each_possible_cpu(i)
> -             ip6_tnl_per_cpu_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
> -}
> -EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset);
> -
> -void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst)
> -{
> -     ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), dst);
> -
> -}
> -EXPORT_SYMBOL_GPL(ip6_tnl_dst_set);
> -
> -void ip6_tnl_dst_destroy(struct ip6_tnl *t)
> -{
> -     if (!t->dst_cache)
> -             return;
> -
> -     ip6_tnl_dst_reset(t);
> -     free_percpu(t->dst_cache);
> -}
> -EXPORT_SYMBOL_GPL(ip6_tnl_dst_destroy);
> -
> -int ip6_tnl_dst_init(struct ip6_tnl *t)
> -{
> -     int i;
> -
> -     t->dst_cache = alloc_percpu(struct ip6_tnl_dst);
> -     if (!t->dst_cache)
> -             return -ENOMEM;
> -
> -     for_each_possible_cpu(i)
> -             seqlock_init(&per_cpu_ptr(t->dst_cache, i)->lock);
> -
> -     return 0;
> -}
> -EXPORT_SYMBOL_GPL(ip6_tnl_dst_init);
> -
>  /**
>   * ip6_tnl_lookup - fetch tunnel matching the end-point addresses
>   *   @remote: the address of the tunnel exit-point
> @@ -331,7 +240,7 @@ static void ip6_dev_free(struct net_devi
>  {
>       struct ip6_tnl *t = netdev_priv(dev);
>  
> -     ip6_tnl_dst_destroy(t);
> +     dst_cache_destroy(&t->dst_cache);
>       free_percpu(dev->tstats);
>       free_netdev(dev);
>  }
> @@ -464,7 +373,7 @@ ip6_tnl_dev_uninit(struct net_device *de
>               RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
>       else
>               ip6_tnl_unlink(ip6n, t);
> -     ip6_tnl_dst_reset(t);
> +     dst_cache_reset(&t->dst_cache);
>       dev_put(dev);
>  }
>  
> @@ -1053,7 +962,6 @@ static int ip6_tnl_xmit2(struct sk_buff
>       struct ipv6_tel_txoption opt;
>       struct dst_entry *dst = NULL, *ndst = NULL;
>       struct net_device *tdev;
> -     bool use_cache = false;
>       int mtu;
>       unsigned int max_headroom = sizeof(struct ipv6hdr);
>       u8 proto;
> @@ -1061,39 +969,28 @@ static int ip6_tnl_xmit2(struct sk_buff
>  
>       /* NBMA tunnel */
>       if (ipv6_addr_any(&t->parms.raddr)) {
> -             if (skb->protocol == htons(ETH_P_IPV6)) {
> -                     struct in6_addr *addr6;
> -                     struct neighbour *neigh;
> -                     int addr_type;
> -
> -                     if (!skb_dst(skb))
> -                             goto tx_err_link_failure;
> -
> -                     neigh = dst_neigh_lookup(skb_dst(skb),
> -                                              &ipv6_hdr(skb)->daddr);
> -                     if (!neigh)
> -                             goto tx_err_link_failure;
> +             struct in6_addr *addr6;
> +             struct neighbour *neigh;
> +             int addr_type;
> +
> +             if (!skb_dst(skb))
> +                     goto tx_err_link_failure;
>  
> -                     addr6 = (struct in6_addr *)&neigh->primary_key;
> -                     addr_type = ipv6_addr_type(addr6);
> +             neigh = dst_neigh_lookup(skb_dst(skb),
> +                                      &ipv6_hdr(skb)->daddr);
> +             if (!neigh)
> +                     goto tx_err_link_failure;
>  
> -                     if (addr_type == IPV6_ADDR_ANY)
> -                             addr6 = &ipv6_hdr(skb)->daddr;
> +             addr6 = (struct in6_addr *)&neigh->primary_key;
> +             addr_type = ipv6_addr_type(addr6);
>  
> -                     memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
> -                     neigh_release(neigh);
> -             }
> -     } else if (t->parms.proto != 0 && !(t->parms.flags &
> -                                         (IP6_TNL_F_USE_ORIG_TCLASS |
> -                                          IP6_TNL_F_USE_ORIG_FWMARK))) {
> -             /* enable the cache only if neither the outer protocol nor the
> -              * routing decision depends on the current inner header value
> -              */
> -             use_cache = true;
> -     }
> +             if (addr_type == IPV6_ADDR_ANY)
> +                     addr6 = &ipv6_hdr(skb)->daddr;
>  
> -     if (use_cache)
> -             dst = ip6_tnl_dst_get(t);
> +             memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
> +             neigh_release(neigh);
> +     } else if (!fl6->flowi6_mark)
> +             dst = dst_cache_get(&t->dst_cache);
>  
>       if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr))
>               goto tx_err_link_failure;
> @@ -1156,8 +1053,8 @@ static int ip6_tnl_xmit2(struct sk_buff
>               skb = new_skb;
>       }
>  
> -     if (use_cache && ndst)
> -             ip6_tnl_dst_set(t, ndst);
> +     if (!fl6->flowi6_mark && ndst)
> +             dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr);
>       skb_dst_set(skb, dst);
>  
>       skb->transport_header = skb->network_header;
> @@ -1392,7 +1289,7 @@ ip6_tnl_change(struct ip6_tnl *t, const
>       t->parms.flowinfo = p->flowinfo;
>       t->parms.link = p->link;
>       t->parms.proto = p->proto;
> -     ip6_tnl_dst_reset(t);
> +     dst_cache_reset(&t->dst_cache);
>       ip6_tnl_link_config(t);
>       return 0;
>  }
> @@ -1663,7 +1560,7 @@ ip6_tnl_dev_init_gen(struct net_device *
>       if (!dev->tstats)
>               return -ENOMEM;
>  
> -     ret = ip6_tnl_dst_init(t);
> +     ret = dst_cache_init(&t->dst_cache, GFP_KERNEL);
>       if (ret) {
>               free_percpu(dev->tstats);
>               dev->tstats = NULL;
> --- a/net/ipv6/ip6_vti.c
> +++ b/net/ipv6/ip6_vti.c
> @@ -645,7 +645,7 @@ vti6_tnl_change(struct ip6_tnl *t, const
>       t->parms.i_key = p->i_key;
>       t->parms.o_key = p->o_key;
>       t->parms.proto = p->proto;
> -     ip6_tnl_dst_reset(t);
> +     dst_cache_reset(&t->dst_cache);
>       vti6_link_config(t);
>       return 0;
>  }
> 
>

It may also be wise to take these two commits from mainline, as they
are along the same lines as this one:

09acddf873b ("ip_tunnel: replace dst_cache with generic implementation")
27337e16f2d ("ip_tunnel: fix preempt warning in ip tunnel creation/updating")

There is a minor conflict with the first one due to stable commit
6f99825e7632 ("sit: fix a double free on error path").

I'll most likely carry them anyways, it fixes a build error with some
out of tree code but less out of tree code I have, the better!

Thanks!
Nathan

Reply via email to