Currently, the VTI input path works by first looking up the VTI by its IP addresses, then setting the tunnel pointer in the XFRM_TUNNEL_SKB_CB, and then having xfrm_input override the mark with the mark in the tunnel.
This patch changes the order so that the tunnel is found by a callback from xfrm_input. Each tunnel type (currently only ip_vti and ip6_vti) implements a lookup function pointer that finds the tunnel and sets it in the CB, and also does a state lookup. This has the advantage that much more information is available to the tunnel lookup function, including the looked-up XFRM state. This will be used in a future change to allow finding the tunnel not just from the IP addresses, but also from the xfrm lookup. The lookup function pointer occupies the same space in the XFRM_TUNNEL_SKB_CB as the IPv4/IPv6 tunnel pointer. The semantics of the field are: - When not running a handler that uses tunnels: always null. - At the beginning of xfrm_input: lookup function pointer. - After xfrm_input calls the lookup function: tunnel if found, else null. Signed-off-by: Lorenzo Colitti <[email protected]> --- include/net/xfrm.h | 2 ++ net/ipv4/ip_vti.c | 43 ++++++++++++++++++++++++++++++++++++---- net/ipv6/ip6_vti.c | 53 +++++++++++++++++++++++++++++++++++++++++++++----- net/ipv6/xfrm6_input.c | 1 - net/xfrm/xfrm_input.c | 34 +++++++++++++++++++------------- 5 files changed, 109 insertions(+), 24 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 9d3b7c0ac6e2..3d245f2f6f6c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -653,6 +653,8 @@ struct xfrm_tunnel_skb_cb { } header; union { + int (*lookup)(struct sk_buff *skb, int nexthdr, __be32 spi, + __be32 seq, struct xfrm_state **x); struct ip_tunnel *ip4; struct ip6_tnl *ip6; } tunnel; diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 949f432a5f04..850625598187 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -49,8 +49,8 @@ static struct rtnl_link_ops vti_link_ops __read_mostly; static unsigned int vti_net_id __read_mostly; static int vti_tunnel_init(struct net_device *dev); -static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi, - int encap_type) +static struct ip_tunnel * +vti4_find_tunnel(struct sk_buff *skb, __be32 spi, struct xfrm_state **x) { struct ip_tunnel *tunnel; const struct iphdr *iph = ip_hdr(skb); @@ -59,19 +59,52 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi, tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, iph->saddr, iph->daddr, 0); + if (tunnel) { + *x = xfrm_state_lookup(net, be32_to_cpu(tunnel->parms.i_key), + (xfrm_address_t *)&iph->daddr, + spi, iph->protocol, AF_INET); + } + + return tunnel; +} + +static int vti_lookup(struct sk_buff *skb, int nexthdr, __be32 spi, __be32 seq, + struct xfrm_state **x) +{ + struct net *net = dev_net(skb->dev); + struct ip_tunnel *tunnel; + + tunnel = vti4_find_tunnel(skb, spi, x); if (tunnel) { if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; + if (!*x) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES); + xfrm_audit_state_notfound(skb, AF_INET, spi, seq); + tunnel->dev->stats.rx_errors++; + tunnel->dev->stats.rx_dropped++; + goto drop; + } + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel; - return xfrm_input(skb, nexthdr, spi, encap_type); + return 0; } return -EINVAL; drop: + if (*x) + xfrm_state_put(*x); kfree_skb(skb); - return 0; + return -ESRCH; +} + +static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type) +{ + XFRM_TUNNEL_SKB_CB(skb)->tunnel.lookup = vti_lookup; + return xfrm_input(skb, nexthdr, spi, encap_type); } static int vti_rcv(struct sk_buff *skb) @@ -93,6 +126,8 @@ static int vti_rcv_cb(struct sk_buff *skb, int err) u32 orig_mark = skb->mark; int ret; + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL; + if (!tunnel) return 1; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index dbb74f3c57a7..d0676f2f99eb 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -297,13 +297,33 @@ static void vti6_dev_uninit(struct net_device *dev) dev_put(dev); } -static int vti6_rcv(struct sk_buff *skb) +static struct ip6_tnl * +vti6_find_tunnel(struct sk_buff *skb, __be32 spi, struct xfrm_state **x) { + const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct net *net = dev_net(skb->dev); struct ip6_tnl *t; + + t = vti6_tnl_lookup(net, &ipv6h->saddr, &ipv6h->daddr); + if (t) { + *x = xfrm_state_lookup(net, be32_to_cpu(t->parms.i_key), + (xfrm_address_t *)&ipv6h->daddr, + spi, ipv6h->nexthdr, AF_INET6); + } + + return t; +} + +int +vti6_lookup(struct sk_buff *skb, int nexthdr, __be32 spi, __be32 seq, + struct xfrm_state **x) +{ const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct net *net = dev_net(skb->dev); + struct ip6_tnl *t; rcu_read_lock(); - t = vti6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr); + t = vti6_find_tunnel(skb, spi, x); if (t) { if (t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) { rcu_read_unlock(); @@ -312,7 +332,7 @@ static int vti6_rcv(struct sk_buff *skb) if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { rcu_read_unlock(); - return 0; + goto discard; } if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) { @@ -321,15 +341,36 @@ static int vti6_rcv(struct sk_buff *skb) goto discard; } + if (!*x) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES); + xfrm_audit_state_notfound(skb, AF_INET6, spi, seq); + t->dev->stats.rx_errors++; + t->dev->stats.rx_dropped++; + rcu_read_unlock(); + goto discard; + } + rcu_read_unlock(); - return xfrm6_rcv_tnl(skb, t); + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t; + + return 0; } rcu_read_unlock(); return -EINVAL; discard: + if (*x) + xfrm_state_put(*x); kfree_skb(skb); - return 0; + return -ESRCH; +} + +static int vti6_rcv(struct sk_buff *skb) +{ + int nexthdr = skb_network_header(skb)[IP6CB(skb)->nhoff]; + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.lookup = vti6_lookup; + return xfrm6_rcv_spi(skb, nexthdr, 0, NULL); } static int vti6_rcv_cb(struct sk_buff *skb, int err) @@ -343,6 +384,8 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err) u32 orig_mark = skb->mark; int ret; + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; + if (!t) return 1; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index fe04e23af986..6d1b734fef8d 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -25,7 +25,6 @@ int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb) int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi, struct ip6_tnl *t) { - XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t; XFRM_SPI_SKB_CB(skb)->family = AF_INET6; XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); return xfrm_input(skb, nexthdr, spi, 0); diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index ac277b97e0d7..7b54f58454ee 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -267,18 +267,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) family = XFRM_SPI_SKB_CB(skb)->family; - /* if tunnel is present override skb->mark value with tunnel i_key */ - switch (family) { - case AF_INET: - if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4) - mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4->parms.i_key); - break; - case AF_INET6: - if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6) - mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6->parms.i_key); - break; - } - err = secpath_set(skb); if (err) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); @@ -293,14 +281,29 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) daddr = (xfrm_address_t *)(skb_network_header(skb) + XFRM_SPI_SKB_CB(skb)->daddroff); + + if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.lookup) { + err = XFRM_TUNNEL_SKB_CB(skb)->tunnel.lookup(skb, nexthdr, + spi, seq, &x); + if (err) { + XFRM_TUNNEL_SKB_CB(skb)->tunnel.lookup = NULL; + return err; + } + } + do { if (skb->sp->len == XFRM_MAX_DEPTH) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); + if (x) + xfrm_state_put(x); goto drop; } - x = xfrm_state_lookup(net, mark, daddr, spi, nexthdr, family); - if (x == NULL) { + if (!x) + x = xfrm_state_lookup(net, mark, daddr, spi, nexthdr, + family); + + if (!x) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES); xfrm_audit_state_notfound(skb, family, spi, seq); goto drop; @@ -420,6 +423,9 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); goto drop; } + + if (!err) + x = NULL; } while (!err); err = xfrm_rcv_cb(skb, family, x->type->proto, 0); -- 2.15.1.504.g5279b80103-goog
