From: William Tu <[email protected]>

    commit 1a66a836da630cd70f3639208da549b549ce576b
    Author: William Tu <[email protected]>
    Date:   Fri Aug 25 09:21:28 2017 -0700

    gre: add collect_md mode to ERSPAN tunnel

    Similar to gre, vxlan, geneve, ipip tunnels, allow ERSPAN tunnels to
    operate in 'collect metadata' mode.  bpf_skb_[gs]et_tunnel_key() helpers
    can make use of it right away.  OVS can use it as well in the future.

    Signed-off-by: William Tu <[email protected]>
    Signed-off-by: David S. Miller <[email protected]>

With some adjustments for compatibility layer.

Cc: William Tu <[email protected]>
Signed-off-by: Greg Rose <[email protected]>
---
 datapath/linux/compat/include/net/ip_tunnels.h |  68 ++++++++------
 datapath/linux/compat/ip_gre.c                 | 117 ++++++++++++++++++++++---
 2 files changed, 146 insertions(+), 39 deletions(-)

diff --git a/datapath/linux/compat/include/net/ip_tunnels.h 
b/datapath/linux/compat/include/net/ip_tunnels.h
index e3fb13b..159eb48 100644
--- a/datapath/linux/compat/include/net/ip_tunnels.h
+++ b/datapath/linux/compat/include/net/ip_tunnels.h
@@ -74,14 +74,25 @@ void rpl_ip_tunnel_xmit(struct sk_buff *skb, struct 
net_device *dev,
 
 
 #ifndef TUNNEL_CSUM
-#define TUNNEL_CSUM    __cpu_to_be16(0x01)
-#define TUNNEL_ROUTING __cpu_to_be16(0x02)
-#define TUNNEL_KEY     __cpu_to_be16(0x04)
-#define TUNNEL_SEQ     __cpu_to_be16(0x08)
-#define TUNNEL_STRICT  __cpu_to_be16(0x10)
-#define TUNNEL_REC     __cpu_to_be16(0x20)
-#define TUNNEL_VERSION __cpu_to_be16(0x40)
-#define TUNNEL_NO_KEY  __cpu_to_be16(0x80)
+#define TUNNEL_CSUM            __cpu_to_be16(0x01)
+#define TUNNEL_ROUTING         __cpu_to_be16(0x02)
+#define TUNNEL_KEY             __cpu_to_be16(0x04)
+#define TUNNEL_SEQ             __cpu_to_be16(0x08)
+#define TUNNEL_STRICT          __cpu_to_be16(0x10)
+#define TUNNEL_REC             __cpu_to_be16(0x20)
+#define TUNNEL_VERSION         __cpu_to_be16(0x40)
+#define TUNNEL_NO_KEY          __cpu_to_be16(0x80)
+#define TUNNEL_DONT_FRAGMENT    __cpu_to_be16(0x0100)
+#define TUNNEL_OAM             __cpu_to_be16(0x0200)
+#define TUNNEL_CRIT_OPT                __cpu_to_be16(0x0400)
+#define TUNNEL_GENEVE_OPT      __cpu_to_be16(0x0800)
+#define TUNNEL_VXLAN_OPT       __cpu_to_be16(0x1000)
+#define TUNNEL_NOCACHE         __cpu_to_be16(0x2000)
+#define TUNNEL_ERSPAN_OPT      __cpu_to_be16(0x4000)
+
+#undef TUNNEL_OPTIONS_PRESENT
+#define TUNNEL_OPTIONS_PRESENT \
+               (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT)
 
 struct tnl_ptk_info {
        __be16 flags;
@@ -100,27 +111,6 @@ struct tnl_ptk_info {
 #define IP_TNL_HASH_BITS   7
 #define IP_TNL_HASH_SIZE   (1 << IP_TNL_HASH_BITS)
 
-#ifndef TUNNEL_DONT_FRAGMENT
-#define TUNNEL_DONT_FRAGMENT   __cpu_to_be16(0x0100)
-#endif
-
-#ifndef TUNNEL_OAM
-#define TUNNEL_OAM     __cpu_to_be16(0x0200)
-#define TUNNEL_CRIT_OPT        __cpu_to_be16(0x0400)
-#endif
-
-#ifndef TUNNEL_GENEVE_OPT
-#define TUNNEL_GENEVE_OPT      __cpu_to_be16(0x0800)
-#endif
-
-#ifndef TUNNEL_VXLAN_OPT
-#define TUNNEL_VXLAN_OPT       __cpu_to_be16(0x1000)
-#endif
-
-/* Older kernels defined TUNNEL_OPTIONS_PRESENT to GENEVE only */
-#undef TUNNEL_OPTIONS_PRESENT
-#define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT)
-
 /* Keep error state on tunnel for 30 sec */
 #define IPTUNNEL_ERR_TIMEO     (30*HZ)
 
@@ -243,6 +233,7 @@ static inline void ip_tunnel_key_init(struct ip_tunnel_key 
*key,
 #define ip_tunnel_collect_metadata() true
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
+#undef TUNNEL_NOCACHE
 #define TUNNEL_NOCACHE 0
 
 static inline bool
@@ -420,6 +411,25 @@ static inline void iptunnel_xmit_stats(struct net_device 
*dev, int pkt_len)
        }
 }
 
+static inline __be64 key32_to_tunnel_id(__be32 key)
+{
+#ifdef __BIG_ENDIAN
+       return (__force __be64)key;
+#else
+       return (__force __be64)((__force u64)key << 32);
+#endif
+}
+
+/* Returns the least-significant 32 bits of a __be64. */
+static inline __be32 tunnel_id_to_key32(__be64 tun_id)
+{
+#ifdef __BIG_ENDIAN
+       return (__force __be32)tun_id;
+#else
+       return (__force __be32)((__force u64)tun_id >> 32);
+#endif
+}
+
 #define ip_tunnel_init rpl_ip_tunnel_init
 int rpl_ip_tunnel_init(struct net_device *dev);
 
diff --git a/datapath/linux/compat/ip_gre.c b/datapath/linux/compat/ip_gre.c
index d25d2df..d0f112d 100644
--- a/datapath/linux/compat/ip_gre.c
+++ b/datapath/linux/compat/ip_gre.c
@@ -122,6 +122,9 @@ static __be32 tunnel_id_to_key(__be64 x)
 #endif
 }
 
+static void erspan_build_header(struct sk_buff *skb,
+                               __be32 id, u32 index, bool truncate);
+
 /* Called with rcu_read_lock and BH disabled. */
 static int gre_err(struct sk_buff *skb, u32 info,
                   const struct tnl_ptk_info *tpi)
@@ -133,7 +136,7 @@ static int erspan_rcv(struct sk_buff *skb, struct 
tnl_ptk_info *tpi,
                      int gre_hdr_len)
 {
        struct net *net = dev_net(skb->dev);
-       struct metadata_dst *tun_dst = NULL;
+       struct metadata_dst tun_dst;
        struct ip_tunnel_net *itn;
        struct ip_tunnel *tunnel;
        struct erspanhdr *ershdr;
@@ -169,9 +172,33 @@ static int erspan_rcv(struct sk_buff *skb, struct 
tnl_ptk_info *tpi,
                                           false, false) < 0)
                        goto drop;
 
-               tunnel->index = ntohl(index);
+               if (tunnel->collect_md) {
+                       struct ip_tunnel_info *info;
+                       struct erspan_metadata *md;
+                       __be64 tun_id;
+                       __be16 flags;
+
+                       tpi->flags |= TUNNEL_KEY;
+                       flags = tpi->flags;
+                       tun_id = key32_to_tunnel_id(tpi->key);
+
+                       ovs_ip_tun_rx_dst(&tun_dst, skb, flags,
+                                         tun_id, sizeof(*md));
+
+                       md = ip_tunnel_info_opts(&tun_dst.u.tun_info);
+                       if (!md)
+                               return PACKET_REJECT;
+
+                       md->index = index;
+                       info = &tun_dst.u.tun_info;
+                       info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
+                       info->options_len = sizeof(*md);
+               } else {
+                       tunnel->index = ntohl(index);
+               }
+
                skb_reset_mac_header(skb);
-               ovs_ip_tunnel_rcv(tunnel->dev, skb, tun_dst);
+               ovs_ip_tunnel_rcv(tunnel->dev, skb, &tun_dst);
                return PACKET_RCVD;
        }
 drop:
@@ -484,6 +511,72 @@ err_free_skb:
 }
 EXPORT_SYMBOL(rpl_gre_fb_xmit);
 
+
+
+static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
+                          __be16 proto)
+{
+       struct ip_tunnel *tunnel = netdev_priv(dev);
+       struct ip_tunnel_info *tun_info;
+       const struct ip_tunnel_key *key;
+       struct erspan_metadata *md;
+       struct rtable *rt = NULL;
+       struct tnl_ptk_info tpi;
+       bool truncate = false;
+       struct flowi4 fl;
+       int tunnel_hlen;
+       __be16 df;
+
+       tun_info = skb_tunnel_info(skb);
+       if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+                    ip_tunnel_info_af(tun_info) != AF_INET))
+               goto err_free_skb;
+
+       key = &tun_info->key;
+
+       /* ERSPAN has fixed 8 byte GRE header */
+       tunnel_hlen = 8 + sizeof(struct erspanhdr);
+
+       rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
+       if (!rt)
+               return;
+
+       if (gre_handle_offloads(skb, false))
+               goto err_free_rt;
+
+       if (skb->len > dev->mtu) {
+               pskb_trim(skb, dev->mtu);
+               truncate = true;
+       }
+
+       md = ip_tunnel_info_opts(tun_info);
+       if (!md)
+               goto err_free_rt;
+
+       erspan_build_header(skb, tunnel_id_to_key32(key->tun_id),
+                           ntohl(md->index), truncate);
+
+       tpi.flags = (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ);
+       tpi.proto = htons(ETH_P_ERSPAN);
+       tpi.key = tunnel_id_to_key32(key->tun_id);
+       tpi.seq = htonl(tunnel->o_seqno++);
+       tpi.hdr_len = 8;
+
+       gre_build_header(skb, &tpi, 8);
+
+       df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
+
+       iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
+                     key->tos, key->ttl, df, false);
+       return;
+
+err_free_rt:
+       ip_rt_put(rt);
+err_free_skb:
+       kfree_skb(skb);
+       dev->stats.tx_dropped++;
+}
+
 #define GRE_FEATURES   (NETIF_F_SG |           \
                         NETIF_F_FRAGLIST |     \
                         NETIF_F_HIGHDMA |      \
@@ -608,18 +701,17 @@ static int erspan_validate(struct nlattr *tb[], struct 
nlattr *data[])
                return ret;
 
        /* ERSPAN should only have GRE sequence and key flag */
-       flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
-       flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
-       if (flags != (GRE_SEQ | GRE_KEY))
+       if (data[IFLA_GRE_OFLAGS])
+               flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
+       if (data[IFLA_GRE_IFLAGS])
+               flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
+       if (!data[IFLA_GRE_COLLECT_METADATA] &&
+           flags != (GRE_SEQ | GRE_KEY))
                return -EINVAL;
 
        /* ERSPAN Session ID only has 10-bit. Since we reuse
         * 32-bit key field as ID, check it's range.
         */
-       if (data[IFLA_GRE_IKEY] &&
-           (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
-               return -EINVAL;
-
        if (data[IFLA_GRE_OKEY] &&
            (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
                return -EINVAL;
@@ -708,6 +800,11 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
        struct ip_tunnel *tunnel = netdev_priv(dev);
        bool truncate = false;
 
+       if (tunnel->collect_md) {
+               erspan_fb_xmit(skb, dev, skb->protocol);
+               return NETDEV_TX_OK;
+       }
+
        if (gre_handle_offloads(skb, false))
                goto free_skb;
 
-- 
1.8.3.1

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to