This commit backports the following upstream commits to fix MPLS GSO in ovs
datapath. It has been tested on kernel 4.4 and 4.9.

Upstream commit:
    commit 48d2ab609b6bbecb7698487c8579bc40de9d6dfa
    Author: David Ahern <d...@cumulusnetworks.com>
    Date:   Wed Aug 24 20:10:44 2016 -0700

    net: mpls: Fixups for GSO

    As reported by Lennert the MPLS GSO code is failing to properly segment
    large packets. There are a couple of problems:

    1. the inner protocol is not set so the gso segment functions for inner
       protocol layers are not getting run, and

    2  MPLS labels for packets that use the "native" (non-OVS) MPLS code
       are not properly accounted for in mpls_gso_segment.

    The MPLS GSO code was added for OVS. It is re-using skb_mac_gso_segment
    to call the gso segment functions for the higher layer protocols. That
    means skb_mac_gso_segment is called twice -- once with the network
    protocol set to MPLS and again with the network protocol set to the
    inner protocol.

    This patch sets the inner skb protocol addressing item 1 above and sets
    the network_header and inner_network_header to mark where the MPLS labels
    start and end. The MPLS code in OVS is also updated to set the two
    network markers.

    >From there the MPLS GSO code uses the difference between the network
    header and the inner network header to know the size of the MPLS header
    that was pushed. It then pulls the MPLS header, resets the mac_len and
    protocol for the inner protocol and then calls skb_mac_gso_segment
    to segment the skb.

    Afterward the inner protocol segmentation is done the skb protocol
    is set to mpls for each segment and the network and mac headers
    restored.

    Reported-by: Lennert Buytenhek <buyt...@wantstofly.org>
    Signed-off-by: David Ahern <d...@cumulusnetworks.com>
    Signed-off-by: David S. Miller <da...@davemloft.net>

Upstream commit:
    commit f7d49bce8e741e1e6aa14ce4db1b6cea7e4be4e8
    Author: Jiri Benc <jb...@redhat.com>
    Date:   Fri Sep 30 19:08:05 2016 +0200

    openvswitch: mpls: set network header correctly on key extract

    After the 48d2ab609b6b ("net: mpls: Fixups for GSO"), MPLS handling in
    openvswitch was changed to have network header pointing to the start of the
    MPLS headers and inner_network_header pointing after the MPLS headers.

    However, key_extract was missed by the mentioned commit, causing incorrect
    headers to be set when a MPLS packet just enters the bridge or after it is
    recirculated.

    Fixes: 48d2ab609b6b ("net: mpls: Fixups for GSO")
    Signed-off-by: Jiri Benc <jb...@redhat.com>
    Acked-by: Pravin B Shelar <pshe...@ovn.org>
    Signed-off-by: David S. Miller <da...@davemloft.net>

Upstream commit:
    commit 85de4a2101acb85c3b1dde465e84596ccca99f2c
    Author: Jiri Benc <jb...@redhat.com>
    Date:   Fri Sep 30 19:08:07 2016 +0200

    openvswitch: use mpls_hdr

    skb_mpls_header is equivalent to mpls_hdr now. Use the existing helper
    instead.

    Signed-off-by: Jiri Benc <jb...@redhat.com>
    Acked-by: Pravin B Shelar <pshe...@ovn.org>
    Signed-off-by: David S. Miller <da...@davemloft.net>

Upstream commit:
    commit c66549ffd666605831abf6cf19ce0571ad868e39
    Author: Jiri Benc <jb...@redhat.com>
    Date:   Wed Oct 5 15:01:57 2016 +0200

    openvswitch: correctly fragment packet with mpls headers

    If mpls headers were pushed to a defragmented packet, the refragmentation no
    longer works correctly after 48d2ab609b6b ("net: mpls: Fixups for GSO"). The
    network header has to be shifted after the mpls headers for the
    fragmentation and restored afterwards.

    Fixes: 48d2ab609b6b ("net: mpls: Fixups for GSO")
    Signed-off-by: Jiri Benc <jb...@redhat.com>
    Signed-off-by: David S. Miller <da...@davemloft.net>

Signed-off-by: Yi-Hung Wei <yihung....@gmail.com>
---
 acinclude.m4                             |  1 +
 datapath/actions.c                       | 57 ++++++++++++++++++++++----------
 datapath/flow.c                          | 11 ++----
 datapath/linux/compat/include/net/mpls.h | 18 ++++++++--
 4 files changed, 60 insertions(+), 27 deletions(-)

diff --git a/acinclude.m4 b/acinclude.m4
index 744d8f89525c..82ca4a28015c 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -479,6 +479,7 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
                         [OVS_GREP_IFELSE([$KSRC/include/net/dst_cache.h], 
[dst_cache],
                                          
[OVS_DEFINE([USE_UPSTREAM_TUNNEL])])])])
 
+  OVS_GREP_IFELSE([$KSRC/include/linux/mpls.h], [mpls_hdr])
   OVS_GREP_IFELSE([$KSRC/include/linux/net.h], [sock_create_kern.*net],
                   [OVS_DEFINE([HAVE_SOCK_CREATE_KERN_NET])])
   OVS_GREP_IFELSE([$KSRC/include/linux/netdevice.h], [ndo_fill_metadata_dst])
diff --git a/datapath/actions.c b/datapath/actions.c
index 06080b24ea5a..ecc5136a3529 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -61,7 +61,8 @@ struct ovs_frag_data {
        struct vport *vport;
        struct ovs_gso_cb cb;
        __be16 inner_protocol;
-       __u16 vlan_tci;
+       u16 network_offset;     /* valid only for MPLS */
+       u16 vlan_tci;
        __be16 vlan_proto;
        unsigned int l2_len;
        u8 mac_proto;
@@ -160,7 +161,7 @@ static void update_ethertype(struct sk_buff *skb, struct 
ethhdr *hdr,
 static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
                     const struct ovs_action_push_mpls *mpls)
 {
-       __be32 *new_mpls_lse;
+       struct mpls_shim_hdr *new_mpls_lse;
 
        /* Networking stack do not allow simultaneous Tunnel and MPLS GSO. */
        if (skb->encapsulation)
@@ -169,20 +170,26 @@ static int push_mpls(struct sk_buff *skb, struct 
sw_flow_key *key,
        if (skb_cow_head(skb, MPLS_HLEN) < 0)
                return -ENOMEM;
 
+       if (!ovs_skb_get_inner_protocol(skb)) {
+               skb_set_inner_network_header(skb, skb->mac_len);
+               ovs_skb_set_inner_protocol(skb, skb->protocol);
+       }
+
        skb_push(skb, MPLS_HLEN);
        memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
                skb->mac_len);
        skb_reset_mac_header(skb);
+#ifdef HAVE_MPLS_HDR
+       skb_set_network_header(skb, skb->mac_len);
+#endif
 
-       new_mpls_lse = (__be32 *)skb_mpls_header(skb);
-       *new_mpls_lse = mpls->mpls_lse;
+       new_mpls_lse = mpls_hdr(skb);
+       new_mpls_lse->label_stack_entry = mpls->mpls_lse;
 
        skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN);
 
        if (ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET)
                update_ethertype(skb, eth_hdr(skb), mpls->mpls_ethertype);
-       if (!ovs_skb_get_inner_protocol(skb))
-               ovs_skb_set_inner_protocol(skb, skb->protocol);
        skb->protocol = mpls->mpls_ethertype;
 
        invalidate_flow_key(key);
@@ -198,21 +205,22 @@ static int pop_mpls(struct sk_buff *skb, struct 
sw_flow_key *key,
        if (unlikely(err))
                return err;
 
-       skb_postpull_rcsum(skb, skb_mpls_header(skb), MPLS_HLEN);
+       skb_postpull_rcsum(skb, mpls_hdr(skb), MPLS_HLEN);
 
        memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
                skb->mac_len);
 
        __skb_pull(skb, MPLS_HLEN);
        skb_reset_mac_header(skb);
+       skb_set_network_header(skb, skb->mac_len);
 
        if (ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET) {
                struct ethhdr *hdr;
 
-               /* skb_mpls_header() is used to locate the ethertype
+               /* mpls_hdr() is used to locate the ethertype
                 * field correctly in the presence of VLAN tags.
                 */
-               hdr = (struct ethhdr *)(skb_mpls_header(skb) - ETH_HLEN);
+               hdr = (struct ethhdr *)((void*)mpls_hdr(skb) - ETH_HLEN);
                update_ethertype(skb, hdr, ethertype);
         }
        if (eth_p_mpls(skb->protocol))
@@ -225,7 +233,7 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key 
*key,
 static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
                    const __be32 *mpls_lse, const __be32 *mask)
 {
-       __be32 *stack;
+       struct mpls_shim_hdr *stack;
        __be32 lse;
        int err;
 
@@ -233,16 +241,16 @@ static int set_mpls(struct sk_buff *skb, struct 
sw_flow_key *flow_key,
        if (unlikely(err))
                return err;
 
-       stack = (__be32 *)skb_mpls_header(skb);
-       lse = OVS_MASKED(*stack, *mpls_lse, *mask);
+       stack = mpls_hdr(skb);
+       lse = OVS_MASKED(stack->label_stack_entry, *mpls_lse, *mask);
        if (skb->ip_summed == CHECKSUM_COMPLETE) {
-               __be32 diff[] = { ~(*stack), lse };
+               __be32 diff[] = { ~(stack->label_stack_entry), lse };
 
                skb->csum = ~csum_partial((char *)diff, sizeof(diff),
                                          ~skb->csum);
        }
 
-       *stack = lse;
+       stack->label_stack_entry = lse;
        flow_key->mpls.top_lse = lse;
        return 0;
 }
@@ -707,6 +715,12 @@ static int ovs_vport_output(OVS_VPORT_OUTPUT_PARAMS)
        skb_postpush_rcsum(skb, skb->data, data->l2_len);
        skb_reset_mac_header(skb);
 
+       if (eth_p_mpls(skb->protocol)) {
+               skb->inner_network_header = skb->network_header;
+               skb_set_network_header(skb, data->network_offset);
+               skb_reset_mac_len(skb);
+       }
+
        ovs_vport_send(vport, skb, data->mac_proto);
        return 0;
 }
@@ -726,7 +740,7 @@ static struct dst_ops ovs_dst_ops = {
  * ovs_vport_output(), which is called once per fragmented packet.
  */
 static void prepare_frag(struct vport *vport, struct sk_buff *skb,
-                       u8 mac_proto)
+                        u16 orig_network_offset, u8 mac_proto)
 {
        unsigned int hlen = skb_network_offset(skb);
        struct ovs_frag_data *data;
@@ -736,6 +750,7 @@ static void prepare_frag(struct vport *vport, struct 
sk_buff *skb,
        data->vport = vport;
        data->cb = *OVS_GSO_CB(skb);
        data->inner_protocol = ovs_skb_get_inner_protocol(skb);
+       data->network_offset = orig_network_offset;
        data->vlan_tci = skb->vlan_tci;
        data->vlan_proto = skb->vlan_proto;
        data->mac_proto = mac_proto;
@@ -750,6 +765,13 @@ static void ovs_fragment(struct net *net, struct vport 
*vport,
                         struct sk_buff *skb, u16 mru,
                         struct sw_flow_key *key)
 {
+       u16 orig_network_offset = 0;
+
+       if (eth_p_mpls(skb->protocol)) {
+               orig_network_offset = skb_network_offset(skb);
+               skb->network_header = skb->inner_network_header;
+       }
+
        if (skb_network_offset(skb) > MAX_L2_LEN) {
                OVS_NLERR(1, "L2 header too long to fragment");
                goto err;
@@ -759,7 +781,8 @@ static void ovs_fragment(struct net *net, struct vport 
*vport,
                struct dst_entry ovs_dst;
                unsigned long orig_dst;
 
-               prepare_frag(vport, skb, ovs_key_mac_proto(key));
+               prepare_frag(vport, skb, orig_network_offset,
+                             ovs_key_mac_proto(key));
                dst_init(&ovs_dst, &ovs_dst_ops, NULL, 1,
                         DST_OBSOLETE_NONE, DST_NOCOUNT);
                ovs_dst.dev = vport->dev;
@@ -779,7 +802,7 @@ static void ovs_fragment(struct net *net, struct vport 
*vport,
                        goto err;
                }
 
-               prepare_frag(vport, skb,
+               prepare_frag(vport, skb, orig_network_offset,
                             ovs_key_mac_proto(key));
                memset(&ovs_rt, 0, sizeof(ovs_rt));
                dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1,
diff --git a/datapath/flow.c b/datapath/flow.c
index 2bc1ad0d5304..1d40b2400406 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -666,12 +666,7 @@ static int key_extract(struct sk_buff *skb, struct 
sw_flow_key *key)
        } else if (eth_p_mpls(key->eth.type)) {
                size_t stack_len = MPLS_HLEN;
 
-               /* In the presence of an MPLS label stack the end of the L2
-                * header and the beginning of the L3 header differ.
-                *
-                * Advance network_header to the beginning of the L3
-                * header. mac_len corresponds to the end of the L2 header.
-                */
+               skb_set_inner_network_header(skb, skb->mac_len);
                while (1) {
                        __be32 lse;
 
@@ -679,12 +674,12 @@ static int key_extract(struct sk_buff *skb, struct 
sw_flow_key *key)
                        if (unlikely(error))
                                return 0;
 
-                       memcpy(&lse, skb_network_header(skb), MPLS_HLEN);
+                       memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN);
 
                        if (stack_len == MPLS_HLEN)
                                memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN);
 
-                       skb_set_network_header(skb, skb->mac_len + stack_len);
+                       skb_set_inner_network_header(skb, skb->mac_len + 
stack_len);
                        if (lse & htonl(MPLS_LS_S_MASK))
                                break;
 
diff --git a/datapath/linux/compat/include/net/mpls.h 
b/datapath/linux/compat/include/net/mpls.h
index 73e48e37eb55..9b5039283a56 100644
--- a/datapath/linux/compat/include/net/mpls.h
+++ b/datapath/linux/compat/include/net/mpls.h
@@ -19,12 +19,24 @@
 
 #define MPLS_HLEN 4
 
+struct mpls_shim_hdr {
+       __be32 label_stack_entry;
+};
+
 static inline bool eth_p_mpls(__be16 eth_type)
 {
        return eth_type == htons(ETH_P_MPLS_UC) ||
                eth_type == htons(ETH_P_MPLS_MC);
 }
 
+/* Starting from kernel 4.9, commit 48d2ab609b6b ("net: mpls: Fixups for GSO")
+ * and commit 85de4a2101ac ("openvswitch: use mpls_hdr") introduced
+ * behavioural changes to MPLS GSO. We shall backport the following function
+ * to ensure MPLS GSO works properly for kernels older than the one which
+ * contains these commits.
+ */
+#ifndef HAVE_MPLS_HDR
+#define mpls_hdr rpl_mpls_hdr
 /*
  * For non-MPLS skbs this will correspond to the network header.
  * For MPLS skbs it will be before the network_header as the MPLS
@@ -32,8 +44,10 @@ static inline bool eth_p_mpls(__be16 eth_type)
  * header. That is, for MPLS skbs the end of the mac header
  * is the top of the MPLS label stack.
  */
-static inline unsigned char *skb_mpls_header(struct sk_buff *skb)
+static inline struct mpls_shim_hdr *rpl_mpls_hdr(const struct sk_buff *skb)
 {
-       return skb_mac_header(skb) + skb->mac_len;
+       return (struct mpls_shim_hdr *) (skb_mac_header(skb) + skb->mac_len);
 }
+#endif
+
 #endif /* _NET_MPLS_WRAPPER_H */
-- 
2.7.4

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to