On Thu, May 23, 2013 at 11:12 PM, Rajahalme, Jarno (NSN - FI/Espoo)
<[email protected]> wrote:
> Pravin,
>
> Please find some review comments below,
Thanks for comments.
>
> Jarno
>
> On May 23, 2013, at 23:01 , ext Pravin B Shelar wrote:
> ...
>> diff --git a/datapath/linux/compat/gre.c b/datapath/linux/compat/gre.c
>> new file mode 100644
>> index 0000000..c352ff8
>> --- /dev/null
>> +++ b/datapath/linux/compat/gre.c
>> @@ -0,0 +1,352 @@
>> +/*
>> + * Copyright (c) 2007-2013 Nicira, Inc.
>> + *
>> + * This program is free software; you can redistribute it and/or
>> + * modify it under the terms of version 2 of the GNU General Public
>> + * License as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful, but
>> + * WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + * General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
>> + * 02110-1301, USA
>> + */
>> +
>> +#include <linux/module.h>
>> +#include <linux/if.h>
>> +#include <linux/if_tunnel.h>
>> +#include <linux/icmp.h>
>> +#include <linux/in.h>
>> +#include <linux/ip.h>
>> +#include <linux/kernel.h>
>> +#include <linux/kmod.h>
>> +#include <linux/netdevice.h>
>> +#include <linux/skbuff.h>
>> +#include <linux/spinlock.h>
>> +
>> +#include <net/gre.h>
>> +#include <net/icmp.h>
>> +#include <net/protocol.h>
>> +#include <net/route.h>
>> +#include <net/xfrm.h>
>> +
>> +#include "gso.h"
>> +
>> +static struct gre_cisco_protocol __rcu *gre_cisco_proto;
>> +
>> +static void gre_csum_fix(struct sk_buff *skb)
>> +{
>> + struct gre_base_hdr *greh;
>> + __be32 *options;
>> + int gre_offset = skb_transport_offset(skb);
>> +
>> + greh = (struct gre_base_hdr *)skb_transport_header(skb);
>> + options = ((__be32 *)greh + 1);
>> +
>> + *options = 0;
>> + *(__sum16 *)options = csum_fold(skb_checksum(skb, gre_offset,
>> + skb->len - gre_offset,
>> 0));
>> +}
>> +
>> +struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum)
>> +{
>> + int err;
>> +
>> + skb_reset_inner_headers(skb);
>> +
>> + if (skb_is_gso(skb)) {
>> + if (gre_csum)
>> + OVS_GSO_CB(skb)->fix_segment = gre_csum_fix;
>> + } else {
>> + if (skb->ip_summed == CHECKSUM_PARTIAL && gre_csum) {
>> + err = skb_checksum_help(skb);
>> + if (err)
>> + goto error;
>> +
>> + } else if (skb->ip_summed != CHECKSUM_PARTIAL)
>> + skb->ip_summed = CHECKSUM_NONE;
>> + }
>> + return skb;
>> +error:
>> + kfree_skb(skb);
>> + return ERR_PTR(err);
>> +}
>> +
>> +static bool is_gre_gso(struct sk_buff *skb)
>> +{
>> + return skb_is_gso(skb);
>> +}
>
> What is the value of this? You already use skb_is_gso() above.
>
right, I will get rid of it.
>> +
>> +void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
>> + int hdr_len)
>> +{
>> + struct gre_base_hdr *greh;
>> +
>> + __skb_push(skb, hdr_len);
>> +
>> + greh = (struct gre_base_hdr *)skb->data;
>> + greh->flags = tnl_flags_to_gre_flags(tpi->flags);
>> + greh->protocol = tpi->proto;
>> +
>> + if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) {
>
> Add some spaces? Also below.
>
ok.
>> + __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
>> +
>> + if (tpi->flags&TUNNEL_SEQ) {
>> + *ptr = tpi->seq;
>> + ptr--;
>> + }
>> + if (tpi->flags&TUNNEL_KEY) {
>> + *ptr = tpi->key;
>> + ptr--;
>> + }
>> + if (tpi->flags&TUNNEL_CSUM && !is_gre_gso(skb)) {
>> + *ptr = 0;
>> + *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
>> + skb->len, 0));
>> + }
>> + }
>> +}
>> +
>> +static __sum16 check_checksum(struct sk_buff *skb)
>> +{
>> + __sum16 csum = 0;
>> +
>> + switch (skb->ip_summed) {
>> + case CHECKSUM_COMPLETE:
>> + csum = csum_fold(skb->csum);
>> +
>> + if (!csum)
>> + break;
>> + /* Fall through. */
>> +
>> + case CHECKSUM_NONE:
>> + skb->csum = 0;
>> + csum = __skb_checksum_complete(skb);
>> + skb->ip_summed = CHECKSUM_COMPLETE;
>> + break;
>> + }
>> +
>> + return csum;
>> +}
>> +
>> +static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
>> + bool *csum_err)
>
> It seems the csum_err returned via the parameter pointer is ignored by the
> caller, hence it could be removed.
>
This function will be defied in upstream gre module, so I will keep it
same as that.
>> +{
>> + unsigned int ip_hlen = ip_hdrlen(skb);
>> + struct gre_base_hdr *greh;
>> + __be32 *options;
>> + int hdr_len;
>> +
>> + if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
>> + return -EINVAL;
>> +
>> + greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
>> + if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
>> + return -EINVAL;
>> +
>> + tpi->flags = gre_flags_to_tnl_flags(greh->flags);
>> + hdr_len = ip_gre_calc_hlen(tpi->flags);
>> +
>> + if (!pskb_may_pull(skb, hdr_len))
>> + return -EINVAL;
>> +
>> + greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
>> + tpi->proto = greh->protocol;
>> +
>> + options = (__be32 *)(greh + 1);
>> + if (greh->flags & GRE_CSUM) {
>> + if (check_checksum(skb)) {
>> + *csum_err = true;
>> + return -EINVAL;
>> + }
>> + options++;
>> + }
>> +
>> + if (greh->flags & GRE_KEY) {
>> + tpi->key = *options;
>> + options++;
>> + } else
>> + tpi->key = 0;
>> +
>> + if (unlikely(greh->flags & GRE_SEQ)) {
>> + tpi->seq = *options;
>> + options++;
>> + } else
>> + tpi->seq = 0;
>> +
>> + /* WCCP version 1 and 2 protocol decoding.
>> + * - Change protocol to IP
>> + * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
>> + */
>> + if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
>> + tpi->proto = htons(ETH_P_IP);
>> + if ((*(u8 *)options & 0xF0) != 0x40) {
>> + hdr_len += 4;
>> + if (!pskb_may_pull(skb, hdr_len))
>> + return -EINVAL;
>> + }
>> + }
>> +
>> + return iptunnel_pull_header(skb, hdr_len, tpi->proto);
>> +}
>> +
>> +static int gre_cisco_rcv(struct sk_buff *skb)
>> +{
>> + struct tnl_ptk_info tpi;
>> + bool csum_err = false;
>> + struct gre_cisco_protocol *proto;
>> +
>> + rcu_read_lock();
>> + proto = rcu_dereference(gre_cisco_proto);
>> + if (!proto)
>> + goto drop;
>> +
>> + if (parse_gre_header(skb, &tpi, &csum_err) < 0)
>> + goto drop;
>> + proto->handler(skb, &tpi);
>> + rcu_read_unlock();
>> + return 0;
>> +
>> +drop:
>> + rcu_read_unlock();
>> + kfree_skb(skb);
>> + return 0;
>> +}
>> +
>> +static const struct gre_protocol ipgre_protocol = {
>> + .handler = gre_cisco_rcv,
>> +};
>> +
>> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
>> +static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX]
>> __read_mostly;
>> +
>> +int gre_add_protocol(const struct gre_protocol *proto, u8 version)
>> +{
>> + if (version >= GREPROTO_MAX)
>> + return -EINVAL;
>> +
>> + return (cmpxchg((const struct gre_protocol **)&gre_proto[version],
>> NULL, proto) == NULL) ?
>> + 0 : -EBUSY;
>> +}
>> +
>> +int gre_del_protocol(const struct gre_protocol *proto, u8 version)
>> +{
>> + int ret;
>> +
>> + if (version >= GREPROTO_MAX)
>> + return -EINVAL;
>> +
>> + ret = (cmpxchg((const struct gre_protocol **)&gre_proto[version],
>> proto, NULL) == proto) ?
>> + 0 : -EBUSY;
>> +
>> + if (ret)
>> + return ret;
>> +
>> + synchronize_net();
>> + return 0;
>> +}
>> +
>> +static int gre_rcv(struct sk_buff *skb)
>> +{
>> + const struct gre_protocol *proto;
>> + u8 ver;
>> + int ret;
>> +
>> + if (!pskb_may_pull(skb, 12))
>> + goto drop;
>
> Is there a symbol you could use here instead the literal 12?
>
same as above, this function is copied from gre module.
>> +
>> + ver = skb->data[1]&0x7f;
>
> Spaces...
>
ok.
>> + if (ver >= GREPROTO_MAX)
>> + goto drop;
>> +
>> + rcu_read_lock();
>> + proto = rcu_dereference(gre_proto[ver]);
>> + if (!proto || !proto->handler)
>> + goto drop_unlock;
>> + ret = proto->handler(skb);
>> + rcu_read_unlock();
>> + return ret;
>> +
>> +drop_unlock:
>> + rcu_read_unlock();
>> +drop:
>> + kfree_skb(skb);
>> + return NET_RX_DROP;
>> +}
>> +
> ...
>> diff --git a/datapath/linux/compat/gso.c b/datapath/linux/compat/gso.c
>> new file mode 100644
>> index 0000000..f6a4ad3
>> --- /dev/null
>> +++ b/datapath/linux/compat/gso.c
>> @@ -0,0 +1,166 @@
>> +/*
>> + * Copyright (c) 2007-2013 Nicira, Inc.
>> + *
>> + * This program is free software; you can redistribute it and/or
>> + * modify it under the terms of version 2 of the GNU General Public
>> + * License as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful, but
>> + * WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + * General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
>> + * 02110-1301, USA
>> + */
>> +
>> +#include <linux/module.h>
>> +#include <linux/if.h>
>> +#include <linux/if_tunnel.h>
>> +#include <linux/icmp.h>
>> +#include <linux/in.h>
>> +#include <linux/ip.h>
>> +#include <linux/kernel.h>
>> +#include <linux/kmod.h>
>> +#include <linux/netdevice.h>
>> +#include <linux/skbuff.h>
>> +#include <linux/spinlock.h>
>> +
>> +#include <net/gre.h>
>> +#include <net/icmp.h>
>> +#include <net/protocol.h>
>> +#include <net/route.h>
>> +#include <net/xfrm.h>
>> +
>> +#include "gso.h"
>> +
>> +static __be16 skb_network_protocol(struct sk_buff *skb)
>> +{
>> + __be16 type = skb->protocol;
>> + int vlan_depth = ETH_HLEN;
>
> Shouldn't this be 2*ETH_ALEN instead, since that is where VLAN header starts
> at if there is one?
>
Linux vlan_hdr start at ETH_HLEN, ref: struct vlan_hd and struct
vlan_ethhdr in if_vlan.h.
>> +
>> + while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
>> + struct vlan_hdr *vh;
>> +
>> + if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
>> + return 0;
>> +
>> + vh = (struct vlan_hdr *)(skb->data + vlan_depth);
>> + type = vh->h_vlan_encapsulated_proto;
>> + vlan_depth += VLAN_HLEN;
>> + }
>> +
>> + return type;
>> +}
>> +
>> +static struct sk_buff *tnl_skb_gso_segment(struct sk_buff *skb,
>> + netdev_features_t features,
>> + bool tx_path)
>> +{
>> + struct iphdr *iph = ip_hdr(skb);
>> + int tnl_hlen = skb_inner_network_offset(skb);
>> + struct sk_buff *skb1 = skb;
>> + struct sk_buff *segs;
>> +
>> + __skb_pull(skb, tnl_hlen);
>> + skb_reset_mac_header(skb);
>> + skb_reset_network_header(skb);
>> + skb_reset_transport_header(skb);
>> + skb->protocol = skb_network_protocol(skb);
>> +
>> + segs = __skb_gso_segment(skb, 0, true);
>
> Are you sure you want to ignore the tx_path and insist on 'true' here? Using
> 'false' selects a looser check of skb->ip_summed at __skb_gso_segment. We
> have seen the check implied by 'true' having the kernel fill the disk with
> non-rate-limited warnings, so this may be a significant choice you are making
> here.
>
right, I will change it.
>> + if (!segs || IS_ERR(segs))
>> + goto free;
>> +
>> + skb = segs;
>> + while (skb) {
>> + __skb_push(skb, tnl_hlen);
>> + skb_reset_mac_header(skb);
>> + skb_reset_network_header(skb);
>> + skb_set_transport_header(skb, sizeof(struct iphdr));
>> + skb->mac_len = 0;
>> +
>> + memcpy(ip_hdr(skb), iph, tnl_hlen);
>
> Could you educate me a bit here and explain shortly why it is necessary to
> overwrite the IP headers of the segmented skbs?
>
we have segmented inner skb, now we need to copy outer header to each
segment from first segment.
>> + if (OVS_GSO_CB(skb)->fix_segment)
>> + OVS_GSO_CB(skb)->fix_segment(skb);
>> + skb = skb->next;
>> + }
>> +free:
>> + consume_skb(skb1);
>> + return segs;
>> +}
>> +
>> +static bool need_linearize(const struct sk_buff *skb)
>> +{
>> + int i;
>> +
>> + if (unlikely(skb_shinfo(skb)->frag_list))
>> + return true;
>> +
>> + /*
>> + * Generally speaking we should linearize if there are paged frags.
>> + * However, if all of the refcounts are 1 we know nobody else can
>> + * change them from underneath us and we can skip the linearization.
>> + */
>> + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
>> + if
>> (unlikely(page_count(skb_frag_page(&skb_shinfo(skb)->frags[i])) > 1))
>> + return true;
>> +
>> + return false;
>> +}
>> +
>> +int ip_local_out(struct sk_buff *skb)
>
> IMO it would be less surprising if you used 'rpl_ip_local_out' here. Don't
> know if that would go against a convention though.
>
ok.
>> +{
>> + struct dst_entry *dst = skb_dst(skb);
>> + int ret = NETDEV_TX_OK;
>> +
>> + if (skb_is_gso(skb)) {
>> + skb = tnl_skb_gso_segment(skb, 0, true);
>
> Maybe should use 'false' as the tx_path argument (Jesse?).
>
ok.
>> + if (!skb || IS_ERR(skb))
>> + return 0;
>> + } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
>> + int err;
>> +
>> + if (unlikely(need_linearize(skb))) {
>> + err = __skb_linearize(skb);
>> + if (unlikely(err))
>> + return 0;
>> + }
>> +
>> + err = skb_checksum_help(skb);
>> + if (unlikely(err))
>> + return 0;
>> + }
>> +
>> + while (skb) {
>> + struct sk_buff *next_skb = skb->next;
>> + int err;
>> +
>> + if (next_skb)
>> + skb_dst_set(skb, dst_clone(dst));
>> + else
>> + skb_dst_set(skb, dst);
>> +
>> + skb->next = NULL;
>> +
>> + /*
>> + * Allow our local IP stack to fragment the outer packet even
>> + * if the DF bit is set as a last resort. We also need to
>> + * force selection of an IP ID here because Linux will
>> + * otherwise leave it at 0 if the packet originally had DF set.
>> + */
>
> This comment needs editing as the IP ID is not set here.
>
right, I missed it.
>> +
>> + skb->local_df = 1;
>> + memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
>> +
>
> Is clearing of the skb control buffer necessary here?
>
yes, we have seen issues due to garbage IPCB used by networking stack.
>> +#undef ip_local_out
>> + err = ip_local_out(skb);
>> + if (unlikely(net_xmit_eval(err)))
>> + ret = err;
>> +
>> + skb = next_skb;
>> + }
>> + return ret;
>> +}
> ...
>> diff --git a/datapath/linux/compat/include/net/gre.h
>> b/datapath/linux/compat/include/net/gre.h
>> new file mode 100644
>> index 0000000..16dc54e
>> --- /dev/null
>> +++ b/datapath/linux/compat/include/net/gre.h
>> @@ -0,0 +1,109 @@
>> +#ifndef __LINUX_GRE_WRAPPER_H
>> +#define __LINUX_GRE_WRAPPER_H
>> +
>> +#include <linux/skbuff.h>
>> +#include <net/ip_tunnels.h>
>> +
>> +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,37)
>> +#include_next <net/gre.h>
>> +
>> +#else /* LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,37) */
>> +
>> +#define GREPROTO_CISCO 0
>> +#define GREPROTO_PPTP 1
>> +#define GREPROTO_MAX 2
>> +#define GRE_IP_PROTO_MAX 2
>> +
>> +struct gre_protocol {
>> + int (*handler)(struct sk_buff *skb);
>> + void (*err_handler)(struct sk_buff *skb, u32 info);
>> +};
>> +
>> +int gre_add_protocol(const struct gre_protocol *proto, u8 version);
>> +int gre_del_protocol(const struct gre_protocol *proto, u8 version);
>> +
>> +#endif
>> +
>> +#define GRE_IP_PROTO_MAX 2
>> +
>> +struct gre_base_hdr {
>> + __be16 flags;
>> + __be16 protocol;
>> +};
>> +#define GRE_HEADER_SECTION 4
>> +
>> +#define MAX_GRE_PROTO_PRIORITY 255
>> +struct gre_cisco_protocol {
>> + int (*handler)(struct sk_buff *skb, const struct tnl_ptk_info *tpi);
>> + int (*err_handler)(struct sk_buff *skb, u32 info,
>> + const struct tnl_ptk_info *tpi);
>> + u8 priority;
>> +};
>> +
>> +#define gre_build_header rpl_gre_build_header
>> +void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
>> + int hdr_len);
>> +
>> +#define gre_handle_offloads rpl_gre_handle_offloads
>> +struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum);
>> +
>> +int gre_cisco_register(struct gre_cisco_protocol *proto);
>> +int gre_cisco_unregister(struct gre_cisco_protocol *proto);
>> +
>> +static inline int ip_gre_calc_hlen(__be16 o_flags)
>> +{
>> + int addend = 4;
>> +
>> + if (o_flags&TUNNEL_CSUM)
>
> Spaces...
>
ok.
>> + addend += 4;
>> + if (o_flags&TUNNEL_KEY)
>> + addend += 4;
>> + if (o_flags&TUNNEL_SEQ)
>> + addend += 4;
>> + return addend;
>> +}
>> +
> ...
>> diff --git a/datapath/linux/compat/include/net/ip_tunnels.h
>> b/datapath/linux/compat/include/net/ip_tunnels.h
>> new file mode 100644
>> index 0000000..ad17c9d
>> --- /dev/null
>> +++ b/datapath/linux/compat/include/net/ip_tunnels.h
>> @@ -0,0 +1,54 @@
>> +#ifndef __NET_IP_TUNNELS_WRAPPER_H
>> +#define __NET_IP_TUNNELS_WRAPPER_H 1
>> +
>> +#include <linux/if_tunnel.h>
>> +#include <linux/netdevice.h>
>> +#include <linux/skbuff.h>
>> +#include <linux/types.h>
>> +#include <net/dsfield.h>
>> +#include <net/flow.h>
>> +#include <net/inet_ecn.h>
>> +#include <net/ip.h>
>> +#include <net/rtnetlink.h>
>> +
>> +#define TUNNEL_CSUM __cpu_to_be16(0x01)
>> +#define TUNNEL_ROUTING __cpu_to_be16(0x02)
>> +#define TUNNEL_KEY __cpu_to_be16(0x04)
>> +#define TUNNEL_SEQ __cpu_to_be16(0x08)
>> +#define TUNNEL_STRICT __cpu_to_be16(0x10)
>> +#define TUNNEL_REC __cpu_to_be16(0x20)
>> +#define TUNNEL_VERSION __cpu_to_be16(0x40)
>> +#define TUNNEL_NO_KEY __cpu_to_be16(0x80)
>> +#define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100)
>> +
>> +struct tnl_ptk_info {
>> + __be16 flags;
>> + __be16 proto;
>> + __be32 key;
>> + __be32 seq;
>> +};
>> +
>> +#define PACKET_RCVD 0
>> +#define PACKET_REJECT 1
>> +
>> +static inline void tunnel_ip_select_ident(struct sk_buff *skb,
>> + const struct iphdr *old_iph,
>> + struct dst_entry *dst)
>> +{
>> + struct iphdr *iph = ip_hdr(skb);
>> +
>> + /* Use inner packet iph-id if possible. */
>> + if (skb->protocol == htons(ETH_P_IP) && old_iph->id)
>> + iph->id = old_iph->id;
>
> Was it concluded that it is safe to use the inner packet ip id? I get that
> this is a copy of upstream code, but would like to know.
>
I still need to think about it and propose fix for upstream kernel too.
>> + else
>> + __ip_select_ident(iph, dst,
>> + (skb_shinfo(skb)->gso_segs ?: 1) - 1);
>> +}
>> +
>> +int iptunnel_xmit(struct net *net, struct rtable *rt,
>> + struct sk_buff *skb,
>> + __be32 src, __be32 dst, __u8 proto,
>> + __u8 tos, __u8 ttl, __be16 df);
>> +
>> +int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16
>> inner_proto);
>> +#endif /* __NET_IP_TUNNELS_H */
>> diff --git a/datapath/linux/compat/ip_tunnels_core.c
>> b/datapath/linux/compat/ip_tunnels_core.c
>> new file mode 100644
>> index 0000000..f8c8488
>> --- /dev/null
>> +++ b/datapath/linux/compat/ip_tunnels_core.c
>> @@ -0,0 +1,115 @@
>> +/*
>> + * Copyright (c) 2007-2013 Nicira, Inc.
>> + *
>> + * This program is free software; you can redistribute it and/or
>> + * modify it under the terms of version 2 of the GNU General Public
>> + * License as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful, but
>> + * WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + * General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
>> + * 02110-1301, USA
>> + */
>> +
>> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
>> +
>> +#include <linux/in.h>
>> +#include <linux/in_route.h>
>> +#include <linux/inetdevice.h>
>> +#include <linux/jhash.h>
>> +#include <linux/list.h>
>> +#include <linux/kernel.h>
>> +#include <linux/version.h>
>> +#include <linux/workqueue.h>
>> +#include <linux/rculist.h>
>> +#include <net/ip_tunnels.h>
>> +#include <net/route.h>
>> +#include <net/xfrm.h>
>> +
>> +#include "gso.h"
>> +#include "compat.h"
>> +
>> +int iptunnel_xmit(struct net *net, struct rtable *rt,
>> + struct sk_buff *skb,
>> + __be32 src, __be32 dst, __u8 proto,
>> + __u8 tos, __u8 ttl, __be16 df)
>> +{
>> + int pkt_len = skb->len;
>> + struct iphdr *iph;
>> + int err;
>> +
>> + nf_reset(skb);
>> + secpath_reset(skb);
>> + skb_clear_rxhash(skb);
>> + skb_dst_drop(skb);
>> + skb_dst_set(skb, &rt_dst(rt));
>> +#if 0
>> + /* Do not clear ovs-callback. It will be done in gso code. */
>> + memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
>> +#endif
>
> Why does the skb control buffer need to be cleared in the first place?
>
it has inner packet offset that compat-gso code needs.
>> +
>> + /* Push down and install the IP header. */
>> + __skb_push(skb, sizeof(struct iphdr));
>> + skb_reset_network_header(skb);
>> +
>> + iph = ip_hdr(skb);
>> +
>> + iph->version = 4;
>> + iph->ihl = sizeof(struct iphdr) >> 2;
>> + iph->frag_off = df;
>> + iph->protocol = proto;
>> + iph->tos = tos;
>> + iph->daddr = dst;
>> + iph->saddr = src;
>> + iph->ttl = ttl;
>> + tunnel_ip_select_ident(skb,
>> + (const struct iphdr
>> *)skb_inner_network_header(skb),
>> + &rt_dst(rt));
>> +
>> + err = ip_local_out(skb);
>> + if (unlikely(net_xmit_eval(err)))
>> + pkt_len = 0;
>> + return pkt_len;
>> +}
>> +
> ...
>> diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c
>> index add17d9..1707eab 100644
>> --- a/datapath/vport-gre.c
>> +++ b/datapath/vport-gre.c
>> @@ -24,50 +24,29 @@
>> #include <linux/if_tunnel.h>
>> #include <linux/if_vlan.h>
>> #include <linux/in.h>
>> +#include <linux/if_vlan.h>
>> +#include <linux/in.h>
>> +#include <linux/in_route.h>
>> +#include <linux/inetdevice.h>
>> +#include <linux/jhash.h>
>> +#include <linux/list.h>
>> +#include <linux/kernel.h>
>> +#include <linux/workqueue.h>
>> +#include <linux/rculist.h>
>> +#include <net/route.h>
>> +#include <net/xfrm.h>
>> +
>>
>> #include <net/icmp.h>
>> #include <net/ip.h>
>> +#include <net/ip_tunnels.h>
>> +#include <net/gre.h>
>> #include <net/protocol.h>
>>
>> #include "datapath.h"
>> #include "tunnel.h"
>> #include "vport.h"
>>
>> -/*
>> - * The GRE header is composed of a series of sections: a base and then a
>> variable
>> - * number of options.
>> - */
>> -#define GRE_HEADER_SECTION 4
>> -
>> -struct gre_base_hdr {
>> - __be16 flags;
>> - __be16 protocol;
>> -};
>> -
>> -static int gre_hdr_len(const struct ovs_key_ipv4_tunnel *tun_key)
>> -{
>> - int len = GRE_HEADER_SECTION;
>> -
>> - if (tun_key->tun_flags & OVS_TNL_F_KEY)
>> - len += GRE_HEADER_SECTION;
>> - if (tun_key->tun_flags & OVS_TNL_F_CSUM)
>> - len += GRE_HEADER_SECTION;
>> - return len;
>> -}
>> -
>> -static int gre64_hdr_len(const struct ovs_key_ipv4_tunnel *tun_key)
>> -{
>> - /* Set key for GRE64 tunnels, even when key if is zero. */
>> - int len = GRE_HEADER_SECTION + /* GRE Hdr */
>> - GRE_HEADER_SECTION + /* GRE Key */
>> - GRE_HEADER_SECTION; /* GRE SEQ */
>> -
>> - if (tun_key->tun_flags & OVS_TNL_F_CSUM)
>> - len += GRE_HEADER_SECTION;
>> -
>> - return len;
>> -}
>> -
>> /* Returns the least-significant 32 bits of a __be64. */
>> static __be32 be64_get_low32(__be64 x)
>> {
>> @@ -78,61 +57,31 @@ static __be32 be64_get_low32(__be64 x)
>> #endif
>> }
>>
>> -static __be32 be64_get_high32(__be64 x)
>> +static __be16 filter_tnl_flags(__be16 flags)
>> {
>> -#ifdef __BIG_ENDIAN
>> - return (__force __be32)((__force u64)x >> 32);
>> -#else
>> - return (__force __be32)x;
>> -#endif
>> + return (flags & (TUNNEL_CSUM | TUNNEL_KEY));
>> }
>>
>> -static void __gre_build_header(struct sk_buff *skb,
>> - int tunnel_hlen,
>> - bool is_gre64)
>> +static struct sk_buff *__build_header(const struct vport *vport,
>
> 'vport' is not used, so it could be removed here.
ok.
>
>> + struct sk_buff *skb,
>> + int tunnel_hlen,
>> + __be32 seq, __be16 gre64_flag)
>> {
>> const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
>> - __be32 *options = (__be32 *)(skb_network_header(skb) + tunnel_hlen
>> - - GRE_HEADER_SECTION);
>> - struct gre_base_hdr *greh = (struct gre_base_hdr *)
>> skb_transport_header(skb);
>> - greh->protocol = htons(ETH_P_TEB);
>> - greh->flags = 0;
>> -
>> - /* Work backwards over the options so the checksum is last. */
>> - if (tun_key->tun_flags & OVS_TNL_F_KEY || is_gre64) {
>> - greh->flags |= GRE_KEY;
>> - if (is_gre64) {
>> - /* Set higher 32 bits to seq. */
>> - *options = be64_get_high32(tun_key->tun_id);
>> - options--;
>> - greh->flags |= GRE_SEQ;
>> - }
>> - *options = be64_get_low32(tun_key->tun_id);
>> - options--;
>> - }
>> + struct tnl_ptk_info tpi;
>>
>> - if (tun_key->tun_flags & OVS_TNL_F_CSUM) {
>> - greh->flags |= GRE_CSUM;
>> - *options = 0;
>> - *(__sum16 *)options = csum_fold(skb_checksum(skb,
>> - skb_transport_offset(skb),
>> - skb->len -
>> skb_transport_offset(skb),
>> - 0));
>> - }
>> -}
>> + skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM));
>> + if (IS_ERR(skb))
>> + return NULL;
>>
>> -static void gre_build_header(const struct vport *vport,
>> - struct sk_buff *skb,
>> - int tunnel_hlen)
>> -{
>> - __gre_build_header(skb, tunnel_hlen, false);
>> -}
>> + tpi.flags = filter_tnl_flags(tun_key->tun_flags) | gre64_flag;
>>
>> -static void gre64_build_header(const struct vport *vport,
>> - struct sk_buff *skb,
>> - int tunnel_hlen)
>> -{
>> - __gre_build_header(skb, tunnel_hlen, true);
>> + tpi.proto = htons(ETH_P_TEB);
>> + tpi.key = be64_get_low32(tun_key->tun_id);
>> + tpi.seq = seq;
>> + gre_build_header(skb, &tpi, tunnel_hlen);
>> +
>> + return skb;
>> }
>>
>
_______________________________________________
dev mailing list
[email protected]
http://openvswitch.org/mailman/listinfo/dev