Thanks Han for backporting it for lower kernels. I tested this patch with
some running workloads with old  3.13* kernels (ovs 2.11*) and it works
fine. In addition I am also considering the discussion [1],  and for ecmp
workloads I see all packets are hitting the different bucket instead of the
same bucket (using ssh with single connection). Not a blocker as will plan
to upgrade these nodes with 3.13*  to newer 4*+ kernels moving further as
it's tough to leverage all ovs features for old kernels.

Tested-by: Aliasgar Ginwala <aginw...@ebay.com>

[1] -
https://mail.openvswitch.org/pipermail/ovs-discuss/2020-April/049896.html

On Mon, May 25, 2020 at 11:28 PM Han Zhou <hz...@ovn.org> wrote:

> This patch backports below upstream patches, and add __skb_set_hash
> to compat for older kernels.
>
> commit b5ab1f1be6180a2e975eede18731804b5164a05d
> Author: Jakub Kicinski <k...@kernel.org>
> Date:   Mon Mar 2 21:05:18 2020 -0800
>
>     openvswitch: add missing attribute validation for hash
>
>     Add missing attribute validation for OVS_PACKET_ATTR_HASH
>     to the netlink policy.
>
>     Fixes: bd1903b7c459 ("net: openvswitch: add hash info to upcall")
>     Signed-off-by: Jakub Kicinski <k...@kernel.org>
>     Reviewed-by: Greg Rose <gvrose8...@gmail.com>
>     Signed-off-by: David S. Miller <da...@davemloft.net>
>
> commit bd1903b7c4596ba6f7677d0dfefd05ba5876707d
> Author: Tonghao Zhang <xiangxia.m....@gmail.com>
> Date:   Wed Nov 13 23:04:49 2019 +0800
>
>     net: openvswitch: add hash info to upcall
>
>     When using the kernel datapath, the upcall don't
>     include skb hash info relatived. That will introduce
>     some problem, because the hash of skb is important
>     in kernel stack. For example, VXLAN module uses
>     it to select UDP src port. The tx queue selection
>     may also use the hash in stack.
>
>     Hash is computed in different ways. Hash is random
>     for a TCP socket, and hash may be computed in hardware,
>     or software stack. Recalculation hash is not easy.
>
>     Hash of TCP socket is computed:
>     tcp_v4_connect
>         -> sk_set_txhash (is random)
>
>     __tcp_transmit_skb
>         -> skb_set_hash_from_sk
>
>     There will be one upcall, without information of skb
>     hash, to ovs-vswitchd, for the first packet of a TCP
>     session. The rest packets will be processed in Open vSwitch
>     modules, hash kept. If this tcp session is forward to
>     VXLAN module, then the UDP src port of first tcp packet
>     is different from rest packets.
>
>     TCP packets may come from the host or dockers, to Open vSwitch.
>     To fix it, we store the hash info to upcall, and restore hash
>     when packets sent back.
>
>     +---------------+          +-------------------------+
>     |   Docker/VMs  |          |     ovs-vswitchd        |
>     +----+----------+          +-+--------------------+--+
>          |                       ^                    |
>          |                       |                    |
>          |                       |  upcall            v restore packet hash
> (not recalculate)
>          |                     +-+--------------------+--+
>          |  tap netdev         |                         |   vxlan module
>          +--------------->     +-->  Open vSwitch ko     +-->
>            or internal type    |                         |
>                                +-------------------------+
>
>     Reported-at:
> https://mail.openvswitch.org/pipermail/ovs-dev/2019-October/364062.html
>     Signed-off-by: Tonghao Zhang <xiangxia.m....@gmail.com>
>     Acked-by: Pravin B Shelar <pshe...@ovn.org>
>     Signed-off-by: David S. Miller <da...@davemloft.net>
>
> Acked-by: Tonghao Zhang <xiangxia.m....@gmail.com>
> Signed-off-by: Han Zhou <hz...@ovn.org>
> ---
> v1->v2: Fix build on kernel 3.16
>
>  acinclude.m4                                 |  4 ++++
>  datapath/datapath.c                          | 33
> +++++++++++++++++++++++++++-
>  datapath/datapath.h                          | 12 ++++++++++
>  datapath/linux/compat/include/linux/skbuff.h | 31
> ++++++++++++++++++++++++++
>  4 files changed, 79 insertions(+), 1 deletion(-)
>
> diff --git a/acinclude.m4 b/acinclude.m4
> index dabbffd..6bb7983 100644
> --- a/acinclude.m4
> +++ b/acinclude.m4
> @@ -1101,6 +1101,10 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
>    OVS_FIND_OP_PARAM_IFELSE([$KSRC/include/net/rtnetlink.h],
>                             [validate], [extack],
>
> [OVS_DEFINE([HAVE_RTNLOP_VALIDATE_WITH_EXTACK])])
> +  OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h],
> +                  [__skb_set_hash])
> +  OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [sw_hash])
> +  OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [skb_get_hash_raw])
>
>    if cmp -s datapath/linux/kcompat.h.new \
>              datapath/linux/kcompat.h >/dev/null 2>&1; then
> diff --git a/datapath/datapath.c b/datapath/datapath.c
> index a7af784..05c1e42 100644
> --- a/datapath/datapath.c
> +++ b/datapath/datapath.c
> @@ -371,7 +371,8 @@ static size_t upcall_msg_size(const struct
> dp_upcall_info *upcall_info,
>         size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
>                 + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
>                 + nla_total_size(ovs_key_attr_size()) /*
> OVS_PACKET_ATTR_KEY */
> -               + nla_total_size(sizeof(unsigned int)); /*
> OVS_PACKET_ATTR_LEN */
> +               + nla_total_size(sizeof(unsigned int)) /*
> OVS_PACKET_ATTR_LEN */
> +               + nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */
>
>         /* OVS_PACKET_ATTR_USERDATA */
>         if (upcall_info->userdata)
> @@ -414,6 +415,7 @@ static int queue_userspace_packet(struct datapath *dp,
> struct sk_buff *skb,
>         size_t len;
>         unsigned int hlen;
>         int err, dp_ifindex;
> +       u64 hash;
>
>         dp_ifindex = get_dpifindex(dp);
>         if (!dp_ifindex)
> @@ -523,6 +525,25 @@ static int queue_userspace_packet(struct datapath
> *dp, struct sk_buff *skb,
>                 pad_packet(dp, user_skb);
>         }
>
> +       /* Add OVS_PACKET_ATTR_HASH */
> +       hash = skb_get_hash_raw(skb);
> +#ifdef HAVE_SW_HASH
> +       if (skb->sw_hash)
> +               hash |= OVS_PACKET_HASH_SW_BIT;
> +#endif
> +
> +#ifdef HAVE_L4_RXHASH
> +       if (skb->l4_rxhash)
> +#else
> +       if (skb->l4_hash)
> +#endif
> +               hash |= OVS_PACKET_HASH_L4_BIT;
> +
> +       if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) {
> +               err = -ENOBUFS;
> +               goto out;
> +       }
> +
>         /* Only reserve room for attribute header, packet data is added
>          * in skb_zerocopy()
>          */
> @@ -563,6 +584,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb,
> struct genl_info *info)
>         struct datapath *dp;
>         struct vport *input_vport;
>         u16 mru = 0;
> +       u64 hash;
>         int len;
>         int err;
>         bool log = !a[OVS_PACKET_ATTR_PROBE];
> @@ -588,6 +610,14 @@ static int ovs_packet_cmd_execute(struct sk_buff
> *skb, struct genl_info *info)
>         }
>         OVS_CB(packet)->mru = mru;
>
> +       if (a[OVS_PACKET_ATTR_HASH]) {
> +               hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]);
> +
> +               __skb_set_hash(packet, hash & 0xFFFFFFFFULL,
> +                              !!(hash & OVS_PACKET_HASH_SW_BIT),
> +                              !!(hash & OVS_PACKET_HASH_L4_BIT));
> +       }
> +
>         /* Build an sw_flow for sending this packet. */
>         flow = ovs_flow_alloc();
>         err = PTR_ERR(flow);
> @@ -649,6 +679,7 @@ static const struct nla_policy
> packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
>         [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
>         [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
>         [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
> +       [OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 },
>  };
>
>  static struct genl_ops dp_packet_genl_ops[] = {
> diff --git a/datapath/datapath.h b/datapath/datapath.h
> index 3bffa1d..f99db1f 100644
> --- a/datapath/datapath.h
> +++ b/datapath/datapath.h
> @@ -159,6 +159,18 @@ struct ovs_net {
>  #endif
>  };
>
> +/**
> + * enum ovs_pkt_hash_types - hash info to include with a packet
> + * to send to userspace.
> + * @OVS_PACKET_HASH_SW_BIT: indicates hash was computed in software stack.
> + * @OVS_PACKET_HASH_L4_BIT: indicates hash is a canonical 4-tuple hash
> + * over transport ports.
> + */
> +enum ovs_pkt_hash_types {
> +       OVS_PACKET_HASH_SW_BIT = (1ULL << 32),
> +       OVS_PACKET_HASH_L4_BIT = (1ULL << 33),
> +};
> +
>  extern unsigned int ovs_net_id;
>  void ovs_lock(void);
>  void ovs_unlock(void);
> diff --git a/datapath/linux/compat/include/linux/skbuff.h
> b/datapath/linux/compat/include/linux/skbuff.h
> index 6397289..6d248b3 100644
> --- a/datapath/linux/compat/include/linux/skbuff.h
> +++ b/datapath/linux/compat/include/linux/skbuff.h
> @@ -456,4 +456,35 @@ static inline void skb_set_inner_ipproto(struct
> sk_buff *skb,
>  #define nf_reset_ct nf_reset
>  #endif
>
> +#ifndef HAVE___SKB_SET_HASH
> +static inline void
> +__skb_set_hash(struct sk_buff *skb, __u32 hash, bool is_sw, bool is_l4)
> +{
> +#ifdef HAVE_RXHASH
> +       skb->rxhash = hash;
> +#else
> +       skb->hash = hash;
> +#endif
> +#if defined(HAVE_L4_RXHASH)
> +       skb->l4_rxhash = is_l4;
> +#else
> +       skb->l4_hash = is_l4;
> +#endif
> +#ifdef HAVE_SW_HASH
> +       skb->sw_hash = is_sw;
> +#endif
> +}
> +#endif
> +
> +#ifndef HAVE_SKB_GET_HASH_RAW
> +static inline __u32 skb_get_hash_raw(const struct sk_buff *skb)
> +{
> +#ifdef HAVE_RXHASH
> +       return skb->rxhash;
> +#else
> +       return skb->hash;
> +#endif
> +}
> +#endif
> +
>  #endif
> --
> 2.1.0
>
> _______________________________________________
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to