On 5/4/2020 11:03 AM, Han Zhou wrote:
On Mon, May 4, 2020 at 10:55 AM Gregory Rose <[email protected]> wrote:
On 5/3/2020 6:02 PM, Han Zhou wrote:
This patch backports below upstream patches, and add __skb_set_hash
to compat for older kernels.
commit b5ab1f1be6180a2e975eede18731804b5164a05d
Author: Jakub Kicinski <[email protected]>
Date: Mon Mar 2 21:05:18 2020 -0800
openvswitch: add missing attribute validation for hash
Add missing attribute validation for OVS_PACKET_ATTR_HASH
to the netlink policy.
Fixes: bd1903b7c459 ("net: openvswitch: add hash info to upcall")
Signed-off-by: Jakub Kicinski <[email protected]>
Reviewed-by: Greg Rose <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
commit bd1903b7c4596ba6f7677d0dfefd05ba5876707d
Author: Tonghao Zhang <[email protected]>
Date: Wed Nov 13 23:04:49 2019 +0800
net: openvswitch: add hash info to upcall
When using the kernel datapath, the upcall don't
include skb hash info relatived. That will introduce
some problem, because the hash of skb is important
in kernel stack. For example, VXLAN module uses
it to select UDP src port. The tx queue selection
may also use the hash in stack.
Hash is computed in different ways. Hash is random
for a TCP socket, and hash may be computed in hardware,
or software stack. Recalculation hash is not easy.
Hash of TCP socket is computed:
tcp_v4_connect
-> sk_set_txhash (is random)
__tcp_transmit_skb
-> skb_set_hash_from_sk
There will be one upcall, without information of skb
hash, to ovs-vswitchd, for the first packet of a TCP
session. The rest packets will be processed in Open vSwitch
modules, hash kept. If this tcp session is forward to
VXLAN module, then the UDP src port of first tcp packet
is different from rest packets.
TCP packets may come from the host or dockers, to Open vSwitch.
To fix it, we store the hash info to upcall, and restore hash
when packets sent back.
+---------------+ +-------------------------+
| Docker/VMs | | ovs-vswitchd |
+----+----------+ +-+--------------------+--+
| ^ |
| | |
| | upcall v restore packet
hash
(not recalculate)
| +-+--------------------+--+
| tap netdev | | vxlan
module
+---------------> +--> Open vSwitch ko +-->
or internal type | |
+-------------------------+
Reported-at:
https://mail.openvswitch.org/pipermail/ovs-dev/2019-October/364062.html
Signed-off-by: Tonghao Zhang <[email protected]>
Acked-by: Pravin B Shelar <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
Cc: Tonghao Zhang <[email protected]>
Signed-off-by: Han Zhou <[email protected]>
Hi Han,
thank you for backporting these patches. They look good but I'm not
really in favor of squashing multiple upstream patches into a single
patch. IMO it helps with history, bisecting and debugging to have a
separate backported patch for each respective upstream patch.
The maintainers may be fine with it so let's see what they say.
Thanks,
- Greg
Thanks for the suggestion. I wasn't so sure because I saw many other
backport has multiple upstream patches in one backport patch as well, and
since in this case the follow-up patch was a bug fix so I wasn't expecting
only one of the patch being merged without the other, so I thought it might
make more sense of submitting as a single patch. I can separate them if it
is preferred, but I will wait for more comment and I can make that change
together in v2, if needed.
Sure, I can see your reasoning. Makes sense.
Thanks,
- Greg
---
acinclude.m4 | 2 ++
datapath/datapath.c | 27
++++++++++++++++++++++++++-
datapath/datapath.h | 12 ++++++++++++
datapath/linux/compat/include/linux/skbuff.h | 10 ++++++++++
4 files changed, 50 insertions(+), 1 deletion(-)
diff --git a/acinclude.m4 b/acinclude.m4
index dabbffd..5e5ca39 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -1101,6 +1101,8 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
OVS_FIND_OP_PARAM_IFELSE([$KSRC/include/net/rtnetlink.h],
[validate], [extack],
[OVS_DEFINE([HAVE_RTNLOP_VALIDATE_WITH_EXTACK])])
+ OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h],
+ [__skb_set_hash])
if cmp -s datapath/linux/kcompat.h.new \
datapath/linux/kcompat.h >/dev/null 2>&1; then
diff --git a/datapath/datapath.c b/datapath/datapath.c
index a7af784..82f5688 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -371,7 +371,8 @@ static size_t upcall_msg_size(const struct
dp_upcall_info *upcall_info,
size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
+ nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
+ nla_total_size(ovs_key_attr_size()) /*
OVS_PACKET_ATTR_KEY */
- + nla_total_size(sizeof(unsigned int)); /*
OVS_PACKET_ATTR_LEN */
+ + nla_total_size(sizeof(unsigned int)) /*
OVS_PACKET_ATTR_LEN */
+ + nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */
/* OVS_PACKET_ATTR_USERDATA */
if (upcall_info->userdata)
@@ -414,6 +415,7 @@ static int queue_userspace_packet(struct datapath
*dp, struct sk_buff *skb,
size_t len;
unsigned int hlen;
int err, dp_ifindex;
+ u64 hash;
dp_ifindex = get_dpifindex(dp);
if (!dp_ifindex)
@@ -523,6 +525,19 @@ static int queue_userspace_packet(struct datapath
*dp, struct sk_buff *skb,
pad_packet(dp, user_skb);
}
+ /* Add OVS_PACKET_ATTR_HASH */
+ hash = skb_get_hash_raw(skb);
+ if (skb->sw_hash)
+ hash |= OVS_PACKET_HASH_SW_BIT;
+
+ if (skb->l4_hash)
+ hash |= OVS_PACKET_HASH_L4_BIT;
+
+ if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash))
{
+ err = -ENOBUFS;
+ goto out;
+ }
+
/* Only reserve room for attribute header, packet data is added
* in skb_zerocopy()
*/
@@ -563,6 +578,7 @@ static int ovs_packet_cmd_execute(struct sk_buff
*skb, struct genl_info *info)
struct datapath *dp;
struct vport *input_vport;
u16 mru = 0;
+ u64 hash;
int len;
int err;
bool log = !a[OVS_PACKET_ATTR_PROBE];
@@ -588,6 +604,14 @@ static int ovs_packet_cmd_execute(struct sk_buff
*skb, struct genl_info *info)
}
OVS_CB(packet)->mru = mru;
+ if (a[OVS_PACKET_ATTR_HASH]) {
+ hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]);
+
+ __skb_set_hash(packet, hash & 0xFFFFFFFFULL,
+ !!(hash & OVS_PACKET_HASH_SW_BIT),
+ !!(hash & OVS_PACKET_HASH_L4_BIT));
+ }
+
/* Build an sw_flow for sending this packet. */
flow = ovs_flow_alloc();
err = PTR_ERR(flow);
@@ -649,6 +673,7 @@ static const struct nla_policy
packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
[OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
[OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
[OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
+ [OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 },
};
static struct genl_ops dp_packet_genl_ops[] = {
diff --git a/datapath/datapath.h b/datapath/datapath.h
index 3bffa1d..f99db1f 100644
--- a/datapath/datapath.h
+++ b/datapath/datapath.h
@@ -159,6 +159,18 @@ struct ovs_net {
#endif
};
+/**
+ * enum ovs_pkt_hash_types - hash info to include with a packet
+ * to send to userspace.
+ * @OVS_PACKET_HASH_SW_BIT: indicates hash was computed in software
stack.
+ * @OVS_PACKET_HASH_L4_BIT: indicates hash is a canonical 4-tuple hash
+ * over transport ports.
+ */
+enum ovs_pkt_hash_types {
+ OVS_PACKET_HASH_SW_BIT = (1ULL << 32),
+ OVS_PACKET_HASH_L4_BIT = (1ULL << 33),
+};
+
extern unsigned int ovs_net_id;
void ovs_lock(void);
void ovs_unlock(void);
diff --git a/datapath/linux/compat/include/linux/skbuff.h
b/datapath/linux/compat/include/linux/skbuff.h
index 6397289..930544e 100644
--- a/datapath/linux/compat/include/linux/skbuff.h
+++ b/datapath/linux/compat/include/linux/skbuff.h
@@ -456,4 +456,14 @@ static inline void skb_set_inner_ipproto(struct
sk_buff *skb,
#define nf_reset_ct nf_reset
#endif
+#ifndef HAVE___SKB_SET_HASH
+static inline void
+__skb_set_hash(struct sk_buff *skb, __u32 hash, bool is_sw, bool is_l4)
+{
+ skb->l4_hash = is_l4;
+ skb->sw_hash = is_sw;
+ skb->hash = hash;
+}
+#endif
+
#endif
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev