ping
On Fri, Nov 15, 2019 at 10:59 AM <[email protected]> wrote: > > From: Tonghao Zhang <[email protected]> > > The kernel datapath may sent upcall with hash info, > ovs-vswitchd should get it from upcall and then send > it back. > > The reason is that: > | When using the kernel datapath, the upcall don't > | include skb hash info relatived. That will introduce > | some problem, because the hash of skb is important > | in kernel stack. For example, VXLAN module uses > | it to select UDP src port. The tx queue selection > | may also use the hash in stack. > | > | Hash is computed in different ways. Hash is random > | for a TCP socket, and hash may be computed in hardware, > | or software stack. Recalculation hash is not easy. > | > | There will be one upcall, without information of skb > | hash, to ovs-vswitchd, for the first packet of a TCP > | session. The rest packets will be processed in Open vSwitch > | modules, hash kept. If this tcp session is forward to > | VXLAN module, then the UDP src port of first tcp packet > | is different from rest packets. > | > | TCP packets may come from the host or dockers, to Open vSwitch. > | To fix it, we store the hash info to upcall, and restore hash > | when packets sent back. > > Reported-at: > https://mail.openvswitch.org/pipermail/ovs-dev/2019-October/364062.html > Link: > https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git/commit/?id=bd1903b7c4596ba6f7677d0dfefd05ba5876707d > Signed-off-by: Tonghao Zhang <[email protected]> > --- > datapath/linux/compat/include/linux/openvswitch.h | 1 + > lib/dpif-netlink.c | 8 +++++++- > lib/dpif.h | 2 ++ > ofproto/ofproto-dpif-upcall.c | 10 +++++++--- > 4 files changed, 17 insertions(+), 4 deletions(-) > > diff --git a/datapath/linux/compat/include/linux/openvswitch.h > b/datapath/linux/compat/include/linux/openvswitch.h > index 7b16b1d5bfe0..778827f8b5a2 100644 > --- a/datapath/linux/compat/include/linux/openvswitch.h > +++ b/datapath/linux/compat/include/linux/openvswitch.h > @@ -210,6 +210,7 @@ enum ovs_packet_attr { > error logging should be suppressed. */ > OVS_PACKET_ATTR_MRU, /* Maximum received IP fragment size. */ > OVS_PACKET_ATTR_LEN, /* Packet size before truncation. */ > + OVS_PACKET_ATTR_HASH, /* Packet hash. */ > __OVS_PACKET_ATTR_MAX > }; > > diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c > index d1f9b81db84f..e9a6887f7af2 100644 > --- a/lib/dpif-netlink.c > +++ b/lib/dpif-netlink.c > @@ -1777,6 +1777,10 @@ dpif_netlink_encode_execute(int dp_ifindex, const > struct dpif_execute *d_exec, > if (d_exec->mtu) { > nl_msg_put_u16(buf, OVS_PACKET_ATTR_MRU, d_exec->mtu); > } > + > + if (d_exec->hash) { > + nl_msg_put_u64(buf, OVS_PACKET_ATTR_HASH, d_exec->hash); > + } > } > > /* Executes, against 'dpif', up to the first 'n_ops' operations in 'ops'. > @@ -2461,7 +2465,8 @@ parse_odp_packet(const struct dpif_netlink *dpif, > struct ofpbuf *buf, > [OVS_PACKET_ATTR_USERDATA] = { .type = NL_A_UNSPEC, .optional = true > }, > [OVS_PACKET_ATTR_EGRESS_TUN_KEY] = { .type = NL_A_NESTED, .optional > = true }, > [OVS_PACKET_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true > }, > - [OVS_PACKET_ATTR_MRU] = { .type = NL_A_U16, .optional = true } > + [OVS_PACKET_ATTR_MRU] = { .type = NL_A_U16, .optional = true }, > + [OVS_PACKET_ATTR_HASH] = { .type = NL_A_U64, .optional = true } > }; > > struct ofpbuf b = ofpbuf_const_initializer(buf->data, buf->size); > @@ -2494,6 +2499,7 @@ parse_odp_packet(const struct dpif_netlink *dpif, > struct ofpbuf *buf, > upcall->out_tun_key = a[OVS_PACKET_ATTR_EGRESS_TUN_KEY]; > upcall->actions = a[OVS_PACKET_ATTR_ACTIONS]; > upcall->mru = a[OVS_PACKET_ATTR_MRU]; > + upcall->hash = a[OVS_PACKET_ATTR_HASH]; > > /* Allow overwriting the netlink attribute header without reallocating. > */ > dp_packet_use_stub(&upcall->packet, > diff --git a/lib/dpif.h b/lib/dpif.h > index 289d574a01f8..c98513e48cc7 100644 > --- a/lib/dpif.h > +++ b/lib/dpif.h > @@ -712,6 +712,7 @@ struct dpif_execute { > bool probe; /* Suppress error messages. */ > unsigned int mtu; /* Maximum transmission unit to fragment. > 0 if not a fragmented packet */ > + uint64_t hash; > const struct flow *flow; /* Flow extracted from 'packet'. */ > > /* Input, but possibly modified as a side effect of execution. */ > @@ -802,6 +803,7 @@ struct dpif_upcall { > size_t key_len; /* Length of 'key' in bytes. */ > ovs_u128 ufid; /* Unique flow identifier for 'key'. */ > struct nlattr *mru; /* Maximum receive unit. */ > + struct nlattr *hash; /* Packet hash. */ > struct nlattr *cutlen; /* Number of bytes shrink from the end. */ > > /* DPIF_UC_ACTION only. */ > diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c > index 657aa7f79208..e6da23964385 100644 > --- a/ofproto/ofproto-dpif-upcall.c > +++ b/ofproto/ofproto-dpif-upcall.c > @@ -217,6 +217,7 @@ struct upcall { > ofp_port_t ofp_in_port; /* OpenFlow in port, or OFPP_NONE. */ > uint16_t mru; /* If !0, Maximum receive unit of > fragmented IP packet */ > + uint64_t hash; > > enum upcall_type type; /* Type of the upcall. */ > const struct nlattr *actions; /* Flow actions in DPIF_UC_ACTION > Upcalls. */ > @@ -784,7 +785,7 @@ recv_upcalls(struct handler *handler) > struct dpif_upcall *dupcall = &dupcalls[n_upcalls]; > struct upcall *upcall = &upcalls[n_upcalls]; > struct flow *flow = &flows[n_upcalls]; > - unsigned int mru; > + unsigned int mru = 0; > int error; > > ofpbuf_use_stub(recv_buf, recv_stubs[n_upcalls], > @@ -802,8 +803,10 @@ recv_upcalls(struct handler *handler) > > if (dupcall->mru) { > mru = nl_attr_get_u16(dupcall->mru); > - } else { > - mru = 0; > + } > + > + if (dupcall->hash) { > + upcall->hash = nl_attr_get_u64(dupcall->hash); > } > > error = upcall_receive(upcall, udpif->backer, &dupcall->packet, > @@ -1600,6 +1603,7 @@ handle_upcalls(struct udpif *udpif, struct upcall > *upcalls, > op->dop.execute.needs_help = (upcall->xout.slow & SLOW_ACTION) > != 0; > op->dop.execute.probe = false; > op->dop.execute.mtu = upcall->mru; > + op->dop.execute.hash = upcall->hash; > } > } > > -- > 2.23.0 > _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
