Issue: The src-port for UDP is based on RSS hash in the packet metadata. In case of packets coming from VM it will be 5-tuple, if available, otherwise just IP addresses.If the VM fragments a large IP packet and sends the fragments to ovs, only the first fragment will contain the L4 header. Therefore, the first fragment and subsequent fragments get different UDP src ports in the outgoing VXLAN header.This can lead to fragment re-ordering in the fabric as packet will take different paths.
Fix: Intention of this is to avoid fragment packets taking different paths. For example, due to presence of firewalls, fragment packets will take different paths and will get dropped.To avoid this we ignore the L4 header during hash calculation only in the case of fragmented packets. Signed-off-by: Hemanth Aramadaka <[email protected]> --- lib/flow.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/lib/flow.c b/lib/flow.c index c3a3aa3ce..20cca5937 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -1018,7 +1018,9 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst) miniflow_push_be16(mf, ct_tp_src, ct_tp_src); miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst); if (dl_type == htons(ETH_TYPE_IP)) { - dp_packet_update_rss_hash_ipv4_tcp_udp(packet); + if (!(nw_frag & FLOW_NW_FRAG_MASK)) { + dp_packet_update_rss_hash_ipv4_tcp_udp(packet); + } } else if (dl_type == htons(ETH_TYPE_IPV6)) { dp_packet_update_rss_hash_ipv6_tcp_udp(packet); } @@ -1033,7 +1035,9 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst) miniflow_push_be16(mf, ct_tp_src, ct_tp_src); miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst); if (dl_type == htons(ETH_TYPE_IP)) { - dp_packet_update_rss_hash_ipv4_tcp_udp(packet); + if (!(nw_frag & FLOW_NW_FRAG_MASK)) { + dp_packet_update_rss_hash_ipv4_tcp_udp(packet); + } } else if (dl_type == htons(ETH_TYPE_IPV6)) { dp_packet_update_rss_hash_ipv6_tcp_udp(packet); } @@ -2248,7 +2252,7 @@ miniflow_hash_5tuple(const struct miniflow *flow, uint32_t basis) if (flow) { ovs_be16 dl_type = MINIFLOW_GET_BE16(flow, dl_type); - uint8_t nw_proto; + uint8_t nw_proto, nw_frag; if (dl_type == htons(ETH_TYPE_IPV6)) { struct flowmap map = FLOWMAP_EMPTY_INITIALIZER; @@ -2270,6 +2274,11 @@ miniflow_hash_5tuple(const struct miniflow *flow, uint32_t basis) nw_proto = MINIFLOW_GET_U8(flow, nw_proto); hash = hash_add(hash, nw_proto); + + nw_frag = MINIFLOW_GET_U8(flow, nw_frag); + if (nw_frag & FLOW_NW_FRAG_MASK) { + goto out; + } if (nw_proto != IPPROTO_TCP && nw_proto != IPPROTO_UDP && nw_proto != IPPROTO_SCTP && nw_proto != IPPROTO_ICMP && nw_proto != IPPROTO_ICMPV6) { @@ -2292,6 +2301,7 @@ flow_hash_5tuple(const struct flow *flow, uint32_t basis) { BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42); uint32_t hash = basis; + uint8_t nw_frag; if (flow) { @@ -2312,6 +2322,10 @@ flow_hash_5tuple(const struct flow *flow, uint32_t basis) } hash = hash_add(hash, flow->nw_proto); + nw_frag = MINIFLOW_GET_U8(flow, nw_frag); + if (nw_frag & FLOW_NW_FRAG_MASK) { + goto out; + } if (flow->nw_proto != IPPROTO_TCP && flow->nw_proto != IPPROTO_UDP && flow->nw_proto != IPPROTO_SCTP && flow->nw_proto != IPPROTO_ICMP && flow->nw_proto != IPPROTO_ICMPV6) { -- 2.34.1 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
