Issue:

The src-port for UDP is based on RSS hash in the packet metadata.
In case of packets coming from VM it will be 5-tuple, if available,
otherwise just IP addresses.If the VM fragments a large IP packet
and sends the fragments to ovs, only the first fragment will contain
the L4 header. Therefore, the first fragment and subsequent fragments
get different UDP src ports in the outgoing VXLAN header.This can
lead to fragment re-ordering in the fabric as packet will take
different paths.

Fix:

Intention of this is to avoid fragment packets taking different paths.
For example, due to presence of firewalls, fragment packets will take
different paths and will get dropped.To avoid this we ignore the L4
header during hash calculation only in the case of fragmented packets.

Signed-off-by: Hemanth Aramadaka <[email protected]>
---
 lib/flow.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/lib/flow.c b/lib/flow.c
index c3a3aa3ce..4f4197b1c 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -1018,7 +1018,9 @@ miniflow_extract(struct dp_packet *packet, struct 
miniflow *dst)
                     miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
                     miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
                     if (dl_type == htons(ETH_TYPE_IP)) {
-                        dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
+                        if (!(nw_frag & FLOW_NW_FRAG_MASK)) {
+                            dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
+                        }
                     } else if (dl_type == htons(ETH_TYPE_IPV6)) {
                         dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
                     }
@@ -1033,7 +1035,9 @@ miniflow_extract(struct dp_packet *packet, struct 
miniflow *dst)
                 miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
                 miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
                 if (dl_type == htons(ETH_TYPE_IP)) {
-                    dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
+                    if (!(nw_frag & FLOW_NW_FRAG_MASK)) {
+                        dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
+                    }
                 } else if (dl_type == htons(ETH_TYPE_IPV6)) {
                     dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
                 }
@@ -2248,7 +2252,7 @@ miniflow_hash_5tuple(const struct miniflow *flow, 
uint32_t basis)
 
     if (flow) {
         ovs_be16 dl_type = MINIFLOW_GET_BE16(flow, dl_type);
-        uint8_t nw_proto;
+        uint8_t nw_proto, nw_frag;
 
         if (dl_type == htons(ETH_TYPE_IPV6)) {
             struct flowmap map = FLOWMAP_EMPTY_INITIALIZER;
@@ -2270,6 +2274,14 @@ miniflow_hash_5tuple(const struct miniflow *flow, 
uint32_t basis)
 
         nw_proto = MINIFLOW_GET_U8(flow, nw_proto);
         hash = hash_add(hash, nw_proto);
+
+        /* Skip l4 header fields if IP packet is fragmented since
+         * only first fragment will carry l4 header.
+         */
+        nw_frag = MINIFLOW_GET_U8(flow, nw_frag);
+        if (nw_frag & FLOW_NW_FRAG_MASK) {
+            goto out;
+        }
         if (nw_proto != IPPROTO_TCP && nw_proto != IPPROTO_UDP
             && nw_proto != IPPROTO_SCTP && nw_proto != IPPROTO_ICMP
             && nw_proto != IPPROTO_ICMPV6) {
-- 
2.34.1

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to