When ovn-controller returns DNS responses with multiple IP addresses,
clients typically use the first IP in the list. Without randomization,
all clients direct traffic to the same backend, leading to uneven load
distribution.

This patch implements Fisher-Yates shuffle on IPv4 and IPv6 address
arrays before building DNS answers, ensuring each query returns IPs
in a randomized order for better traffic distribution across backends.

Signed-off-by: Mehrdad Moradi <[email protected]>
---
 controller/pinctrl.c | 49 ++++++++++++++++++++++++++++++++++++--
 tests/ovn.at         | 56 +++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 97 insertions(+), 8 deletions(-)

diff --git a/controller/pinctrl.c b/controller/pinctrl.c
index ab8a0a37c..25a4de6a2 100644
--- a/controller/pinctrl.c
+++ b/controller/pinctrl.c
@@ -3365,6 +3365,44 @@ dns_build_ptr_answer(
     free(encoded);
 }
 
+/* Shuffle an array in-place using Fisher-Yates algorithm. */
+static void
+array_shuffle(void *array, size_t elem_size, size_t n)
+{
+    if (n <= 1) {
+        return;
+    }
+
+    uint8_t *arr = array;
+    for (size_t i = n - 1; i > 0; i--) {
+        size_t j = random_range(i + 1);
+        uint8_t *a = arr + i * elem_size;
+        uint8_t *b = arr + j * elem_size;
+        for (size_t k = 0; k < elem_size; k++) {
+            uint8_t tmp = a[k];
+            a[k] = b[k];
+            b[k] = tmp;
+        }
+    }
+}
+
+/* Return an array of shuffled indices [0, n-1] for DNS round-robin.
+ * Caller must free the returned array. Returns NULL if n == 0. */
+static size_t *
+shuffle_indices(size_t n)
+{
+    if (!n) {
+        return NULL;
+    }
+
+    size_t *indices = xmalloc(n * sizeof *indices);
+    for (size_t i = 0; i < n; i++) {
+        indices[i] = i;
+    }
+    array_shuffle(indices, sizeof *indices, n);
+    return indices;
+}
+
 #define DNS_RCODE_SERVER_REFUSE 0x5
 #define DNS_QUERY_TYPE_CLASS_LEN (2 * sizeof(ovs_be16))
 
@@ -3530,11 +3568,15 @@ pinctrl_handle_dns_lookup(
             goto exit;
         }
 
+        /* Shuffle indices for round-robin load balancing. */
+        size_t *ipv4_order = shuffle_indices(ip_addrs.n_ipv4_addrs);
+        size_t *ipv6_order = shuffle_indices(ip_addrs.n_ipv6_addrs);
+
         if (query_type == DNS_QUERY_TYPE_A ||
             query_type == DNS_QUERY_TYPE_ANY) {
             for (size_t i = 0; i < ip_addrs.n_ipv4_addrs; i++) {
                 dns_build_a_answer(&dns_answer, in_queryname, idx,
-                                   ip_addrs.ipv4_addrs[i].addr);
+                                   ip_addrs.ipv4_addrs[ipv4_order[i]].addr);
                 ancount++;
             }
         }
@@ -3543,11 +3585,14 @@ pinctrl_handle_dns_lookup(
             query_type == DNS_QUERY_TYPE_ANY) {
             for (size_t i = 0; i < ip_addrs.n_ipv6_addrs; i++) {
                 dns_build_aaaa_answer(&dns_answer, in_queryname, idx,
-                                      &ip_addrs.ipv6_addrs[i].addr);
+                                      
&ip_addrs.ipv6_addrs[ipv6_order[i]].addr);
                 ancount++;
             }
         }
 
+        free(ipv4_order);
+        free(ipv6_order);
+
         /* DNS is configured with a record for this domain with
          * an IPv4/IPV6 only, so instead of ignoring this A/AAAA query,
          * we can reply with  RCODE = 5 (server refuses) and that
diff --git a/tests/ovn.at b/tests/ovn.at
index 4d15d4a62..d5e51c60b 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -11636,6 +11636,7 @@ set_dns_params() {
     local ttl=00000e10
     an_count=0001
     type=0001
+    expected_dns_answer_alt=""
     case $hname in
     vm1)
         # vm1.ovn.org
@@ -11652,10 +11653,13 @@ set_dns_params() {
     vm2)
         # vm2.ovn.org
         query_name=03766d32036f766e036f726700
+        # IPv4 addresses may be returned in any order due to DNS round-robin.
         # IPv4 address - 10.0.0.6
-        expected_dns_answer=${query_name}00010001${ttl}00040a000006
+        local ip1=${query_name}00010001${ttl}00040a000006
         # IPv4 address - 20.0.0.4
-        
expected_dns_answer=${expected_dns_answer}${query_name}00010001${ttl}000414000004
+        local ip2=${query_name}00010001${ttl}000414000004
+        expected_dns_answer=${ip1}${ip2}
+        expected_dns_answer_alt=${ip2}${ip1}
         an_count=0002
         ;;
     vm3)
@@ -11712,6 +11716,12 @@ set_dns_params() {
     local dns_resp_header=010281200001${an_count}00000000
     dns_req_data=${dns_req_header}${query_name}${type}0001
     
dns_resp_data=${dns_resp_header}${query_name}${type}0001${expected_dns_answer}
+    # Alternative response for cases with multiple IPs (DNS round-robin).
+    if test -n "$expected_dns_answer_alt"; then
+        
dns_resp_data_alt=${dns_resp_header}${query_name}${type}0001${expected_dns_answer_alt}
+    else
+        dns_resp_data_alt=""
+    fi
 }
 
 # This shell function sends a DNS request packet
@@ -11789,17 +11799,51 @@ src_ip=`ip_to_hex 10 0 0 4`
 dst_ip=`ip_to_hex 10 0 0 1`
 dns_reply=1
 test_dns 1 f00000000001 f000000000f0 $src_ip $dst_ip $dns_reply $dns_req_data 
$dns_resp_data
+# Generate alternative expected response for DNS round-robin (without sending 
packet).
+if test -n "$dns_resp_data_alt"; then
+    # Build expected reply packet with alternative IP ordering.
+    ip_len_val=`expr 28 + ${#dns_resp_data_alt} / 2`
+    udp_len_val=`expr $ip_len_val - 20`
+    ip_len_hex=$(printf "%x" $ip_len_val)
+    udp_len_hex=$(printf "%x" $udp_len_val)
+    reply_alt=f00000000001f000000000f00800450000${ip_len_hex}0000000080110000
+    
reply_alt=${reply_alt}${dst_ip}${src_ip}0035923400${udp_len_hex}0000${dns_resp_data_alt}
+    echo $reply_alt > 1.expected.alt
+fi
 
 # NXT_RESUMEs should be 1.
 OVS_WAIT_UNTIL([test 1 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`])
 
-OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [1.expected], ["cut -c -48"])
-# Skipping the IPv4 checksum.
-OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [1.expected], ["cut -c 53-"])
+# Check packets - for vm2, IPs may be in either order due to DNS round-robin.
+if test -f 1.expected.alt; then
+    $PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif1-tx.pcap > 1.received
+    cut -c 53- 1.expected > 1.expected.tail
+    cut -c 53- 1.expected.alt > 1.expected.alt.tail
+    cut -c 53- 1.received > 1.received.tail
+    # Accept either ordering of IPs.
+    if diff -q 1.expected.tail 1.received.tail > /dev/null 2>&1 || \
+       diff -q 1.expected.alt.tail 1.received.tail > /dev/null 2>&1; then
+        :
+    else
+        echo "Expected (order 1):" && cat 1.expected.tail
+        echo "Expected (order 2):" && cat 1.expected.alt.tail
+        echo "Received:" && cat 1.received.tail
+        AT_FAIL_IF([true])
+    fi
+    # Check header (first 48 chars).
+    cut -c -48 1.expected > 1.expected.head
+    cut -c -48 1.received > 1.received.head
+    AT_CHECK([diff 1.expected.head 1.received.head])
+else
+    OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [1.expected], ["cut -c -48"])
+    # Skipping the IPv4 checksum.
+    OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [1.expected], ["cut -c 53-"])
+fi
 
 reset_pcap_file hv1-vif1 hv1/vif1
 reset_pcap_file hv1-vif2 hv1/vif2
-rm -f 1.expected
+rm -f 1.expected 1.expected.alt 1.received
+rm -f 1.expected.head 1.expected.tail 1.expected.alt.tail 1.received.head 
1.received.tail
 rm -f 2.expected
 
 
-- 
2.52.0

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to