On Fri, Jan 30, 2026 at 10:01 PM Mehrdad Moradi via dev < [email protected]> wrote:
> When ovn-controller returns DNS responses with multiple IP addresses, > clients typically use the first IP in the list. Without randomization, > all clients direct traffic to the same backend, leading to uneven load > distribution. > > This patch implements Fisher-Yates shuffle on IPv4 and IPv6 address > arrays before building DNS answers, ensuring each query returns IPs > in a randomized order for better traffic distribution across backends. > > Signed-off-by: Mehrdad Moradi <[email protected]> > --- > Hi Mehrdad, thank you for the patch. I have some small comments down below. controller/pinctrl.c | 40 +++++++++++++++++++++++++++++++ > tests/ovn.at | 56 +++++++++++++++++++++++++++++++++++++++----- > 2 files changed, 90 insertions(+), 6 deletions(-) > > diff --git a/controller/pinctrl.c b/controller/pinctrl.c > index ab8a0a37c..8f9edf7cd 100644 > --- a/controller/pinctrl.c > +++ b/controller/pinctrl.c > @@ -3365,6 +3365,42 @@ dns_build_ptr_answer( > free(encoded); > } > > +/* Shuffle IPv4 addresses using Fisher-Yates for DNS round-robin. */ > +static void > +dns_shuffle_ipv4_addrs(struct ipv4_netaddr *addrs, size_t n) > +{ > + if (n <= 1) { > + return; > + } nit: I would change this check to "(!n)" as only the 0, 1 will be eliminated by the for loop check. + > + for (size_t i = n - 1; i > 0; i--) { > + size_t j = random_range(i + 1); > + if (i != j) { > nit: No need to check if it's the same element. This shouldn't cause any harm. > + struct ipv4_netaddr tmp = addrs[i]; > + addrs[i] = addrs[j]; > + addrs[j] = tmp; > + } > + } > +} > + > +/* Shuffle IPv6 addresses using Fisher-Yates for DNS round-robin. */ > +static void > +dns_shuffle_ipv6_addrs(struct ipv6_netaddr *addrs, size_t n) > +{ > Both points above apply here too. > + if (n <= 1) { > + return; > + } > + > + for (size_t i = n - 1; i > 0; i--) { > + size_t j = random_range(i + 1); > + if (i != j) { > + struct ipv6_netaddr tmp = addrs[i]; > + addrs[i] = addrs[j]; > + addrs[j] = tmp; > + } > + } > +} > + > #define DNS_RCODE_SERVER_REFUSE 0x5 > #define DNS_QUERY_TYPE_CLASS_LEN (2 * sizeof(ovs_be16)) > > @@ -3530,6 +3566,10 @@ pinctrl_handle_dns_lookup( > goto exit; > } > > + /* Shuffle IPs for round-robin load balancing. */ > + dns_shuffle_ipv4_addrs(ip_addrs.ipv4_addrs, > ip_addrs.n_ipv4_addrs); > + dns_shuffle_ipv6_addrs(ip_addrs.ipv6_addrs, > ip_addrs.n_ipv6_addrs); > + > if (query_type == DNS_QUERY_TYPE_A || > query_type == DNS_QUERY_TYPE_ANY) { > for (size_t i = 0; i < ip_addrs.n_ipv4_addrs; i++) { > diff --git a/tests/ovn.at b/tests/ovn.at > index 4d15d4a62..d5e51c60b 100644 > --- a/tests/ovn.at > +++ b/tests/ovn.at > @@ -11636,6 +11636,7 @@ set_dns_params() { > local ttl=00000e10 > an_count=0001 > type=0001 > + expected_dns_answer_alt="" > case $hname in > vm1) > # vm1.ovn.org > @@ -11652,10 +11653,13 @@ set_dns_params() { > vm2) > # vm2.ovn.org > query_name=03766d32036f766e036f726700 > + # IPv4 addresses may be returned in any order due to DNS > round-robin. > # IPv4 address - 10.0.0.6 > - expected_dns_answer=${query_name}00010001${ttl}00040a000006 > + local ip1=${query_name}00010001${ttl}00040a000006 > # IPv4 address - 20.0.0.4 > - > expected_dns_answer=${expected_dns_answer}${query_name}00010001${ttl}000414000004 > + local ip2=${query_name}00010001${ttl}000414000004 > + expected_dns_answer=${ip1}${ip2} > + expected_dns_answer_alt=${ip2}${ip1} > an_count=0002 > ;; > vm3) > @@ -11712,6 +11716,12 @@ set_dns_params() { > local dns_resp_header=010281200001${an_count}00000000 > dns_req_data=${dns_req_header}${query_name}${type}0001 > > > dns_resp_data=${dns_resp_header}${query_name}${type}0001${expected_dns_answer} > + # Alternative response for cases with multiple IPs (DNS round-robin). > + if test -n "$expected_dns_answer_alt"; then > + > dns_resp_data_alt=${dns_resp_header}${query_name}${type}0001${expected_dns_answer_alt} > + else > + dns_resp_data_alt="" > + fi > } > > # This shell function sends a DNS request packet > @@ -11789,17 +11799,51 @@ src_ip=`ip_to_hex 10 0 0 4` > dst_ip=`ip_to_hex 10 0 0 1` > dns_reply=1 > test_dns 1 f00000000001 f000000000f0 $src_ip $dst_ip $dns_reply > $dns_req_data $dns_resp_data > +# Generate alternative expected response for DNS round-robin (without > sending packet). > +if test -n "$dns_resp_data_alt"; then > + # Build expected reply packet with alternative IP ordering. > + ip_len_val=`expr 28 + ${#dns_resp_data_alt} / 2` > + udp_len_val=`expr $ip_len_val - 20` > + ip_len_hex=$(printf "%x" $ip_len_val) > + udp_len_hex=$(printf "%x" $udp_len_val) > + > reply_alt=f00000000001f000000000f00800450000${ip_len_hex}0000000080110000 > + > reply_alt=${reply_alt}${dst_ip}${src_ip}0035923400${udp_len_hex}0000${dns_resp_data_alt} > + echo $reply_alt > 1.expected.alt > +fi > > # NXT_RESUMEs should be 1. > OVS_WAIT_UNTIL([test 1 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`]) > > -OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [1.expected], ["cut -c -48"]) > -# Skipping the IPv4 checksum. > -OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [1.expected], ["cut -c 53-"]) > +# Check packets - for vm2, IPs may be in either order due to DNS > round-robin. > +if test -f 1.expected.alt; then > + $PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif1-tx.pcap > > 1.received > + cut -c 53- 1.expected > 1.expected.tail > + cut -c 53- 1.expected.alt > 1.expected.alt.tail > + cut -c 53- 1.received > 1.received.tail > + # Accept either ordering of IPs. > + if diff -q 1.expected.tail 1.received.tail > /dev/null 2>&1 || \ > + diff -q 1.expected.alt.tail 1.received.tail > /dev/null 2>&1; then > + : > + else > + echo "Expected (order 1):" && cat 1.expected.tail > + echo "Expected (order 2):" && cat 1.expected.alt.tail > + echo "Received:" && cat 1.received.tail > + AT_FAIL_IF([true]) > + fi > + # Check header (first 48 chars). > + cut -c -48 1.expected > 1.expected.head > + cut -c -48 1.received > 1.received.head > + AT_CHECK([diff 1.expected.head 1.received.head]) > +else > + OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [1.expected], ["cut -c -48"]) > + # Skipping the IPv4 checksum. > + OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [1.expected], ["cut -c 53-"]) > +fi > I would love for this test to be converted to use scapy. But I won't block it as that change is a bit out of scope of this. > > reset_pcap_file hv1-vif1 hv1/vif1 > reset_pcap_file hv1-vif2 hv1/vif2 > -rm -f 1.expected > +rm -f 1.expected 1.expected.alt 1.received > +rm -f 1.expected.head 1.expected.tail 1.expected.alt.tail 1.received.head > 1.received.tail > rm -f 2.expected > > > -- > 2.52.0 > > _______________________________________________ > dev mailing list > [email protected] > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > > There is probably no need to send v2, the above can be taken care of during merge: Acked-by: Ales Musil <[email protected]> Regards, Ales _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
