On Wed, Feb 4, 2026 at 6:03 PM Mehrdad Moradi via dev
<[email protected]> wrote:
>
> When ovn-controller returns DNS responses with multiple IP addresses,
> clients typically use the first IP in the list. Without randomization,
> all clients direct traffic to the same backend, leading to uneven load
> distribution.
>
> This patch implements Fisher-Yates shuffle on IPv4 and IPv6 address
> arrays before building DNS answers, ensuring each query returns IPs
> in a randomized order for better traffic distribution across backends.
>
> Signed-off-by: Mehrdad Moradi <[email protected]>
Thanks for the patch Mehrdad.
The patch LGTM with one minor comment. Please see below.
Numan
> ---
> controller/pinctrl.c | 15 ++++++++----
> lib/ovn-util.c | 25 ++++++++++++++++++++
> lib/ovn-util.h | 2 ++
> tests/ovn.at | 56 +++++++++++++++++++++++++++++++++++++++-----
> 4 files changed, 88 insertions(+), 10 deletions(-)
>
> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> index ab8a0a37c..388955b44 100644
> --- a/controller/pinctrl.c
> +++ b/controller/pinctrl.c
> @@ -3530,11 +3530,15 @@ pinctrl_handle_dns_lookup(
> goto exit;
> }
>
> + /* Shuffle indices for round-robin load balancing. */
> + size_t *ipv4_order = shuffle_indices(ip_addrs.n_ipv4_addrs);
> + size_t *ipv6_order = shuffle_indices(ip_addrs.n_ipv6_addrs);
> +
> if (query_type == DNS_QUERY_TYPE_A ||
> query_type == DNS_QUERY_TYPE_ANY) {
> for (size_t i = 0; i < ip_addrs.n_ipv4_addrs; i++) {
> - dns_build_a_answer(&dns_answer, in_queryname, idx,
> - ip_addrs.ipv4_addrs[i].addr);
> + ovs_be32 addr = ip_addrs.ipv4_addrs[ipv4_order[i]].addr;
> + dns_build_a_answer(&dns_answer, in_queryname, idx, addr);
> ancount++;
> }
> }
> @@ -3542,12 +3546,15 @@ pinctrl_handle_dns_lookup(
> if (query_type == DNS_QUERY_TYPE_AAAA ||
> query_type == DNS_QUERY_TYPE_ANY) {
> for (size_t i = 0; i < ip_addrs.n_ipv6_addrs; i++) {
> - dns_build_aaaa_answer(&dns_answer, in_queryname, idx,
> - &ip_addrs.ipv6_addrs[i].addr);
> + struct in6_addr *addr =
> &ip_addrs.ipv6_addrs[ipv6_order[i]].addr;
> + dns_build_aaaa_answer(&dns_answer, in_queryname, idx, addr);
> ancount++;
> }
> }
>
> + free(ipv4_order);
> + free(ipv6_order);
> +
> /* DNS is configured with a record for this domain with
> * an IPv4/IPV6 only, so instead of ignoring this A/AAAA query,
> * we can reply with RCODE = 5 (server refuses) and that
> diff --git a/lib/ovn-util.c b/lib/ovn-util.c
> index 71098d478..48f9fd15f 100644
> --- a/lib/ovn-util.c
> +++ b/lib/ovn-util.c
> @@ -21,6 +21,7 @@
>
> #include "daemon.h"
> #include "include/ovn/actions.h"
> +#include "lib/random.h"
> #include "openvswitch/ofp-parse.h"
> #include "openvswitch/rconn.h"
> #include "openvswitch/vlog.h"
> @@ -1737,3 +1738,27 @@ is_partial_uuid_match(const struct uuid *uuid, const
> char *match)
> s2 = strip_leading_zero(s2);
> return !strncmp(s1, s2, strlen(s2));
> }
> +
> +/* Return an array of shuffled indices [0, n-1] using Fisher-Yates algorithm.
> + * Caller must free the returned array. Returns NULL if n == 0. */
Since this is a library function, callers of this function can use
the returned array in any way they like
and not just as shuffle indices.
I asked Gemini and it suggested the below functions:
- shuffled_range(size_t) or maybe shuffle_range(size_t)
- permute_range(size_t)
- generate_permutation(size_t)
and a few more
My preference would be suffle_range(size_t)
/* Returns a heap-allocated array of 'n' integers containing a random
* permutation of the range [0, n-1]. Caller must free the returned array.
* Returns NULL if n == 0. */
size_t *
shuffled_range(size_t n)
{
...
}
What do you all think ?
With this addressed:
Acked-by: Numan Siddique <[email protected]>
I guess there is no need to submit v4 if Dumitru/Ales agree with one
of the names or have a better name.
They can take care of it while applying.
Thanks
Numan
> +size_t *
> +shuffle_indices(size_t n)
> +{
> + if (!n) {
> + return NULL;
> + }
> +
> + size_t *indices = xmalloc(n * sizeof *indices);
> + for (size_t i = 0; i < n; i++) {
> + indices[i] = i;
> + }
> +
> + for (size_t i = n - 1; i > 0; i--) {
> + size_t j = random_range(i + 1);
> + size_t tmp = indices[i];
> + indices[i] = indices[j];
> + indices[j] = tmp;
> + }
> +
> + return indices;
> +}
> diff --git a/lib/ovn-util.h b/lib/ovn-util.h
> index 74931ed29..7c29b6afb 100644
> --- a/lib/ovn-util.h
> +++ b/lib/ovn-util.h
> @@ -796,4 +796,6 @@ NEIGH_REDISTRIBUTE_MODES
> enum neigh_redistribute_mode
> parse_neigh_dynamic_redistribute(const struct smap *options);
>
> +size_t *shuffle_indices(size_t n);
> +
> #endif /* OVN_UTIL_H */
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 4d15d4a62..d5e51c60b 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -11636,6 +11636,7 @@ set_dns_params() {
> local ttl=00000e10
> an_count=0001
> type=0001
> + expected_dns_answer_alt=""
> case $hname in
> vm1)
> # vm1.ovn.org
> @@ -11652,10 +11653,13 @@ set_dns_params() {
> vm2)
> # vm2.ovn.org
> query_name=03766d32036f766e036f726700
> + # IPv4 addresses may be returned in any order due to DNS round-robin.
> # IPv4 address - 10.0.0.6
> - expected_dns_answer=${query_name}00010001${ttl}00040a000006
> + local ip1=${query_name}00010001${ttl}00040a000006
> # IPv4 address - 20.0.0.4
> -
> expected_dns_answer=${expected_dns_answer}${query_name}00010001${ttl}000414000004
> + local ip2=${query_name}00010001${ttl}000414000004
> + expected_dns_answer=${ip1}${ip2}
> + expected_dns_answer_alt=${ip2}${ip1}
> an_count=0002
> ;;
> vm3)
> @@ -11712,6 +11716,12 @@ set_dns_params() {
> local dns_resp_header=010281200001${an_count}00000000
> dns_req_data=${dns_req_header}${query_name}${type}0001
>
> dns_resp_data=${dns_resp_header}${query_name}${type}0001${expected_dns_answer}
> + # Alternative response for cases with multiple IPs (DNS round-robin).
> + if test -n "$expected_dns_answer_alt"; then
> +
> dns_resp_data_alt=${dns_resp_header}${query_name}${type}0001${expected_dns_answer_alt}
> + else
> + dns_resp_data_alt=""
> + fi
> }
>
> # This shell function sends a DNS request packet
> @@ -11789,17 +11799,51 @@ src_ip=`ip_to_hex 10 0 0 4`
> dst_ip=`ip_to_hex 10 0 0 1`
> dns_reply=1
> test_dns 1 f00000000001 f000000000f0 $src_ip $dst_ip $dns_reply
> $dns_req_data $dns_resp_data
> +# Generate alternative expected response for DNS round-robin (without
> sending packet).
> +if test -n "$dns_resp_data_alt"; then
> + # Build expected reply packet with alternative IP ordering.
> + ip_len_val=`expr 28 + ${#dns_resp_data_alt} / 2`
> + udp_len_val=`expr $ip_len_val - 20`
> + ip_len_hex=$(printf "%x" $ip_len_val)
> + udp_len_hex=$(printf "%x" $udp_len_val)
> + reply_alt=f00000000001f000000000f00800450000${ip_len_hex}0000000080110000
> +
> reply_alt=${reply_alt}${dst_ip}${src_ip}0035923400${udp_len_hex}0000${dns_resp_data_alt}
> + echo $reply_alt > 1.expected.alt
> +fi
>
> # NXT_RESUMEs should be 1.
> OVS_WAIT_UNTIL([test 1 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`])
>
> -OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [1.expected], ["cut -c -48"])
> -# Skipping the IPv4 checksum.
> -OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [1.expected], ["cut -c 53-"])
> +# Check packets - for vm2, IPs may be in either order due to DNS round-robin.
> +if test -f 1.expected.alt; then
> + $PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif1-tx.pcap > 1.received
> + cut -c 53- 1.expected > 1.expected.tail
> + cut -c 53- 1.expected.alt > 1.expected.alt.tail
> + cut -c 53- 1.received > 1.received.tail
> + # Accept either ordering of IPs.
> + if diff -q 1.expected.tail 1.received.tail > /dev/null 2>&1 || \
> + diff -q 1.expected.alt.tail 1.received.tail > /dev/null 2>&1; then
> + :
> + else
> + echo "Expected (order 1):" && cat 1.expected.tail
> + echo "Expected (order 2):" && cat 1.expected.alt.tail
> + echo "Received:" && cat 1.received.tail
> + AT_FAIL_IF([true])
> + fi
> + # Check header (first 48 chars).
> + cut -c -48 1.expected > 1.expected.head
> + cut -c -48 1.received > 1.received.head
> + AT_CHECK([diff 1.expected.head 1.received.head])
> +else
> + OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [1.expected], ["cut -c -48"])
> + # Skipping the IPv4 checksum.
> + OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [1.expected], ["cut -c 53-"])
> +fi
>
> reset_pcap_file hv1-vif1 hv1/vif1
> reset_pcap_file hv1-vif2 hv1/vif2
> -rm -f 1.expected
> +rm -f 1.expected 1.expected.alt 1.received
> +rm -f 1.expected.head 1.expected.tail 1.expected.alt.tail 1.received.head
> 1.received.tail
> rm -f 2.expected
>
>
> --
> 2.52.0
>
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev