On Tue, Feb 3, 2026 at 8:40 PM Mehrdad Moradi via dev <
[email protected]> wrote:

> When ovn-controller returns DNS responses with multiple IP addresses,
> clients typically use the first IP in the list. Without randomization,
> all clients direct traffic to the same backend, leading to uneven load
> distribution.
>
> This patch implements Fisher-Yates shuffle on IPv4 and IPv6 address
> arrays before building DNS answers, ensuring each query returns IPs
> in a randomized order for better traffic distribution across backends.
>
> Signed-off-by: Mehrdad Moradi <[email protected]>
> ---
>

Hi Mehrdad,

I think there might have been slight misunderstanding.
Dumitru suggested two things, generic shuffle and shuffled
indices. If we are going the way of shuffled indices we don't
need the generic shuffle. So I would put the shuffle into the
shuffle_indices() function directly. I have also one more comment
down below.

 controller/pinctrl.c | 49 ++++++++++++++++++++++++++++++++++++--
>  tests/ovn.at         | 56 +++++++++++++++++++++++++++++++++++++++-----
>  2 files changed, 97 insertions(+), 8 deletions(-)
>
> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> index ab8a0a37c..25a4de6a2 100644
> --- a/controller/pinctrl.c
> +++ b/controller/pinctrl.c
> @@ -3365,6 +3365,44 @@ dns_build_ptr_answer(
>      free(encoded);
>  }
>
> +/* Shuffle an array in-place using Fisher-Yates algorithm. */
> +static void
> +array_shuffle(void *array, size_t elem_size, size_t n)
> +{
> +    if (n <= 1) {
> +        return;
> +    }
> +
> +    uint8_t *arr = array;
> +    for (size_t i = n - 1; i > 0; i--) {
> +        size_t j = random_range(i + 1);
> +        uint8_t *a = arr + i * elem_size;
> +        uint8_t *b = arr + j * elem_size;
> +        for (size_t k = 0; k < elem_size; k++) {
> +            uint8_t tmp = a[k];
> +            a[k] = b[k];
> +            b[k] = tmp;
> +        }
> +    }
> +}
> +
> +/* Return an array of shuffled indices [0, n-1] for DNS round-robin.
> + * Caller must free the returned array. Returns NULL if n == 0. */
> +static size_t *
> +shuffle_indices(size_t n)
> +{
> +    if (!n) {
> +        return NULL;
> +    }
> +
> +    size_t *indices = xmalloc(n * sizeof *indices);
> +    for (size_t i = 0; i < n; i++) {
> +        indices[i] = i;
> +    }
> +    array_shuffle(indices, sizeof *indices, n);
>

As said above, let's put the shuffle loop directly here.

+    return indices;
> +}
> +
>  #define DNS_RCODE_SERVER_REFUSE 0x5
>  #define DNS_QUERY_TYPE_CLASS_LEN (2 * sizeof(ovs_be16))
>
> @@ -3530,11 +3568,15 @@ pinctrl_handle_dns_lookup(
>              goto exit;
>          }
>
> +        /* Shuffle indices for round-robin load balancing. */
> +        size_t *ipv4_order = shuffle_indices(ip_addrs.n_ipv4_addrs);
> +        size_t *ipv6_order = shuffle_indices(ip_addrs.n_ipv6_addrs);
> +
>          if (query_type == DNS_QUERY_TYPE_A ||
>              query_type == DNS_QUERY_TYPE_ANY) {
>              for (size_t i = 0; i < ip_addrs.n_ipv4_addrs; i++) {
>                  dns_build_a_answer(&dns_answer, in_queryname, idx,
> -                                   ip_addrs.ipv4_addrs[i].addr);
> +
>  ip_addrs.ipv4_addrs[ipv4_order[i]].addr);
>

nit: This is a bit hard to read, I would make the addr separate variable.

                 ancount++;
>              }
>          }
> @@ -3543,11 +3585,14 @@ pinctrl_handle_dns_lookup(
>              query_type == DNS_QUERY_TYPE_ANY) {
>              for (size_t i = 0; i < ip_addrs.n_ipv6_addrs; i++) {
>                  dns_build_aaaa_answer(&dns_answer, in_queryname, idx,
> -                                      &ip_addrs.ipv6_addrs[i].addr);
> +
> &ip_addrs.ipv6_addrs[ipv6_order[i]].addr);
>

nit: Same here.


>                  ancount++;
>              }
>          }
>
> +        free(ipv4_order);
> +        free(ipv6_order);
> +
>          /* DNS is configured with a record for this domain with
>           * an IPv4/IPV6 only, so instead of ignoring this A/AAAA query,
>           * we can reply with  RCODE = 5 (server refuses) and that
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 4d15d4a62..d5e51c60b 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -11636,6 +11636,7 @@ set_dns_params() {
>      local ttl=00000e10
>      an_count=0001
>      type=0001
> +    expected_dns_answer_alt=""
>      case $hname in
>      vm1)
>          # vm1.ovn.org
> @@ -11652,10 +11653,13 @@ set_dns_params() {
>      vm2)
>          # vm2.ovn.org
>          query_name=03766d32036f766e036f726700
> +        # IPv4 addresses may be returned in any order due to DNS
> round-robin.
>          # IPv4 address - 10.0.0.6
> -        expected_dns_answer=${query_name}00010001${ttl}00040a000006
> +        local ip1=${query_name}00010001${ttl}00040a000006
>          # IPv4 address - 20.0.0.4
> -
> expected_dns_answer=${expected_dns_answer}${query_name}00010001${ttl}000414000004
> +        local ip2=${query_name}00010001${ttl}000414000004
> +        expected_dns_answer=${ip1}${ip2}
> +        expected_dns_answer_alt=${ip2}${ip1}
>          an_count=0002
>          ;;
>      vm3)
> @@ -11712,6 +11716,12 @@ set_dns_params() {
>      local dns_resp_header=010281200001${an_count}00000000
>      dns_req_data=${dns_req_header}${query_name}${type}0001
>
>  
> dns_resp_data=${dns_resp_header}${query_name}${type}0001${expected_dns_answer}
> +    # Alternative response for cases with multiple IPs (DNS round-robin).
> +    if test -n "$expected_dns_answer_alt"; then
> +
> dns_resp_data_alt=${dns_resp_header}${query_name}${type}0001${expected_dns_answer_alt}
> +    else
> +        dns_resp_data_alt=""
> +    fi
>  }
>
>  # This shell function sends a DNS request packet
> @@ -11789,17 +11799,51 @@ src_ip=`ip_to_hex 10 0 0 4`
>  dst_ip=`ip_to_hex 10 0 0 1`
>  dns_reply=1
>  test_dns 1 f00000000001 f000000000f0 $src_ip $dst_ip $dns_reply
> $dns_req_data $dns_resp_data
> +# Generate alternative expected response for DNS round-robin (without
> sending packet).
> +if test -n "$dns_resp_data_alt"; then
> +    # Build expected reply packet with alternative IP ordering.
> +    ip_len_val=`expr 28 + ${#dns_resp_data_alt} / 2`
> +    udp_len_val=`expr $ip_len_val - 20`
> +    ip_len_hex=$(printf "%x" $ip_len_val)
> +    udp_len_hex=$(printf "%x" $udp_len_val)
> +
> reply_alt=f00000000001f000000000f00800450000${ip_len_hex}0000000080110000
> +
> reply_alt=${reply_alt}${dst_ip}${src_ip}0035923400${udp_len_hex}0000${dns_resp_data_alt}
> +    echo $reply_alt > 1.expected.alt
> +fi
>
>  # NXT_RESUMEs should be 1.
>  OVS_WAIT_UNTIL([test 1 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`])
>
> -OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [1.expected], ["cut -c -48"])
> -# Skipping the IPv4 checksum.
> -OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [1.expected], ["cut -c 53-"])
> +# Check packets - for vm2, IPs may be in either order due to DNS
> round-robin.
> +if test -f 1.expected.alt; then
> +    $PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif1-tx.pcap >
> 1.received
> +    cut -c 53- 1.expected > 1.expected.tail
> +    cut -c 53- 1.expected.alt > 1.expected.alt.tail
> +    cut -c 53- 1.received > 1.received.tail
> +    # Accept either ordering of IPs.
> +    if diff -q 1.expected.tail 1.received.tail > /dev/null 2>&1 || \
> +       diff -q 1.expected.alt.tail 1.received.tail > /dev/null 2>&1; then
> +        :
> +    else
> +        echo "Expected (order 1):" && cat 1.expected.tail
> +        echo "Expected (order 2):" && cat 1.expected.alt.tail
> +        echo "Received:" && cat 1.received.tail
> +        AT_FAIL_IF([true])
> +    fi
> +    # Check header (first 48 chars).
> +    cut -c -48 1.expected > 1.expected.head
> +    cut -c -48 1.received > 1.received.head
> +    AT_CHECK([diff 1.expected.head 1.received.head])
> +else
> +    OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [1.expected], ["cut -c -48"])
> +    # Skipping the IPv4 checksum.
> +    OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [1.expected], ["cut -c 53-"])
> +fi
>
>  reset_pcap_file hv1-vif1 hv1/vif1
>  reset_pcap_file hv1-vif2 hv1/vif2
> -rm -f 1.expected
> +rm -f 1.expected 1.expected.alt 1.received
> +rm -f 1.expected.head 1.expected.tail 1.expected.alt.tail 1.received.head
> 1.received.tail
>  rm -f 2.expected
>
>
> --
> 2.52.0
>
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
>
If Dumitru agrees I can make the changes during merge.

Regards,
Ales
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to