On 2/4/26 8:30 AM, Ales Musil wrote:
> On Tue, Feb 3, 2026 at 8:40 PM Mehrdad Moradi via dev <
> [email protected]> wrote:
>
>> When ovn-controller returns DNS responses with multiple IP addresses,
>> clients typically use the first IP in the list. Without randomization,
>> all clients direct traffic to the same backend, leading to uneven load
>> distribution.
>>
>> This patch implements Fisher-Yates shuffle on IPv4 and IPv6 address
>> arrays before building DNS answers, ensuring each query returns IPs
>> in a randomized order for better traffic distribution across backends.
>>
>> Signed-off-by: Mehrdad Moradi <[email protected]>
>> ---
>>
>
> Hi Mehrdad,
>
Hi Mehrdad, Ales,
> I think there might have been slight misunderstanding.
> Dumitru suggested two things, generic shuffle and shuffled
> indices. If we are going the way of shuffled indices we don't
> need the generic shuffle. So I would put the shuffle into the
> shuffle_indices() function directly. I have also one more comment
> down below.
>
Yes, that's what I meant, sorry if it wasn't too clear.
> controller/pinctrl.c | 49 ++++++++++++++++++++++++++++++++++++--
>> tests/ovn.at | 56 +++++++++++++++++++++++++++++++++++++++-----
>> 2 files changed, 97 insertions(+), 8 deletions(-)
>>
>> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
>> index ab8a0a37c..25a4de6a2 100644
>> --- a/controller/pinctrl.c
>> +++ b/controller/pinctrl.c
>> @@ -3365,6 +3365,44 @@ dns_build_ptr_answer(
>> free(encoded);
>> }
>>
>> +/* Shuffle an array in-place using Fisher-Yates algorithm. */
>> +static void
>> +array_shuffle(void *array, size_t elem_size, size_t n)
>> +{
>> + if (n <= 1) {
>> + return;
>> + }
>> +
>> + uint8_t *arr = array;
>> + for (size_t i = n - 1; i > 0; i--) {
>> + size_t j = random_range(i + 1);
>> + uint8_t *a = arr + i * elem_size;
>> + uint8_t *b = arr + j * elem_size;
>> + for (size_t k = 0; k < elem_size; k++) {
>> + uint8_t tmp = a[k];
>> + a[k] = b[k];
>> + b[k] = tmp;
>> + }
Swapping byte by byte is not really the right way to do it. But if we
move it inside the "shuffle_indices" function we don't need to do this
anymore. We can just use a size_t temporary instead.
>> + }
>> +}
>> +
>> +/* Return an array of shuffled indices [0, n-1] for DNS round-robin.
This is a generic function, it just creates a nicely randomized
permutation of indices. It shouldn't mention DNS. It could also go in
ovn-util.[ch] while we're at it.
>> + * Caller must free the returned array. Returns NULL if n == 0. */
>> +static size_t *
>> +shuffle_indices(size_t n)
>> +{
>> + if (!n) {
>> + return NULL;
>> + }
>> +
>> + size_t *indices = xmalloc(n * sizeof *indices);
>> + for (size_t i = 0; i < n; i++) {
>> + indices[i] = i;
>> + }
>> + array_shuffle(indices, sizeof *indices, n);
>>
>
> As said above, let's put the shuffle loop directly here.
>
> + return indices;
>> +}
>> +
>> #define DNS_RCODE_SERVER_REFUSE 0x5
>> #define DNS_QUERY_TYPE_CLASS_LEN (2 * sizeof(ovs_be16))
>>
>> @@ -3530,11 +3568,15 @@ pinctrl_handle_dns_lookup(
>> goto exit;
>> }
>>
>> + /* Shuffle indices for round-robin load balancing. */
>> + size_t *ipv4_order = shuffle_indices(ip_addrs.n_ipv4_addrs);
>> + size_t *ipv6_order = shuffle_indices(ip_addrs.n_ipv6_addrs);
>> +
>> if (query_type == DNS_QUERY_TYPE_A ||
>> query_type == DNS_QUERY_TYPE_ANY) {
>> for (size_t i = 0; i < ip_addrs.n_ipv4_addrs; i++) {
>> dns_build_a_answer(&dns_answer, in_queryname, idx,
>> - ip_addrs.ipv4_addrs[i].addr);
>> +
>> ip_addrs.ipv4_addrs[ipv4_order[i]].addr);
>>
>
> nit: This is a bit hard to read, I would make the addr separate variable.
>
> ancount++;
>> }
>> }
>> @@ -3543,11 +3585,14 @@ pinctrl_handle_dns_lookup(
>> query_type == DNS_QUERY_TYPE_ANY) {
>> for (size_t i = 0; i < ip_addrs.n_ipv6_addrs; i++) {
>> dns_build_aaaa_answer(&dns_answer, in_queryname, idx,
>> - &ip_addrs.ipv6_addrs[i].addr);
>> +
>> &ip_addrs.ipv6_addrs[ipv6_order[i]].addr);
>>
>
> nit: Same here.
>
>
>> ancount++;
>> }
>> }
>>
>> + free(ipv4_order);
>> + free(ipv6_order);
>> +
>> /* DNS is configured with a record for this domain with
>> * an IPv4/IPV6 only, so instead of ignoring this A/AAAA query,
>> * we can reply with RCODE = 5 (server refuses) and that
>> diff --git a/tests/ovn.at b/tests/ovn.at
>> index 4d15d4a62..d5e51c60b 100644
>> --- a/tests/ovn.at
>> +++ b/tests/ovn.at
>> @@ -11636,6 +11636,7 @@ set_dns_params() {
>> local ttl=00000e10
>> an_count=0001
>> type=0001
>> + expected_dns_answer_alt=""
>> case $hname in
>> vm1)
>> # vm1.ovn.org
>> @@ -11652,10 +11653,13 @@ set_dns_params() {
>> vm2)
>> # vm2.ovn.org
>> query_name=03766d32036f766e036f726700
>> + # IPv4 addresses may be returned in any order due to DNS
>> round-robin.
>> # IPv4 address - 10.0.0.6
>> - expected_dns_answer=${query_name}00010001${ttl}00040a000006
>> + local ip1=${query_name}00010001${ttl}00040a000006
>> # IPv4 address - 20.0.0.4
>> -
>> expected_dns_answer=${expected_dns_answer}${query_name}00010001${ttl}000414000004
>> + local ip2=${query_name}00010001${ttl}000414000004
>> + expected_dns_answer=${ip1}${ip2}
>> + expected_dns_answer_alt=${ip2}${ip1}
>> an_count=0002
>> ;;
>> vm3)
>> @@ -11712,6 +11716,12 @@ set_dns_params() {
>> local dns_resp_header=010281200001${an_count}00000000
>> dns_req_data=${dns_req_header}${query_name}${type}0001
>>
>>
>> dns_resp_data=${dns_resp_header}${query_name}${type}0001${expected_dns_answer}
>> + # Alternative response for cases with multiple IPs (DNS round-robin).
>> + if test -n "$expected_dns_answer_alt"; then
>> +
>> dns_resp_data_alt=${dns_resp_header}${query_name}${type}0001${expected_dns_answer_alt}
>> + else
>> + dns_resp_data_alt=""
>> + fi
>> }
>>
>> # This shell function sends a DNS request packet
>> @@ -11789,17 +11799,51 @@ src_ip=`ip_to_hex 10 0 0 4`
>> dst_ip=`ip_to_hex 10 0 0 1`
>> dns_reply=1
>> test_dns 1 f00000000001 f000000000f0 $src_ip $dst_ip $dns_reply
>> $dns_req_data $dns_resp_data
>> +# Generate alternative expected response for DNS round-robin (without
>> sending packet).
>> +if test -n "$dns_resp_data_alt"; then
>> + # Build expected reply packet with alternative IP ordering.
>> + ip_len_val=`expr 28 + ${#dns_resp_data_alt} / 2`
>> + udp_len_val=`expr $ip_len_val - 20`
>> + ip_len_hex=$(printf "%x" $ip_len_val)
>> + udp_len_hex=$(printf "%x" $udp_len_val)
>> +
>> reply_alt=f00000000001f000000000f00800450000${ip_len_hex}0000000080110000
>> +
>> reply_alt=${reply_alt}${dst_ip}${src_ip}0035923400${udp_len_hex}0000${dns_resp_data_alt}
>> + echo $reply_alt > 1.expected.alt
>> +fi
>>
>> # NXT_RESUMEs should be 1.
>> OVS_WAIT_UNTIL([test 1 = `cat ofctl_monitor*.log | grep -c NXT_RESUME`])
>>
>> -OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [1.expected], ["cut -c -48"])
>> -# Skipping the IPv4 checksum.
>> -OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [1.expected], ["cut -c 53-"])
>> +# Check packets - for vm2, IPs may be in either order due to DNS
>> round-robin.
>> +if test -f 1.expected.alt; then
>> + $PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif1-tx.pcap >
>> 1.received
>> + cut -c 53- 1.expected > 1.expected.tail
>> + cut -c 53- 1.expected.alt > 1.expected.alt.tail
>> + cut -c 53- 1.received > 1.received.tail
>> + # Accept either ordering of IPs.
>> + if diff -q 1.expected.tail 1.received.tail > /dev/null 2>&1 || \
>> + diff -q 1.expected.alt.tail 1.received.tail > /dev/null 2>&1; then
>> + :
>> + else
>> + echo "Expected (order 1):" && cat 1.expected.tail
>> + echo "Expected (order 2):" && cat 1.expected.alt.tail
>> + echo "Received:" && cat 1.received.tail
>> + AT_FAIL_IF([true])
>> + fi
>> + # Check header (first 48 chars).
>> + cut -c -48 1.expected > 1.expected.head
>> + cut -c -48 1.received > 1.received.head
>> + AT_CHECK([diff 1.expected.head 1.received.head])
>> +else
>> + OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [1.expected], ["cut -c -48"])
>> + # Skipping the IPv4 checksum.
>> + OVN_CHECK_PACKETS([hv1/vif1-tx.pcap], [1.expected], ["cut -c 53-"])
>> +fi
>>
>> reset_pcap_file hv1-vif1 hv1/vif1
>> reset_pcap_file hv1-vif2 hv1/vif2
>> -rm -f 1.expected
>> +rm -f 1.expected 1.expected.alt 1.received
>> +rm -f 1.expected.head 1.expected.tail 1.expected.alt.tail 1.received.head
>> 1.received.tail
>> rm -f 2.expected
>>
>>
>> --
>> 2.52.0
>>
>> _______________________________________________
>> dev mailing list
>> [email protected]
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>
>>
> If Dumitru agrees I can make the changes during merge.
>
At this point, I think it might be clearer if Mehrdad posts a v3
addressing all remaining comments. We're not in a hurry to get this
merged, we already agreed to accept it in 26.03.
Regards,
Dumitru
> Regards,
> Ales
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev