The ovn-controller had a race condition over MAC
binding table with other controllers. When multiple
controllers received GARP from single source usually
the one who was able to win the race put it into SB.
The others got transaction error which triggered
full recompute even if it's not needed.
In order to reduce the chance of multiple controllers
trying to insert same row at the same time add slight
delay to the MAC binding processing. This delay
is random interval between 0-49 in ms. This greatly
reduces the chance that multiple controllers will try
to add MAC binding at exactly the same time.
Local testing with this delay vs without show significantly
reduced chance of hitting the transaction error.
During the testing 10 GARPs was sent to two controllers
at the same time. Without proposed fix at least one controller
had multiple transaction errors present every test iteration.
With the proposed fix the transaction error was reduced to a
single one when it happened which was usually in less than
10% of the iterations.
As was mentioned before the race condition can still happen,
but the chance is greatly reduced.
Suggested-by: Daniel Alvarez Sanchez <[email protected]>
Signed-off-by: Ales Musil <[email protected]>
---
controller/pinctrl.c | 38 +++++++++++++++++++++++++++++++++++++-
tests/ovn.at | 7 +++++++
tests/system-ovn.at | 2 ++
3 files changed, 46 insertions(+), 1 deletion(-)
diff --git a/controller/pinctrl.c b/controller/pinctrl.c
index f954362b7..1ba9e399a 100644
--- a/controller/pinctrl.c
+++ b/controller/pinctrl.c
@@ -4104,6 +4104,31 @@ pinctrl_destroy(void)
/* Contains "struct mac_binding"s. */
static struct hmap put_mac_bindings;
+#define MAX_PUT_MAC_BINDINGS_DELAY 50
+
+struct put_mac_bindings_delay {
+ long long last_insert;
+ uint32_t delay;
+};
+
+static struct put_mac_bindings_delay put_mac_bindings_delay;
+
+static bool
+put_mac_bindings_delay_has_expired(void)
+ OVS_REQUIRES(pinctrl_mutex)
+{
+ return time_msec() - put_mac_bindings_delay.last_insert >=
+ put_mac_bindings_delay.delay;
+}
+
+static void
+put_mac_binding_delay_renew(void)
+ OVS_REQUIRES(pinctrl_mutex)
+{
+ put_mac_bindings_delay.delay = random_range(MAX_PUT_MAC_BINDINGS_DELAY);
+ put_mac_bindings_delay.last_insert = time_msec();
+}
+
static void
init_put_mac_bindings(void)
{
@@ -4142,6 +4167,11 @@ pinctrl_handle_put_mac_binding(const struct flow *md,
return;
}
+ /* Add slight delay to the packet processing if we are past the old one. */
+ if (put_mac_bindings_delay_has_expired()) {
+ put_mac_binding_delay_renew();
+ }
+
/* We can send the buffered packet once the main ovn-controller
* thread calls pinctrl_run() and it writes the mac_bindings stored
* in 'put_mac_bindings' hmap into the Southbound MAC_Binding table. */
@@ -4296,6 +4326,11 @@ run_put_mac_bindings(struct ovsdb_idl_txn *ovnsb_idl_txn,
return;
}
+ /* We didn't go over the random delay yet. */
+ if (!put_mac_bindings_delay_has_expired()) {
+ return;
+ }
+
const struct mac_binding *mb;
HMAP_FOR_EACH (mb, hmap_node, &put_mac_bindings) {
run_put_mac_binding(ovnsb_idl_txn, sbrec_datapath_binding_by_key,
@@ -4352,9 +4387,10 @@ run_buffered_binding(struct ovsdb_idl_index
*sbrec_mac_binding_by_lport_ip,
static void
wait_put_mac_bindings(struct ovsdb_idl_txn *ovnsb_idl_txn)
+ OVS_REQUIRES(pinctrl_mutex)
{
if (ovnsb_idl_txn && !hmap_is_empty(&put_mac_bindings)) {
- poll_immediate_wake();
+ poll_timer_wait_until(put_mac_bindings_delay.delay);
}
}
diff --git a/tests/ovn.at b/tests/ovn.at
index 6e35bb5b8..ff57df725 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -19561,6 +19561,7 @@ data=0800bee4391a0001
send_icmp_packet 1 1 $src_mac $router_mac0 $src_ip $dst_ip 0d1c $data
send_arp_reply 2 1 $dst_mac $router_mac1 $dst_ip $router_ip
+OVS_WAIT_UNTIL([ovn-sbctl list mac_binding | grep -q "172.16.1.10"])
echo $(get_arp_req $router_mac1 $router_ip $dst_ip) > expected
echo
"${dst_mac}${router_mac1}08004500001c00004000fe0122b5${router_ip}${dst_ip}${data}"
>> expected
@@ -19601,6 +19602,7 @@ gw_router_mac=f00000010a0a
send_icmp_packet 2 2 f00000110203 $router_mac0 $src_ip $dst_ip 0c1b $data
echo $(get_arp_req f00000010204 $fip_ip $gw_router_ip) >> expected
send_arp_reply 2 1 $gw_router_mac f00000010204 $gw_router_ip $fip_ip
+OVS_WAIT_UNTIL([ovn-sbctl list mac_binding | grep -q "172.16.1.11"])
echo
"${gw_router_mac}f0000001020408004500001c00004000fe0121b4${fip_ip}${dst_ip}${data}"
>> expected
AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=37 | grep pkt_mark=0x64 | grep -q n_packets=1],[0])
@@ -23009,6 +23011,11 @@ grep table_id=10 | wc -l`])
check_row_count MAC_Binding 1
+OVS_WAIT_UNTIL(
+ [test 1 = `as hv1 ovs-ofctl dump-flows br-int table=67 | grep
dl_src=50:54:00:00:00:13 \
+| wc -l`]
+)
+
AT_CHECK([ovn-sbctl --format=csv --bare --columns logical_port,ip,mac \
list mac_binding], [0], [lr0-sw0
10.0.0.30
diff --git a/tests/system-ovn.at b/tests/system-ovn.at
index df2da3408..e68c3762b 100644
--- a/tests/system-ovn.at
+++ b/tests/system-ovn.at
@@ -5278,6 +5278,8 @@ chmod 664 /var/lib/dhcp/dhcpd6.leases
NS_CHECK_EXEC([server], [dhcpd -6 -f s1 > dhcpd.log &])
ovn-nbctl --wait=hv sync
+OVS_WAIT_UNTIL([ovn-sbctl list mac_binding | grep -q "f0:00:00:01:02:05"])
+
OVS_WAIT_WHILE([test "$(ovn-nbctl get logical_router_port rp-public ipv6_prefix | cut
-c4-15)" = ""])
OVS_WAIT_WHILE([test "$(ovn-nbctl get logical_router_port rp-sw0 ipv6_prefix | cut
-c4-15)" = ""])
OVS_WAIT_WHILE([test "$(ovn-nbctl get logical_router_port rp-sw1 ipv6_prefix | cut
-c4-15)" = ""])