The ovn-controller had a race condition over MAC binding table with other controllers. When multiple controllers received GARP from single source usually the one who was able to win the race put it into SB. The others got transaction error which triggered full recompute even if it's not needed.
In order to reduce the chance of multiple controllers trying to insert same row at the same time add slight delay to the MAC binding processing. This delay is random interval between 0-49 in ms. This greatly reduces the chance that multiple controllers will try to add MAC binding at exactly the same time. Local testing with this delay vs without show significantly reduced chance of hitting the transaction error. During the testing 10 GARPs was sent to two controllers at the same time. Without proposed fix at least one controller had multiple transaction errors present every test iteration. With the proposed fix the transaction error was reduced to a single one when it happened which was usually in less than 10% of the iterations. As was mentioned before the race condition can still happen, but the chance is greatly reduced. Suggested-by: Daniel Alvarez Sanchez <[email protected]> Signed-off-by: Ales Musil <[email protected]> --- controller/pinctrl.c | 38 +++++++++++++++++++++++++++++++++++++- tests/ovn.at | 7 +++++++ tests/system-ovn.at | 2 ++ 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/controller/pinctrl.c b/controller/pinctrl.c index f954362b7..1ba9e399a 100644 --- a/controller/pinctrl.c +++ b/controller/pinctrl.c @@ -4104,6 +4104,31 @@ pinctrl_destroy(void) /* Contains "struct mac_binding"s. */ static struct hmap put_mac_bindings; +#define MAX_PUT_MAC_BINDINGS_DELAY 50 + +struct put_mac_bindings_delay { + long long last_insert; + uint32_t delay; +}; + +static struct put_mac_bindings_delay put_mac_bindings_delay; + +static bool +put_mac_bindings_delay_has_expired(void) + OVS_REQUIRES(pinctrl_mutex) +{ + return time_msec() - put_mac_bindings_delay.last_insert >= + put_mac_bindings_delay.delay; +} + +static void +put_mac_binding_delay_renew(void) + OVS_REQUIRES(pinctrl_mutex) +{ + put_mac_bindings_delay.delay = random_range(MAX_PUT_MAC_BINDINGS_DELAY); + put_mac_bindings_delay.last_insert = time_msec(); +} + static void init_put_mac_bindings(void) { @@ -4142,6 +4167,11 @@ pinctrl_handle_put_mac_binding(const struct flow *md, return; } + /* Add slight delay to the packet processing if we are past the old one. */ + if (put_mac_bindings_delay_has_expired()) { + put_mac_binding_delay_renew(); + } + /* We can send the buffered packet once the main ovn-controller * thread calls pinctrl_run() and it writes the mac_bindings stored * in 'put_mac_bindings' hmap into the Southbound MAC_Binding table. */ @@ -4296,6 +4326,11 @@ run_put_mac_bindings(struct ovsdb_idl_txn *ovnsb_idl_txn, return; } + /* We didn't go over the random delay yet. */ + if (!put_mac_bindings_delay_has_expired()) { + return; + } + const struct mac_binding *mb; HMAP_FOR_EACH (mb, hmap_node, &put_mac_bindings) { run_put_mac_binding(ovnsb_idl_txn, sbrec_datapath_binding_by_key, @@ -4352,9 +4387,10 @@ run_buffered_binding(struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip, static void wait_put_mac_bindings(struct ovsdb_idl_txn *ovnsb_idl_txn) + OVS_REQUIRES(pinctrl_mutex) { if (ovnsb_idl_txn && !hmap_is_empty(&put_mac_bindings)) { - poll_immediate_wake(); + poll_timer_wait_until(put_mac_bindings_delay.delay); } } diff --git a/tests/ovn.at b/tests/ovn.at index 6e35bb5b8..ff57df725 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -19561,6 +19561,7 @@ data=0800bee4391a0001 send_icmp_packet 1 1 $src_mac $router_mac0 $src_ip $dst_ip 0d1c $data send_arp_reply 2 1 $dst_mac $router_mac1 $dst_ip $router_ip +OVS_WAIT_UNTIL([ovn-sbctl list mac_binding | grep -q "172.16.1.10"]) echo $(get_arp_req $router_mac1 $router_ip $dst_ip) > expected echo "${dst_mac}${router_mac1}08004500001c00004000fe0122b5${router_ip}${dst_ip}${data}" >> expected @@ -19601,6 +19602,7 @@ gw_router_mac=f00000010a0a send_icmp_packet 2 2 f00000110203 $router_mac0 $src_ip $dst_ip 0c1b $data echo $(get_arp_req f00000010204 $fip_ip $gw_router_ip) >> expected send_arp_reply 2 1 $gw_router_mac f00000010204 $gw_router_ip $fip_ip +OVS_WAIT_UNTIL([ovn-sbctl list mac_binding | grep -q "172.16.1.11"]) echo "${gw_router_mac}f0000001020408004500001c00004000fe0121b4${fip_ip}${dst_ip}${data}" >> expected AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=37 | grep pkt_mark=0x64 | grep -q n_packets=1],[0]) @@ -23009,6 +23011,11 @@ grep table_id=10 | wc -l`]) check_row_count MAC_Binding 1 +OVS_WAIT_UNTIL( + [test 1 = `as hv1 ovs-ofctl dump-flows br-int table=67 | grep dl_src=50:54:00:00:00:13 \ +| wc -l`] +) + AT_CHECK([ovn-sbctl --format=csv --bare --columns logical_port,ip,mac \ list mac_binding], [0], [lr0-sw0 10.0.0.30 diff --git a/tests/system-ovn.at b/tests/system-ovn.at index df2da3408..e68c3762b 100644 --- a/tests/system-ovn.at +++ b/tests/system-ovn.at @@ -5278,6 +5278,8 @@ chmod 664 /var/lib/dhcp/dhcpd6.leases NS_CHECK_EXEC([server], [dhcpd -6 -f s1 > dhcpd.log &]) ovn-nbctl --wait=hv sync +OVS_WAIT_UNTIL([ovn-sbctl list mac_binding | grep -q "f0:00:00:01:02:05"]) + OVS_WAIT_WHILE([test "$(ovn-nbctl get logical_router_port rp-public ipv6_prefix | cut -c4-15)" = ""]) OVS_WAIT_WHILE([test "$(ovn-nbctl get logical_router_port rp-sw0 ipv6_prefix | cut -c4-15)" = ""]) OVS_WAIT_WHILE([test "$(ovn-nbctl get logical_router_port rp-sw1 ipv6_prefix | cut -c4-15)" = ""]) -- 2.35.3 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
