Hi Ales,

I think this is probably going to do a good enough job of avoiding the issue, so

Acked-by: Mark Michelson <[email protected]>


We should stay aware if this issue still happens and see if we need to expand on this in the future. One idea I had was that each ovn-controller could have a constant offset instead of having each one choose a random value every time. So like ovn-controller 1 will always wait 5ms, and ovn-controller will always wait 15 ms, etc. This could lower the chances of the race condition even more. But I don't think we need to try to go this route unless we have to.

On 6/30/22 09:39, Ales Musil wrote:
The ovn-controller had a race condition over MAC
binding table with other controllers. When multiple
controllers received GARP from single source usually
the one who was able to win the race put it into SB.
The others got transaction error which triggered
full recompute even if it's not needed.

In order to reduce the chance of multiple controllers
trying to insert same row at the same time add slight
delay to the MAC binding processing. This delay
is random interval between 0-49 in ms. This greatly
reduces the chance that multiple controllers will try
to add MAC binding at exactly the same time.

Local testing with this delay vs without show significantly
reduced chance of hitting the transaction error.

During the testing 10 GARPs was sent to two controllers
at the same time. Without proposed fix at least one controller
had multiple transaction errors present every test iteration.

With the proposed fix the transaction error was reduced to a
single one when it happened which was usually in less than
10% of the iterations.

As was mentioned before the race condition can still happen,
but the chance is greatly reduced.

Suggested-by: Daniel Alvarez Sanchez <[email protected]>
Signed-off-by: Ales Musil <[email protected]>
---
  controller/pinctrl.c | 38 +++++++++++++++++++++++++++++++++++++-
  tests/ovn.at         |  7 +++++++
  tests/system-ovn.at  |  2 ++
  3 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/controller/pinctrl.c b/controller/pinctrl.c
index f954362b7..1ba9e399a 100644
--- a/controller/pinctrl.c
+++ b/controller/pinctrl.c
@@ -4104,6 +4104,31 @@ pinctrl_destroy(void)
  /* Contains "struct mac_binding"s. */
  static struct hmap put_mac_bindings;
+#define MAX_PUT_MAC_BINDINGS_DELAY 50
+
+struct put_mac_bindings_delay {
+    long long last_insert;
+    uint32_t delay;
+};
+
+static struct put_mac_bindings_delay put_mac_bindings_delay;
+
+static bool
+put_mac_bindings_delay_has_expired(void)
+    OVS_REQUIRES(pinctrl_mutex)
+{
+    return time_msec() - put_mac_bindings_delay.last_insert >=
+           put_mac_bindings_delay.delay;
+}
+
+static void
+put_mac_binding_delay_renew(void)
+    OVS_REQUIRES(pinctrl_mutex)
+{
+    put_mac_bindings_delay.delay = random_range(MAX_PUT_MAC_BINDINGS_DELAY);
+    put_mac_bindings_delay.last_insert = time_msec();
+}
+
  static void
  init_put_mac_bindings(void)
  {
@@ -4142,6 +4167,11 @@ pinctrl_handle_put_mac_binding(const struct flow *md,
          return;
      }
+ /* Add slight delay to the packet processing if we are past the old one. */
+    if (put_mac_bindings_delay_has_expired()) {
+        put_mac_binding_delay_renew();
+    }
+
      /* We can send the buffered packet once the main ovn-controller
       * thread calls pinctrl_run() and it writes the mac_bindings stored
       * in 'put_mac_bindings' hmap into the Southbound MAC_Binding table. */
@@ -4296,6 +4326,11 @@ run_put_mac_bindings(struct ovsdb_idl_txn *ovnsb_idl_txn,
          return;
      }
+ /* We didn't go over the random delay yet. */
+    if (!put_mac_bindings_delay_has_expired()) {
+        return;
+    }
+
      const struct mac_binding *mb;
      HMAP_FOR_EACH (mb, hmap_node, &put_mac_bindings) {
          run_put_mac_binding(ovnsb_idl_txn, sbrec_datapath_binding_by_key,
@@ -4352,9 +4387,10 @@ run_buffered_binding(struct ovsdb_idl_index 
*sbrec_mac_binding_by_lport_ip,
static void
  wait_put_mac_bindings(struct ovsdb_idl_txn *ovnsb_idl_txn)
+    OVS_REQUIRES(pinctrl_mutex)
  {
      if (ovnsb_idl_txn && !hmap_is_empty(&put_mac_bindings)) {
-        poll_immediate_wake();
+        poll_timer_wait_until(put_mac_bindings_delay.delay);
      }
  }
diff --git a/tests/ovn.at b/tests/ovn.at
index 6e35bb5b8..ff57df725 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -19561,6 +19561,7 @@ data=0800bee4391a0001
send_icmp_packet 1 1 $src_mac $router_mac0 $src_ip $dst_ip 0d1c $data
  send_arp_reply 2 1 $dst_mac $router_mac1 $dst_ip $router_ip
+OVS_WAIT_UNTIL([ovn-sbctl list mac_binding | grep -q "172.16.1.10"])
  echo $(get_arp_req $router_mac1 $router_ip $dst_ip) > expected
  echo 
"${dst_mac}${router_mac1}08004500001c00004000fe0122b5${router_ip}${dst_ip}${data}" 
>> expected
@@ -19601,6 +19602,7 @@ gw_router_mac=f00000010a0a
  send_icmp_packet 2 2 f00000110203 $router_mac0 $src_ip $dst_ip 0c1b $data
  echo $(get_arp_req f00000010204 $fip_ip $gw_router_ip) >> expected
  send_arp_reply 2 1 $gw_router_mac f00000010204 $gw_router_ip $fip_ip
+OVS_WAIT_UNTIL([ovn-sbctl list mac_binding | grep -q "172.16.1.11"])
  echo 
"${gw_router_mac}f0000001020408004500001c00004000fe0121b4${fip_ip}${dst_ip}${data}" 
>> expected
AT_CHECK([as hv2 ovs-ofctl dump-flows br-int table=37 | grep pkt_mark=0x64 | grep -q n_packets=1],[0])
@@ -23009,6 +23011,11 @@ grep table_id=10 | wc -l`])
check_row_count MAC_Binding 1 +OVS_WAIT_UNTIL(
+    [test 1 = `as hv1 ovs-ofctl dump-flows br-int table=67 | grep 
dl_src=50:54:00:00:00:13 \
+| wc -l`]
+)
+
  AT_CHECK([ovn-sbctl --format=csv --bare --columns logical_port,ip,mac \
  list mac_binding], [0], [lr0-sw0
  10.0.0.30
diff --git a/tests/system-ovn.at b/tests/system-ovn.at
index df2da3408..e68c3762b 100644
--- a/tests/system-ovn.at
+++ b/tests/system-ovn.at
@@ -5278,6 +5278,8 @@ chmod 664 /var/lib/dhcp/dhcpd6.leases
  NS_CHECK_EXEC([server], [dhcpd -6 -f s1 > dhcpd.log &])
  ovn-nbctl --wait=hv sync
+OVS_WAIT_UNTIL([ovn-sbctl list mac_binding | grep -q "f0:00:00:01:02:05"])
+
  OVS_WAIT_WHILE([test "$(ovn-nbctl get logical_router_port rp-public ipv6_prefix | cut 
-c4-15)" = ""])
  OVS_WAIT_WHILE([test "$(ovn-nbctl get logical_router_port rp-sw0 ipv6_prefix | cut 
-c4-15)" = ""])
  OVS_WAIT_WHILE([test "$(ovn-nbctl get logical_router_port rp-sw1 ipv6_prefix | cut 
-c4-15)" = ""])


_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to