Checking "is-remote" in chassis->other_config returns false if done within
the same loop as sbrec_chassis_update_other_config_setkey(chassis,
"is-remote", "true"). Hence, when gateway_run, creating remote chassis in
az sb, was executed within the same loop as port_binding_run creating
transit-router-port Port_Binding the Port Binding was created with the
wrong AZ.

A new test has been added reproducing this issue. Depending on which AZ
inserted Port_Binding first, then we were seeing either one transaction
failure, or a continuous loop of transaction failure.

Signed-off-by: Xavier Simonart <[email protected]>
---
 ic/ovn-ic.c     |  9 ++++++++-
 tests/ovn-ic.at | 50 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/ic/ovn-ic.c b/ic/ovn-ic.c
index 4f7e33da3..5c8211c1a 100644
--- a/ic/ovn-ic.c
+++ b/ic/ovn-ic.c
@@ -478,8 +478,15 @@ sync_isb_gw_to_sb(struct ic_context *ctx,
                   const struct icsbrec_gateway *gw,
                   const struct sbrec_chassis *chassis)
 {
+    struct smap temp_map;
     sbrec_chassis_set_hostname(chassis, gw->hostname);
-    sbrec_chassis_update_other_config_setkey(chassis, "is-remote", "true");
+    smap_clone(&temp_map, &chassis->other_config);
+    smap_replace(&temp_map, "is-remote", "true");
+    /* Use sbrec_chassis_set_other_config instead of
+     * sbrec_chassis_update_other_config_setkey so the in-memory datum is
+     * updated for reads in the same loop iteration. */
+    sbrec_chassis_set_other_config(chassis, &temp_map);
+    smap_destroy(&temp_map);
 
     /* Sync encaps used by this gateway. */
     ovs_assert(gw->n_encaps);
diff --git a/tests/ovn-ic.at b/tests/ovn-ic.at
index 9b6a58acd..9fc2bdec6 100644
--- a/tests/ovn-ic.at
+++ b/tests/ovn-ic.at
@@ -4418,6 +4418,56 @@ OVN_CLEANUP_IC([az1], [az2])
 AT_CLEANUP
 ])
 
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([ovn-ic -- Add transit router remote port - race condition])
+
+ovn_init_ic_db
+net_add n1
+net_add n2
+ovn_start az1
+ovn_start az2
+
+OVS_WAIT_UNTIL([test 2 = `ovn-ic-sbctl show | wc -l`])
+
+AT_CHECK([ovn-ic-sbctl show], [0], [dnl
+availability-zone az1
+availability-zone az2
+])
+
+sim_add hv1
+as hv1
+ovs-vsctl add-br br-phys
+ovn_az_attach az1 n1 br-phys 192.168.0.1
+ovs-vsctl set open . external-ids:ovn-is-interconn=true
+wait_row_count Chassis 1 name=hv1
+
+AS_BOX([$(date +%H:%M:%S.%03N) Pausing ovn-ic-sb])
+on_exit "test -e ovn-ic-sb/ovsdb-server.pid && kill -CONT $(cat 
ovn-ic-sb/ovsdb-server.pid)"
+AT_CHECK([kill -STOP $(cat ovn-ic-sb/ovsdb-server.pid)])
+
+sim_add hv2
+as hv2
+ovs-vsctl add-br br-phys
+ovn_az_attach az2 n2 br-phys 192.168.0.2
+ovs-vsctl set open . external-ids:ovn-is-interconn=true
+wait_row_count Chassis 1 name=hv2
+
+ovn_as az1
+check ovn-ic-nbctl tr-add tr0
+check ovn-ic-nbctl trp-add tr0 tr0-p0 00:00:00:11:22:00 192.168.10.10/24 
192.168.10.20/24 chassis=hv2
+
+AS_BOX([$(date +%H:%M:%S.%03N) Resuming ovn-ic-sb])
+AT_CHECK([kill -CONT $(cat ovn-ic-sb/ovsdb-server.pid)])
+
+wait_row_count Port_Binding 1 logical_port=tr0-p0
+for az in az1 az2; do
+    AT_CHECK([grep "Transaction causes multiple rows" $az/ic/ovn-ic.log], [1])
+done
+
+OVN_CLEANUP_IC([az1], [az2])
+AT_CLEANUP
+])
+
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([ovn-ic -- Transit router delete port])
 
-- 
2.47.1

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to