Checking "is-remote" in chassis->other_config returns false if done within the same loop as sbrec_chassis_update_other_config_setkey(chassis, "is-remote", "true"). Hence, when gateway_run, creating remote chassis in az sb, was executed within the same loop as port_binding_run creating transit-router-port Port_Binding the Port Binding was created with the wrong AZ.
A new test has been added reproducing this issue. Depending on which AZ inserted Port_Binding first, then we were seeing either one transaction failure, or a continuous loop of transaction failure. Signed-off-by: Xavier Simonart <[email protected]> --- ic/ovn-ic.c | 9 ++++++++- tests/ovn-ic.at | 50 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/ic/ovn-ic.c b/ic/ovn-ic.c index 4f7e33da3..5c8211c1a 100644 --- a/ic/ovn-ic.c +++ b/ic/ovn-ic.c @@ -478,8 +478,15 @@ sync_isb_gw_to_sb(struct ic_context *ctx, const struct icsbrec_gateway *gw, const struct sbrec_chassis *chassis) { + struct smap temp_map; sbrec_chassis_set_hostname(chassis, gw->hostname); - sbrec_chassis_update_other_config_setkey(chassis, "is-remote", "true"); + smap_clone(&temp_map, &chassis->other_config); + smap_replace(&temp_map, "is-remote", "true"); + /* Use sbrec_chassis_set_other_config instead of + * sbrec_chassis_update_other_config_setkey so the in-memory datum is + * updated for reads in the same loop iteration. */ + sbrec_chassis_set_other_config(chassis, &temp_map); + smap_destroy(&temp_map); /* Sync encaps used by this gateway. */ ovs_assert(gw->n_encaps); diff --git a/tests/ovn-ic.at b/tests/ovn-ic.at index 9b6a58acd..9fc2bdec6 100644 --- a/tests/ovn-ic.at +++ b/tests/ovn-ic.at @@ -4418,6 +4418,56 @@ OVN_CLEANUP_IC([az1], [az2]) AT_CLEANUP ]) +OVN_FOR_EACH_NORTHD([ +AT_SETUP([ovn-ic -- Add transit router remote port - race condition]) + +ovn_init_ic_db +net_add n1 +net_add n2 +ovn_start az1 +ovn_start az2 + +OVS_WAIT_UNTIL([test 2 = `ovn-ic-sbctl show | wc -l`]) + +AT_CHECK([ovn-ic-sbctl show], [0], [dnl +availability-zone az1 +availability-zone az2 +]) + +sim_add hv1 +as hv1 +ovs-vsctl add-br br-phys +ovn_az_attach az1 n1 br-phys 192.168.0.1 +ovs-vsctl set open . external-ids:ovn-is-interconn=true +wait_row_count Chassis 1 name=hv1 + +AS_BOX([$(date +%H:%M:%S.%03N) Pausing ovn-ic-sb]) +on_exit "test -e ovn-ic-sb/ovsdb-server.pid && kill -CONT $(cat ovn-ic-sb/ovsdb-server.pid)" +AT_CHECK([kill -STOP $(cat ovn-ic-sb/ovsdb-server.pid)]) + +sim_add hv2 +as hv2 +ovs-vsctl add-br br-phys +ovn_az_attach az2 n2 br-phys 192.168.0.2 +ovs-vsctl set open . external-ids:ovn-is-interconn=true +wait_row_count Chassis 1 name=hv2 + +ovn_as az1 +check ovn-ic-nbctl tr-add tr0 +check ovn-ic-nbctl trp-add tr0 tr0-p0 00:00:00:11:22:00 192.168.10.10/24 192.168.10.20/24 chassis=hv2 + +AS_BOX([$(date +%H:%M:%S.%03N) Resuming ovn-ic-sb]) +AT_CHECK([kill -CONT $(cat ovn-ic-sb/ovsdb-server.pid)]) + +wait_row_count Port_Binding 1 logical_port=tr0-p0 +for az in az1 az2; do + AT_CHECK([grep "Transaction causes multiple rows" $az/ic/ovn-ic.log], [1]) +done + +OVN_CLEANUP_IC([az1], [az2]) +AT_CLEANUP +]) + OVN_FOR_EACH_NORTHD([ AT_SETUP([ovn-ic -- Transit router delete port]) -- 2.47.1 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
