When az1 is paused, there is a small time window during which no az is
active and owns the lock (the time for ovn-ic-sb to receive the unlock
from az1 and notify az2).
Any ovn-ic-nb change received during this time, which would have resulted
in ovn-ic-sb changes, would be lost (from a ovn-ic-sb perspective), as
az1 is paused and az2 is not (yet) the leader.

While at it, properly log when ISB is really locked: previous log was
confusing as logging when acquiring the lock (which could fail/takes some
time).

This issue caused the following test to fail sporadically:
"ovn-ic - IC-SB lock acquisition and transit switch creation"

Fixes: 052a298bb90e ("ovn-ic: Use dual IC-SB connections to prevent constraint 
violations.")
Signed-off-by: Xavier Simonart <[email protected]>
---
 ic/ovn-ic.c     | 16 ++++++++++++++--
 ic/ovn-ic.h     |  3 ++-
 tests/ovn-ic.at | 20 ++++++++++++++++++++
 3 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/ic/ovn-ic.c b/ic/ovn-ic.c
index b5c961d2d..b197ceeab 100644
--- a/ic/ovn-ic.c
+++ b/ic/ovn-ic.c
@@ -3809,6 +3809,7 @@ main(int argc, char *argv[])
 
     exiting = false;
     state.had_lock = false;
+    state.had_isb_lock = false;
     state.paused = false;
 
     while (!exiting) {
@@ -3844,8 +3845,7 @@ main(int argc, char *argv[])
                  * Ensure that only a single ovn-ic has the permission to
                  * write to IC-SB.
                  */
-                VLOG_INFO("OVN ISB lock acquired. "
-                          "This ovn-ic instance is now active.");
+                VLOG_INFO("Acquiring OVN ISB lock.");
                 ovsdb_idl_set_lock(ovnisb_idl_loop.idl, "ovn_ic_sb");
             }
 
@@ -3969,6 +3969,17 @@ main(int argc, char *argv[])
                 state.had_lock = false;
             }
 
+            if (!state.had_isb_lock && ovsdb_idl_has_lock(ctx.ovnisb_idl)) {
+                VLOG_INFO("OVN ISB lock acquired. "
+                          "This ovn-ic instance is now active.");
+                state.had_isb_lock = true;
+                inc_proc_ic_force_recompute_immediate();
+            } else if (state.had_isb_lock &&
+                       !ovsdb_idl_has_lock(ctx.ovnisb_idl)) {
+                VLOG_INFO("OVN ISB lock lost.");
+                state.had_isb_lock = false;
+            }
+
             if (ovsdb_idl_has_lock(ctx.ovnsb_idl) &&
                 ovsdb_idl_has_ever_connected(ctx.ovnnb_idl) &&
                 ovsdb_idl_has_ever_connected(ctx.ovnsb_idl) &&
@@ -4055,6 +4066,7 @@ main(int argc, char *argv[])
                 VLOG_INFO("This ovn-ic instance is now paused. "
                           "Removing IC-SB lock.");
                 ovsdb_idl_set_lock(ovnisb_idl_loop.idl, NULL);
+                state.had_isb_lock = false;
             }
 
             if (ovsdb_idl_has_lock(ovnsb_idl_loop.idl) ||
diff --git a/ic/ovn-ic.h b/ic/ovn-ic.h
index 9f52bb0f9..1d3f4e4de 100644
--- a/ic/ovn-ic.h
+++ b/ic/ovn-ic.h
@@ -53,7 +53,8 @@ struct ic_context {
 };
 
 struct ic_state {
-    bool had_lock;
+    bool had_lock; /* Lock to local AZ SB */
+    bool had_isb_lock; /* Lock to ISB */
     bool paused;
 };
 
diff --git a/tests/ovn-ic.at b/tests/ovn-ic.at
index 007ba0eb8..0826632e9 100644
--- a/tests/ovn-ic.at
+++ b/tests/ovn-ic.at
@@ -4871,6 +4871,26 @@ check_column "ts1 ts2 ts3" ic-sb:Datapath_Binding 
transit_switch
 
 as az2 check ovn-appctl -t ic/ovn-ic resume
 
+# Now do the same on a "slow" ovn-ic-sb
+OVS_WAIT_UNTIL([test "x$(as az2 ovn-appctl -t ic/ovn-ic status)" = "xStatus: 
active"])
+
+AS_BOX([$(date +%H:%M:%S.%03N) Pausing ovn-ic-sb])
+AT_CHECK([kill -STOP $(cat ovn-ic-sb/ovsdb-server.pid)])
+
+as az1 check ovn-appctl -t ic/ovn-ic pause
+OVS_WAIT_UNTIL([test "x$(as az1 ovn-appctl -t ic/ovn-ic status)" = "xStatus: 
paused"])
+
+check ovn-ic-nbctl ts-add ts4
+
+AS_BOX([$(date +%H:%M:%S.%03N) Resuming ovn-ic-sb])
+AT_CHECK([kill -CONT $(cat ovn-ic-sb/ovsdb-server.pid)])
+
+wait_row_count ic-sb:Datapath_Binding 1 transit_switch=ts4
+check_row_count ic-sb:Datapath_Binding 4
+check_column "ts1 ts2 ts3 ts4" ic-sb:Datapath_Binding transit_switch
+
+as az1 check ovn-appctl -t ic/ovn-ic resume
+
 for i in 1 2; do
     az=az$i
     ovn_as $az
-- 
2.47.1

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to