When ovn-controller is restarted, it may need multiple iterations of
main loop before completely download all related data from SB DB,
especially when ovn-monitor-all=false, so after restart, before it sees
the related localnet ports from SB DB, it treats the related patch
ports on the chassis as not needed, and deleted them. Later when it
downloads thoses port-bindings it recreates them.  For a graceful
upgrade, we don't this to happen, because it would break the traffic.

This is especially problematic at ovn-k8s setups because the external
side of the patch port is used to program some flows for external
network communication. When the patch ports are recreated, the OF port
number changes and those flows are broken. The CMS would detect and fix
the flows but it would result in even longer downtime.

This patch postpone the deletion of the patch ports, with the assumption
that leaving the unused ports on the chassis for little longer is not
harmful.

Signed-off-by: Han Zhou <[email protected]>
---
 controller/patch.c      | 15 ++++++++-
 tests/ovn-controller.at | 71 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 85 insertions(+), 1 deletion(-)

diff --git a/controller/patch.c b/controller/patch.c
index ed831302c..9faae5879 100644
--- a/controller/patch.c
+++ b/controller/patch.c
@@ -307,11 +307,24 @@ patch_run(struct ovsdb_idl_txn *ovs_idl_txn,
 
     /* Now 'existing_ports' only still contains patch ports that exist in the
      * database but shouldn't.  Delete them from the database. */
+
+    /* Wait for some iterations before really deleting any patch ports, because
+     * with conditional monitoring it is possible that related SB data is not
+     * completely downloaded yet after last restart of ovn-controller.
+     * Otherwise it may cause unncessary dataplane interruption during
+     * restart/upgrade. */
+    static int delete_patch_port_delay = 10;
+    if (delete_patch_port_delay > 0) {
+        delete_patch_port_delay--;
+    }
+
     struct shash_node *port_node;
     SHASH_FOR_EACH_SAFE (port_node, &existing_ports) {
         port = port_node->data;
         shash_delete(&existing_ports, port_node);
-        remove_port(bridge_table, port);
+        if (!delete_patch_port_delay) {
+            remove_port(bridge_table, port);
+        }
     }
     shash_destroy(&existing_ports);
 }
diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
index b8db342b9..a3a9122ab 100644
--- a/tests/ovn-controller.at
+++ b/tests/ovn-controller.at
@@ -2242,3 +2242,74 @@ OVS_WAIT_UNTIL([test 6 = $(as hv1 ovs-ofctl dump-flows 
br-int | grep -c $(port_b
 
 OVN_CLEANUP([hv1])
 AT_CLEANUP
+
+AT_SETUP([ovn-controller - restart should not delete patch ports])
+
+ovn_start
+
+net_add n1
+sim_add hv1
+as hv1
+check ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.1
+check ovs-vsctl add-port br-int hv1-vif1 -- \
+    set interface hv1-vif1 external-ids:iface-id=lsp1
+check ovs-vsctl add-br br-ext -- \
+    set open . external-ids:ovn-bridge-mappings=extnet:br-ext
+
+# Create logical topology so that lsp1 has multiple hops to a localnet port,
+# which would require multiple iterations to download the related datapaths and
+# port_bindings from SB DB during startup.
+#
+# lsp1@hv1 -- ls1 -- lr1 -- ls2 -- lr2 -- ls-ext -- lsp-ext (localnet)
+
+check ovn-appctl -t ovn-controller vlog/set file:dbg
+check ovn-nbctl ls-add ls1 \
+    -- lsp-add ls1 lsp1 \
+    -- lsp-set-addresses lsp1 "f0:00:00:00:00:01 10.0.1.2"
+check ovn-nbctl lr-add lr1 \
+    -- lrp-add lr1 lrp-lr1-ls1 f0:00:aa:00:01:01 10.0.1.1/24 \
+    -- lsp-add ls1 lsp-ls1-lr1 \
+    -- lsp-set-type lsp-ls1-lr1 router \
+    -- lsp-set-options lsp-ls1-lr1 router-port=lrp-lr1-ls1 \
+    -- lsp-set-addresses lsp-ls1-lr1 router
+check ovn-nbctl ls-add ls2 \
+    -- lrp-add lr1 lrp-lr1-ls2 f0:00:aa:00:01:02 10.0.2.1/24 \
+    -- lsp-add ls2 lsp-ls2-lr1 \
+    -- lsp-set-type lsp-ls2-lr1 router \
+    -- lsp-set-options lsp-ls2-lr1 router-port=lrp-lr1-ls2 \
+    -- lsp-set-addresses lsp-ls2-lr1 router
+check ovn-nbctl lr-add lr2 \
+    -- lrp-add lr2 lrp-lr2-ls2 f0:00:aa:00:02:02 10.0.2.2/24 \
+    -- lsp-add ls2 lsp-ls2-lr2 \
+    -- lsp-set-type lsp-ls2-lr2 router \
+    -- lsp-set-options lsp-ls2-lr2 router-port=lrp-lr2-ls2 \
+    -- lsp-set-addresses lsp-ls2-lr2 router
+check ovn-nbctl ls-add ls-ext \
+    -- lrp-add lr2 lrp-lr2-ext f0:00:aa:00:02:03 10.0.3.1/24 \
+    -- lsp-add ls-ext lsp-ext-lr2 \
+    -- lsp-set-type lsp-ext-lr2 router \
+    -- lsp-set-options lsp-ext-lr2 router-port=lrp-lr2-ext \
+    -- lsp-set-addresses lsp-ext-lr2 router
+check ovn-nbctl lsp-add ls-ext lsp-ext \
+    -- lsp-set-type lsp-ext localnet \
+    -- lsp-set-options lsp-ext network_name=extnet \
+    -- lsp-set-addresses lsp-ext unknown
+check ovn-nbctl --wait=hv sync
+
+AT_CHECK([ovs-vsctl list-ports br-int | grep patch-br-int-to-lsp-ext], [0], 
[ignore])
+
+# Stop ovn-controller
+OVS_APP_EXIT_AND_WAIT([ovn-controller])
+
+# Start ovn-controller, which shouldn't cause any patch interface
+# deletion/recreation
+start_daemon ovn-controller
+for i in $(seq 20); do
+    check ovn-nbctl --wait=hv sync
+done
+
+AT_CHECK([grep "deleted interface patch" hv1/ovs-vswitchd.log], [1], [ignore])
+OVN_CLEANUP([hv1])
+AT_CLEANUP
+
-- 
2.30.2

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to