>From 04f9b903646cc934bbd863174697b9343ed17e49 Mon Sep 17 00:00:00 2001
From: Qingzhou Hu <[email protected]>
Date: Wed, 4 Mar 2026 09:09:53 +0800
Subject: [PATCH OVN] controller/binding: Fix missing local_datapath updates in
 OVS iface handler.

When a VIF tap interface first appears after ovs-vswitchd restart, OVN's
incremental engine handles it via:

  binding_handle_ovs_interface_changes()
    -> consider_iface_claim()
       -> consider_vif_lport_()
          -> add_local_datapath()    /* creates a new local_datapath */

add_local_datapath() allocates the local_datapath struct with all
per-datapath fields (localnet_port, external_ports, vtep_port,
multichassis_ports) left NULL/empty.  The full binding_run() path
avoids this by doing a second pass over all relevant lports and calling
update_ld_localnet_port(), update_ld_external_ports(), etc. for each
newly added local_datapath.  The incremental path in
binding_handle_ovs_interface_changes() never performs that pass, so
these fields remain uninitialised.

The most visible consequence is in physical.c consider_mc_group(): when
get_localnet_port() returns NULL it adds all remote chassis to the
_MC_flood group and installs Geneve tunnel output actions.  Any
broadcast or GARP sent by the just-claimed VM is therefore tunnelled to
every remote chassis instead of being forwarded out through the localnet
patch port.

The window is:
  ovs-vswitchd starts -> ovn-controller first binding_run (no VIFs yet,
  localnet update is a no-op) -> nova-compute recreates tap ->
  incremental claim -> wrong OpenFlow installed -> VM sends GARP burst
  -> tunnelled.

This is exacerbated on large deployments (many IDL cells / OVS ports)
because the longer initialisation delay makes the race much more likely.
Restarting ovn-controller closes the window by triggering a fresh full
binding_run() with the tap already present.

If two hypervisors simultaneously hit this race on the same provider
network, each chassis includes the other in its _MC_flood tunnel group,
turning a single broadcast into an overlay-level broadcast storm until
ovn-controller is restarted on at least one node.

Fix: add a post-processing block at the end of
binding_handle_ovs_interface_changes(), mirroring the equivalent block
that commit 50b3af8938c9 added to binding_handle_port_binding_changes().
That commit fixed the same class of problem for the port-binding handler
but did not cover this path.  The new block iterates over all
newly-added local datapaths recorded in tracked_dp_bindings and calls
consider_localnet_lport(), update_ld_localnet_port(),
update_ld_external_ports(), update_ld_vtep_port(), and
update_ld_multichassis_ports() as appropriate for each port binding on
those datapaths.

Fixes: 50b3af8938c9 ("binding.c: Missing local_datapath update in runtime_data 
port_binding handler.")
Reported-by: Qingzhou Hu <[email protected]>
Signed-off-by: Qingzhou Hu <[email protected]>
---
 controller/binding.c | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/controller/binding.c b/controller/binding.c
index 0712d7030..2e3a54f13 100644
--- a/controller/binding.c
+++ b/controller/binding.c
@@ -2666,6 +2666,45 @@ binding_handle_ovs_interface_changes(struct 
binding_ctx_in *b_ctx_in,
         }
     }

+    if (handled) {
+        /* There may be new local datapaths added by the above handling, so
+         * go through each port_binding of newly added local datapaths to
+         * update related local_datapaths if needed. */
+        struct shash bridge_mappings = SHASH_INITIALIZER(&bridge_mappings);
+        add_ovs_bridge_mappings(b_ctx_in->ovs_table,
+                                b_ctx_in->bridge_table,
+                                &bridge_mappings);
+        struct tracked_datapath *t_dp;
+        HMAP_FOR_EACH (t_dp, node, b_ctx_out->tracked_dp_bindings) {
+            if (t_dp->tracked_type != TRACKED_RESOURCE_NEW) {
+                continue;
+            }
+            struct sbrec_port_binding *target =
+                sbrec_port_binding_index_init_row(
+                    b_ctx_in->sbrec_port_binding_by_datapath);
+            sbrec_port_binding_index_set_datapath(target, t_dp->dp);
+            const struct sbrec_port_binding *pb;
+            SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, target,
+                    b_ctx_in->sbrec_port_binding_by_datapath) {
+                enum en_lport_type lport_type = get_lport_type(pb);
+                if (lport_type == LP_LOCALNET) {
+                    consider_localnet_lport(pb, b_ctx_out);
+                    update_ld_localnet_port(pb, &bridge_mappings,
+                                            b_ctx_out->local_datapaths);
+                } else if (lport_type == LP_EXTERNAL) {
+                    update_ld_external_ports(pb, b_ctx_out->local_datapaths);
+                } else if (lport_type == LP_VTEP) {
+                    update_ld_vtep_port(pb, b_ctx_out->local_datapaths);
+                } else if (pb->n_additional_chassis) {
+                    update_ld_multichassis_ports(pb,
+                                                 b_ctx_out->local_datapaths);
+                }
+            }
+            sbrec_port_binding_index_destroy_row(target);
+        }
+        shash_destroy(&bridge_mappings);
+    }
+
     return handled;
 }

--
2.50.1 (Apple Git-155)



_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to