On 8/26/25 6:26 PM, Ales Musil wrote:
> Make sure we remove the stale Static MAC Bindings if there is SB
> datapath re-creation in a single transaction. Otherwise, northd
> would be stuck in a commit failed loop due to "referential integrity
> violation" as the static mac binding would still reference the old
> datapath.
> 
> At the same time unify the check if dynamic/static mac binding is
> stale as it was different up until now. Both of them use the same
> columns as an indication of logical port and datapath.
> 
> Fixes: b22684d4e37c ("Add a northbound interface to program MAC_Binding 
> table")
> Reported-at: https://issues.redhat.com/browse/FDP-1623
> Signed-off-by: Ales Musil <amu...@redhat.com>
> ---
> v2: Replace the Fixes tags.
>     Remove the redundant check for op->nbrp.
>     Update the commit message with more details.
> ---
>  northd/northd.c     | 28 +++++++++++++++++++---------
>  tests/ovn-northd.at |  8 ++++++++
>  2 files changed, 27 insertions(+), 9 deletions(-)
> 
> diff --git a/northd/northd.c b/northd/northd.c
> index 015f30a35..4b5b6584e 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -2867,6 +2867,19 @@ common:
>      }
>  }
>  
> +static bool
> +mac_binding_is_stale(const struct hmap *lr_datapaths,
> +                     const struct hmap *lr_ports,
> +                     const struct sbrec_datapath_binding *dp,
> +                     const char *logical_port)
> +{
> +    const struct ovn_datapath *od =
> +        ovn_datapath_from_sbrec(NULL, lr_datapaths, dp);
> +
> +    return !od || ovn_datapath_is_stale(od) ||
> +           !ovn_port_find(lr_ports, logical_port);
> +}
> +
>  /* Remove mac_binding entries that refer to logical_ports which are
>   * deleted. */
>  static void
> @@ -2876,11 +2889,8 @@ cleanup_mac_bindings(
>  {
>      const struct sbrec_mac_binding *b;
>      SBREC_MAC_BINDING_TABLE_FOR_EACH_SAFE (b, sbrec_mac_binding_table) {
> -        const struct ovn_datapath *od =
> -            ovn_datapath_from_sbrec(NULL, lr_datapaths, b->datapath);
> -
> -        if (!od || ovn_datapath_is_stale(od) ||
> -                !ovn_port_find(lr_ports, b->logical_port)) {
> +        if (mac_binding_is_stale(lr_datapaths, lr_ports, b->datapath,
> +                                 b->logical_port)) {
>              sbrec_mac_binding_delete(b);
>          }
>      }
> @@ -18850,7 +18860,7 @@ build_static_mac_binding_table(
>      struct ovsdb_idl_txn *ovnsb_txn,
>      const struct nbrec_static_mac_binding_table *nbrec_static_mb_table,
>      const struct sbrec_static_mac_binding_table *sbrec_static_mb_table,
> -    struct hmap *lr_ports)
> +    const struct hmap *lr_datapaths, const struct hmap *lr_ports)
>  {
>      /* Cleanup SB Static_MAC_Binding entries which do not have corresponding
>       * NB Static_MAC_Binding entries, and SB Static_MAC_Binding entries for
> @@ -18866,8 +18876,8 @@ build_static_mac_binding_table(
>              continue;
>          }
>  
> -        struct ovn_port *op = ovn_port_find(lr_ports, nb_smb->logical_port);
> -        if (!op || !op->nbrp || !op->od || !op->od->sdp->sb_dp) {
> +        if (mac_binding_is_stale(lr_datapaths, lr_ports, sb_smb->datapath,
> +                                 sb_smb->logical_port)) {
>              sbrec_static_mac_binding_delete(sb_smb);
>          }

FWIW, in addition to obscuring the logic in this function, the change above
also makes the comment at the top of the loop inaccurate, making the whole
thing even more confusing.  So, at least that should be fixed.

The fact that the row will be re-created, but will have the same UUID also
doesn't sound particularly great.

I'd much prefer a solution like this (not tested):

diff --git a/northd/northd.c b/northd/northd.c
index 015f30a35..fb494713a 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -18852,28 +18852,16 @@ build_static_mac_binding_table(
     const struct sbrec_static_mac_binding_table *sbrec_static_mb_table,
     struct hmap *lr_ports)
 {
-    /* Cleanup SB Static_MAC_Binding entries which do not have corresponding
-     * NB Static_MAC_Binding entries, and SB Static_MAC_Binding entries for
-     * which there is not a NB Logical_Router_Port of the same name. */
-    const struct nbrec_static_mac_binding *nb_smb;
+    struct hmapx stale = HMAPX_INITIALIZER(&stale);
     const struct sbrec_static_mac_binding *sb_smb;
     SBREC_STATIC_MAC_BINDING_TABLE_FOR_EACH_SAFE (sb_smb,
         sbrec_static_mb_table) {
-        nb_smb = nbrec_static_mac_binding_table_get_for_uuid(
-            nbrec_static_mb_table, &sb_smb->header_.uuid);
-        if (!nb_smb) {
-            sbrec_static_mac_binding_delete(sb_smb);
-            continue;
-        }
-
-        struct ovn_port *op = ovn_port_find(lr_ports, nb_smb->logical_port);
-        if (!op || !op->nbrp || !op->od || !op->od->sdp->sb_dp) {
-            sbrec_static_mac_binding_delete(sb_smb);
-        }
+        hmapx_add(&stale, CONST_CAST(void *, sb_smb));
     }
 
     /* Create/Update SB Static_MAC_Binding entries with corresponding values
      * from NB Static_MAC_Binding entries. */
+    const struct nbrec_static_mac_binding *nb_smb;
     NBREC_STATIC_MAC_BINDING_TABLE_FOR_EACH (
         nb_smb, nbrec_static_mb_table) {
         struct ovn_port *op = ovn_port_find(lr_ports, nb_smb->logical_port);
@@ -18905,10 +18893,22 @@ build_static_mac_binding_table(
                         sbrec_static_mac_binding_set_override_dynamic_mac(mb,
                             nb_smb->override_dynamic_mac);
                     }
+                    if (od->sdp->sb_dp != mb->datapath) {
+                        sbrec_static_mac_binding_set_datapath(
+                            mb, od->sdp->sb_dp);
+                    }
+                    hmapx_find_and_delete(&stale, mb);
                 }
             }
         }
     }
+
+    /* Cleanup SB Static_MAC_Binding entries which are no longer needed. */
+    struct hmapx_node *node;
+    HMAPX_FOR_EACH (node, &stale) {
+        sbrec_static_mac_binding_delete(node->data);
+    }
+    hmapx_destroy(&stale);
 }
 
 static void
---

This approach is much more robust, since it avoids logic duplication and
doesn't require a ton of explanation in the comments.


Reading this code again, I'm pretty sure there are more bugs here caused
by the recent UUID unification.  AFAICT, if I change IP address in NB,
it will not be updated in SB.  Before the UUID change, lookup was done
with the port+IP pair and so if we found an entry, it had the right IP,
but now we're not updating IP and also not checking it in the staleness
check.  May also be a problem if we change the logical port in NB while
the old one still exists, the staleness check will not catch that.
So, we need to add more things into the 'update' branch.

Best regards, Ilya Maximets.
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to