> On 6/15/26 5:18 PM, Lorenzo Bianconi via dev wrote:
> > During VM live migration, a CMS needs to know when the destination
> > chassis has finished installing OpenFlow flows before it can safely
> > start the VM.  Currently, ovn-controller sets ovn-installed on the
> > local OVS interface, but this information is not reflected back to
> > the Southbound DB, requiring a per-chassis agent to monitor readiness.
> > Add a new Port_Binding options:additional-chassis-ready key that
> > contains a comma-separated list of chassis names that have completed
> > flow installation as additional chassis.  ovn-controller sets this
> > when local_binding_set_up() is called for an additional chassis
> > binding, and clears it when the chassis is released.
> > The option is preserved by northd during Port_Binding option rebuilds,
> > gated on requested_additional_chassis being set, so it is automatically
> > cleaned up when migration completes.
> > This differs from the existing additional-chassis-activated option
> > which is traffic-triggered (RARP/GARP/NA via activation strategy).
> > The new option is flow-installation-triggered and always-on.
> > 
> > Reported-at: https://redhat.atlassian.net/browse/FDP-3026
> > Assisted-by: Claude Opus 4.6, Claude Code
> > Signed-off-by: Lorenzo Bianconi <[email protected]>
> > ---
> 
> Hi Lorenzo,
> 
> Thanks for the patch!

Hi Dumitru,

thx for the patch.

> 
> >  controller/binding.c |  79 ++++++++++++++++++++++
> >  northd/northd.c      |   5 ++
> >  ovn-sb.xml           |  12 ++++
> >  tests/ovn.at         | 158 +++++++++++++++++++++++++++++++++++++++++++
> 
> This is a new feature, we should add a line in the NEWS file for it.

ack, I will add it in v2.

> 
> >  4 files changed, 254 insertions(+)
> > 
> > diff --git a/controller/binding.c b/controller/binding.c
> > index de51be823..c761b0b25 100644
> > --- a/controller/binding.c
> > +++ b/controller/binding.c
> > @@ -1009,6 +1009,50 @@ local_binding_is_down(struct shash *local_bindings, 
> > const char *pb_name,
> >      return true;
> >  }
> >  
> > +static bool
> > +is_chassis_in_list(const char *chassis_list, const char *chassis_name)
> > +{
> > +    if (!chassis_list) {
> > +        return false;
> > +    }
> > +
> > +    char *save_ptr, *tokstr = xstrdup(chassis_list);
> > +    for (const char *name = strtok_r(tokstr, ",", &save_ptr);
> > +         name != NULL; name = strtok_r(NULL, ",", &save_ptr)) {
> > +        if (!strcmp(name, chassis_name)) {
> > +            free(tokstr);
> > +            return true;
> > +        }
> > +    }
> > +    free(tokstr);
> > +
> > +    return false;
> > +}
> > +
> > +static char *
> > +remove_chassis_from_list(const char *chassis_list, const char 
> > *chassis_name)
> > +{
> > +    if (!chassis_list) {
> > +        return NULL;
> > +    }
> > +
> > +    char *save_ptr, *tokstr = xstrdup(chassis_list);
> > +    struct ds result = DS_EMPTY_INITIALIZER;
> > +
> > +    for (const char *name = strtok_r(tokstr, ",", &save_ptr);
> > +         name != NULL; name = strtok_r(NULL, ",", &save_ptr)) {
> > +        if (strcmp(name, chassis_name)) {
> > +            if (result.length) {
> > +                ds_put_char(&result, ',');
> > +            }
> > +            ds_put_cstr(&result, name);
> > +        }
> > +    }
> > +    free(tokstr);
> > +
> > +    return ds_steal_cstr(&result);
> > +}
> > +
> >  void
> >  local_binding_set_up(struct shash *local_bindings, const char *pb_name,
> >                       const struct sbrec_chassis *chassis_rec,
> > @@ -1038,6 +1082,25 @@ local_binding_set_up(struct shash *local_bindings, 
> > const char *pb_name,
> >              binding_lport_set_up(b_lport, sb_readonly);
> >          }
> >      }
> > +
> > +    if (!sb_readonly && lbinding && b_lport &&
> 
> b_lport is the iterator inside the if above, reusing it like this makes
> it quite hard to understand the intention.
> 
> Wouldn't it make sense to rewrite part of this function as follows?
> 
> if (!sb_readonly && lbinding && b_lport) {
>     if (b_lport->pb->n_up && !b_lport->pb->up[0] &&
>         b_lport->pb->chassis == chassis_rec) {
>         // do what the block above does, i.e.:
>         // set b_lport up
>         // walk all &lbinding->binding_lports
>         // and set them up
>     }
> 
>     if (is_additional_chassis(b_lport->pb, chassis_rec)) {
>         // update additional-chassis-ready like you do below

ack, I will fix it in v2.

>     }
> }
> 
> > +        is_additional_chassis(b_lport->pb, chassis_rec)) {
> > +        const char *current = smap_get(&b_lport->pb->options,
> > +                                       "additional-chassis-ready");
> 
> Using sset_from_delimited_string() would simplify this patch a lot.

ack, I will fix it in v2.

> 
> > +        if (!is_chassis_in_list(current, chassis_rec->name)) {
> > +            char *val;
> > +
> > +            if (current) {
> > +                val = xasprintf("%s,%s", current, chassis_rec->name);
> 
> I'd use 'val' only on this branch of the if and call
> sbrec_port_binding_update_options_setkey() here too.
> 
> > +            } else {
> > +                val = xstrdup(chassis_rec->name);
> 
> We can just call:
> 
> sbrec_port_binding_update_options_setkey(.., "additional-chassis-ready",
>                                          chassis_rec->name);

ack, I will fix it in v2.

> 
> > +            }
> > +
> > +            sbrec_port_binding_update_options_setkey(
> > +                    b_lport->pb, "additional-chassis-ready", val);
> > +            free(val);
> > +        }
> > +    }
> >  }
> >  
> >  void
> > @@ -1570,6 +1633,22 @@ release_lport_additional_chassis(const struct 
> > sbrec_port_binding *pb,
> >          remove_additional_chassis(pb, chassis_rec);
> >      }
> >  
> > +    const char *ready = smap_get(&pb->options, "additional-chassis-ready");
> > +    if (ready && is_chassis_in_list(ready, chassis_rec->name)) {
> > +        if (sb_readonly) {
> > +            return false;
> > +        }
> > +        char *updated = remove_chassis_from_list(ready, chassis_rec->name);
> 
> Under the hood we parsed 'ready' twice in the lines above.
> 
> Let's just parse it into a sset from the beginning
> (sset_from_delimited_string()) and operate on sets instead of strings.

ack, I will fix it in v2.

> 
> > +        if (updated && updated[0]) {
> > +            sbrec_port_binding_update_options_setkey(
> > +                pb, "additional-chassis-ready", updated);
> > +        } else {
> > +            sbrec_port_binding_update_options_delkey(
> > +                pb, "additional-chassis-ready");
> > +        }
> > +        free(updated);
> > +    }
> > +
> >      VLOG_INFO("Releasing lport %s from this additional chassis.",
> >                pb->logical_port);
> >      return true;
> > diff --git a/northd/northd.c b/northd/northd.c
> > index 0dbf17426..71b3ca9c1 100644
> > --- a/northd/northd.c
> > +++ b/northd/northd.c
> > @@ -2871,6 +2871,11 @@ ovn_port_update_sbrec(struct ovsdb_idl_txn 
> > *ovnsb_txn,
> >                      smap_add(&options, "additional-chassis-activated",
> >                               activated_str);
> >                  }
> > +                const char *ready_str = smap_get(&op->sb->options,
> > +                                                 
> > "additional-chassis-ready");
> > +                if (ready_str) {
> > +                    smap_add(&options, "additional-chassis-ready", 
> > ready_str);
> > +                }
> >              }
> >  
> >              /* Preserve virtual port options. */
> > diff --git a/ovn-sb.xml b/ovn-sb.xml
> > index e45b63d73..5175c523a 100644
> > --- a/ovn-sb.xml
> > +++ b/ovn-sb.xml
> > @@ -3855,6 +3855,18 @@ tcp.flags = RST;
> >          that the port was activated using the strategy specified.
> >        </column>
> >  
> > +      <column name="options" key="additional-chassis-ready">
> > +        A comma-separated list of chassis names that have finished 
> > installing
> > +        OpenFlow flows for this port binding as an additional chassis.  
> > Set by
> > +        <code>ovn-controller</code> when the interface reaches the
> > +        <code>ovn-installed</code> state on the additional chassis.  This
> > +        allows a CMS to monitor the Southbound DB for migration readiness
> > +        without requiring an agent on each chassis.  The option is
> > +        automatically cleaned up when the chassis is removed from
> > +        <ref column="additional_chassis"/> or when
> > +        <ref column="requested_additional_chassis"/> is cleared.
> > +      </column>
> > +
> >        <column name="options" key="iface-id-ver">
> >          If set, this port will be bound by <code>ovn-controller</code>
> >          only if this same key and value is configured in the
> > diff --git a/tests/ovn.at b/tests/ovn.at
> > index 522c1c90d..6dc2f158e 100644
> > --- a/tests/ovn.at
> > +++ b/tests/ovn.at
> > @@ -17336,6 +17336,164 @@ OVN_CLEANUP([hv1],[hv2])
> >  AT_CLEANUP
> >  ])
> >  
> > +OVN_FOR_EACH_NORTHD([
> > +AT_SETUP([options:additional-chassis-ready for logical port])
> > +AT_KEYWORDS([multi-chassis])
> > +TAG_UNSTABLE
> 
> Why are we adding an unstable test?  Is it really unstable or just a
> copy/paste issue?

This is just a leftover, I will fix it in v2.

> 
> > +ovn_start
> > +
> > +net_add n1
> > +
> > +sim_add hv1
> > +as hv1
> > +check ovs-vsctl add-br br-phys
> > +ovn_attach n1 br-phys 192.168.0.11
> > +
> > +sim_add hv2
> > +as hv2
> > +check ovs-vsctl add-br br-phys
> > +ovn_attach n1 br-phys 192.168.0.12
> > +
> > +check ovn-nbctl ls-add ls0
> > +check ovn-nbctl lsp-add ls0 lsp0
> > +
> > +# Allow only chassis hv1 to bind logical port lsp0.
> > +check ovn-nbctl lsp-set-options lsp0 requested-chassis=hv1
> > +
> > +as hv1 check ovs-vsctl -- add-port br-int lsp0 -- \
> > +    set Interface lsp0 external-ids:iface-id=lsp0
> > +as hv2 check ovs-vsctl -- add-port br-int lsp0 -- \
> > +    set Interface lsp0 external-ids:iface-id=lsp0
> > +
> > +wait_row_count Chassis 1 name=hv1
> > +wait_row_count Chassis 1 name=hv2
> > +hv1_uuid=$(fetch_column Chassis _uuid name=hv1)
> > +hv2_uuid=$(fetch_column Chassis _uuid name=hv2)
> > +
> > +wait_column "$hv1_uuid" Port_Binding chassis logical_port=lsp0
> > +wait_column "$hv1_uuid" Port_Binding requested_chassis logical_port=lsp0
> > +wait_column "" Port_Binding additional_chassis logical_port=lsp0
> > +wait_column "" Port_Binding requested_additional_chassis logical_port=lsp0
> > +
> > +pb_uuid=$(ovn-sbctl --bare --columns _uuid find Port_Binding 
> > logical_port=lsp0)
> 
> We could use fetch_column here.

ack I will fix it in v2.

> 
> > +
> > +# additional-chassis-ready should not be set yet
> > +AT_CHECK([ovn-sbctl get Port_Binding $pb_uuid 
> > options:additional-chassis-ready 2>/dev/null], [1], [ignore], [ignore])
> > +
> > +# Request port binding at an additional chassis (simulate migration start)
> 
> Comments should be sentences and end with a period.  This applies to
> multiple places in this patch.

ack I will fix it in v2.

> 
> > +check ovn-nbctl lsp-set-options lsp0 \
> > +    requested-chassis=hv1,hv2
> > +
> > +wait_column "$hv1_uuid" Port_Binding chassis logical_port=lsp0
> > +wait_column "$hv2_uuid" Port_Binding additional_chassis logical_port=lsp0
> > +wait_column "$hv2_uuid" Port_Binding requested_additional_chassis 
> > logical_port=lsp0
> > +
> > +# Wait for ovn-installed on both chassis
> > +wait_for_ports_up
> > +
> > +for hv in hv1 hv2; do
> > +    OVS_WAIT_UNTIL([test `as $hv ovs-vsctl get Interface lsp0 
> > external_ids:ovn-installed` = '"true"'])
> > +done
> > +
> > +# Verify additional-chassis-ready=hv2 is set in Port_Binding options
> > +OVS_WAIT_UNTIL([test xhv2 = x$(ovn-sbctl get Port_Binding $pb_uuid 
> > options:additional-chassis-ready | tr -d '""')])
> > +
> > +# Complete migration: move binding to hv2
> > +check ovn-nbctl lsp-set-options lsp0 requested-chassis=hv2
> > +
> > +wait_column "$hv2_uuid" Port_Binding chassis logical_port=lsp0
> > +wait_column "$hv2_uuid" Port_Binding requested_chassis logical_port=lsp0
> > +wait_column "" Port_Binding additional_chassis logical_port=lsp0
> > +wait_column "" Port_Binding requested_additional_chassis logical_port=lsp0
> > +
> > +# Verify additional-chassis-ready is cleared after migration completes
> > +OVS_WAIT_UNTIL([test x = x$(ovn-sbctl get Port_Binding $pb_uuid 
> > options:additional-chassis-ready 2>/dev/null)])
> > +
> > +OVN_CLEANUP([hv1
> > +ignored_dp=ls0],[hv2])
> > +
> > +AT_CLEANUP
> > +])
> > +
> > +OVN_FOR_EACH_NORTHD([
> > +AT_SETUP([options:additional-chassis-ready with multiple additional 
> > chassis])
> > +AT_KEYWORDS([multi-chassis])
> > +TAG_UNSTABLE
> 
> Why are we adding an unstable test?  Is it really unstable or just a
> copy/paste issue?

ditto.

> 
> > +ovn_start
> > +
> > +net_add n1
> > +
> > +for i in 1 2 3; do
> > +    sim_add hv$i
> > +    as hv$i
> > +    check ovs-vsctl add-br br-phys
> > +    ovn_attach n1 br-phys 192.168.0.1$i
> > +done
> > +
> > +check ovn-nbctl ls-add ls0
> > +check ovn-nbctl lsp-add ls0 lsp0
> > +
> > +# Bind the port to hv1 initially
> > +check ovn-nbctl lsp-set-options lsp0 requested-chassis=hv1
> > +
> > +for i in 1 2 3; do
> > +    as hv$i check ovs-vsctl -- add-port br-int lsp0 -- \
> > +        set Interface lsp0 external-ids:iface-id=lsp0
> > +done
> > +
> > +for i in 1 2 3; do
> > +    wait_row_count Chassis 1 name=hv$i
> > +done
> > +hv1_uuid=$(fetch_column Chassis _uuid name=hv1)
> > +hv2_uuid=$(fetch_column Chassis _uuid name=hv2)
> > +hv3_uuid=$(fetch_column Chassis _uuid name=hv3)
> > +
> > +wait_column "$hv1_uuid" Port_Binding chassis logical_port=lsp0
> > +
> > +pb_uuid=$(ovn-sbctl --bare --columns _uuid find Port_Binding 
> > logical_port=lsp0)
> > +
> > +# Request binding at two additional chassis
> > +check ovn-nbctl lsp-set-options lsp0 requested-chassis=hv1,hv2,hv3
> 
> Missing --wait=hv sync?

ack I will fix it in v2.

> 
> > +
> > +wait_column "$hv1_uuid" Port_Binding chassis logical_port=lsp0
> > +wait_column "$hv2_uuid $hv3_uuid" Port_Binding additional_chassis 
> > logical_port=lsp0
> > +
> > +wait_for_ports_up
> > +
> > +for hv in hv1 hv2 hv3; do
> > +    OVS_WAIT_UNTIL([test `as $hv ovs-vsctl get Interface lsp0 
> > external_ids:ovn-installed` = '"true"'])
> > +done
> > +
> > +# Verify additional-chassis-ready contains both hv2 and hv3
> > +OVS_WAIT_UNTIL([
> > +    ready=$(ovn-sbctl get Port_Binding $pb_uuid 
> > options:additional-chassis-ready | tr -d '""')
> > +    echo "$ready" | grep -q hv2 && echo "$ready" | grep -q hv3
> > +])
> > +
> > +# Remove hv3 from additional chassis
> > +check ovn-nbctl lsp-set-options lsp0 requested-chassis=hv1,hv2
> > +
> > +wait_column "$hv2_uuid" Port_Binding additional_chassis logical_port=lsp0
> > +
> 
> I think if you add --wait=hv to the nbctl command you can change this
> into a check_column.

ditto.

> 
> > +# Verify hv3 is removed from additional-chassis-ready but hv2 remains
> > +OVS_WAIT_UNTIL([test xhv2 = x$(ovn-sbctl get Port_Binding $pb_uuid 
> > options:additional-chassis-ready | tr -d '""')])
> > +
> > +# Complete migration
> > +check ovn-nbctl lsp-set-options lsp0 requested-chassis=hv2
> > +
> 
> Missing --wait=hv sync?

ditto.

> 
> > +wait_column "$hv2_uuid" Port_Binding chassis logical_port=lsp0
> > +wait_column "" Port_Binding additional_chassis logical_port=lsp0
> > +
> > +# Verify additional-chassis-ready fully cleaned up
> > +OVS_WAIT_UNTIL([test x = x$(ovn-sbctl get Port_Binding $pb_uuid 
> > options:additional-chassis-ready 2>/dev/null)])
> > +
> > +OVN_CLEANUP([hv1
> > +ignored_dp=ls0],[hv2],[hv3
> > +ignored_dp=ls0])
> > +
> > +AT_CLEANUP
> > +])
> > +
> >  OVN_FOR_EACH_NORTHD([
> >  AT_SETUP([options:requested-chassis for logical port])
> >  ovn_start
> 
> Regards,
> Dumitru
> 

Regards,
Lorenzo
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to