On 6/15/26 5:18 PM, Lorenzo Bianconi via dev wrote:
> During VM live migration, a CMS needs to know when the destination
> chassis has finished installing OpenFlow flows before it can safely
> start the VM.  Currently, ovn-controller sets ovn-installed on the
> local OVS interface, but this information is not reflected back to
> the Southbound DB, requiring a per-chassis agent to monitor readiness.
> Add a new Port_Binding options:additional-chassis-ready key that
> contains a comma-separated list of chassis names that have completed
> flow installation as additional chassis.  ovn-controller sets this
> when local_binding_set_up() is called for an additional chassis
> binding, and clears it when the chassis is released.
> The option is preserved by northd during Port_Binding option rebuilds,
> gated on requested_additional_chassis being set, so it is automatically
> cleaned up when migration completes.
> This differs from the existing additional-chassis-activated option
> which is traffic-triggered (RARP/GARP/NA via activation strategy).
> The new option is flow-installation-triggered and always-on.
> 
> Reported-at: https://redhat.atlassian.net/browse/FDP-3026
> Assisted-by: Claude Opus 4.6, Claude Code
> Signed-off-by: Lorenzo Bianconi <[email protected]>
> ---

Hi Lorenzo,

Thanks for the patch!

>  controller/binding.c |  79 ++++++++++++++++++++++
>  northd/northd.c      |   5 ++
>  ovn-sb.xml           |  12 ++++
>  tests/ovn.at         | 158 +++++++++++++++++++++++++++++++++++++++++++

This is a new feature, we should add a line in the NEWS file for it.

>  4 files changed, 254 insertions(+)
> 
> diff --git a/controller/binding.c b/controller/binding.c
> index de51be823..c761b0b25 100644
> --- a/controller/binding.c
> +++ b/controller/binding.c
> @@ -1009,6 +1009,50 @@ local_binding_is_down(struct shash *local_bindings, 
> const char *pb_name,
>      return true;
>  }
>  
> +static bool
> +is_chassis_in_list(const char *chassis_list, const char *chassis_name)
> +{
> +    if (!chassis_list) {
> +        return false;
> +    }
> +
> +    char *save_ptr, *tokstr = xstrdup(chassis_list);
> +    for (const char *name = strtok_r(tokstr, ",", &save_ptr);
> +         name != NULL; name = strtok_r(NULL, ",", &save_ptr)) {
> +        if (!strcmp(name, chassis_name)) {
> +            free(tokstr);
> +            return true;
> +        }
> +    }
> +    free(tokstr);
> +
> +    return false;
> +}
> +
> +static char *
> +remove_chassis_from_list(const char *chassis_list, const char *chassis_name)
> +{
> +    if (!chassis_list) {
> +        return NULL;
> +    }
> +
> +    char *save_ptr, *tokstr = xstrdup(chassis_list);
> +    struct ds result = DS_EMPTY_INITIALIZER;
> +
> +    for (const char *name = strtok_r(tokstr, ",", &save_ptr);
> +         name != NULL; name = strtok_r(NULL, ",", &save_ptr)) {
> +        if (strcmp(name, chassis_name)) {
> +            if (result.length) {
> +                ds_put_char(&result, ',');
> +            }
> +            ds_put_cstr(&result, name);
> +        }
> +    }
> +    free(tokstr);
> +
> +    return ds_steal_cstr(&result);
> +}
> +
>  void
>  local_binding_set_up(struct shash *local_bindings, const char *pb_name,
>                       const struct sbrec_chassis *chassis_rec,
> @@ -1038,6 +1082,25 @@ local_binding_set_up(struct shash *local_bindings, 
> const char *pb_name,
>              binding_lport_set_up(b_lport, sb_readonly);
>          }
>      }
> +
> +    if (!sb_readonly && lbinding && b_lport &&

b_lport is the iterator inside the if above, reusing it like this makes
it quite hard to understand the intention.

Wouldn't it make sense to rewrite part of this function as follows?

if (!sb_readonly && lbinding && b_lport) {
    if (b_lport->pb->n_up && !b_lport->pb->up[0] &&
        b_lport->pb->chassis == chassis_rec) {
        // do what the block above does, i.e.:
        // set b_lport up
        // walk all &lbinding->binding_lports
        // and set them up
    }

    if (is_additional_chassis(b_lport->pb, chassis_rec)) {
        // update additional-chassis-ready like you do below
    }
}

> +        is_additional_chassis(b_lport->pb, chassis_rec)) {
> +        const char *current = smap_get(&b_lport->pb->options,
> +                                       "additional-chassis-ready");

Using sset_from_delimited_string() would simplify this patch a lot.

> +        if (!is_chassis_in_list(current, chassis_rec->name)) {
> +            char *val;
> +
> +            if (current) {
> +                val = xasprintf("%s,%s", current, chassis_rec->name);

I'd use 'val' only on this branch of the if and call
sbrec_port_binding_update_options_setkey() here too.

> +            } else {
> +                val = xstrdup(chassis_rec->name);

We can just call:

sbrec_port_binding_update_options_setkey(.., "additional-chassis-ready",
                                         chassis_rec->name);

> +            }
> +
> +            sbrec_port_binding_update_options_setkey(
> +                    b_lport->pb, "additional-chassis-ready", val);
> +            free(val);
> +        }
> +    }
>  }
>  
>  void
> @@ -1570,6 +1633,22 @@ release_lport_additional_chassis(const struct 
> sbrec_port_binding *pb,
>          remove_additional_chassis(pb, chassis_rec);
>      }
>  
> +    const char *ready = smap_get(&pb->options, "additional-chassis-ready");
> +    if (ready && is_chassis_in_list(ready, chassis_rec->name)) {
> +        if (sb_readonly) {
> +            return false;
> +        }
> +        char *updated = remove_chassis_from_list(ready, chassis_rec->name);

Under the hood we parsed 'ready' twice in the lines above.

Let's just parse it into a sset from the beginning
(sset_from_delimited_string()) and operate on sets instead of strings.

> +        if (updated && updated[0]) {
> +            sbrec_port_binding_update_options_setkey(
> +                pb, "additional-chassis-ready", updated);
> +        } else {
> +            sbrec_port_binding_update_options_delkey(
> +                pb, "additional-chassis-ready");
> +        }
> +        free(updated);
> +    }
> +
>      VLOG_INFO("Releasing lport %s from this additional chassis.",
>                pb->logical_port);
>      return true;
> diff --git a/northd/northd.c b/northd/northd.c
> index 0dbf17426..71b3ca9c1 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -2871,6 +2871,11 @@ ovn_port_update_sbrec(struct ovsdb_idl_txn *ovnsb_txn,
>                      smap_add(&options, "additional-chassis-activated",
>                               activated_str);
>                  }
> +                const char *ready_str = smap_get(&op->sb->options,
> +                                                 "additional-chassis-ready");
> +                if (ready_str) {
> +                    smap_add(&options, "additional-chassis-ready", 
> ready_str);
> +                }
>              }
>  
>              /* Preserve virtual port options. */
> diff --git a/ovn-sb.xml b/ovn-sb.xml
> index e45b63d73..5175c523a 100644
> --- a/ovn-sb.xml
> +++ b/ovn-sb.xml
> @@ -3855,6 +3855,18 @@ tcp.flags = RST;
>          that the port was activated using the strategy specified.
>        </column>
>  
> +      <column name="options" key="additional-chassis-ready">
> +        A comma-separated list of chassis names that have finished installing
> +        OpenFlow flows for this port binding as an additional chassis.  Set 
> by
> +        <code>ovn-controller</code> when the interface reaches the
> +        <code>ovn-installed</code> state on the additional chassis.  This
> +        allows a CMS to monitor the Southbound DB for migration readiness
> +        without requiring an agent on each chassis.  The option is
> +        automatically cleaned up when the chassis is removed from
> +        <ref column="additional_chassis"/> or when
> +        <ref column="requested_additional_chassis"/> is cleared.
> +      </column>
> +
>        <column name="options" key="iface-id-ver">
>          If set, this port will be bound by <code>ovn-controller</code>
>          only if this same key and value is configured in the
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 522c1c90d..6dc2f158e 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -17336,6 +17336,164 @@ OVN_CLEANUP([hv1],[hv2])
>  AT_CLEANUP
>  ])
>  
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([options:additional-chassis-ready for logical port])
> +AT_KEYWORDS([multi-chassis])
> +TAG_UNSTABLE

Why are we adding an unstable test?  Is it really unstable or just a
copy/paste issue?

> +ovn_start
> +
> +net_add n1
> +
> +sim_add hv1
> +as hv1
> +check ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.11
> +
> +sim_add hv2
> +as hv2
> +check ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.12
> +
> +check ovn-nbctl ls-add ls0
> +check ovn-nbctl lsp-add ls0 lsp0
> +
> +# Allow only chassis hv1 to bind logical port lsp0.
> +check ovn-nbctl lsp-set-options lsp0 requested-chassis=hv1
> +
> +as hv1 check ovs-vsctl -- add-port br-int lsp0 -- \
> +    set Interface lsp0 external-ids:iface-id=lsp0
> +as hv2 check ovs-vsctl -- add-port br-int lsp0 -- \
> +    set Interface lsp0 external-ids:iface-id=lsp0
> +
> +wait_row_count Chassis 1 name=hv1
> +wait_row_count Chassis 1 name=hv2
> +hv1_uuid=$(fetch_column Chassis _uuid name=hv1)
> +hv2_uuid=$(fetch_column Chassis _uuid name=hv2)
> +
> +wait_column "$hv1_uuid" Port_Binding chassis logical_port=lsp0
> +wait_column "$hv1_uuid" Port_Binding requested_chassis logical_port=lsp0
> +wait_column "" Port_Binding additional_chassis logical_port=lsp0
> +wait_column "" Port_Binding requested_additional_chassis logical_port=lsp0
> +
> +pb_uuid=$(ovn-sbctl --bare --columns _uuid find Port_Binding 
> logical_port=lsp0)

We could use fetch_column here.

> +
> +# additional-chassis-ready should not be set yet
> +AT_CHECK([ovn-sbctl get Port_Binding $pb_uuid 
> options:additional-chassis-ready 2>/dev/null], [1], [ignore], [ignore])
> +
> +# Request port binding at an additional chassis (simulate migration start)

Comments should be sentences and end with a period.  This applies to
multiple places in this patch.

> +check ovn-nbctl lsp-set-options lsp0 \
> +    requested-chassis=hv1,hv2
> +
> +wait_column "$hv1_uuid" Port_Binding chassis logical_port=lsp0
> +wait_column "$hv2_uuid" Port_Binding additional_chassis logical_port=lsp0
> +wait_column "$hv2_uuid" Port_Binding requested_additional_chassis 
> logical_port=lsp0
> +
> +# Wait for ovn-installed on both chassis
> +wait_for_ports_up
> +
> +for hv in hv1 hv2; do
> +    OVS_WAIT_UNTIL([test `as $hv ovs-vsctl get Interface lsp0 
> external_ids:ovn-installed` = '"true"'])
> +done
> +
> +# Verify additional-chassis-ready=hv2 is set in Port_Binding options
> +OVS_WAIT_UNTIL([test xhv2 = x$(ovn-sbctl get Port_Binding $pb_uuid 
> options:additional-chassis-ready | tr -d '""')])
> +
> +# Complete migration: move binding to hv2
> +check ovn-nbctl lsp-set-options lsp0 requested-chassis=hv2
> +
> +wait_column "$hv2_uuid" Port_Binding chassis logical_port=lsp0
> +wait_column "$hv2_uuid" Port_Binding requested_chassis logical_port=lsp0
> +wait_column "" Port_Binding additional_chassis logical_port=lsp0
> +wait_column "" Port_Binding requested_additional_chassis logical_port=lsp0
> +
> +# Verify additional-chassis-ready is cleared after migration completes
> +OVS_WAIT_UNTIL([test x = x$(ovn-sbctl get Port_Binding $pb_uuid 
> options:additional-chassis-ready 2>/dev/null)])
> +
> +OVN_CLEANUP([hv1
> +ignored_dp=ls0],[hv2])
> +
> +AT_CLEANUP
> +])
> +
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([options:additional-chassis-ready with multiple additional chassis])
> +AT_KEYWORDS([multi-chassis])
> +TAG_UNSTABLE

Why are we adding an unstable test?  Is it really unstable or just a
copy/paste issue?

> +ovn_start
> +
> +net_add n1
> +
> +for i in 1 2 3; do
> +    sim_add hv$i
> +    as hv$i
> +    check ovs-vsctl add-br br-phys
> +    ovn_attach n1 br-phys 192.168.0.1$i
> +done
> +
> +check ovn-nbctl ls-add ls0
> +check ovn-nbctl lsp-add ls0 lsp0
> +
> +# Bind the port to hv1 initially
> +check ovn-nbctl lsp-set-options lsp0 requested-chassis=hv1
> +
> +for i in 1 2 3; do
> +    as hv$i check ovs-vsctl -- add-port br-int lsp0 -- \
> +        set Interface lsp0 external-ids:iface-id=lsp0
> +done
> +
> +for i in 1 2 3; do
> +    wait_row_count Chassis 1 name=hv$i
> +done
> +hv1_uuid=$(fetch_column Chassis _uuid name=hv1)
> +hv2_uuid=$(fetch_column Chassis _uuid name=hv2)
> +hv3_uuid=$(fetch_column Chassis _uuid name=hv3)
> +
> +wait_column "$hv1_uuid" Port_Binding chassis logical_port=lsp0
> +
> +pb_uuid=$(ovn-sbctl --bare --columns _uuid find Port_Binding 
> logical_port=lsp0)
> +
> +# Request binding at two additional chassis
> +check ovn-nbctl lsp-set-options lsp0 requested-chassis=hv1,hv2,hv3

Missing --wait=hv sync?

> +
> +wait_column "$hv1_uuid" Port_Binding chassis logical_port=lsp0
> +wait_column "$hv2_uuid $hv3_uuid" Port_Binding additional_chassis 
> logical_port=lsp0
> +
> +wait_for_ports_up
> +
> +for hv in hv1 hv2 hv3; do
> +    OVS_WAIT_UNTIL([test `as $hv ovs-vsctl get Interface lsp0 
> external_ids:ovn-installed` = '"true"'])
> +done
> +
> +# Verify additional-chassis-ready contains both hv2 and hv3
> +OVS_WAIT_UNTIL([
> +    ready=$(ovn-sbctl get Port_Binding $pb_uuid 
> options:additional-chassis-ready | tr -d '""')
> +    echo "$ready" | grep -q hv2 && echo "$ready" | grep -q hv3
> +])
> +
> +# Remove hv3 from additional chassis
> +check ovn-nbctl lsp-set-options lsp0 requested-chassis=hv1,hv2
> +
> +wait_column "$hv2_uuid" Port_Binding additional_chassis logical_port=lsp0
> +

I think if you add --wait=hv to the nbctl command you can change this
into a check_column.

> +# Verify hv3 is removed from additional-chassis-ready but hv2 remains
> +OVS_WAIT_UNTIL([test xhv2 = x$(ovn-sbctl get Port_Binding $pb_uuid 
> options:additional-chassis-ready | tr -d '""')])
> +
> +# Complete migration
> +check ovn-nbctl lsp-set-options lsp0 requested-chassis=hv2
> +

Missing --wait=hv sync?

> +wait_column "$hv2_uuid" Port_Binding chassis logical_port=lsp0
> +wait_column "" Port_Binding additional_chassis logical_port=lsp0
> +
> +# Verify additional-chassis-ready fully cleaned up
> +OVS_WAIT_UNTIL([test x = x$(ovn-sbctl get Port_Binding $pb_uuid 
> options:additional-chassis-ready 2>/dev/null)])
> +
> +OVN_CLEANUP([hv1
> +ignored_dp=ls0],[hv2],[hv3
> +ignored_dp=ls0])
> +
> +AT_CLEANUP
> +])
> +
>  OVN_FOR_EACH_NORTHD([
>  AT_SETUP([options:requested-chassis for logical port])
>  ovn_start

Regards,
Dumitru

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to