Hello Lei, thanks for the patch. I have some notes inline
On Tue, Feb 10, 2026 at 8:55 PM Lei Huang <[email protected]> wrote:
>
> From: Lei Huang <[email protected]>
>
> Use requested-encap-ip (with requested-chassis) to set Port_Binding.encap,
> clear on removal, and prefer geneve when both types exist. Add a northd
> test and document the ovn-k8s interconnect use case.
>
> CC: Han Zhou <[email protected]>
> Signed-off-by: Lei Huang <[email protected]>
0-day Robot is complaining because the email in the Signed-off-by line
does not match the one in the From line.
> ---
> NEWS | 3 ++
> northd/northd.c | 116 +++++++++++++++++++++++++++++++++++++++++---
> ovn-nb.xml | 18 +++++++
> tests/ovn-northd.at | 63 ++++++++++++++++++++++++
> 4 files changed, 193 insertions(+), 7 deletions(-)
>
> diff --git a/NEWS b/NEWS
> index 2a2b5e12d..040577519 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -1,5 +1,8 @@
> Post v25.09.0
> -------------
> + - Added LSP/LRP option "requested-encap-ip" to let CMS request a specific
> + SB Port_Binding encap IP (e.g., for remote transit ports in ovn-k8s
> + interconnect mode).
> - Added DNS query statistics tracking in ovn-controller using OVS coverage
> counters. Statistics can be queried using "ovn-appctl -t ovn-controller
> coverage/read-counter <counter_name>" or "coverage/show". Tracked
> metrics
> diff --git a/northd/northd.c b/northd/northd.c
> index b4bb4ba6d..3e8e9a994 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -2531,6 +2531,82 @@ ovn_port_update_sbrec_chassis(
> free(requested_chassis_sb);
> }
>
> +static void
> +encap_ip_map_init(struct shash *encap_by_ip,
> + const struct sbrec_chassis_table *sbrec_chassis_table)
> +{
I'm not 100% certain this encap_by_ip mapping is necessary.
The OVSDB IDL provides a way of creating indexes of tables. In this
case, you could create an ovsdb_idl_index that specifically allows for
you to look up encaps by chassis name and ip. Then you can use this
index to find any matching encaps. In case this has more than one
match, you can prefer the one with the higher priority tunnel type.
However, if you have done some benchmarking and found that this
outperforms the IDL index method, then we can go with this instead.
> + shash_init(encap_by_ip);
> + if (!sbrec_chassis_table) {
> + return;
> + }
> +
> + const struct sbrec_chassis *chassis;
> + SBREC_CHASSIS_TABLE_FOR_EACH (chassis, sbrec_chassis_table) {
> + for (size_t i = 0; i < chassis->n_encaps; i++) {
> + const struct sbrec_encap *encap = chassis->encaps[i];
> + if (!encap || !encap->ip || !encap->type ||
> !encap->chassis_name) {
Since "type" and "ip" are indexes on the Encap table, it is not
possible for encap->ip or encap->type to be NULL.
> + continue;
> + }
> +
> + enum chassis_tunnel_type tun_type = get_tunnel_type(encap->type);
> + if (tun_type == TUNNEL_TYPE_INVALID) {
> + continue;
> + }
> +
> + char *key = xasprintf("%s@%s", encap->chassis_name, encap->ip);
I recommend creating a function that creates this string. This way,
both encap_ip_map_init() and encap_ip_map_lookup() can use the same
function to create the string.
> + struct shash_node *node = shash_find(encap_by_ip, key);
> + if (!node) {
> + shash_add_nocopy(encap_by_ip, key, (void *) encap);
> + } else {
> + free(key);
> + const struct sbrec_encap *existing = node->data;
> + /* Pick the highest-preference tunnel type (geneve > vxlan)
> + * when multiple encap types share the same chassis+IP. */
> + if (get_tunnel_type(existing->type) < tun_type) {
> + node->data = (void *) encap;
> + }
> + }
> + }
> + }
> +}
> +
> +static const struct sbrec_encap *
> +encap_ip_map_lookup(const struct shash *encap_by_ip, const char
> *chassis_name,
> + const char *ip)
> +{
> + if (!encap_by_ip || !chassis_name || !chassis_name[0] || !ip || !ip[0]) {
I don't know how it's possible for encap_by_ip to be NULL here. The
southbound schema for the Chassis table ensures that the chassis_name
is non-NULL and not zero-length. The only caller of
encap_ip_map_lookup() also does its own validity checks for ip before
calling.
Therefore, I think this NULL return is impossible to hit, and you can remove it.
Alternatively, you could remove the validation checks from the caller
of encap_ip_map_lookup() and just check the validity of ip here.
> + return NULL;
> + }
> + char *key = xasprintf("%s@%s", chassis_name, ip);
> + const struct sbrec_encap *encap = shash_find_data(encap_by_ip, key);
> + free(key);
> + return encap;
> +}
> +
> +static void
> +ovn_port_update_requested_encap(const struct shash *encap_by_ip,
> + const struct ovn_port *op)
> +{
> + if (is_cr_port(op)) {
> + return;
> + }
> +
> + /* requested-chassis is resolved into SB first; reuse that binding. */
> + const struct smap *options = op->nbsp ? &op->nbsp->options
> + : &op->nbrp->options;
> + const char *requested_ip = smap_get(options, "requested-encap-ip");
> + const struct sbrec_encap *encap = NULL;
> + if (requested_ip && requested_ip[0] && op->sb->requested_chassis) {
> + encap = encap_ip_map_lookup(encap_by_ip,
> + op->sb->requested_chassis->name,
> + requested_ip);
> + }
> +
> + if (op->sb->encap != encap) {
> + sbrec_port_binding_set_encap(op->sb, encap);
> + }
> +}
> +
> static void
> check_and_do_sb_mirror_deletion(const struct ovn_port *op)
> {
> @@ -2601,6 +2677,7 @@ ovn_port_update_sbrec(struct ovsdb_idl_txn *ovnsb_txn,
> struct ovsdb_idl_index *sbrec_chassis_by_hostname,
> struct ovsdb_idl_index *sbrec_ha_chassis_grp_by_name,
> const struct sbrec_mirror_table *sbrec_mirror_table,
> + const struct shash *encap_by_ip,
> const struct ovn_port *op,
> unsigned long *queue_id_bitmap,
> struct sset *active_ha_chassis_grps)
> @@ -2937,6 +3014,10 @@ common:
> sbrec_port_binding_set_tunnel_key(op->sb, op->tunnel_key);
> }
>
> + if (encap_by_ip) {
Under what circumstances would encap_by_ip be NULL here?
> + ovn_port_update_requested_encap(encap_by_ip, op);
> + }
> +
> /* ovn-controller will update 'Port_Binding.up' only if it was explicitly
> * set to 'false'.
> */
> @@ -4096,6 +4177,7 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn,
> const struct sbrec_mirror_table *sbrec_mirror_table,
> const struct sbrec_mac_binding_table *sbrec_mac_binding_table,
> const struct sbrec_ha_chassis_group_table *sbrec_ha_chassis_group_table,
> + const struct sbrec_chassis_table *sbrec_chassis_table,
> struct ovsdb_idl_index *sbrec_chassis_by_name,
> struct ovsdb_idl_index *sbrec_chassis_by_hostname,
> struct ovsdb_idl_index *sbrec_ha_chassis_grp_by_name,
> @@ -4113,6 +4195,9 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn,
> struct sset active_ha_chassis_grps =
> SSET_INITIALIZER(&active_ha_chassis_grps);
>
> + struct shash encap_by_ip;
> + encap_ip_map_init(&encap_by_ip, sbrec_chassis_table);
> +
> /* Borrow ls_ports for joining NB and SB for both LSPs and LRPs.
> * We will split them later. */
> struct hmap *ports = ls_ports;
> @@ -4172,6 +4257,7 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn,
> sbrec_chassis_by_hostname,
> sbrec_ha_chassis_grp_by_name,
> sbrec_mirror_table,
> + &encap_by_ip,
> op, queue_id_bitmap,
> &active_ha_chassis_grps);
> op->od->is_transit_router |= is_transit_router_port(op);
> @@ -4185,6 +4271,7 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn,
> sbrec_chassis_by_hostname,
> sbrec_ha_chassis_grp_by_name,
> sbrec_mirror_table,
> + &encap_by_ip,
> op, queue_id_bitmap,
> &active_ha_chassis_grps);
> sbrec_port_binding_set_logical_port(op->sb, op->key);
> @@ -4215,6 +4302,7 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn,
> cleanup_mac_bindings(sbrec_mac_binding_table, lr_datapaths,
> lr_ports);
> }
>
> + shash_destroy(&encap_by_ip);
> tag_alloc_destroy(&tag_alloc_table);
> bitmap_free(queue_id_bitmap);
> cleanup_sb_ha_chassis_groups(sbrec_ha_chassis_group_table,
> @@ -4401,7 +4489,8 @@ ls_port_init(struct ovn_port *op, struct ovsdb_idl_txn
> *ovnsb_txn,
> const struct sbrec_port_binding *sb,
> const struct sbrec_mirror_table *sbrec_mirror_table,
> struct ovsdb_idl_index *sbrec_chassis_by_name,
> - struct ovsdb_idl_index *sbrec_chassis_by_hostname)
> + struct ovsdb_idl_index *sbrec_chassis_by_hostname,
> + const struct shash *encap_by_ip)
> {
> op->od = od;
> parse_lsp_addrs(op);
> @@ -4431,6 +4520,7 @@ ls_port_init(struct ovn_port *op, struct ovsdb_idl_txn
> *ovnsb_txn,
> }
> ovn_port_update_sbrec(ovnsb_txn, sbrec_chassis_by_name,
> sbrec_chassis_by_hostname, NULL,
> sbrec_mirror_table,
> + encap_by_ip,
> op, NULL, NULL);
> return true;
> }
> @@ -4441,13 +4531,15 @@ ls_port_create(struct ovsdb_idl_txn *ovnsb_txn,
> struct hmap *ls_ports,
> struct ovn_datapath *od,
> const struct sbrec_mirror_table *sbrec_mirror_table,
> struct ovsdb_idl_index *sbrec_chassis_by_name,
> - struct ovsdb_idl_index *sbrec_chassis_by_hostname)
> + struct ovsdb_idl_index *sbrec_chassis_by_hostname,
> + const struct shash *encap_by_ip)
> {
> struct ovn_port *op = ovn_port_create(ls_ports, key, nbsp, NULL,
> NULL);
> hmap_insert(&od->ports, &op->dp_node, hmap_node_hash(&op->key_node));
> if (!ls_port_init(op, ovnsb_txn, od, NULL, sbrec_mirror_table,
> - sbrec_chassis_by_name, sbrec_chassis_by_hostname)) {
> + sbrec_chassis_by_name, sbrec_chassis_by_hostname,
> + encap_by_ip)) {
> ovn_port_destroy(ls_ports, op);
> return NULL;
> }
> @@ -4462,14 +4554,16 @@ ls_port_reinit(struct ovn_port *op, struct
> ovsdb_idl_txn *ovnsb_txn,
> const struct sbrec_port_binding *sb,
> const struct sbrec_mirror_table *sbrec_mirror_table,
> struct ovsdb_idl_index *sbrec_chassis_by_name,
> - struct ovsdb_idl_index *sbrec_chassis_by_hostname)
> + struct ovsdb_idl_index *sbrec_chassis_by_hostname,
> + const struct shash *encap_by_ip)
> {
> ovn_port_cleanup(op);
> op->sb = sb;
> ovn_port_set_nb(op, nbsp, NULL);
> op->primary_port = op->cr_port = NULL;
> return ls_port_init(op, ovnsb_txn, od, sb, sbrec_mirror_table,
> - sbrec_chassis_by_name, sbrec_chassis_by_hostname);
> + sbrec_chassis_by_name, sbrec_chassis_by_hostname,
> + encap_by_ip);
> }
>
> /* Returns true if the logical switch has changes which can be
> @@ -4632,6 +4726,9 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
> return true;
> }
>
> + struct shash encap_by_ip;
> + encap_ip_map_init(&encap_by_ip, ni->sbrec_chassis_table);
> +
> bool ls_had_only_router_ports = (!vector_is_empty(&od->router_ports)
> && (vector_len(&od->router_ports) == hmap_count(&od->ports)));
>
> @@ -4658,7 +4755,8 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
> new_nbsp->name, new_nbsp, od,
> ni->sbrec_mirror_table,
> ni->sbrec_chassis_by_name,
> - ni->sbrec_chassis_by_hostname);
> + ni->sbrec_chassis_by_hostname,
> + &encap_by_ip);
> if (!op) {
> goto fail;
> }
> @@ -4697,7 +4795,8 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
> new_nbsp,
> od, sb, ni->sbrec_mirror_table,
> ni->sbrec_chassis_by_name,
> - ni->sbrec_chassis_by_hostname)) {
> + ni->sbrec_chassis_by_hostname,
> + &encap_by_ip)) {
> if (sb) {
> sbrec_port_binding_delete(sb);
> }
> @@ -4798,9 +4897,11 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
> }
> sset_destroy(&created_or_deleted_ports);
>
> + shash_destroy(&encap_by_ip);
> return true;
>
> fail:
> + shash_destroy(&encap_by_ip);
> destroy_tracked_ovn_ports(trk_lsps);
> return false;
> }
> @@ -20622,6 +20723,7 @@ ovnnb_db_run(struct northd_input *input_data,
> input_data->sbrec_mirror_table,
> input_data->sbrec_mac_binding_table,
> input_data->sbrec_ha_chassis_group_table,
> + input_data->sbrec_chassis_table,
> input_data->sbrec_chassis_by_name,
> input_data->sbrec_chassis_by_hostname,
> input_data->sbrec_ha_chassis_grp_by_name,
> diff --git a/ovn-nb.xml b/ovn-nb.xml
> index 1acbf202b..33bf8993a 100644
> --- a/ovn-nb.xml
> +++ b/ovn-nb.xml
> @@ -1550,6 +1550,15 @@
> </p>
> </column>
>
> + <column name="options" key="requested-encap-ip">
> + Requests the encapsulation IP address for the port binding. If set,
> + <code>ovn-northd</code> uses this IP to select the
> + <ref table="Encap" db="OVN_Southbound"/> entry written to
> + <ref table="Port_Binding" column="encap" db="OVN_Southbound"/>.
> + This is intended for ports without a local OVS interface, e.g.
> remote
> + transit switch ports in ovn-kubernetes interconnect mode.
> + </column>
> +
> <column name="options" key="activation-strategy">
> If used with multiple chassis set in
> <ref column="requested-chassis"/>, specifies an activation strategy
> @@ -4393,6 +4402,15 @@ or
> </p>
> </column>
>
> + <column name="options" key="requested-encap-ip">
> + Requests the encapsulation IP address for the port binding. If set,
> + <code>ovn-northd</code> uses this IP to select the
> + <ref table="Encap" db="OVN_Southbound"/> entry written to
> + <ref table="Port_Binding" column="encap" db="OVN_Southbound"/>.
> + This is intended for ports without a local OVS interface, e.g. remote
> + transit router ports in ovn-kubernetes interconnect mode.
> + </column>
> +
> <column name="options" key="dynamic-routing-redistribute"
> type='{"type": "string"}'>
> <p>
> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> index 512e42036..9245ff639 100644
> --- a/tests/ovn-northd.at
> +++ b/tests/ovn-northd.at
> @@ -2896,6 +2896,69 @@ OVN_CLEANUP_NORTHD
> AT_CLEANUP
> ])
>
> +OVN_FOR_EACH_NORTHD_NO_HV([
> +AT_SETUP([check options:requested-encap-ip fills port binding encap col])
> +AT_KEYWORDS([requested encap ip])
> +ovn_start
> +
> +check_uuid ovn-sbctl \
> + -- --id=@e11 create encap chassis_name=ch1 ip="192.168.1.1"
> type="geneve" \
> + -- --id=@e12 create encap chassis_name=ch1 ip="192.168.1.2"
> type="geneve" \
> + -- --id=@c1 create chassis name=ch1 encaps=@e11,@e12
> +check_uuid ovn-sbctl \
> + -- --id=@e21 create encap chassis_name=ch2 ip="192.168.2.1"
> type="geneve" \
> + -- --id=@e22 create encap chassis_name=ch2 ip="192.168.2.2"
> type="geneve" \
> + -- --id=@c2 create chassis name=ch2 encaps=@e21,@e22
> +
> +wait_row_count Chassis 2
> +wait_row_count Encap 4
> +en11_uuid=`ovn-sbctl --bare --columns _uuid find Encap ip="192.168.1.1"`
Nit: for this and all other lines where you look up the UUID of the
encap, you can instead use the fetch_column() function provided by the
testsuite:
en11_uuid=$(fetch_column Encap _uuid ip="192.168.1.1")
> +en12_uuid=`ovn-sbctl --bare --columns _uuid find Encap ip="192.168.1.2"`
> +en21_uuid=`ovn-sbctl --bare --columns _uuid find Encap ip="192.168.2.1"`
> +en22_uuid=`ovn-sbctl --bare --columns _uuid find Encap ip="192.168.2.2"`
> +ovn-sbctl show
> +
> +echo "__file__:__line__: encap uuid: $en11_uuid, ip: 192.168.1.1"
> +echo "__file__:__line__: encap uuid: $en12_uuid, ip: 192.168.1.2"
> +echo "__file__:__line__: encap uuid: $en21_uuid, ip: 192.168.2.1"
> +echo "__file__:__line__: encap uuid: $en22_uuid, ip: 192.168.2.2"
> +
> +check ovn-nbctl --wait=sb ls-add ls1
> +check ovn-nbctl --wait=sb lsp-add ls1 lsp1
> +check ovn-nbctl --wait=sb lsp-add ls1 lsp2
> +ovn-nbctl show
> +
> +echo "options:requested-chassis is required to set
> options:requested-encap-ip"
> +check ovn-nbctl --wait=sb set logical-switch-port lsp1 \
> + options:requested-encap-ip=192.168.1.1
> +check ovn-nbctl --wait=sb sync
> +wait_row_count Port_Binding 1 logical_port=lsp1 'encap=[[]]'
> +
> +# Now set both options
> +check ovn-nbctl --wait=sb set logical-switch-port lsp1 \
> + options:requested-chassis=ch1 \
> + options:requested-encap-ip=192.168.1.1
> +check ovn-nbctl --wait=sb set logical-switch-port lsp2 \
> + options:requested-chassis=ch2 \
> + options:requested-encap-ip=192.168.2.2
> +
> +wait_row_count Port_Binding 1 logical_port=lsp1 encap="$en11_uuid"
> +wait_row_count Port_Binding 1 logical_port=lsp2 encap="$en22_uuid"
> +
> +# remove options:requested-encap-ip from lsp1
> +check ovn-nbctl --wait=sb remove logical_switch_port lsp1 \
> + options requested-encap-ip=192.168.1.1
> +wait_row_count Port_Binding 1 logical_port=lsp1 'encap=[[]]'
> +
> +# remove options:requested-chassis from lsp2
> +check ovn-nbctl --wait=sb remove logical_switch_port lsp2 \
> + options requested-chassis=ch2
> +wait_row_count Port_Binding 1 logical_port=lsp2 'encap=[[]]'
> +
One thing this test does not validate is that geneve encaps take
priority over vxlan encaps.
> +OVN_CLEANUP_NORTHD
> +AT_CLEANUP
> +])
> +
> OVN_FOR_EACH_NORTHD_NO_HV([
> AT_SETUP([port requested-tnl-key])
> AT_KEYWORDS([requested tnl tunnel key keys])
> --
> 2.39.5 (Apple Git-154)
>
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev