On 5/22/25 10:40 AM, Vasyl Saienko wrote: > With this patch ovn-controller-vtep start synchronizing local > MAC database to FDB OVN SB table. > This information is used by ovn-controller instances to > create direct flows to remove macs. > > Signed-Off-By: Vasyl Saienko <vsaie...@mirantis.com> > ---
Hi Vasyl, Again, sorry for the long time it took to review this patch. I have a few comments. Most of them are nits that I could fix up myself but the main question is related to SB.FDB entries created by ovn-controller-vtep and the fact that if we have FDB aging enabled things might behave a bit weird as ovn-northd would continuously remove (age-out) these entries. Please see inline. Also, with this patch applied, the "vtep: 3 HVs, 1 VIFs/HV, 1 GW, 1 LS" test becomes unstable because there's a race condition as the test is not waiting for SB.FDB entries to be processed by ovn-controller. One way to avoid that could be to add the following incremental change: diff --git a/tests/ovn.at b/tests/ovn.at index 108de260ef..b0cc95d224 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -4678,6 +4678,12 @@ AT_CHECK([ovn-sbctl lflow-list lsw0 | grep 'reg0[\[14\]]' | ovn_strip_lflows], [ table=??(ls_in_hairpin ), priority=2000 , match=(reg0[[14]] == 1 && is_chassis_resident("cr-lrp2")), action=(next;) ]) +# Ensure FDB table is up to date. +wait_row_count FDB 1 mac='"02:00:00:00:00:ee"' +wait_row_count FDB 1 mac='"02:00:00:00:00:ff"' +wait_row_count FDB 1 mac='"f0:00:00:00:00:03"' +check_row_count FDB 3 + # dump information with counters echo "------ OVN dump ------" ovn-nbctl show The test failed a couple of times in CI: https://github.com/ovsrobot/ovn/actions/runs/15183730341/job/42702264742#step:11:5010 > controller-vtep/ovn-controller-vtep.c | 6 + > controller-vtep/vtep.c | 173 ++++++++++++++++++++++++++ > tests/ovn-controller-vtep.at | 102 ++++++++++++++- > 3 files changed, 279 insertions(+), 2 deletions(-) > > diff --git a/controller-vtep/ovn-controller-vtep.c > b/controller-vtep/ovn-controller-vtep.c > index 698511482..ea113f8df 100644 > --- a/controller-vtep/ovn-controller-vtep.c > +++ b/controller-vtep/ovn-controller-vtep.c > @@ -171,6 +171,12 @@ main(int argc, char *argv[]) > ovsdb_idl_add_column(ovnsb_idl_loop.idl, > &sbrec_port_binding_col_options); > ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type); > ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_up); > + ovsdb_idl_add_column(ovnsb_idl_loop.idl, > + &sbrec_port_binding_col_tunnel_key); > + ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_fdb); > + ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_fdb_col_mac); > + ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_fdb_col_dp_key); > + ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_fdb_col_port_key); > > ovsdb_idl_set_leader_only(ovnsb_idl_loop.idl, false); > ovsdb_idl_get_initial_snapshot(ovnsb_idl_loop.idl); > diff --git a/controller-vtep/vtep.c b/controller-vtep/vtep.c > index 7f5e1d606..f86cb679f 100644 > --- a/controller-vtep/vtep.c > +++ b/controller-vtep/vtep.c > @@ -42,6 +42,12 @@ struct mmr_hash_node_data { > struct shash physical_locators; > }; > > +struct lp_fdb_node_data { > + uint32_t dp_key; > + uint32_t port_key; > + struct shash mac_fdbs; > +}; > + > /* > * Scans through the Binding table in ovnsb, and updates the vtep logical > * switch tunnel keys and the 'Ucast_Macs_Remote' table in the VTEP > @@ -519,6 +525,140 @@ vtep_mcast_macs_cleanup(struct ovsdb_idl *vtep_idl) > > return true; > } > + > +static const struct sbrec_port_binding * > +find_pbs_for_logical_switch(struct shash *vtep_pbs, Nit: I'd call this logical_switch_find_pb(). > + struct sset *vtep_pswitches, > + char *ls_name){ > + > + struct shash_node *node; > + SHASH_FOR_EACH (node, vtep_pbs) { > + const struct sbrec_port_binding *port_binding_rec = node->data; > + const char *pswitch_name = smap_get(&port_binding_rec->options, > + "vtep-physical-switch"); > + const char *lswitch_name = smap_get(&port_binding_rec->options, > + "vtep-logical-switch"); > + if (!port_binding_rec->chassis) { > + continue; > + } > + > + /* If 'port_binding_rec->chassis' exists then 'pswitch_name' > + * and 'lswitch_name' must also exist. */ > + if (!pswitch_name || !lswitch_name) { > + /* This could only happen when someone directly modifies the > + * database, (e.g. using ovn-sbctl). */ > + VLOG_ERR("logical port (%s) with no 'options:vtep-physical-" Nit: VLOG_ERR_RL() instead. > + "switch' or 'options:vtep-logical-switch' specified " > + "is bound to chassis (%s).", > + port_binding_rec->logical_port, > + port_binding_rec->chassis->name); > + continue; > + } > + /* Make sure both logical_switch and physical_switch matches */ > + if (!strcmp(ls_name, lswitch_name)) { > + if (sset_find(vtep_pswitches, port_binding_rec->chassis->name)) { > + return port_binding_rec; > + } > + } Nit: indentation is off here. > + } > + return NULL; > +} > + > +/* Propagate dynamically learned MACs on local VTEPs to OVN SB. Where > + * later this information is used to create tunnels between neighbour > + * VTEPs. > +*/ > +static void > +vtep_local_macs(struct controller_vtep_ctx *ctx, > + struct shash *vtep_pbs, > + struct sset *vtep_pswitches, > + struct shash *vtep_lswitches, > + struct shash *lp_fdb){ > + > + if (!ctx->ovnsb_idl_txn) { > + return; > + } > + > + ovsdb_idl_txn_add_comment(ctx->ovnsb_idl_txn, > + "ovn-controller-vtep: updating fdb"); > + > + const struct vteprec_ucast_macs_local *vtep_uml; > + const struct sbrec_port_binding *port_binding_rec; > + const struct sbrec_fdb *fdb; These last two can be declared inside the loops, restricting the scope of the variables. > + > + /* Collect local unicast MACs */ > + VTEPREC_UCAST_MACS_LOCAL_FOR_EACH (vtep_uml, ctx->vtep_idl) { > + port_binding_rec = find_pbs_for_logical_switch( > + vtep_pbs, vtep_pswitches, vtep_uml->logical_switch->name); > + if (!port_binding_rec) { > + VLOG_ERR("Cannot find port_binding for dynamically learned MAC " > + "%s in logical_switch %s", vtep_uml->MAC, > + vtep_uml->logical_switch->name); > + continue; > + } > + > + if (!port_binding_rec->datapath->tunnel_key || > + !port_binding_rec->tunnel_key) { > + continue; > + } Datapath_bindings and port binding tunnel_keys can never be 0. We can skip this check. > + > + char *fdb_dp_port_key = xasprintf( > + "%"PRId64"_%"PRId64, port_binding_rec->datapath->tunnel_key, > + port_binding_rec->tunnel_key); > + > + struct lp_fdb_node_data *sbrec_lp_fdb = shash_find_data( > + lp_fdb, fdb_dp_port_key); The indentation looks a bit awkward here. I'd change it to: struct lp_fdb_node_data *sbrec_lp_fdb = shash_find_data(lp_fdb, fdb_dp_port_key); > + fdb = NULL; > + if (sbrec_lp_fdb) { > + fdb = shash_find_and_delete(&sbrec_lp_fdb->mac_fdbs, > + vtep_uml->MAC); > + } > + free(fdb_dp_port_key); > + > + if (!fdb) { > + const struct sbrec_fdb *sb_fdb; > + VLOG_DBG("Creating new FDB entry for mac %s", vtep_uml->MAC); > + sb_fdb = sbrec_fdb_insert(ctx->ovnsb_idl_txn); > + sbrec_fdb_set_dp_key(sb_fdb, > + port_binding_rec->datapath->tunnel_key); > + sbrec_fdb_set_mac(sb_fdb, vtep_uml->MAC); > + sbrec_fdb_set_port_key(sb_fdb, port_binding_rec->tunnel_key); > + sbrec_fdb_set_timestamp(sb_fdb, time_wall_msec()); So, this makes me wonder, how does this new feature work in conjunction with FDB aging (ovn-northd removes FDB entries that are older and inactive for more than a configured threshold). I'm guess it doesn't work. Do we need to support that case? If yes, do we need to periodically refresh the timestamp of vtep-inserted FDB records? > + } > + } > + > + struct shash_node *node; > + struct shash_node *sbrec_fdb_node; This can be declared inside the loop, restricting the scope of the variable. > + SHASH_FOR_EACH (node, vtep_lswitches) { > + port_binding_rec = find_pbs_for_logical_switch(vtep_pbs, > + vtep_pswitches, > + node->name); > + if (!port_binding_rec) { > + continue; > + } > + if (!port_binding_rec->datapath->tunnel_key || > + !port_binding_rec->tunnel_key) { > + continue; > + } > + > + char *fdb_dp_port_key = xasprintf( > + "%"PRId64"_%"PRId64, port_binding_rec->datapath->tunnel_key, > + port_binding_rec->tunnel_key); > + struct lp_fdb_node_data *sbrec_lp_fdb = shash_find_data( > + lp_fdb, fdb_dp_port_key); > + free(fdb_dp_port_key); > + > + if (!sbrec_lp_fdb) { > + continue; > + } > + > + SHASH_FOR_EACH (sbrec_fdb_node, &sbrec_lp_fdb->mac_fdbs) { > + struct sbrec_fdb *mac_fdb = sbrec_fdb_node->data; > + VLOG_DBG("Removing stale FDB for VTEP mac %s", mac_fdb->mac); > + sbrec_fdb_delete(mac_fdb); > + } > + } > +} > > /* Updates vtep logical switch tunnel keys. */ > void > @@ -605,6 +745,29 @@ vtep_run(struct controller_vtep_ctx *ctx) > shash_add(target, port_binding_rec->logical_port, port_binding_rec); > } > > + /* Construct logical_port to fdb */ > + const struct sbrec_fdb *fdb; > + struct shash lp_fdbs = SHASH_INITIALIZER(&lp_fdbs); > + > + SBREC_FDB_FOR_EACH (fdb, ctx->ovnsb_idl) { > + char *fdb_dp_port_key = xasprintf("%"PRId64"_%"PRId64, fdb->dp_key, > + fdb->port_key); > + struct lp_fdb_node_data *lp_fdb = shash_find_data(&lp_fdbs, > + fdb_dp_port_key); > + Nit: we don't really need this empty line here. > + if (!lp_fdb) { > + lp_fdb = xmalloc(sizeof *lp_fdb); > + lp_fdb->dp_key = fdb->dp_key; > + lp_fdb->port_key = fdb->port_key; > + shash_init(&lp_fdb->mac_fdbs); To ensure that all members of the structure are initialized (at least zeroed), in the eventuality that new fields are added in the future, I'd write this as: lp_fdb = xmalloc(sizeof *lp_fdb); *lp_fdb = (struct lp_fdb_node_data) { .dp_key = fdb->dp_key, .port_key = fdb->port_key, .mac_fdbs = SHASH_INITIALIZER(&lp_fdb->mac_fdbs), }; Nit: I'd add an empty line here between member initializations and other operations. > + shash_add(&lp_fdb->mac_fdbs, fdb->mac, fdb); This is executed in both cases (lp_fdb existed or not), it can be moved after the "if/else". > + shash_add(&lp_fdbs, fdb_dp_port_key, lp_fdb); > + } else { > + shash_add(&lp_fdb->mac_fdbs, fdb->mac, fdb); > + } > + free(fdb_dp_port_key); > + } > + > ovsdb_idl_txn_add_comment(ctx->vtep_idl_txn, > "ovn-controller-vtep: update logical switch " > "tunnel keys and 'ucast_macs_remote's"); > @@ -613,6 +776,8 @@ vtep_run(struct controller_vtep_ctx *ctx) > vtep_macs_run(ctx->vtep_idl_txn, &ucast_macs_rmts, > &mcast_macs_rmts, &physical_locators, > &vtep_lswitches, &non_vtep_pbs); > + vtep_local_macs(ctx, &vtep_pbs, &vtep_pswitches, &vtep_lswitches, > + &lp_fdbs); > > sset_destroy(&vtep_pswitches); > shash_destroy(&vtep_lswitches); > @@ -628,6 +793,14 @@ vtep_run(struct controller_vtep_ctx *ctx) > shash_destroy(&physical_locators); > shash_destroy(&vtep_pbs); > shash_destroy(&non_vtep_pbs); > + > + struct shash_node *lp_fdb_node; > + SHASH_FOR_EACH (lp_fdb_node, &lp_fdbs) { > + struct lp_fdb_node_data *lp_fdb = lp_fdb_node->data; > + shash_destroy(&lp_fdb->mac_fdbs); > + free(lp_fdb); > + } > + shash_destroy(&lp_fdbs); > } > > /* Cleans up all related entries in vtep. Returns true when done (i.e. there > diff --git a/tests/ovn-controller-vtep.at b/tests/ovn-controller-vtep.at > index d410ecd28..2434d9126 100644 > --- a/tests/ovn-controller-vtep.at > +++ b/tests/ovn-controller-vtep.at > @@ -1,9 +1,10 @@ > AT_BANNER([ovn_controller_vtep]) > > -# OVN_CONTROLLER_VTEP_START(SIM_NAME) > +# OVN_CONTROLLER_VTEP_START(SIM_NAME, TUNNEL_IP) > # > # $1 - optional simulator name. If none is given, runs ovn-controller-vtep, > and > # vtep emulator in $ovs_dir. > +# $2 - optional tunnel ip. If none is given default 1.2.3.4 is used > # Starts the test with a setup with vtep device. Each test case must first > # call this macro and ovn_start. > # > @@ -14,7 +15,9 @@ m4_define([OVN_CONTROLLER_VTEP_START], [ > AT_KEYWORDS([ovn]) > # this will cause skip when 'make check' using Windows setup. > sim="$1" > + tunnel_ip="$2" > vtep_chassis=${sim:-br-vtep} > + tunnel_ip=${tunnel_ip:-1.2.3.4} > > test -n "$sim" && as "$sim" > mkdir -p "$ovs_dir" || return 1 > @@ -41,7 +44,7 @@ m4_define([OVN_CONTROLLER_VTEP_START], [ > -- add-port $vtep_chassis p1 -- set Interface p1 type=dummy > ofport_request=2 > > dnl Start ovs-vtep. > - check vtep-ctl add-ps $vtep_chassis -- set Physical_Switch $vtep_chassis > tunnel_ips=1.2.3.4 > + check vtep-ctl add-ps $vtep_chassis -- set Physical_Switch $vtep_chassis > tunnel_ips=$tunnel_ip > AT_CHECK([ovs-vtep --log-file="$ovs_dir"/ovs-vtep.log \ > --pidfile="$ovs_dir"/ovs-vtep.pid \ > --detach --no-chdir $vtep_chassis], [0], [], [stderr]) > @@ -86,6 +89,9 @@ AT_CHECK([ovn-nbctl lsp-set-type $2 vtep]) > AT_CHECK([ovn-nbctl lsp-set-options $2 vtep-physical-switch=$3 > vtep-logical-switch=$4]) > ]) > > +# mac learning table is hardcoded in ovs-vtep emulator > +m4_define([mac_learning_table], [1]) > + > ############################################## > > # tests chassis related updates. > @@ -509,6 +515,98 @@ AT_CHECK([vtep-ctl --columns=MAC list Ucast_Macs_Remote > | cut -d ':' -f2- | tr - > OVN_CONTROLLER_VTEP_STOP([/has already been known to be on logical port/d]) > AT_CLEANUP > > +# Tests vtep module 'Ucast_Macs_Local' MACs propagation > +AT_SETUP([ovn-controller-vtep - vtep-macs 3]) > +ovn_start > +OVN_CONTROLLER_VTEP_START > + > +check ovn-nbctl ls-add br-test > +check ovn-nbctl ls-add br-test1 > + > +# creates a simple logical network with the vtep device and a fake hv chassis > +# 'ch0'. > +AT_CHECK([ovn-nbctl --wait=sb sync]) > + > +# creates the logical switch in vtep and adds the corresponding logical > +# port to 'br-test'. > +AT_CHECK([vtep-ctl add-ls lswitch0 -- bind-ls br-vtep p0 100 lswitch0]) > +OVN_NB_ADD_VTEP_PORT([br-test], [br-vtep_lswitch0], [br-vtep], [lswitch0]) > +OVS_WAIT_UNTIL([test -n "`ovn-sbctl list Port_Binding | grep > br-vtep_lswitch0`"]) Nit: please use wait_row_count, e.g.: wait_row_count Port_Binding 1 logical_port=br-vtep_lswitch0 > +OVS_WAIT_UNTIL([test -n "`ovs-vsctl show | grep br-vtep_vtep_ls1`"]) > + > +# Simulate peer on physical port to populate Ucast_Macs_Local table > +AT_CHECK([ovs-ofctl -O OpenFlow14 add-flow br-vtep_vtep_ls1 > "cookie=0x5000,table=mac_learning_table,priority=1000,dl_dst=aa:bb:cc:dd:ee:01 > actions=output:0100-p0-l"]) > + > +# Wait Ucast_Macs_Local is updated with local MAC > +OVS_WAIT_UNTIL([test -n "`vtep-ctl list Ucast_Macs_Local | grep > aa:bb:cc:dd:ee:01`"]) > + > +# Check that OVNSB is updated with learned MAC > +OVS_WAIT_UNTIL([test -n "`ovn-sbctl find Fdb mac='"aa:bb:cc:dd:ee:01"' |grep > _uuid`"]) > + > +# Plug p1 to lswitch1 > +AT_CHECK([vtep-ctl add-ls lswitch1 -- bind-ls br-vtep p1 100 lswitch1]) > +OVN_NB_ADD_VTEP_PORT([br-test1], [br-vtep_lswitch1], [br-vtep], [lswitch1]) > +OVS_WAIT_UNTIL([test -n "`ovn-sbctl list Port_Binding | grep > br-vtep_lswitch1`"]) Nit: wait_row_count Port_Binding 1 logical_port=br-vtep_lswitch1 > +OVS_WAIT_UNTIL([test -n "`ovs-vsctl show | grep br-vtep_vtep_ls2`"]) > + > +OVS_WAIT_UNTIL([test -n "`ovs-vsctl show |grep 0100-p1-l`"]) > + > +# Simulate peer on physical port to populate Ucast_Macs_Local table > +AT_CHECK([ovs-ofctl -O OpenFlow14 add-flow br-vtep_vtep_ls2 > "cookie=0x5000,table=mac_learning_table,priority=1000,dl_dst=aa:bb:cc:dd:ee:02 > actions=output:0100-p1-l"]) > + > +# Wait Ucast_Macs_Local is updated with local MAC > +OVS_WAIT_UNTIL([test -n "`vtep-ctl list Ucast_Macs_Local | grep > aa:bb:cc:dd:ee:02`"]) > + > +# Remove MAC from Ucast_Macs_Local > +AT_CHECK([ovs-ofctl -O OpenFlow14 del-flows br-vtep_vtep_ls1 > "table=mac_learning_table,dl_dst=aa:bb:cc:dd:ee:01"]) > + > +# Check that first MAC is removed from OVNSB, but second is still there > +OVS_WAIT_UNTIL([test -z "`ovn-sbctl find Fdb mac='"aa:bb:cc:dd:ee:01"'`"]) > +OVS_WAIT_UNTIL([test -n "`ovn-sbctl find Fdb mac='"aa:bb:cc:dd:ee:02"'`"]) > + Nit: wait_row_count > +# Remove second MAC from Ucast_Macs_Local > +AT_CHECK([ovs-ofctl -O OpenFlow14 del-flows br-vtep_vtep_ls2 > "table=mac_learning_table,dl_dst=aa:bb:cc:dd:ee:02"]) > + > +# Check that second MAC is removed from OVNSB > +OVS_WAIT_UNTIL([test -z "`ovn-sbctl find Fdb mac='"aa:bb:cc:dd:ee:01"'`"]) > +OVS_WAIT_UNTIL([test -z "`ovn-sbctl find Fdb mac='"aa:bb:cc:dd:ee:02"'`"]) > + > +# Start second VTEP chassis > +OVN_CONTROLLER_VTEP_START(br-vtep1, 1.2.3.5) > + > +# creates the logical switch in vtep and adds the corresponding logical > +# # port to 'br-test'. > +AT_CHECK([vtep-ctl add-ls lswitch0 -- bind-ls br-vtep1 p0 100 lswitch0]) > +OVN_NB_ADD_VTEP_PORT([br-test], [br-vtep1_lswitch0], [br-vtep1], [lswitch0]) > +OVS_WAIT_UNTIL([test -n "`ovn-sbctl list Port_Binding | grep > br-vtep1_lswitch0`"]) Nit: wait_row_count > +OVS_WAIT_UNTIL([test -n "`ovs-vsctl show | grep br-vtep1_vtep_ls1`"]) > + > +# creates the logical switch in vtep and adds the corresponding logical > +# # # port to 'br-test'. > +AT_CHECK([vtep-ctl add-ls lswitch1 -- bind-ls br-vtep1 p1 100 lswitch1]) > +OVN_NB_ADD_VTEP_PORT([br-test1], [br-vtep1_lswitch1], [br-vtep1], [lswitch1]) > +OVS_WAIT_UNTIL([test -n "`ovn-sbctl list Port_Binding | grep > br-vtep1_lswitch1`"]) Nit: wait_row_count > +OVS_WAIT_UNTIL([test -n "`ovs-vsctl show | grep br-vtep1_vtep_ls2`"]) > + > +# Wait bingins are activated > +OVS_WAIT_UNTIL([test -n "`ovs-vsctl show |grep 0100-p0-l`"]) > +OVS_WAIT_UNTIL([test -n "`ovs-vsctl show |grep 0100-p1-l`"]) > + > +# Simulate peer on physical port to populate Ucast_Macs_Local table on > br-vtep1 > +AT_CHECK([ovs-ofctl -O OpenFlow14 add-flow br-vtep1_vtep_ls1 > "cookie=0x5000,table=mac_learning_table,priority=1000,dl_dst=aa:bb:cc:dd:ff:01 > actions=output:0100-p0-l"]) > +AT_CHECK([ovs-ofctl -O OpenFlow14 add-flow br-vtep1_vtep_ls2 > "cookie=0x5000,table=mac_learning_table,priority=1000,dl_dst=aa:bb:cc:dd:ff:02 > actions=output:0100-p1-l"]) > + > +# Wait Ucast_Macs_Local is updated with local MAC > +OVS_WAIT_UNTIL([test -n "`vtep-ctl list Ucast_Macs_Local | grep > aa:bb:cc:dd:ff:01`"]) > +OVS_WAIT_UNTIL([test -n "`vtep-ctl list Ucast_Macs_Local | grep > aa:bb:cc:dd:ff:02`"]) > + > +# Check that first MACs is propagated to FDB table > +OVS_WAIT_UNTIL([test -n "`ovn-sbctl find Fdb mac='"aa:bb:cc:dd:ff:01"'`"]) > +OVS_WAIT_UNTIL([test -n "`ovn-sbctl find Fdb mac='"aa:bb:cc:dd:ff:02"'`"]) Nit: wait_row_count > + > +OVN_CONTROLLER_VTEP_STOP > +AT_CLEANUP > + > # Tests vtep module 'Mcast_Macs_Remote's. > AT_SETUP([ovn-controller-vtep - vtep-Mcast_Macs_Remote]) > ovn_start Regards, Dumitru _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev