Hi Felix, I noticed that the system tests seem to leave 'ovnvrf1337' in the system after they finish. The weird thing is that it happens whether they pass or fail and it's always 1337, even though both tests use other VRFs as well.
It's not a big deal for the test env, since other tests can use VRF_RESERVE to cleanup system before they run, but it strikes me as odd that the 1337 always persists, so probably worth looking into. Also small inconsistency, these tests use VRF tables 1337,1338 and 1339, but they don't run VRF_RESERVE for all of them. Martin. On Wed, 2025-01-29 at 12:15 +0100, Felix Huettner via dev wrote: > When we stop ovn-controller without immediately restarting it we now > cleanup routes. > This allows the routing agents to stop advertising this chassis to > the > fabric. > > Signed-off-by: Felix Huettner <felix.huettner@stackit.cloud> > --- > v3->v4: > - addressed review comments. > > controller/route-exchange-netlink.c | 34 +++++++++++---- > controller/route-exchange-netlink.h | 2 + > controller/route-exchange.c | 66 > +++++++++++++++++++++++++++- > tests/system-ovn.at | 67 > ++++++++++++++++++++++++++++- > 4 files changed, 159 insertions(+), 10 deletions(-) > > diff --git a/controller/route-exchange-netlink.c b/controller/route- > exchange-netlink.c > index 6a9612a7e..93b3057c8 100644 > --- a/controller/route-exchange-netlink.c > +++ b/controller/route-exchange-netlink.c > @@ -223,6 +223,9 @@ handle_route_msg(const struct route_table_msg > *msg, void *data) > > /* This route is not from us, so we learn it. */ > if (rd->rtm_protocol != RTPROT_OVN) { > + if (!handle_data->learned_routes) { > + return; > + } > if (prefix_is_link_local(&rd->rta_dst, rd->rtm_dst_len)) { > return; > } > @@ -244,14 +247,16 @@ handle_route_msg(const struct route_table_msg > *msg, void *data) > return; > } > > - struct hmapx_node *hn; > - HMAPX_FOR_EACH_SAFE (hn, handle_data->routes_to_advertise) { > - ar = hn->data; > - if (ipv6_addr_equals(&ar->addr, &rd->rta_dst) > - && ar->plen == rd->rtm_dst_len > - && ar->priority == rd->rta_priority) { > - hmapx_delete(handle_data->routes_to_advertise, hn); > - return; > + if (handle_data->routes_to_advertise) { > + struct hmapx_node *hn; > + HMAPX_FOR_EACH_SAFE (hn, handle_data->routes_to_advertise) { > + ar = hn->data; > + if (ipv6_addr_equals(&ar->addr, &rd->rta_dst) > + && ar->plen == rd->rtm_dst_len > + && ar->priority == rd->rta_priority) { > + hmapx_delete(handle_data->routes_to_advertise, hn); > + return; > + } > } > } > > @@ -309,3 +314,16 @@ re_nl_sync_routes(uint32_t table_id, const > struct hmap *routes, > } > hmapx_destroy(&routes_to_advertise); > } > + > +void > +re_nl_cleanup_routes(uint32_t table_id) > +{ > + /* Remove routes from the system that are not in the host_routes > hmap and > + * remove entries from host_routes hmap that match routes > already installed > + * in the system. */ > + struct route_msg_handle_data data = { > + .routes_to_advertise = NULL, > + .learned_routes = NULL, > + }; > + route_table_dump_one_table(table_id, handle_route_msg, &data); > +} > diff --git a/controller/route-exchange-netlink.h b/controller/route- > exchange-netlink.h > index b930f05a2..7a3ae1dda 100644 > --- a/controller/route-exchange-netlink.h > +++ b/controller/route-exchange-netlink.h > @@ -55,4 +55,6 @@ void re_nl_sync_routes(uint32_t table_id, const > struct hmap *routes, > struct ovs_list *learned_routes, > const struct sbrec_datapath_binding *db); > > +void re_nl_cleanup_routes(uint32_t table_id); > + > #endif /* route-exchange-netlink.h */ > diff --git a/controller/route-exchange.c b/controller/route- > exchange.c > index e7a85eecf..47ff2350e 100644 > --- a/controller/route-exchange.c > +++ b/controller/route-exchange.c > @@ -19,6 +19,7 @@ > > #include <errno.h> > #include <net/if.h> > +#include <stdbool.h> > > #include "openvswitch/vlog.h" > #include "openvswitch/list.h" > @@ -37,6 +38,13 @@ > VLOG_DEFINE_THIS_MODULE(route_exchange); > static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); > > +struct maintained_route_table_entry { > + struct hmap_node node; > + uint32_t table_id; > +}; > + > +static struct hmap _maintained_route_tables = HMAP_INITIALIZER( > + &_maintained_route_tables); > static struct sset _maintained_vrfs = > SSET_INITIALIZER(&_maintained_vrfs); > > struct route_entry { > @@ -45,6 +53,38 @@ struct route_entry { > const struct sbrec_learned_route *sb_route; > }; > > +static uint32_t > +maintained_route_table_hash(uint32_t table_id) > +{ > + return hash_int(table_id, 0); > +} > + > +static bool > +maintained_route_table_contains(uint32_t table_id) > +{ > + uint32_t hash = maintained_route_table_hash(table_id); > + struct maintained_route_table_entry *mrt; > + HMAP_FOR_EACH_WITH_HASH (mrt, node, hash, > + &_maintained_route_tables) { > + if (mrt->table_id == table_id) { > + return true; > + } > + } > + return false; > +} > + > +static void > +maintained_route_table_add(uint32_t table_id) > +{ > + if (maintained_route_table_contains(table_id)) { > + return; > + } > + uint32_t hash = maintained_route_table_hash(table_id); > + struct maintained_route_table_entry *mrt = > xzalloc(sizeof(*mrt)); > + mrt->table_id = table_id; > + hmap_insert(&_maintained_route_tables, &mrt->node, hash); > +} > + > static struct route_entry * > route_alloc_entry(struct hmap *routes, > const struct sbrec_learned_route *sb_route) > @@ -174,6 +214,9 @@ route_exchange_run(struct route_exchange_ctx_in > *r_ctx_in, > { > struct sset old_maintained_vrfs = > SSET_INITIALIZER(&old_maintained_vrfs); > sset_swap(&_maintained_vrfs, &old_maintained_vrfs); > + struct hmap old_maintained_route_table = HMAP_INITIALIZER( > + &old_maintained_route_table); > + hmap_swap(&_maintained_route_tables, > &old_maintained_route_table); > > const struct advertise_datapath_entry *ad; > HMAP_FOR_EACH (ad, node, r_ctx_in->announce_routes) { > @@ -201,9 +244,10 @@ route_exchange_run(struct route_exchange_ctx_in > *r_ctx_in, > sset_find_and_delete(&old_maintained_vrfs, vrf_name); > } > > + maintained_route_table_add(table_id); > + > struct ovs_list received_routes = OVS_LIST_INITIALIZER( > &received_routes); > - > re_nl_sync_routes(ad->db->tunnel_key, &ad->routes, > &received_routes, ad->db); > > @@ -218,6 +262,15 @@ route_exchange_run(struct route_exchange_ctx_in > *r_ctx_in, > re_nl_learned_routes_destroy(&received_routes); > } > > + /* Remove routes in tables previously maintained by us. */ > + struct maintained_route_table_entry *mrt; > + HMAP_FOR_EACH_POP (mrt, node, &old_maintained_route_table) { > + if (!maintained_route_table_contains(mrt->table_id)) { > + re_nl_cleanup_routes(mrt->table_id); > + } > + free(mrt); > + } > + hmap_destroy(&old_maintained_route_table); > > /* Remove VRFs previously maintained by us not found in the > above loop. */ > const char *vrf_name; > @@ -233,6 +286,11 @@ route_exchange_run(struct route_exchange_ctx_in > *r_ctx_in, > void > route_exchange_cleanup_vrfs(void) > { > + struct maintained_route_table_entry *mrt; > + HMAP_FOR_EACH (mrt, node, &_maintained_route_tables) { > + re_nl_cleanup_routes(mrt->table_id); > + } > + > const char *vrf_name; > SSET_FOR_EACH (vrf_name, &_maintained_vrfs) { > re_nl_delete_vrf(vrf_name); > @@ -242,10 +300,16 @@ route_exchange_cleanup_vrfs(void) > void > route_exchange_destroy(void) > { > + struct maintained_route_table_entry *mrt; > + HMAP_FOR_EACH_POP (mrt, node, &_maintained_route_tables) { > + free(mrt); > + } > + > const char *vrf_name; > SSET_FOR_EACH_SAFE (vrf_name, &_maintained_vrfs) { > sset_delete(&_maintained_vrfs, > SSET_NODE_FROM_NAME(vrf_name)); > } > > sset_destroy(&_maintained_vrfs); > + hmap_destroy(&_maintained_route_tables); > } > diff --git a/tests/system-ovn.at b/tests/system-ovn.at > index 3f5672dad..963a67e0c 100644 > --- a/tests/system-ovn.at > +++ b/tests/system-ovn.at > @@ -15090,8 +15090,39 @@ check ovn-nbctl --wait=hv set > Logical_Router_Port internet-phys \ > options:dynamic-routing-ifname=lo > check_row_count Learned_Route 1 > > - > +# Stopping the ovn-controller will clean up the route entries > created by it. > +# We first need to unset dynamic-routing-maintain-vrf as otherwise > it will > +# delete the whole vrf. > +check ovn-nbctl --wait=hv set Logical_Router_Port internet-phys \ > + options:dynamic-routing-maintain- > vrf=false > OVS_APP_EXIT_AND_WAIT([ovn-controller]) > +AT_CHECK([ip route list vrf ovnvrf1337 | awk '{$1=$1};1'], [0], [dnl > +233.252.0.0/24 via 192.168.10.10 dev lo onlink > +]) > + > +# Starting it again will add the routes again. > +# The 2 sync commands ensure that we wait until the routes are > actually > +# installed. Otherwise this is racy. > +start_daemon ovn-controller > +OVS_WAIT_UNTIL([test "$(ovn-appctl -t ovn-controller debug/status)" > == "running"]) > +OVS_WAIT_UNTIL_EQUAL([ip route list vrf ovnvrf1337 | awk > '{$1=$1};1'], [dnl > +blackhole 192.0.2.1 proto 84 metric 1000 > +blackhole 192.0.2.2 proto 84 metric 100 > +blackhole 192.0.2.3 proto 84 metric 100 > +blackhole 192.0.2.10 proto 84 metric 100 > +blackhole 198.51.100.0/24 proto 84 metric 1000 > +233.252.0.0/24 via 192.168.10.10 dev lo onlink]) > + > +# Stoping with --restart will not touch the routes. > +check ovn-appctl -t ovn-controller exit --restart > +OVS_WAIT_UNTIL([test "$(ovn-appctl -t ovn-controller debug/status)" > != "running"]) > +OVS_WAIT_UNTIL_EQUAL([ip route list vrf ovnvrf1337 | awk > '{$1=$1};1'], [dnl > +blackhole 192.0.2.1 proto 84 metric 1000 > +blackhole 192.0.2.2 proto 84 metric 100 > +blackhole 192.0.2.3 proto 84 metric 100 > +blackhole 192.0.2.10 proto 84 metric 100 > +blackhole 198.51.100.0/24 proto 84 metric 1000 > +233.252.0.0/24 via 192.168.10.10 dev lo onlink]) > > as ovn-sb > OVS_APP_EXIT_AND_WAIT([ovsdb-server]) > @@ -15337,6 +15368,40 @@ check ovn-nbctl --wait=hv set > Logical_Router_Port internet-phys \ > options:dynamic-routing-ifname=lo > check_row_count Learned_Route 1 > > +# Stopping the ovn-controller will clean up the route entries > created by it. > +# We first need to unset dynamic-routing-maintain-vrf as otherwise > it will > +# delete the whole vrf > +check ovn-nbctl --wait=hv set Logical_Router_Port internet-phys \ > + options:dynamic-routing-maintain- > vrf=false > +OVS_APP_EXIT_AND_WAIT([ovn-controller]) > +AT_CHECK([ip route list vrf ovnvrf1337 | awk '{$1=$1};1'], [0], [dnl > +233.252.0.0/24 via 192.168.10.10 dev lo onlink > +]) > + > +# Starting it again will add the routes again. > +# The 2 sync commands ensure that we wait until the routes are > actually > +# installed. Otherwise this is racy. > +start_daemon ovn-controller > +OVS_WAIT_UNTIL([test "$(ovn-appctl -t ovn-controller debug/status)" > == "running"]) > +OVS_WAIT_UNTIL_EQUAL([ip route list vrf ovnvrf1337 | awk > '{$1=$1};1'], [dnl > +blackhole 192.0.2.1 proto 84 metric 100 > +blackhole 192.0.2.2 proto 84 metric 100 > +blackhole 192.0.2.3 proto 84 metric 100 > +blackhole 192.0.2.10 proto 84 metric 100 > +blackhole 198.51.100.0/24 proto 84 metric 1000 > +233.252.0.0/24 via 192.168.10.10 dev lo onlink]) > + > +# Stoping with --restart will not touch the routes. > +check ovn-appctl -t ovn-controller exit --restart > +OVS_WAIT_UNTIL([test "$(ovn-appctl -t ovn-controller debug/status)" > != "running"]) > +OVS_WAIT_UNTIL_EQUAL([ip route list vrf ovnvrf1337 | awk > '{$1=$1};1'], [dnl > +blackhole 192.0.2.1 proto 84 metric 100 > +blackhole 192.0.2.2 proto 84 metric 100 > +blackhole 192.0.2.3 proto 84 metric 100 > +blackhole 192.0.2.10 proto 84 metric 100 > +blackhole 198.51.100.0/24 proto 84 metric 1000 > +233.252.0.0/24 via 192.168.10.10 dev lo onlink]) > + > as ovn-sb > OVS_APP_EXIT_AND_WAIT([ovsdb-server]) > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev