We are currently maintaining a state for _maintained_route_tables hashmap and _maintained_vrfs sset between route_exchange_run() consecutive runs. This approach does not take into account possible netlink failures. As a result OVN and kernel state can be not aligned. Fix the problem retrying the failed VRF or route update in the next route_exchange_run() iteration. Moreover report operation result in re_nl_sync_routes.
Reported-at: https://issues.redhat.com/browse/FDP-1440 Fixes: faf4df563f1d ("controller: Announce routes via route-exchange.") Fixes: b344a5341da8 ("controller: Cleanup routes on stop.") Acked-By: Felix Huettner <felix.huettner@stackit.cloud> Signed-off-by: Lorenzo Bianconi <lorenzo.bianc...@redhat.com> --- controller/route-exchange-netlink.c | 20 ++++++++++++++++++-- controller/route-exchange-netlink.h | 8 ++++---- controller/route-exchange.c | 15 ++++++++++++--- 3 files changed, 34 insertions(+), 9 deletions(-) diff --git a/controller/route-exchange-netlink.c b/controller/route-exchange-netlink.c index 83deb13e6..0b9b21b86 100644 --- a/controller/route-exchange-netlink.c +++ b/controller/route-exchange-netlink.c @@ -194,6 +194,7 @@ struct route_msg_handle_data { struct hmapx *routes_to_advertise; struct vector *learned_routes; const struct hmap *routes; + int ret; }; static void @@ -256,16 +257,22 @@ handle_route_msg(const struct route_table_msg *msg, void *data) addr_s, &rd->rta_dst) ? addr_s : "(invalid)", rd->rtm_dst_len, ovs_strerror(err)); + + if (!handle_data->ret) { + /* Report the first error value to the caller. */ + handle_data->ret = err; + } } } -void +int re_nl_sync_routes(uint32_t table_id, const struct hmap *routes, struct vector *learned_routes, const struct sbrec_datapath_binding *db) { struct hmapx routes_to_advertise = HMAPX_INITIALIZER(&routes_to_advertise); struct advertise_route_entry *ar; + int ret; HMAP_FOR_EACH (ar, node, routes) { hmapx_add(&routes_to_advertise, ar); @@ -281,6 +288,7 @@ re_nl_sync_routes(uint32_t table_id, const struct hmap *routes, .db = db, }; route_table_dump_one_table(table_id, handle_route_msg, &data); + ret = data.ret; /* Add any remaining routes in the routes_to_advertise hmapx to the * system routing table. */ @@ -298,12 +306,18 @@ re_nl_sync_routes(uint32_t table_id, const struct hmap *routes, addr_s, &ar->addr) ? addr_s : "(invalid)", ar->plen, ovs_strerror(err)); + if (!ret) { + /* Report the first error value to the caller. */ + ret = err; + } } } hmapx_destroy(&routes_to_advertise); + + return ret; } -void +int re_nl_cleanup_routes(uint32_t table_id) { /* Remove routes from the system that are not in the host_routes hmap and @@ -314,4 +328,6 @@ re_nl_cleanup_routes(uint32_t table_id) .learned_routes = NULL, }; route_table_dump_one_table(table_id, handle_route_msg, &data); + + return data.ret; } diff --git a/controller/route-exchange-netlink.h b/controller/route-exchange-netlink.h index 8f35b3a9d..c9fce692b 100644 --- a/controller/route-exchange-netlink.h +++ b/controller/route-exchange-netlink.h @@ -49,10 +49,10 @@ int re_nl_add_route(uint32_t table_id, const struct in6_addr *dst, int re_nl_delete_route(uint32_t table_id, const struct in6_addr *dst, unsigned int plen, unsigned int priority); -void re_nl_sync_routes(uint32_t table_id, const struct hmap *routes, - struct vector *learned_routes, - const struct sbrec_datapath_binding *db); +int re_nl_sync_routes(uint32_t table_id, const struct hmap *routes, + struct vector *learned_routes, + const struct sbrec_datapath_binding *db); -void re_nl_cleanup_routes(uint32_t table_id); +int re_nl_cleanup_routes(uint32_t table_id); #endif /* route-exchange-netlink.h */ diff --git a/controller/route-exchange.c b/controller/route-exchange.c index 79a046f60..f3b70fa8b 100644 --- a/controller/route-exchange.c +++ b/controller/route-exchange.c @@ -213,6 +213,7 @@ route_exchange_run(const struct route_exchange_ctx_in *r_ctx_in, struct hmap old_maintained_route_table = HMAP_INITIALIZER(&old_maintained_route_table); hmap_swap(&_maintained_route_tables, &old_maintained_route_table); + int error; const struct advertise_datapath_entry *ad; HMAP_FOR_EACH (ad, node, r_ctx_in->announce_routes) { @@ -220,7 +221,7 @@ route_exchange_run(const struct route_exchange_ctx_in *r_ctx_in, if (ad->maintain_vrf) { if (!sset_contains(&old_maintained_vrfs, ad->vrf_name)) { - int error = re_nl_create_vrf(ad->vrf_name, table_id); + error = re_nl_create_vrf(ad->vrf_name, table_id); if (error && error != EEXIST) { VLOG_WARN_RL(&rl, "Unable to create VRF %s for datapath " @@ -261,7 +262,11 @@ route_exchange_run(const struct route_exchange_ctx_in *r_ctx_in, struct maintained_route_table_entry *mrt; HMAP_FOR_EACH_POP (mrt, node, &old_maintained_route_table) { if (!maintained_route_table_contains(mrt->table_id)) { - re_nl_cleanup_routes(mrt->table_id); + error = re_nl_cleanup_routes(mrt->table_id); + if (error) { + /* If netlink transaction fails, we will retry next time. */ + maintained_route_table_add(mrt->table_id); + } } free(mrt); } @@ -271,7 +276,11 @@ route_exchange_run(const struct route_exchange_ctx_in *r_ctx_in, const char *vrf_name; SSET_FOR_EACH_SAFE (vrf_name, &old_maintained_vrfs) { if (!sset_contains(&_maintained_vrfs, vrf_name)) { - re_nl_delete_vrf(vrf_name); + error = re_nl_delete_vrf(vrf_name); + if (error) { + /* If netlink transaction fails, we will retry next time. */ + sset_add(&_maintained_vrfs, vrf_name); + } } sset_delete(&old_maintained_vrfs, SSET_NODE_FROM_NAME(vrf_name)); } -- 2.49.0 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev