We are currently maintaining a state for _maintained_route_tables
hashmap and _maintained_vrfs sset between route_exchange_run()
consecutive runs. This approach does not take into account possible
netlink failures. As a result OVN and kernel state can be not aligned.
Fix the problem retrying the failed VRF or route update in the next
route_exchange_run() iteration.
Moreover report operation result in re_nl_sync_routes.

Reported-at: https://issues.redhat.com/browse/FDP-1440
Fixes: faf4df563f1d ("controller: Announce routes via route-exchange.")
Fixes: b344a5341da8 ("controller: Cleanup routes on stop.")
Acked-By: Felix Huettner <felix.huettner@stackit.cloud>
Signed-off-by: Lorenzo Bianconi <lorenzo.bianc...@redhat.com>
---
 controller/route-exchange-netlink.c | 20 ++++++++++++++++++--
 controller/route-exchange-netlink.h |  8 ++++----
 controller/route-exchange.c         | 15 ++++++++++++---
 3 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/controller/route-exchange-netlink.c 
b/controller/route-exchange-netlink.c
index 83deb13e6..0b9b21b86 100644
--- a/controller/route-exchange-netlink.c
+++ b/controller/route-exchange-netlink.c
@@ -194,6 +194,7 @@ struct route_msg_handle_data {
     struct hmapx *routes_to_advertise;
     struct vector *learned_routes;
     const struct hmap *routes;
+    int ret;
 };
 
 static void
@@ -256,16 +257,22 @@ handle_route_msg(const struct route_table_msg *msg, void 
*data)
                          addr_s, &rd->rta_dst) ? addr_s : "(invalid)",
                      rd->rtm_dst_len,
                      ovs_strerror(err));
+
+        if (!handle_data->ret) {
+            /* Report the first error value to the caller. */
+            handle_data->ret = err;
+        }
     }
 }
 
-void
+int
 re_nl_sync_routes(uint32_t table_id, const struct hmap *routes,
                   struct vector *learned_routes,
                   const struct sbrec_datapath_binding *db)
 {
     struct hmapx routes_to_advertise = HMAPX_INITIALIZER(&routes_to_advertise);
     struct advertise_route_entry *ar;
+    int ret;
 
     HMAP_FOR_EACH (ar, node, routes) {
         hmapx_add(&routes_to_advertise, ar);
@@ -281,6 +288,7 @@ re_nl_sync_routes(uint32_t table_id, const struct hmap 
*routes,
         .db = db,
     };
     route_table_dump_one_table(table_id, handle_route_msg, &data);
+    ret = data.ret;
 
     /* Add any remaining routes in the routes_to_advertise hmapx to the
      * system routing table. */
@@ -298,12 +306,18 @@ re_nl_sync_routes(uint32_t table_id, const struct hmap 
*routes,
                              addr_s, &ar->addr) ? addr_s : "(invalid)",
                          ar->plen,
                          ovs_strerror(err));
+            if (!ret) {
+                /* Report the first error value to the caller. */
+                ret = err;
+            }
         }
     }
     hmapx_destroy(&routes_to_advertise);
+
+    return ret;
 }
 
-void
+int
 re_nl_cleanup_routes(uint32_t table_id)
 {
     /* Remove routes from the system that are not in the host_routes hmap and
@@ -314,4 +328,6 @@ re_nl_cleanup_routes(uint32_t table_id)
         .learned_routes = NULL,
     };
     route_table_dump_one_table(table_id, handle_route_msg, &data);
+
+    return data.ret;
 }
diff --git a/controller/route-exchange-netlink.h 
b/controller/route-exchange-netlink.h
index 8f35b3a9d..c9fce692b 100644
--- a/controller/route-exchange-netlink.h
+++ b/controller/route-exchange-netlink.h
@@ -49,10 +49,10 @@ int re_nl_add_route(uint32_t table_id, const struct 
in6_addr *dst,
 int re_nl_delete_route(uint32_t table_id, const struct in6_addr *dst,
                        unsigned int plen, unsigned int priority);
 
-void re_nl_sync_routes(uint32_t table_id, const struct hmap *routes,
-                       struct vector *learned_routes,
-                       const struct sbrec_datapath_binding *db);
+int re_nl_sync_routes(uint32_t table_id, const struct hmap *routes,
+                      struct vector *learned_routes,
+                      const struct sbrec_datapath_binding *db);
 
-void re_nl_cleanup_routes(uint32_t table_id);
+int re_nl_cleanup_routes(uint32_t table_id);
 
 #endif /* route-exchange-netlink.h */
diff --git a/controller/route-exchange.c b/controller/route-exchange.c
index 79a046f60..f3b70fa8b 100644
--- a/controller/route-exchange.c
+++ b/controller/route-exchange.c
@@ -213,6 +213,7 @@ route_exchange_run(const struct route_exchange_ctx_in 
*r_ctx_in,
     struct hmap old_maintained_route_table =
         HMAP_INITIALIZER(&old_maintained_route_table);
     hmap_swap(&_maintained_route_tables, &old_maintained_route_table);
+    int error;
 
     const struct advertise_datapath_entry *ad;
     HMAP_FOR_EACH (ad, node, r_ctx_in->announce_routes) {
@@ -220,7 +221,7 @@ route_exchange_run(const struct route_exchange_ctx_in 
*r_ctx_in,
 
         if (ad->maintain_vrf) {
             if (!sset_contains(&old_maintained_vrfs, ad->vrf_name)) {
-                int error = re_nl_create_vrf(ad->vrf_name, table_id);
+                error = re_nl_create_vrf(ad->vrf_name, table_id);
                 if (error && error != EEXIST) {
                     VLOG_WARN_RL(&rl,
                                  "Unable to create VRF %s for datapath "
@@ -261,7 +262,11 @@ route_exchange_run(const struct route_exchange_ctx_in 
*r_ctx_in,
     struct maintained_route_table_entry *mrt;
     HMAP_FOR_EACH_POP (mrt, node, &old_maintained_route_table) {
         if (!maintained_route_table_contains(mrt->table_id)) {
-            re_nl_cleanup_routes(mrt->table_id);
+            error = re_nl_cleanup_routes(mrt->table_id);
+            if (error) {
+                /* If netlink transaction fails, we will retry next time. */
+                maintained_route_table_add(mrt->table_id);
+            }
         }
         free(mrt);
     }
@@ -271,7 +276,11 @@ route_exchange_run(const struct route_exchange_ctx_in 
*r_ctx_in,
     const char *vrf_name;
     SSET_FOR_EACH_SAFE (vrf_name, &old_maintained_vrfs) {
         if (!sset_contains(&_maintained_vrfs, vrf_name)) {
-            re_nl_delete_vrf(vrf_name);
+            error = re_nl_delete_vrf(vrf_name);
+            if (error) {
+                /* If netlink transaction fails, we will retry next time. */
+                sset_add(&_maintained_vrfs, vrf_name);
+            }
         }
         sset_delete(&old_maintained_vrfs, SSET_NODE_FROM_NAME(vrf_name));
     }
-- 
2.49.0

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to