On 9/10/25 7:59 AM, Ales Musil via dev wrote: > We were removing routes while still iterating over them, > while it might work it's not a good practice. Postpone > the removal after the dump is done. > > Reported-at: https://issues.redhat.com/browse/FDP-1594 > Signed-off-by: Ales Musil <amu...@redhat.com> > ---
Hi Ales, Thanks for the fix! There's one CI test failure in the ovsrobot run of this patch but it's not related to the change: https://github.com/ovsrobot/ovn/actions/runs/17604844865/job/50013775059#step:11:4605 > controller/route-exchange-netlink.c | 70 ++++++++++++++++++++--------- > 1 file changed, 50 insertions(+), 20 deletions(-) > > diff --git a/controller/route-exchange-netlink.c > b/controller/route-exchange-netlink.c > index cec4b1ec3..91f059492 100644 > --- a/controller/route-exchange-netlink.c > +++ b/controller/route-exchange-netlink.c > @@ -200,9 +200,9 @@ struct route_msg_handle_data { > const struct sbrec_datapath_binding *db; > struct hmapx *routes_to_advertise; > struct vector *learned_routes; > + struct vector *stale_routes; > const struct hmap *routes; > uint32_t table_id; /* requested table id. */ > - int ret; > }; > > static void > @@ -211,7 +211,6 @@ handle_route_msg(const struct route_table_msg *msg, void > *data) > struct route_msg_handle_data *handle_data = data; > const struct route_data *rd = &msg->rd; > struct advertise_route_entry *ar; > - int err; > > if (handle_data->table_id != rd->rta_table_id) { > /* We do not have the NLM_F_DUMP_FILTERED info here, so check if the > @@ -261,23 +260,37 @@ handle_route_msg(const struct route_table_msg *msg, > void *data) > } > } > } > - err = re_nl_delete_route(rd->rta_table_id, &rd->rta_dst, > - rd->rtm_dst_len, rd->rta_priority); > - if (err) { > - char addr_s[INET6_ADDRSTRLEN + 1]; > - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); > - VLOG_WARN_RL(&rl, "Delete route table_id=%"PRIu32" dst=%s plen=%d " > - "failed: %s", rd->rta_table_id, > - ipv6_string_mapped( > - addr_s, &rd->rta_dst) ? addr_s : "(invalid)", > - rd->rtm_dst_len, > - ovs_strerror(err)); > - > - if (!handle_data->ret) { > - /* Report the first error value to the caller. */ > - handle_data->ret = err; > + > + if (handle_data->stale_routes) { > + vector_push(handle_data->stale_routes, rd); > + } > +} > + > +static int > +re_nl_delete_stale_routes(const struct vector *stale_routes) > +{ > + int ret = 0; > + > + const struct route_data *rd; > + VECTOR_FOR_EACH_PTR (stale_routes, rd) { > + int err = re_nl_delete_route(rd->rta_table_id, &rd->rta_dst, > + rd->rtm_dst_len, rd->rta_priority); > + if (err) { > + char addr_s[INET6_ADDRSTRLEN + 1]; > + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); > + VLOG_WARN_RL(&rl, "Delete route table_id=%"PRIu32" dst=%s > plen=%d " > + "failed: %s", rd->rta_table_id, > + ipv6_string_mapped( > + addr_s, &rd->rta_dst) ? addr_s : "(invalid)", > + rd->rtm_dst_len, > + ovs_strerror(err)); > + if (!ret) { > + ret = err; > + } > } > } > + > + return ret; > } > > int > @@ -286,8 +299,9 @@ re_nl_sync_routes(uint32_t table_id, const struct hmap > *routes, > const struct sbrec_datapath_binding *db) > { > struct hmapx routes_to_advertise = > HMAPX_INITIALIZER(&routes_to_advertise); > + struct vector stale_routes = VECTOR_EMPTY_INITIALIZER(struct route_data); > struct advertise_route_entry *ar; > - int ret; > + int ret = 0; > > HMAP_FOR_EACH (ar, node, routes) { > hmapx_add(&routes_to_advertise, ar); > @@ -300,11 +314,11 @@ re_nl_sync_routes(uint32_t table_id, const struct hmap > *routes, > .routes = routes, > .routes_to_advertise = &routes_to_advertise, > .learned_routes = learned_routes, > + .stale_routes = &stale_routes, > .db = db, > .table_id = table_id, > }; > route_table_dump_one_table(table_id, handle_route_msg, &data); > - ret = data.ret; > > /* Add any remaining routes in the routes_to_advertise hmapx to the > * system routing table. */ > @@ -328,7 +342,14 @@ re_nl_sync_routes(uint32_t table_id, const struct hmap > *routes, > } > } > } > + > + int err = re_nl_delete_stale_routes(&stale_routes); > + if (!ret) { > + ret = err; > + } > + > hmapx_destroy(&routes_to_advertise); > + vector_destroy(&stale_routes); > > return ret; > } > @@ -336,15 +357,24 @@ re_nl_sync_routes(uint32_t table_id, const struct hmap > *routes, > int > re_nl_cleanup_routes(uint32_t table_id) > { > + int ret = 0; > + struct vector stale_routes = VECTOR_EMPTY_INITIALIZER(struct route_data); > /* Remove routes from the system that are not in the host_routes hmap and > * remove entries from host_routes hmap that match routes already > installed > * in the system. */ > struct route_msg_handle_data data = { > .routes_to_advertise = NULL, > .learned_routes = NULL, > + .stale_routes = &stale_routes, > .table_id = table_id, > }; > route_table_dump_one_table(table_id, handle_route_msg, &data); > > - return data.ret; > + int err = re_nl_delete_stale_routes(&stale_routes); Nit: I think I would've just inlined the re_nl_delete_stale_routes() code here like we do in ne_nl_sync_neigh(). It saves us the check below and the function itself is not that large, IMO, it gives a better overview of what's going on. But I'll leave it up to you: Acked-by: Dumitru Ceara <dce...@redhat.com> > + if (!ret) { > + ret = err; > + } > + vector_destroy(&stale_routes); > + > + return ret; > } Regards, Dumitru _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev