On Wed, Sep 10, 2025 at 3:04 PM Ales Musil <amu...@redhat.com> wrote:
> > > On Wed, Sep 10, 2025 at 9:44 AM Dumitru Ceara <dce...@redhat.com> wrote: > >> On 9/10/25 7:59 AM, Ales Musil via dev wrote: >> > We were removing routes while still iterating over them, >> > while it might work it's not a good practice. Postpone >> > the removal after the dump is done. >> > >> > Reported-at: https://issues.redhat.com/browse/FDP-1594 >> > Signed-off-by: Ales Musil <amu...@redhat.com> >> > --- >> >> Hi Ales, >> >> Thanks for the fix! >> >> There's one CI test failure in the ovsrobot run of this patch but it's >> not related to the change: >> >> https://github.com/ovsrobot/ovn/actions/runs/17604844865/job/50013775059#step:11:4605 > > > Thank you Dumitru, > > it seems that we have a new flake. > > >> >> >> > controller/route-exchange-netlink.c | 70 ++++++++++++++++++++--------- >> > 1 file changed, 50 insertions(+), 20 deletions(-) >> > >> > diff --git a/controller/route-exchange-netlink.c >> b/controller/route-exchange-netlink.c >> > index cec4b1ec3..91f059492 100644 >> > --- a/controller/route-exchange-netlink.c >> > +++ b/controller/route-exchange-netlink.c >> > @@ -200,9 +200,9 @@ struct route_msg_handle_data { >> > const struct sbrec_datapath_binding *db; >> > struct hmapx *routes_to_advertise; >> > struct vector *learned_routes; >> > + struct vector *stale_routes; >> > const struct hmap *routes; >> > uint32_t table_id; /* requested table id. */ >> > - int ret; >> > }; >> > >> > static void >> > @@ -211,7 +211,6 @@ handle_route_msg(const struct route_table_msg *msg, >> void *data) >> > struct route_msg_handle_data *handle_data = data; >> > const struct route_data *rd = &msg->rd; >> > struct advertise_route_entry *ar; >> > - int err; >> > >> > if (handle_data->table_id != rd->rta_table_id) { >> > /* We do not have the NLM_F_DUMP_FILTERED info here, so check >> if the >> > @@ -261,23 +260,37 @@ handle_route_msg(const struct route_table_msg >> *msg, void *data) >> > } >> > } >> > } >> > - err = re_nl_delete_route(rd->rta_table_id, &rd->rta_dst, >> > - rd->rtm_dst_len, rd->rta_priority); >> > - if (err) { >> > - char addr_s[INET6_ADDRSTRLEN + 1]; >> > - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); >> > - VLOG_WARN_RL(&rl, "Delete route table_id=%"PRIu32" dst=%s >> plen=%d " >> > - "failed: %s", rd->rta_table_id, >> > - ipv6_string_mapped( >> > - addr_s, &rd->rta_dst) ? addr_s : "(invalid)", >> > - rd->rtm_dst_len, >> > - ovs_strerror(err)); >> > - >> > - if (!handle_data->ret) { >> > - /* Report the first error value to the caller. */ >> > - handle_data->ret = err; >> > + >> > + if (handle_data->stale_routes) { >> > + vector_push(handle_data->stale_routes, rd); >> > + } >> > +} >> > + >> > +static int >> > +re_nl_delete_stale_routes(const struct vector *stale_routes) >> > +{ >> > + int ret = 0; >> > + >> > + const struct route_data *rd; >> > + VECTOR_FOR_EACH_PTR (stale_routes, rd) { >> > + int err = re_nl_delete_route(rd->rta_table_id, &rd->rta_dst, >> > + rd->rtm_dst_len, >> rd->rta_priority); >> > + if (err) { >> > + char addr_s[INET6_ADDRSTRLEN + 1]; >> > + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, >> 20); >> > + VLOG_WARN_RL(&rl, "Delete route table_id=%"PRIu32" dst=%s >> plen=%d " >> > + "failed: %s", rd->rta_table_id, >> > + ipv6_string_mapped( >> > + addr_s, &rd->rta_dst) ? addr_s : >> "(invalid)", >> > + rd->rtm_dst_len, >> > + ovs_strerror(err)); >> > + if (!ret) { >> > + ret = err; >> > + } >> > } >> > } >> > + >> > + return ret; >> > } >> > >> > int >> > @@ -286,8 +299,9 @@ re_nl_sync_routes(uint32_t table_id, const struct >> hmap *routes, >> > const struct sbrec_datapath_binding *db) >> > { >> > struct hmapx routes_to_advertise = >> HMAPX_INITIALIZER(&routes_to_advertise); >> > + struct vector stale_routes = VECTOR_EMPTY_INITIALIZER(struct >> route_data); >> > struct advertise_route_entry *ar; >> > - int ret; >> > + int ret = 0; >> > >> > HMAP_FOR_EACH (ar, node, routes) { >> > hmapx_add(&routes_to_advertise, ar); >> > @@ -300,11 +314,11 @@ re_nl_sync_routes(uint32_t table_id, const struct >> hmap *routes, >> > .routes = routes, >> > .routes_to_advertise = &routes_to_advertise, >> > .learned_routes = learned_routes, >> > + .stale_routes = &stale_routes, >> > .db = db, >> > .table_id = table_id, >> > }; >> > route_table_dump_one_table(table_id, handle_route_msg, &data); >> > - ret = data.ret; >> > >> > /* Add any remaining routes in the routes_to_advertise hmapx to the >> > * system routing table. */ >> > @@ -328,7 +342,14 @@ re_nl_sync_routes(uint32_t table_id, const struct >> hmap *routes, >> > } >> > } >> > } >> > + >> > + int err = re_nl_delete_stale_routes(&stale_routes); >> > + if (!ret) { >> > + ret = err; >> > + } >> > + >> > hmapx_destroy(&routes_to_advertise); >> > + vector_destroy(&stale_routes); >> > >> > return ret; >> > } >> > @@ -336,15 +357,24 @@ re_nl_sync_routes(uint32_t table_id, const struct >> hmap *routes, >> > int >> > re_nl_cleanup_routes(uint32_t table_id) >> > { >> > + int ret = 0; >> > + struct vector stale_routes = VECTOR_EMPTY_INITIALIZER(struct >> route_data); >> > /* Remove routes from the system that are not in the host_routes >> hmap and >> > * remove entries from host_routes hmap that match routes already >> installed >> > * in the system. */ >> > struct route_msg_handle_data data = { >> > .routes_to_advertise = NULL, >> > .learned_routes = NULL, >> > + .stale_routes = &stale_routes, >> > .table_id = table_id, >> > }; >> > route_table_dump_one_table(table_id, handle_route_msg, &data); >> > >> > - return data.ret; >> > + int err = re_nl_delete_stale_routes(&stale_routes); >> >> Nit: I think I would've just inlined the re_nl_delete_stale_routes() >> code here like we do in ne_nl_sync_neigh(). It saves us the check below >> and the function itself is not that large, IMO, it gives a better >> overview of what's going on. >> > > This one is actually called twice once in "re_nl_cleanup_routes()" > and second in "re_nl_sync_routes()". That's why there is separate > function. Which is different from "ne_nl_sync_neigh()" where deletion > happens only once. With that said I'll leave it as is. > > >> But I'll leave it up to you: >> >> Acked-by: Dumitru Ceara <dce...@redhat.com> >> >> > + if (!ret) { >> > + ret = err; >> > + } >> > + vector_destroy(&stale_routes); >> > + >> > + return ret; >> > } >> >> Regards, >> Dumitru >> >> > I went ahead and merged this into main and backported to 25.09. I will > have to create a custom backport patch for 25.03, I'll send it out to the > ML. > > Regards, > Ales > As discussed offline this is the backport I'm going to apply, let me know if it looks good to you. https://github.com/almusil/ovn/commit/62722d8713c9ba4596c6976b29b7334aaa100dbf Thanks, Ales _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev