On Fri, May 8, 2026 at 12:23 PM Dumitru Ceara <[email protected]> wrote:
> On 4/24/26 3:23 PM, Ales Musil wrote: > > Add nexthop_exchange engine node that handles nexthop updates > > keeping the internal nexthop mapping up to date. The noop handler > > for evpn_fdb will be replaced by later commit. > > > > Signed-off-by: Ales Musil <[email protected]> > > --- > > Hi Ales, > > Hi Dumitru, > > controller/nexthop-exchange-stub.c | 20 +++++++ > > controller/nexthop-exchange.c | 82 ++++++++++++++++++++++----- > > controller/nexthop-exchange.h | 5 ++ > > controller/ovn-controller.c | 89 ++++++++++++++++++++++++++++++ > > tests/ovn-inc-proc-graph-dump.at | 3 + > > 5 files changed, 184 insertions(+), 15 deletions(-) > > > > diff --git a/controller/nexthop-exchange-stub.c > b/controller/nexthop-exchange-stub.c > > index 2742dc7e2..52c1bf028 100644 > > --- a/controller/nexthop-exchange-stub.c > > +++ b/controller/nexthop-exchange-stub.c > > @@ -18,6 +18,7 @@ > > #include "lib/netlink.h" > > #include "openvswitch/hmap.h" > > #include "openvswitch/ofpbuf.h" > > +#include "vec.h" > > > > #include "nexthop-exchange.h" > > > > @@ -34,9 +35,28 @@ nexthop_entry_format(struct ds *ds OVS_UNUSED, > > { > > } > > > > +struct nexthop_entry * > > +nexthop_entry_find(const struct hmap *nexthops OVS_UNUSED, > > + uint32_t id OVS_UNUSED) > > +{ > > + return NULL; > > +} > > + > > int > > nh_table_parse(struct ofpbuf *buf OVS_UNUSED, > > struct nh_table_msg *change OVS_UNUSED) > > { > > return 0; > > } > > + > > +bool > > +nexthops_handle_changes(struct hmap *nexthops OVS_UNUSED, > > + struct vector *msgs OVS_UNUSED) > > +{ > > + return false; > > +} > > + > > +void > > +nexthops_destroy(struct hmap *nexthops OVS_UNUSED) > > +{ > > +} > > diff --git a/controller/nexthop-exchange.c > b/controller/nexthop-exchange.c > > index a2ad643a6..8718b893f 100644 > > --- a/controller/nexthop-exchange.c > > +++ b/controller/nexthop-exchange.c > > @@ -19,9 +19,11 @@ > > > > #include "lib/netlink.h" > > #include "lib/netlink-socket.h" > > +#include "hmapx.h" > > #include "openvswitch/ofpbuf.h" > > #include "openvswitch/vlog.h" > > #include "packets.h" > > +#include "vec.h" > > > > #include "nexthop-exchange.h" > > > > @@ -109,6 +111,20 @@ nexthop_entry_format(struct ds *ds, const struct > nexthop_entry *nhe) > > } > > } > > > > +struct nexthop_entry * > > +nexthop_entry_find(const struct hmap *nexthops, uint32_t id) > > +{ > > + uint32_t hash = nexthop_entry_hash(id); > > + struct nexthop_entry *nhe; > > + HMAP_FOR_EACH_WITH_HASH (nhe, hmap_node, hash, nexthops) { > > + if (nhe->id == id) { > > + return nhe; > > + } > > + } > > + > > + return NULL; > > +} > > + > > /* Parse Netlink message in buf, which is expected to contain a UAPI > nhmsg > > * header and associated nexthop attributes. This will allocate > > * 'struct nexthop_entry' which needs to be freed by the caller. > > @@ -128,6 +144,56 @@ nh_table_parse(struct ofpbuf *buf, struct > nh_table_msg *change) > > nlmsg, change); > > } > > > > +bool > > +nexthops_handle_changes(struct hmap *nexthops, struct vector *msgs) > > +{ > > + if (vector_is_empty(msgs)) { > > + return false; > > + } > > + > > + struct hmapx updated_groups = HMAPX_INITIALIZER(&updated_groups); > > + > > + struct nh_table_msg *msg; > > + VECTOR_FOR_EACH_PTR (msgs, msg) { > > + struct nexthop_entry *nhe = nexthop_entry_find(nexthops, > msg->nhe->id); > > + if (nhe) { > > + hmap_remove(nexthops, &nhe->hmap_node); > > We assume here that if nhe used to be referenced by a different > nexthop_grp_entry->gateway, the same batch of changes will also include > an RTM_DELNEXTHOP for that nexthop too. > > It feels a bit fragile but it's probably fine. The alternative would be > to store backrefs but that seems a bit excessive. > > Let's leave it as is. > Right, I actually did a bunch of testing, and the kernel actually sends those in order, I hope that won't change in the future. > > > + free(nhe); > > + } > > + > > + if (msg->nlmsg_type == RTM_NEWNEXTHOP) { > > + hmap_insert(nexthops, &msg->nhe->hmap_node, > > + nexthop_entry_hash(msg->nhe->id)); > > + > > + if (msg->nhe->n_grps) { > > + hmapx_add(&updated_groups, msg->nhe); > > + } > > + > > + /* The nexthop entry moved into the hmap, prevent double > free. */ > > + msg->nhe = NULL; > > + } > > + } > > + > > + struct hmapx_node *hmapx_node; > > + HMAPX_FOR_EACH (hmapx_node, &updated_groups) { > > + struct nexthop_entry *nhe = hmapx_node->data; > > + nh_populate_grp_pointers(nhe, nexthops); > > + } > > + > > + hmapx_destroy(&updated_groups); > > + > > + return true; > > +} > > + > > +void > > +nexthops_destroy(struct hmap *nexthops) > > +{ > > + struct nexthop_entry *entry; > > + HMAP_FOR_EACH_POP (entry, hmap_node, nexthops) { > > + free(entry); > > + } > > +} > > + > > static int > > nh_table_parse__(struct ofpbuf *buf, size_t ofs, const struct nlmsghdr > *nlmsg, > > struct nh_table_msg *change) > > @@ -214,25 +280,11 @@ nexthop_entry_hash(uint32_t id) > > return hash_int(id, 0); > > } > > > > -static struct nexthop_entry * > > -nexthop_find(struct hmap *nexthops, uint32_t id) > > -{ > > - uint32_t hash = nexthop_entry_hash(id); > > - struct nexthop_entry *nhe; > > - HMAP_FOR_EACH_WITH_HASH (nhe, hmap_node, hash, nexthops) { > > - if (nhe->id == id) { > > - return nhe; > > - } > > - } > > - > > - return NULL; > > -} > > - > > static void > > nh_populate_grp_pointers(struct nexthop_entry *nhe, struct hmap > *nexthops) > > { > > for (size_t i = 0; i < nhe->n_grps; i++) { > > struct nexthop_grp_entry *grp = &nhe->grps[i]; > > - grp->gateway = nexthop_find(nexthops, grp->id); > > + grp->gateway = nexthop_entry_find(nexthops, grp->id); > > } > > } > > diff --git a/controller/nexthop-exchange.h > b/controller/nexthop-exchange.h > > index e94fdc73a..73f08c2fe 100644 > > --- a/controller/nexthop-exchange.h > > +++ b/controller/nexthop-exchange.h > > @@ -23,6 +23,7 @@ > > > > struct ds; > > struct ofpbuf; > > +struct vector; > > > > struct nexthop_grp_entry { > > /* The id of the nexthop gateway. */ > > @@ -56,6 +57,10 @@ struct nh_table_msg { > > > > void nexthops_sync(struct hmap *nexthops); > > void nexthop_entry_format(struct ds *ds, const struct nexthop_entry > *nhe); > > +struct nexthop_entry *nexthop_entry_find(const struct hmap *nexthops, > > + uint32_t id); > > int nh_table_parse(struct ofpbuf *, struct nh_table_msg *change); > > +bool nexthops_handle_changes(struct hmap *nexthops, struct vector > *msgs); > > +void nexthops_destroy(struct hmap *nexthops); > > > > #endif /* NEXTHOP_EXCHANGE_H */ > > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c > > index 35a5cd0b4..c46530f90 100644 > > --- a/controller/ovn-controller.c > > +++ b/controller/ovn-controller.c > > @@ -99,6 +99,7 @@ > > #include "neighbor.h" > > #include "neighbor-exchange.h" > > #include "neighbor-exchange-netlink.h" > > +#include "nexthop-exchange.h" > > #include "evpn-arp.h" > > #include "evpn-binding.h" > > #include "evpn-fdb.h" > > @@ -6344,6 +6345,83 @@ en_neighbor_table_notify_run(struct engine_node > *node OVS_UNUSED, > > return state; > > } > > > > +/* The nexthop_exchange node is an input node, but is enabled/disabled > > + * based on en_neighbor_exchange node. The reason being that engine > > + * periodically runs input nodes to check if there are updates, so it > could > > + * be polled for updates without requiring other nodes to run first. */ > > +struct ed_type_nexthop_exchange { > > + struct hmap nexthops; > > + bool enabled; > > + bool recompute; > > +}; > > + > > +static void * > > +en_nexthop_exchange_init(struct engine_node *node OVS_UNUSED, > > + struct engine_arg *arg OVS_UNUSED) > > +{ > > + struct ed_type_nexthop_exchange *nhe_data = xmalloc(sizeof > *nhe_data); > > + *nhe_data = (struct ed_type_nexthop_exchange) { > > + .nexthops = HMAP_INITIALIZER(&nhe_data->nexthops), > > + .enabled = false, > > + .recompute = true, > > + }; > > + > > + return nhe_data; > > +} > > + > > +static void > > +en_nexthop_exchange_cleanup(void *data) > > +{ > > + struct ed_type_nexthop_exchange *nhe_data = data; > > + nexthops_destroy(&nhe_data->nexthops); > > + hmap_destroy(&nhe_data->nexthops); > > +} > > + > > +static enum engine_node_state > > +en_nexthop_exchange_run(struct engine_node *node OVS_UNUSED, void *data) > > +{ > > + struct ed_type_nexthop_exchange *nhe_data = data; > > + > > + if (!nhe_data->enabled) { > > + return EN_UNCHANGED; > > + } > > + > > + if (nhe_data->recompute) { > > + nexthops_destroy(&nhe_data->nexthops); > > + nexthops_sync(&nhe_data->nexthops); > > + /* We are doing a full sync, let's clear any data > > + * that might accumulate in the meantime. */ > > + ovn_netlink_notifier_flush(OVN_NL_NOTIFIER_NEXTHOP); > > + > > + nhe_data->recompute = false; > > + return EN_UPDATED; > > + } > > + > > + struct vector *msgs = ovn_netlink_get_msgs(OVN_NL_NOTIFIER_NEXTHOP); > > + bool updated = nexthops_handle_changes(&nhe_data->nexthops, msgs); > > + ovn_netlink_notifier_flush(OVN_NL_NOTIFIER_NEXTHOP); > > + > > + return updated ? EN_UPDATED : EN_UNCHANGED; > > +} > > + > > +static void > > +nexthop_exchange_update(struct ed_type_nexthop_exchange *nhe_data, > > + bool enabled) > > +{ > > + if (nhe_data->enabled == enabled) { > > + return; > > + } > > + > > + if (nhe_data->enabled && !enabled) { > > + nexthops_destroy(&nhe_data->nexthops); > > + } else if (!nhe_data->enabled && enabled) { > > + nhe_data->recompute = true; > > + } > > + > > + nhe_data->enabled = enabled; > > + ovn_netlink_update_notifier(OVN_NL_NOTIFIER_NEXTHOP, enabled); > > +} > > + > > struct ed_type_neighbor_exchange { > > /* Contains 'struct evpn_remote_vtep'. */ > > struct hmap remote_vteps; > > @@ -6389,6 +6467,8 @@ en_neighbor_exchange_run(struct engine_node *node, > void *data_) > > engine_get_input_data("neighbor", node); > > struct ed_type_neighbor_table_notify *nt_notify = > > engine_get_input_data("neighbor_table_notify", node); > > + struct ed_type_nexthop_exchange *nhe_data = > > + engine_get_input_data("nexthop_exchange", node); > > > > evpn_remote_vteps_clear(&data->remote_vteps); > > evpn_static_entries_clear(&data->static_fdbs); > > @@ -6407,6 +6487,7 @@ en_neighbor_exchange_run(struct engine_node *node, > void *data_) > > > > neighbor_exchange_run(&n_ctx_in, &n_ctx_out); > > neighbor_table_notify_update(&nt_notify->watches); > > + nexthop_exchange_update(nhe_data, > !vector_is_empty(&nt_notify->watches)); > > This reads a bit weird to be honest. 'nhe_data' is the data of the > "nexthop_exchange" node, which is an input of the "neighbor_exchange" > node whose run() function we're executing here. > > So we're changing the data owned by an input node inside a different > node's run callback. That is not really nice. > > OTOH, we already broke that pattern with 5ec13ae6eff6 ("controller: > Consolidate the netlink notifiers.") as we're doing the same thing > (changing input node data) for rt_notify. > > Let's keep it for now but maybe we should follow up with a finer grain > approach that doesn't require this kind of semantics? > This is a bit ugly but there is no other way to define a node that is leaf (root) that runs every single time and has dependencies. We already do this for aging and aging schedulers in northd. I'm not sure if there we be a better way than combining nodes. > > > > > return EN_UPDATED; > > } > > @@ -6864,6 +6945,7 @@ static ENGINE_NODE(neighbor); > > static ENGINE_NODE(neighbor_table_notify); > > static ENGINE_NODE(neighbor_exchange); > > static ENGINE_NODE(neighbor_exchange_status); > > +static ENGINE_NODE(nexthop_exchange); > > static ENGINE_NODE(evpn_vtep_binding, CLEAR_TRACKED_DATA); > > static ENGINE_NODE(evpn_fdb, CLEAR_TRACKED_DATA); > > static ENGINE_NODE(evpn_arp, CLEAR_TRACKED_DATA); > > @@ -7117,6 +7199,10 @@ inc_proc_ovn_controller_init( > > engine_add_input(&en_neighbor_exchange, &en_neighbor_table_notify, > NULL); > > engine_add_input(&en_neighbor_exchange, > &en_neighbor_exchange_status, > > NULL); > > + /* We just need to enable/disable the nexthop exchange based on > > + * the neighbor status. */ > > + engine_add_input(&en_neighbor_exchange, &en_nexthop_exchange, > > + engine_noop_handler); > > > > engine_add_input(&en_evpn_vtep_binding, &en_ovs_open_vswitch, NULL); > > engine_add_input(&en_evpn_vtep_binding, &en_ovs_bridge, NULL); > > @@ -7133,6 +7219,9 @@ inc_proc_ovn_controller_init( > > engine_add_input(&en_evpn_fdb, &en_neighbor_exchange, NULL); > > engine_add_input(&en_evpn_fdb, &en_evpn_vtep_binding, > > evpn_fdb_vtep_binding_handler); > > + /* XXX: This is just a place holder and it will be updated later > on. */ > > + engine_add_input(&en_evpn_fdb, &en_nexthop_exchange, > > + engine_noop_handler); > > > > engine_add_input(&en_evpn_arp, &en_neighbor_exchange, NULL); > > engine_add_input(&en_evpn_arp, &en_evpn_vtep_binding, > > diff --git a/tests/ovn-inc-proc-graph-dump.at b/tests/ > ovn-inc-proc-graph-dump.at > > index 178310978..6b4d94835 100644 > > --- a/tests/ovn-inc-proc-graph-dump.at > > +++ b/tests/ovn-inc-proc-graph-dump.at > > @@ -401,11 +401,13 @@ digraph "Incremental-Processing-Engine" { > > host_if_monitor [[style=filled, shape=box, fillcolor=white, > label="host_if_monitor"]]; > > neighbor_table_notify [[style=filled, shape=box, fillcolor=white, > label="neighbor_table_notify"]]; > > neighbor_exchange_status [[style=filled, shape=box, > fillcolor=white, label="neighbor_exchange_status"]]; > > + nexthop_exchange [[style=filled, shape=box, fillcolor=white, > label="nexthop_exchange"]]; > > neighbor_exchange [[style=filled, shape=box, fillcolor=white, > label="neighbor_exchange"]]; > > neighbor -> neighbor_exchange [[label=""]]; > > host_if_monitor -> neighbor_exchange [[label=""]]; > > neighbor_table_notify -> neighbor_exchange [[label=""]]; > > neighbor_exchange_status -> neighbor_exchange [[label=""]]; > > + nexthop_exchange -> neighbor_exchange > [[label="engine_noop_handler"]]; > > evpn_vtep_binding [[style=filled, shape=box, fillcolor=white, > label="evpn_vtep_binding"]]; > > OVS_open_vswitch -> evpn_vtep_binding [[label=""]]; > > OVS_bridge -> evpn_vtep_binding [[label=""]]; > > @@ -416,6 +418,7 @@ digraph "Incremental-Processing-Engine" { > > evpn_fdb [[style=filled, shape=box, fillcolor=white, > label="evpn_fdb"]]; > > neighbor_exchange -> evpn_fdb [[label=""]]; > > evpn_vtep_binding -> evpn_fdb > [[label="evpn_fdb_vtep_binding_handler"]]; > > + nexthop_exchange -> evpn_fdb [[label="engine_noop_handler"]]; > > evpn_arp [[style=filled, shape=box, fillcolor=white, > label="evpn_arp"]]; > > neighbor_exchange -> evpn_arp [[label=""]]; > > evpn_vtep_binding -> evpn_arp > [[label="evpn_arp_vtep_binding_handler"]]; > > Applied to main, thanks! > > Regards, > Dumitru > > Thanks, Ales _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
