> On Tue, Jul 23, 2024 at 10:17 AM Lorenzo Bianconi <
> [email protected]> wrote:
>
> > Introduce ECMP_Nexthop table in the SB db in order to track active
> > ecmp-symmetric-reply connections and flush stale ones.
> >
> > Signed-off-by: Lorenzo Bianconi <[email protected]>
> > ---
> >
>
> Hi Lorenzo,
>
> thank you for the patch, I have one comment down below.
Hi Ales,
thx for the review.
>
> northd/en-northd.c | 35 ++++++++++++++++
> > northd/en-northd.h | 4 ++
> > northd/inc-proc-northd.c | 7 +++-
> > northd/northd.c | 87 ++++++++++++++++++++++++++++++++++++++++
> > northd/northd.h | 10 +++++
> > ovn-sb.ovsschema | 18 ++++++++-
> > ovn-sb.xml | 31 ++++++++++++++
> > tests/ovn-northd.at | 4 ++
> > 8 files changed, 193 insertions(+), 3 deletions(-)
> >
> > diff --git a/northd/en-northd.c b/northd/en-northd.c
> > index 817a4736c..0d86fcb60 100644
> > --- a/northd/en-northd.c
> > +++ b/northd/en-northd.c
> > @@ -402,6 +402,25 @@ en_bfd_sync_run(struct engine_node *node OVS_UNUSED,
> > engine_set_node_state(node, EN_UPDATED);
> > }
> >
> > +void
> > +en_ecmp_nexthop_run(struct engine_node *node, void *data)
> > +{
> > + const struct engine_context *eng_ctx = engine_get_context();
> > + struct static_routes_data *static_routes_data =
> > + engine_get_input_data("static_routes", node);
> > + struct ecmp_nexthop_data *enh_data = data;
> > + const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table =
> > + EN_OVSDB_GET(engine_get_input("SB_ecmp_nexthop", node));
> > +
> > + ecmp_nexthop_destroy(data);
> > + ecmp_nexthop_init(data);
> > + build_ecmp_nexthop_table(eng_ctx->ovnsb_idl_txn,
> > + &static_routes_data->parsed_routes,
> > + &enh_data->nexthops,
> > + sbrec_ecmp_nexthop_table);
> > + engine_set_node_state(node, EN_UPDATED);
> > +}
> > +
> > void
> > *en_northd_init(struct engine_node *node OVS_UNUSED,
> > struct engine_arg *arg OVS_UNUSED)
> > @@ -449,6 +468,16 @@ void *en_bfd_sync_init(struct engine_node *node
> > OVS_UNUSED,
> > return NULL;
> > }
> >
> > +void
> > +*en_ecmp_nexthop_init(struct engine_node *node OVS_UNUSED,
> > + struct engine_arg *arg OVS_UNUSED)
> > +{
> > + struct ecmp_nexthop_data *data = xzalloc(sizeof *data);
> > +
> > + ecmp_nexthop_init(data);
> > + return data;
> > +}
> > +
> > void
> > en_northd_cleanup(void *data)
> > {
> > @@ -484,3 +513,9 @@ void
> > en_bfd_sync_cleanup(void *data OVS_UNUSED)
> > {
> > }
> > +
> > +void
> > +en_ecmp_nexthop_cleanup(void *data)
> > +{
> > + ecmp_nexthop_destroy(data);
> > +}
> > diff --git a/northd/en-northd.h b/northd/en-northd.h
> > index 68f7755f8..4f30ac335 100644
> > --- a/northd/en-northd.h
> > +++ b/northd/en-northd.h
> > @@ -42,5 +42,9 @@ void *en_bfd_sync_init(struct engine_node *node
> > OVS_UNUSED,
> > void en_bfd_sync_run(struct engine_node *node OVS_UNUSED,
> > void *data OVS_UNUSED);
> > void en_bfd_sync_cleanup(void *data OVS_UNUSED);
> > +void en_ecmp_nexthop_run(struct engine_node *node, void *data);
> > +void *en_ecmp_nexthop_init(struct engine_node *node OVS_UNUSED,
> > + struct engine_arg *arg OVS_UNUSED);
> > +void en_ecmp_nexthop_cleanup(void *data);
> >
> > #endif /* EN_NORTHD_H */
> > diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
> > index b3baecaff..7416b1c43 100644
> > --- a/northd/inc-proc-northd.c
> > +++ b/northd/inc-proc-northd.c
> > @@ -101,7 +101,8 @@ static unixctl_cb_func chassis_features_list;
> > SB_NODE(fdb, "fdb") \
> > SB_NODE(static_mac_binding, "static_mac_binding") \
> > SB_NODE(chassis_template_var, "chassis_template_var") \
> > - SB_NODE(logical_dp_group, "logical_dp_group")
> > + SB_NODE(logical_dp_group, "logical_dp_group") \
> > + SB_NODE(ecmp_nexthop, "ecmp_nexthop")
> >
> > enum sb_engine_node {
> > #define SB_NODE(NAME, NAME_STR) SB_##NAME,
> > @@ -159,6 +160,7 @@ static ENGINE_NODE(route_policies, "route_policies");
> > static ENGINE_NODE(static_routes, "static_routes");
> > static ENGINE_NODE(bfd, "bfd");
> > static ENGINE_NODE(bfd_sync, "bfd_sync");
> > +static ENGINE_NODE(ecmp_nexthop, "ecmp_nexthop");
> >
> > void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
> > struct ovsdb_idl_loop *sb)
> > @@ -258,6 +260,9 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
> > engine_add_input(&en_bfd_sync, &en_static_routes, NULL);
> > engine_add_input(&en_bfd_sync, &en_route_policies, NULL);
> >
> > + engine_add_input(&en_ecmp_nexthop, &en_sb_ecmp_nexthop, NULL);
> > + engine_add_input(&en_ecmp_nexthop, &en_static_routes, NULL);
> > +
> > engine_add_input(&en_sync_meters, &en_nb_acl, NULL);
> > engine_add_input(&en_sync_meters, &en_nb_meter, NULL);
> > engine_add_input(&en_sync_meters, &en_sb_meter, NULL);
> > diff --git a/northd/northd.c b/northd/northd.c
> > index 70bfb144e..b5b332536 100644
> > --- a/northd/northd.c
> > +++ b/northd/northd.c
> > @@ -9998,6 +9998,81 @@ build_bfd_map(const struct nbrec_bfd_table
> > *nbrec_bfd_table,
> > }
> > }
> >
> > +#define NEXTHOP_IDS_LEN 65535
> > +void
> > +build_ecmp_nexthop_table(
> > + struct ovsdb_idl_txn *ovnsb_txn,
> > + struct hmap *routes,
> > + struct simap *nexthops,
> > + const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table)
> > +{
> > + unsigned long *nexthop_ids = bitmap_allocate(NEXTHOP_IDS_LEN);
> > +
> > + if (!ovnsb_txn) {
> > + return;
> > + }
> > +
> > + const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop;
> > + SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sb_ecmp_nexthop,
> > + sbrec_ecmp_nexthop_table) {
> > + simap_put(nexthops, sb_ecmp_nexthop->nexthop,
> > + sb_ecmp_nexthop->id);
> > + bitmap_set1(nexthop_ids, sb_ecmp_nexthop->id);
> > + }
> > +
> > + struct parsed_route *pr;
> > + HMAP_FOR_EACH (pr, key_node, routes) {
> > + if (!pr->ecmp_symmetric_reply) {
> > + continue;
> > + }
> > +
> > + const struct nbrec_logical_router_static_route *r = pr->route;
> > + if (!simap_contains(nexthops, r->nexthop)) {
> > + int id = bitmap_scan(nexthop_ids, 0, 1, NEXTHOP_IDS_LEN);
> > + if (id == NEXTHOP_IDS_LEN) {
> > + static struct vlog_rate_limit rl =
> > VLOG_RATE_LIMIT_INIT(1, 1);
> > + VLOG_WARN_RL(&rl, "nexthop id address space is
> > exhausted");
> > + continue;
> > + }
> > + bitmap_set1(nexthop_ids, id);
> > + simap_put(nexthops, r->nexthop, id);
> > +
> > + sb_ecmp_nexthop = sbrec_ecmp_nexthop_insert(ovnsb_txn);
> > + sbrec_ecmp_nexthop_set_nexthop(sb_ecmp_nexthop, r->nexthop);
> > + sbrec_ecmp_nexthop_set_id(sb_ecmp_nexthop, id);
> > + }
> > + }
> > +
> > + struct simap_node *n;
> > + SIMAP_FOR_EACH_SAFE (n, nexthops) {
> > + bool stale = true;
> > + HMAP_FOR_EACH (pr, key_node, routes) {
> > + if (!pr->ecmp_symmetric_reply) {
> > + continue;
> > + }
> > +
> > + const struct nbrec_logical_router_static_route *r = pr->route;
> > + if (!strcmp(r->nexthop, n->name)) {
> > + stale = false;
> > + break;
> > + }
> > + }
> > +
> > + if (stale) {
> > + simap_delete(nexthops, n);
> > + }
> > + }
> > +
> > + SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH_SAFE (sb_ecmp_nexthop,
> > + sbrec_ecmp_nexthop_table) {
> > + if (!simap_contains(nexthops, sb_ecmp_nexthop->nexthop)) {
> > + sbrec_ecmp_nexthop_delete(sb_ecmp_nexthop);
> > + }
> > + }
> > +
> > + bitmap_free(nexthop_ids);
> > +}
> >
>
> The amount of loops in this could be greatly reduced by using sset as
> representation what is in sync or not.
>
> HMAP_FOR_EACH (pr, key_node, routes) would add everything into sset.
> Then you could walk all SB entries, what is in sset would end up in the
> simap
> and bitmap. If it's not present in the sset we can remove it from the DB.
> Last step would be to walk the sset and add an SB entry for everything that
> is
> in the sset but not in the simap WDYT?
ack, I guess we can get rid of the third loop here (the one to look for stale
nexthops entries). I will fix it in the next version.
Regards,
Lorenzo
>
>
> > +
> > /* Returns a string of the IP address of the router port 'op' that
> > * overlaps with 'ip_s". If one is not found, returns NULL.
> > *
> > @@ -17791,6 +17866,12 @@ bfd_init(struct bfd_data *data)
> > hmap_init(&data->bfd_connections);
> > }
> >
> > +void
> > +ecmp_nexthop_init(struct ecmp_nexthop_data *data)
> > +{
> > + simap_init(&data->nexthops);
> > +}
> > +
> > void
> > northd_destroy(struct northd_data *data)
> > {
> > @@ -17872,6 +17953,12 @@ static_routes_destroy(struct static_routes_data
> > *data)
> > __bfd_destroy(&data->bfd_active_connections);
> > }
> >
> > +void
> > +ecmp_nexthop_destroy(struct ecmp_nexthop_data *data)
> > +{
> > + simap_destroy(&data->nexthops);
> > +}
> > +
> > void
> > ovnnb_db_run(struct northd_input *input_data,
> > struct northd_data *data,
> > diff --git a/northd/northd.h b/northd/northd.h
> > index 7ddb72eb7..205793e56 100644
> > --- a/northd/northd.h
> > +++ b/northd/northd.h
> > @@ -185,6 +185,10 @@ struct bfd_data {
> > struct hmap bfd_connections;
> > };
> >
> > +struct ecmp_nexthop_data {
> > + struct simap nexthops;
> > +};
> > +
> > struct lr_nat_table;
> >
> > struct lflow_input {
> > @@ -724,6 +728,12 @@ void static_routes_destroy(struct static_routes_data
> > *);
> > void bfd_init(struct bfd_data *);
> > void bfd_destroy(struct bfd_data *);
> >
> > +void build_ecmp_nexthop_table(struct ovsdb_idl_txn *,
> > + struct hmap *, struct simap *,
> > + const struct sbrec_ecmp_nexthop_table *);
> > +void ecmp_nexthop_init(struct ecmp_nexthop_data *);
> > +void ecmp_nexthop_destroy(struct ecmp_nexthop_data *);
> > +
> > struct lflow_table;
> > struct lr_stateful_tracked_data;
> > struct ls_stateful_tracked_data;
> > diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema
> > index b6c051ae6..a4d1630b0 100644
> > --- a/ovn-sb.ovsschema
> > +++ b/ovn-sb.ovsschema
> > @@ -1,7 +1,7 @@
> > {
> > "name": "OVN_Southbound",
> > - "version": "20.34.0",
> > - "cksum": "2786607656 31376",
> > + "version": "20.35.0",
> > + "cksum": "1887835491 32037",
> > "tables": {
> > "SB_Global": {
> > "columns": {
> > @@ -608,6 +608,20 @@
> > "refTable":
> > "Datapath_Binding"}}}},
> > "indexes": [["logical_port", "ip"]],
> > "isRoot": true},
> > + "ECMP_Nexthop": {
> > + "columns": {
> > + "nexthop": {"type": "string"},
> > + "id": {"type": {"key": {"type": "integer",
> > + "minInteger": 0,
> > + "maxInteger": 65535}}},
> > + "external_ids": {
> > + "type": {"key": "string", "value": "string",
> > + "min": 0, "max": "unlimited"}},
> > + "options": {
> > + "type": {"key": "string", "value": "string",
> > + "min": 0, "max": "unlimited"}}},
> > + "indexes": [["nexthop"]],
> > + "isRoot": true},
> > "Chassis_Template_Var": {
> > "columns": {
> > "chassis": {"type": "string"},
> > diff --git a/ovn-sb.xml b/ovn-sb.xml
> > index 73a1be5ed..d84e956c5 100644
> > --- a/ovn-sb.xml
> > +++ b/ovn-sb.xml
> > @@ -5179,4 +5179,35 @@ tcp.flags = RST;
> > The set of variable values for a given chassis.
> > </column>
> > </table>
> > +
> > + <table name="ECMP_Nexthop">
> > + <p>
> > + Each record in this table represents an active ECMP route committed
> > by
> > + <code>ovn-northd</code> to <code>ovs</code> connection-tracking
> > table.
> > + <code>ECMP_Nexthop</code> table is used by
> > <code>ovn-controller</code>
> > + to track active ct entries and to flush stale ones.
> > + </p>
> > + <column name="nexthop">
> > + <p>
> > + Nexthop IP address for this ECMP route. Nexthop IP address should
> > + be the IP address of a connected router port or the IP address of
> > + an external device used as nexthop for the given destination.
> > + </p>
> > + </column>
> > +
> > + <column name="id">
> > + <p>
> > + Nexthop unique identifier. Nexthop ID is used to track active
> > + ecmp-symmetric-reply connections and flush stale ones.
> > + </p>
> > + </column>
> > +
> > + <column name="options">
> > + Reserved for future use.
> > + </column>
> > +
> > + <column name="external_ids">
> > + See <em>External IDs</em> at the beginning of this document.
> > + </column>
> > + </table>
> > </database>
> > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> > index a84fb6fab..b815eb321 100644
> > --- a/tests/ovn-northd.at
> > +++ b/tests/ovn-northd.at
> > @@ -6775,6 +6775,7 @@ check ovn-nbctl lsp-set-addresses public-lr0 router
> > check ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public
> >
> > check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1
> > 192.168.0.10
> > +check_row_count ECMP_Nexthop 1
> >
> > ovn-sbctl dump-flows lr0 > lr0flows
> >
> > @@ -6786,6 +6787,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows |
> > ovn_strip_lflows], [0], [dn
> > ])
> >
> > check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1
> > 192.168.0.20
> > +check_row_count ECMP_Nexthop 2
> >
> > ovn-sbctl dump-flows lr0 > lr0flows
> > AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows |
> > ovn_strip_lflows], [0], [dnl
> > @@ -6822,6 +6824,7 @@ AT_CHECK([grep -e "lr_in_arp_resolve.*ecmp" lr0flows
> > | ovn_strip_lflows], [0], [
> >
> > # add ecmp route with wrong nexthop
> > check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1
> > 192.168.1.20
> > +check_row_count ECMP_Nexthop 2
> >
> > ovn-sbctl dump-flows lr0 > lr0flows
> > AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows |
> > ovn_strip_lflows], [0], [dnl
> > @@ -6836,6 +6839,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows |
> > sed 's/192\.168\.0\..0/192.
> >
> > check ovn-nbctl lr-route-del lr0
> > wait_row_count nb:Logical_Router_Static_Route 0
> > +check_row_count ECMP_Nexthop 0
> >
> > check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10
> > ovn-sbctl dump-flows lr0 > lr0flows
> > --
> > 2.45.2
> >
> >
> Thanks,
> Ales
>
> --
>
> Ales Musil
>
> Senior Software Engineer - OVN Core
>
> Red Hat EMEA <https://www.redhat.com>
>
> [email protected]
> <https://red.ht/sig>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev