> Hi Lorenzo,

Hi Mark,

> 
> I have some comments below.

thx for the review.

> 
> On 3/7/24 08:19, Lorenzo Bianconi wrote:
> > Introduce ECMP_Nexthop table in the SB db in order to track active
> > ecmp-symmetric-reply connections and flush stale ones.
> > 
> > Signed-off-by: Lorenzo Bianconi <lorenzo.bianc...@redhat.com>
> > ---
> >   northd/en-northd.c       |  4 ++
> >   northd/inc-proc-northd.c |  8 +++-
> >   northd/northd.c          | 98 ++++++++++++++++++++++++++++++++++++++++
> >   northd/northd.h          |  3 ++
> >   ovn-sb.ovsschema         | 15 +++++-
> >   ovn-sb.xml               | 19 ++++++++
> >   tests/ovn-northd.at      |  4 ++
> >   7 files changed, 147 insertions(+), 4 deletions(-)
> > 
> > diff --git a/northd/en-northd.c b/northd/en-northd.c
> > index 4479b4aff..8d2ab481f 100644
> > --- a/northd/en-northd.c
> > +++ b/northd/en-northd.c
> > @@ -76,6 +76,8 @@ northd_get_input_data(struct engine_node *node,
> >           EN_OVSDB_GET(engine_get_input("NB_chassis_template_var", node));
> >       input_data->nbrec_mirror_table =
> >           EN_OVSDB_GET(engine_get_input("NB_mirror", node));
> > +    input_data->nbrec_static_route_table =
> > +        EN_OVSDB_GET(engine_get_input("NB_logical_router_static_route", 
> > node));
> >       input_data->sbrec_datapath_binding_table =
> >           EN_OVSDB_GET(engine_get_input("SB_datapath_binding", node));
> > @@ -101,6 +103,8 @@ northd_get_input_data(struct engine_node *node,
> >           EN_OVSDB_GET(engine_get_input("SB_chassis_template_var", node));
> >       input_data->sbrec_mirror_table =
> >           EN_OVSDB_GET(engine_get_input("SB_mirror", node));
> > +    input_data->sbrec_ecmp_nexthop_table =
> > +        EN_OVSDB_GET(engine_get_input("SB_ecmp_nexthop", node));
> >       struct ed_type_lb_data *lb_data =
> >           engine_get_input_data("lb_data", node);
> > diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
> > index e1073812c..1c58da0bf 100644
> > --- a/northd/inc-proc-northd.c
> > +++ b/northd/inc-proc-northd.c
> > @@ -61,7 +61,8 @@ static unixctl_cb_func chassis_features_list;
> >       NB_NODE(meter, "meter") \
> >       NB_NODE(bfd, "bfd") \
> >       NB_NODE(static_mac_binding, "static_mac_binding") \
> > -    NB_NODE(chassis_template_var, "chassis_template_var")
> > +    NB_NODE(chassis_template_var, "chassis_template_var") \
> > +    NB_NODE(logical_router_static_route, "logical_router_static_route")
> >       enum nb_engine_node {
> >   #define NB_NODE(NAME, NAME_STR) NB_##NAME,
> > @@ -101,7 +102,8 @@ static unixctl_cb_func chassis_features_list;
> >       SB_NODE(fdb, "fdb") \
> >       SB_NODE(static_mac_binding, "static_mac_binding") \
> >       SB_NODE(chassis_template_var, "chassis_template_var") \
> > -    SB_NODE(logical_dp_group, "logical_dp_group")
> > +    SB_NODE(logical_dp_group, "logical_dp_group") \
> > +    SB_NODE(ecmp_nexthop, "ecmp_nexthop")
> >   enum sb_engine_node {
> >   #define SB_NODE(NAME, NAME_STR) SB_##NAME,
> > @@ -180,6 +182,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
> >       engine_add_input(&en_northd, &en_nb_mirror, NULL);
> >       engine_add_input(&en_northd, &en_nb_static_mac_binding, NULL);
> >       engine_add_input(&en_northd, &en_nb_chassis_template_var, NULL);
> > +    engine_add_input(&en_northd, &en_nb_logical_router_static_route, NULL);
> >       engine_add_input(&en_northd, &en_sb_chassis, NULL);
> >       engine_add_input(&en_northd, &en_sb_mirror, NULL);
> > @@ -192,6 +195,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
> >       engine_add_input(&en_northd, &en_sb_fdb, NULL);
> >       engine_add_input(&en_northd, &en_sb_static_mac_binding, NULL);
> >       engine_add_input(&en_northd, &en_sb_chassis_template_var, NULL);
> > +    engine_add_input(&en_northd, &en_sb_ecmp_nexthop, NULL);
> >       engine_add_input(&en_northd, &en_global_config,
> >                        northd_global_config_handler);
> > diff --git a/northd/northd.c b/northd/northd.c
> > index 4b39137e7..3770f9f94 100644
> > --- a/northd/northd.c
> > +++ b/northd/northd.c
> > @@ -16654,6 +16654,101 @@ sync_mirrors(struct ovsdb_idl_txn *ovnsb_txn,
> >       shash_destroy(&sb_mirrors);
> >   }
> > +struct sb_ecmp_nexthop_entry {
> > +    struct hmap_node hmap_node;
> > +    const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop;
> > +};
> > +
> > +static struct sb_ecmp_nexthop_entry *
> > +sb_ecmp_nexthop_lookup(const struct hmap *map, const char *nexthop)
> > +{
> > +    uint32_t hash = hash_string(nexthop, 0);
> > +    struct sb_ecmp_nexthop_entry *enh_e;
> > +
> > +    HMAP_FOR_EACH_WITH_HASH (enh_e, hmap_node, hash, map) {
> > +        if (!strcmp(enh_e->sb_ecmp_nexthop->nexthop, nexthop)) {
> > +            return enh_e;
> > +        }
> > +    }
> > +    return NULL;
> > +}
> > +
> > +#define NEXTHOP_IDS_LEN    65535
> > +static void
> > +sync_ecmp_symmetric_reply_nexthop(struct ovsdb_idl_txn *ovnsb_txn,
> > +        const struct nbrec_logical_router_static_route_table 
> > *nbrec_sr_table,
> > +        const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nextop_table)
> > +{
> > +    unsigned long *nexthop_ids = bitmap_allocate(NEXTHOP_IDS_LEN);
> > +    struct hmap sb_only = HMAP_INITIALIZER(&sb_only);
> > +    const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop;
> > +    struct sb_ecmp_nexthop_entry *enh_e;
> > +    int id;
> > +
> > +    SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sb_ecmp_nexthop,
> > +                                       sbrec_ecmp_nextop_table) {
> > +        uint32_t hash = hash_string(sb_ecmp_nexthop->nexthop, 0);
> > +        enh_e = xmalloc(sizeof *enh_e);
> > +        enh_e->sb_ecmp_nexthop = sb_ecmp_nexthop;
> > +        hmap_insert(&sb_only, &enh_e->hmap_node, hash);
> > +    }
> > +
> > +    const struct nbrec_logical_router_static_route *r;
> > +    NBREC_LOGICAL_ROUTER_STATIC_ROUTE_TABLE_FOR_EACH (r, nbrec_sr_table) {
> > +        if (!smap_get_bool(&r->options, "ecmp_symmetric_reply", false)) {
> > +            continue;
> > +        }
> > +
> > +        id = smap_get_int(&r->options, "nexthop-id", -1);
> 
> I think having options:nexthop-id in the NB DB can cause problems. Clearly
> it's only intended for northd to set, but an admin also has access to it.
> Let's say that a user has two static routes in the NB DB with the same
> "nexthop" value. These should have the same nexthop-id associated with them.
> But let's imagine an admin sets the two static routes to have two different
> options:nexthop-id values. northd will end up creating one SB ECMP_Nexthop
> entry, but it will be unpredictable which of the two nexthop-ids will be set
> on this entry. Since patch 2 uses the NB nexthop-id in the ct_label, it
> means that one of these two static routes will end up setting a nonsense ID
> in the ct_label.
> 
> I think that the nexthop-id should only exist on the SB ECMP_Nexthop, and
> not in the NB Logical_Router_Static_Route table. In patch 2, OVN should use
> the SB ECMP_Nexthop nexthop-id when setting the ct_label value. This will
> ensure that all ECMP routes with the same nexthop will have the same
> nexthop-id set in the ct_label.

ack, fine. I will fix it in v2.

> 
> > +        if (id < 0) {
> > +            continue;
> > +        }
> > +        bitmap_set1(nexthop_ids, id);
> > +    }
> > +    NBREC_LOGICAL_ROUTER_STATIC_ROUTE_TABLE_FOR_EACH (r, nbrec_sr_table) {
> > +        if (!smap_get_bool(&r->options, "ecmp_symmetric_reply", false)) {
> > +            continue;
> > +        }
> > +
> > +        id = smap_get_int(&r->options, "nexthop-id", -1);
> > +        if (id < 0) {
> > +            id = bitmap_scan(nexthop_ids, 0, 1, NEXTHOP_IDS_LEN);
> > +            if (id == NEXTHOP_IDS_LEN) {
> > +                continue;
> > +            }
> > +            bitmap_set1(nexthop_ids, id);
> > +
> > +            struct smap options = SMAP_INITIALIZER(&options);
> > +            smap_clone(&options, &r->options);
> > +            smap_add_format(&options, "nexthop-id", "%d", id);
> > +            nbrec_logical_router_static_route_set_options(r, &options);
> > +            smap_destroy(&options);
> > +        }
> > +
> > +        enh_e = sb_ecmp_nexthop_lookup(&sb_only, r->nexthop);
> > +        if (!enh_e) {
> > +            sb_ecmp_nexthop = sbrec_ecmp_nexthop_insert(ovnsb_txn);
> > +            sbrec_ecmp_nexthop_set_nexthop(sb_ecmp_nexthop, r->nexthop);
> > +
> > +            struct smap options = SMAP_INITIALIZER(&options);
> > +            smap_add_format(&options, "nexthop-id", "%d", id);
> > +            sbrec_ecmp_nexthop_set_options(sb_ecmp_nexthop, &options);
> > +            smap_destroy(&options);
> > +        } else {
> > +            hmap_remove(&sb_only, &enh_e->hmap_node);
> > +            free(enh_e);
> > +        }
> > +    }
> > +
> > +    HMAP_FOR_EACH_POP (enh_e, hmap_node, &sb_only) {
> > +        sbrec_ecmp_nexthop_delete(enh_e->sb_ecmp_nexthop);
> > +        free(enh_e);
> > +    }
> > +    hmap_destroy(&sb_only);
> > +
> > +    bitmap_free(nexthop_ids);
> > +}
> > +
> >   /*
> >    * struct 'dns_info' is used to sync the DNS records between OVN 
> > Northbound db
> >    * and Southbound db.
> > @@ -17334,6 +17429,9 @@ ovnnb_db_run(struct northd_input *input_data,
> >                        &data->ls_datapaths.datapaths);
> >       sync_template_vars(ovnsb_txn, 
> > input_data->nbrec_chassis_template_var_table,
> >                          input_data->sbrec_chassis_template_var_table);
> > +    sync_ecmp_symmetric_reply_nexthop(ovnsb_txn,
> > +                                      input_data->nbrec_static_route_table,
> > +                                      
> > input_data->sbrec_ecmp_nexthop_table);
> >       cleanup_stale_fdb_entries(input_data->sbrec_fdb_table,
> >                                 &data->ls_datapaths.datapaths);
> > diff --git a/northd/northd.h b/northd/northd.h
> > index 3f1cd8341..48b38b542 100644
> > --- a/northd/northd.h
> > +++ b/northd/northd.h
> > @@ -34,6 +34,8 @@ struct northd_input {
> >       const struct nbrec_chassis_template_var_table
> >           *nbrec_chassis_template_var_table;
> >       const struct nbrec_mirror_table *nbrec_mirror_table;
> > +    const struct nbrec_logical_router_static_route_table
> > +        *nbrec_static_route_table;
> >       /* Southbound table references */
> >       const struct sbrec_datapath_binding_table 
> > *sbrec_datapath_binding_table;
> > @@ -50,6 +52,7 @@ struct northd_input {
> >       const struct sbrec_chassis_template_var_table
> >           *sbrec_chassis_template_var_table;
> >       const struct sbrec_mirror_table *sbrec_mirror_table;
> > +    const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table;
> >       /* Northd lb data node inputs*/
> >       const struct hmap *lbs;
> > diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema
> > index 84ae09515..fe32dd030 100644
> > --- a/ovn-sb.ovsschema
> > +++ b/ovn-sb.ovsschema
> > @@ -1,7 +1,7 @@
> >   {
> >       "name": "OVN_Southbound",
> > -    "version": "20.33.0",
> > -    "cksum": "4076371179 31328",
> > +    "version": "20.34.0",
> > +    "cksum": "1685306144 31809",
> >       "tables": {
> >           "SB_Global": {
> >               "columns": {
> > @@ -607,6 +607,17 @@
> >                                             "refTable": 
> > "Datapath_Binding"}}}},
> >               "indexes": [["logical_port", "ip"]],
> >               "isRoot": true},
> > +        "ECMP_Nexthop": {
> > +            "columns": {
> > +                "nexthop": {"type": "string"},
> > +                "external_ids": {
> > +                    "type": {"key": "string", "value": "string",
> > +                             "min": 0, "max": "unlimited"}},
> > +                "options": {
> > +                    "type": {"key": "string", "value": "string",
> > +                             "min": 0, "max": "unlimited"}}},
> 
> I think that the nexthop-id should not be in the "options' of ECMP_Nexthop.
> It's not an optional thing and so it should have its own column.

ack, fine. I will fix it in v2.

> 
> > +            "indexes": [["nexthop"]],
> > +            "isRoot": true},
> >           "Chassis_Template_Var": {
> >               "columns": {
> >                   "chassis": {"type": "string"},
> > diff --git a/ovn-sb.xml b/ovn-sb.xml
> > index ac4e585f2..5f5e2401b 100644
> > --- a/ovn-sb.xml
> > +++ b/ovn-sb.xml
> > @@ -5095,4 +5095,23 @@ tcp.flags = RST;
> >         The set of variable values for a given chassis.
> >       </column>
> >     </table>
> > +
> > +  <table name="ECMP_Nexthop">
> > +    <column name="nexthop">
> > +      <p>
> > +        Nexthop IP address for this route.  Nexthop IP address should be 
> > the IP
> > +        address of a connected router port or the IP address of a logical 
> > port
> > +        or can be set to <code>discard</code> for dropping packets which 
> > match
> > +        the given route.
> > +      </p>
> > +    </column>
> > +
> > +    <column name="options">
> > +      Reserved for future use.
> > +    </column>
> 
> "options:nexthop-id" is not documented here (or in the NB
> Logical_Router_Static_Route table, but as I mentioned earlier, I don't think
> it should be there anyway)

ditto.

Regards,
Lorenzo

> 
> > +
> > +    <column name="external_ids">
> > +      See <em>External IDs</em> at the beginning of this document.
> > +    </column>
> > +  </table>
> >   </database>
> > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> > index 89aed5adc..2160e8de7 100644
> > --- a/tests/ovn-northd.at
> > +++ b/tests/ovn-northd.at
> > @@ -6542,6 +6542,7 @@ check ovn-nbctl lsp-set-addresses public-lr0 router
> >   check ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public
> >   check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 
> > 192.168.0.10
> > +check_row_count ECMP_Nexthop 1
> >   ovn-sbctl dump-flows lr0 > lr0flows
> > @@ -6553,6 +6554,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | 
> > ovn_strip_lflows], [0], [dn
> >   ])
> >   check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 
> > 192.168.0.20
> > +check_row_count ECMP_Nexthop 2
> >   ovn-sbctl dump-flows lr0 > lr0flows
> >   AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | 
> > ovn_strip_lflows], [0], [dnl
> > @@ -6589,6 +6591,7 @@ AT_CHECK([grep -e "lr_in_arp_resolve.*ecmp" lr0flows 
> > | ovn_strip_lflows], [0], [
> >   # add ecmp route with wrong nexthop
> >   check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 
> > 192.168.1.20
> > +check_row_count ECMP_Nexthop 3
> >   ovn-sbctl dump-flows lr0 > lr0flows
> >   AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | 
> > ovn_strip_lflows], [0], [dnl
> > @@ -6603,6 +6606,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | 
> > sed 's/192\.168\.0\..0/192.
> >   check ovn-nbctl lr-route-del lr0
> >   wait_row_count nb:Logical_Router_Static_Route 0
> > +check_row_count ECMP_Nexthop 0
> >   check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10
> >   ovn-sbctl dump-flows lr0 > lr0flows
> 
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to