Hi Felix,

On 2/6/25 3:19 PM, Felix Huettner via dev wrote:
> Previously we just assumed that if a LR had multiple LRPs bound to the
> local chassis that all routes where valid for all of these LRPs.
> This commit handles a previous option in ovn-nb that allowes the user to

Typo: allowes

> specify the interface name that a route needs to use to be acceptable
> for a given LRP.
> The user specifies a port name in the northbound database. If this port
> is bound locally we use the interface name associated with it for
> filtering routes.
> Additionally users can overwrite that interface name to also support
> cases where there is no referencable port name in the northbound.

Typo: referencable

> 
> This can e.g. be used if a chassis has multiple uplinks. Each of these
> uplinks could be assigned to a different LRP of the same LR connected to
> the same localnet.
> The option allows then to map e.g. a learned default route to each
> individual LRP. If one of the links would go down just one of the
> learned routes would be removed, causing ECMP across multiple chassis to
> be correctly weighted.
> 
> Signed-off-by: Felix Huettner <[email protected]>
> ---

The changes look correct but I have some comments on the way we handle
the "dynamic-routing-port-mapping" external ID you added in v7..

First of all it needs to be documented in the ovn-controller man page.

> v6->v7:
>   * Reworked implementation based on review comments.
>     We now have a layer of indirection to allow different chassis to
>     have different interface names.
> v5->v6:
>   * addressed review comments
> v3->v4:
>   - addressed review comments.
>   - updated commit message to be more descriptive.
> 
>  controller/ovn-controller.c         |  7 +++
>  controller/route-exchange-netlink.c |  2 +
>  controller/route-exchange-netlink.h |  3 ++
>  controller/route-exchange.c         | 20 ++++++---
>  controller/route.c                  | 68 ++++++++++++++++++++++++++---
>  controller/route.h                  |  8 +++-
>  tests/system-ovn.at                 | 62 +++++++++++++++++++++++++-
>  7 files changed, 156 insertions(+), 14 deletions(-)
> 
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 0cf931986..1a09af5ed 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -4978,13 +4978,20 @@ en_route_run(struct engine_node *node, void *data)
>      const struct sbrec_advertised_route_table *advertised_route_table =
>          EN_OVSDB_GET(engine_get_input("SB_advertised_route", node));
>  
> +    const struct ovsrec_open_vswitch *cfg
> +        = ovsrec_open_vswitch_table_first(ovs_table);
> +    const char *dynamic_routing_port_mapping = smap_get(&cfg->external_ids,
> +          "dynamic-routing-port-mapping");

Nit: I think this looks a bit better:

    const char *dynamic_routing_port_mapping =
        smap_get(&cfg->external_ids, "dynamic-routing-port-mapping");

> +
>      struct route_ctx_in r_ctx_in = {
>          .advertised_route_table = advertised_route_table,
>          .sbrec_port_binding_by_name = sbrec_port_binding_by_name,
>          .chassis = chassis,
> +        .dynamic_routing_port_mapping = dynamic_routing_port_mapping,
>          .active_tunnels = &rt_data->active_tunnels,
>          .local_datapaths = &rt_data->local_datapaths,
>          .local_lports = &rt_data->local_lports,
> +        .local_bindings = &rt_data->lbinding_data.bindings,
>      };
>  
>      struct route_ctx_out r_ctx_out = {
> diff --git a/controller/route-exchange-netlink.c 
> b/controller/route-exchange-netlink.c
> index 01f252d11..661e10614 100644
> --- a/controller/route-exchange-netlink.c
> +++ b/controller/route-exchange-netlink.c
> @@ -228,6 +228,8 @@ handle_route_msg(const struct route_table_msg *msg, void 
> *data)
>              rr->prefix = rd->rta_dst;
>              rr->plen = rd->rtm_dst_len;
>              rr->nexthop = nexthop->addr;
> +            memcpy(rr->ifname, nexthop->ifname, IFNAMSIZ);
> +            rr->ifname[IFNAMSIZ] = 0;
>              ovs_list_push_back(handle_data->learned_routes, &rr->list_node);
>          }
>          return;
> diff --git a/controller/route-exchange-netlink.h 
> b/controller/route-exchange-netlink.h
> index 6b26103d5..e46ae048e 100644
> --- a/controller/route-exchange-netlink.h
> +++ b/controller/route-exchange-netlink.h
> @@ -21,6 +21,7 @@
>  #include <stdint.h>
>  #include "openvswitch/list.h"
>  #include <netinet/in.h>
> +#include <net/if.h>
>  
>  /* This value is arbitrary but currently unused.
>   * See the kernel rtnetlink UAPI at
> @@ -37,6 +38,8 @@ struct re_nl_received_route_node {
>      struct in6_addr prefix;
>      unsigned int plen;
>      struct in6_addr nexthop;
> +    /* Adding 1 to this to be sure we actually have a terminating '\0' */
> +    char ifname[IFNAMSIZ + 1];
>  };
>  
>  int re_nl_create_vrf(const char *ifname, uint32_t table_id);
> diff --git a/controller/route-exchange.c b/controller/route-exchange.c
> index b0ebc4fda..7b91d3adb 100644
> --- a/controller/route-exchange.c
> +++ b/controller/route-exchange.c
> @@ -94,7 +94,7 @@ route_lookup(struct hmap *route_map,
>  static void
>  sb_sync_learned_routes(const struct ovs_list *learned_routes,
>                         const struct sbrec_datapath_binding *datapath,
> -                       const struct sset *bound_ports,
> +                       const struct smap *bound_ports,
>                         struct ovsdb_idl_txn *ovnsb_idl_txn,
>                         struct ovsdb_idl_index *sbrec_port_binding_by_name,
>                         struct ovsdb_idl_index 
> *sbrec_learned_route_by_datapath)
> @@ -109,8 +109,9 @@ sb_sync_learned_routes(const struct ovs_list 
> *learned_routes,
>      SBREC_LEARNED_ROUTE_FOR_EACH_EQUAL (sb_route, filter,
>                                          sbrec_learned_route_by_datapath) {
>          /* If the port is not local we don't care about it.
> -         * Some other ovn-controller will handle it. */
> -        if (!sset_contains(bound_ports,
> +         * Some other ovn-controller will handle it.
> +         * We may not use smap_get since the value might be validly NULL. */
> +        if (!smap_get_node(bound_ports,
>                             sb_route->logical_port->logical_port)) {
>              continue;
>          }
> @@ -124,11 +125,18 @@ sb_sync_learned_routes(const struct ovs_list 
> *learned_routes,
>                                                 learned_route->plen);
>          char *nexthop = normalize_v46(&learned_route->nexthop);
>  
> -        const char *logical_port_name;
> -        SSET_FOR_EACH (logical_port_name, bound_ports) {
> +        struct smap_node *port_node;
> +        SMAP_FOR_EACH (port_node, bound_ports) {
> +            /* The user specified an ifname, but we learned it on a different
> +             * port. */
> +            if (port_node->value && strcmp(port_node->value,
> +                                           learned_route->ifname)) {
> +                continue;
> +            }
> +
>              const struct sbrec_port_binding *logical_port =
>                  lport_lookup_by_name(sbrec_port_binding_by_name,
> -                                     logical_port_name);
> +                                     port_node->key);
>              if (!logical_port) {
>                  continue;
>              }
> diff --git a/controller/route.c b/controller/route.c
> index 108435bad..2e9e705c9 100644
> --- a/controller/route.c
> +++ b/controller/route.c
> @@ -19,6 +19,7 @@
>  
>  #include <net/if.h>
>  
> +#include "vswitch-idl.h"
>  #include "openvswitch/hmap.h"
>  #include "openvswitch/vlog.h"
>  
> @@ -30,7 +31,6 @@
>  #include "route.h"
>  
>  VLOG_DEFINE_THIS_MODULE(exchange);
> -static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>  
>  bool
>  route_exchange_relevant_port(const struct sbrec_port_binding *pb)
> @@ -73,6 +73,56 @@ find_route_exchange_pb(struct ovsdb_idl_index 
> *sbrec_port_binding_by_name,
>      return NULL;
>  }
>  
> +static char *
> +ifname_from_port_name(const char *port_mapping, struct shash *local_bindings,
> +                      const struct sbrec_chassis *chassis,
> +                      const char *port_name)
> +{
> +    if (!port_name) {
> +        return NULL;
> +    }
> +
> +    if (port_mapping) {
> +        char *save_ptr = NULL;
> +        char *tokstr = xstrdup(port_mapping);
> +
> +        for (char *token = strtok_r(tokstr, ";", &save_ptr);
> +             token != NULL;
> +             token = strtok_r(NULL, ";", &save_ptr)) {
> +
> +            char *key, *value;
> +            key = value = xstrdup(token);
> +            strsep(&value, "=");
> +

strtok_r and strsep in the same function. :)

Joke aside, I asked Ilya if OVS has helper functions to parse strings
into smap and apparently there's no explicit helper (there is one for
ssets but we need a smap here).

He did point me to ofputil_parse_key_value() though - this could be used
to iterate through the key/value pairs embedded in the input string and
allow us to build a smap without custom parsing.

> +            if (!value) {
> +              static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
> +              VLOG_WARN_RL(&rl, "dynamic-routing-port-mapping values '%s' is 
> "
> +                                "not valid.", token);
> +              free(key);
> +              free(tokstr);
> +              break;
> +            }
> +
> +            if (!strcmp(key, port_name)) {
> +                char *out = xstrdup(value);
> +                free(key);
> +                free(tokstr);
> +                return out;
> +            }
> +            free(key);
> +        }
> +        free(tokstr);
> +    }
> +
> +    if (!local_binding_is_up(local_bindings, port_name, chassis)) {
> +        return xstrdup("thisinterfacenameistoolongsowillnevertrigger");

:)

But this is too hacky.  I'd say we should find a different way of
handling this.

> +    }
> +
> +    struct local_binding *binding = local_binding_find(local_bindings,
> +                                                       port_name);
> +    return xstrdup(binding->iface->name);
> +}
> +
>  static void
>  advertise_datapath_cleanup(struct advertise_datapath_entry *ad)
>  {
> @@ -82,7 +132,7 @@ advertise_datapath_cleanup(struct advertise_datapath_entry 
> *ad)
>          free(ar);
>      }
>      hmap_destroy(&ad->routes);
> -    sset_destroy(&ad->bound_ports);
> +    smap_destroy(&ad->bound_ports);
>      free(ad);
>  }
>  
> @@ -114,7 +164,7 @@ route_run(struct route_ctx_in *r_ctx_in,
>          ad = xzalloc(sizeof(*ad));
>          ad->db = ld->datapath;
>          hmap_init(&ad->routes);
> -        sset_init(&ad->bound_ports);
> +        smap_init(&ad->bound_ports);
>  
>          /* This is a LR datapath, find LRPs with route exchange options
>           * that are bound locally. */
> @@ -132,10 +182,17 @@ route_run(struct route_ctx_in *r_ctx_in,
>  
>              ad->maintain_vrf |= smap_get_bool(
>                  &repb->options, "dynamic-routing-maintain-vrf", false);
> -            sset_add(&ad->bound_ports, local_peer->logical_port);
> +
> +            const char *port_name = smap_get(&repb->options,
> +                                            "dynamic-routing-port-name");
> +            char *ifname = ifname_from_port_name(
> +                r_ctx_in->dynamic_routing_port_mapping,

We keep reparsing the dynamic_routing_port_mapping for each datapath.
Maybe we should build a smap out of this value at the beginning of this
function and pass that as argument to ifname_from_port_name()?

> +                r_ctx_in->local_bindings, r_ctx_in->chassis, port_name);
> +            smap_add_nocopy(&ad->bound_ports,
> +                            xstrdup(local_peer->logical_port), ifname);
>          }
>  
> -        if (sset_is_empty(&ad->bound_ports)) {
> +        if (smap_is_empty(&ad->bound_ports)) {
>              advertise_datapath_cleanup(ad);
>              continue;
>          }
> @@ -157,6 +214,7 @@ route_run(struct route_ctx_in *r_ctx_in,
>          struct in6_addr prefix;
>          unsigned int plen;
>          if (!ip46_parse_cidr(route->ip_prefix, &prefix, &plen)) {
> +            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>              VLOG_WARN_RL(&rl, "bad 'ip_prefix' %s in route "
>                           UUID_FMT, route->ip_prefix,
>                           UUID_ARGS(&route->header_.uuid));
> diff --git a/controller/route.h b/controller/route.h
> index 448643ead..9115ba438 100644
> --- a/controller/route.h
> +++ b/controller/route.h
> @@ -22,6 +22,7 @@
>  #include <netinet/in.h>
>  #include "openvswitch/hmap.h"
>  #include "sset.h"
> +#include "smap.h"
>  
>  struct hmap;
>  struct ovsdb_idl_index;
> @@ -32,9 +33,11 @@ struct route_ctx_in {
>      const struct sbrec_advertised_route_table *advertised_route_table;
>      struct ovsdb_idl_index *sbrec_port_binding_by_name;
>      const struct sbrec_chassis *chassis;
> +    const char *dynamic_routing_port_mapping;
>      const struct sset *active_tunnels;
>      const struct hmap *local_datapaths;
>      const struct sset *local_lports;
> +    struct shash *local_bindings;
>  };
>  
>  struct route_ctx_out {
> @@ -50,8 +53,9 @@ struct advertise_datapath_entry {
>      struct hmap routes;
>  
>      /* The name of the port bindings locally bound for this datapath and
> -     * running route exchange logic. */
> -    struct sset bound_ports;
> +     * running route exchange logic.
> +     * The key is the port name and the value is the ifname if set. */
> +    struct smap bound_ports;
>  };
>  
>  struct advertise_route_entry {
> diff --git a/tests/system-ovn.at b/tests/system-ovn.at
> index abd940029..ca5c2a5c8 100644
> --- a/tests/system-ovn.at
> +++ b/tests/system-ovn.at
> @@ -15660,6 +15660,35 @@ check_row_count Learned_Route 1
>  lp=$(fetch_column port_binding _uuid logical_port=internet-phys)
>  check_row_count Learned_Route 1 logical_port=$lp ip_prefix=233.252.0.0/24 
> nexthop=192.168.10.10
>  
> +# By setting a learning interface filter we will now forget about this route.
> +# The Port referenced by the name does not even exist.
> +check ovn-nbctl --wait=hv set Logical_Router_Port internet-phys \
> +      options:dynamic-routing-port-name=thisportdoesnotexist
> +check_row_count Learned_Route 0
> +
> +# Setting the local ovsdb to map this port to "lo" will make route learning
> +# work again.
> +check ovs-vsctl set Open_vSwitch . \
> +    
> external-ids:dynamic-routing-port-mapping="thisisirrelevant=andjustfortesting;thisportdoesnotexist=lo"
> +check ovn-nbctl --wait=hv sync
> +check_row_count Learned_Route 1
> +#
> +# Now we try the interface filter with an existing port.
> +check ovn-nbctl lsp-add phys mylearninglsp
> +check ovs-vsctl -- add-port br-int hv1-mll -- \
> +    set interface hv1-mll type=internal external-ids:iface-id=mylearninglsp
> +check ip link set hv1-mll up
> +wait_for_ports_up mylearninglsp
> +
> +check ovn-nbctl --wait=hv set Logical_Router_Port internet-phys \
> +      options:dynamic-routing-port-name=mylearninglsp
> +
> +check ip route add 233.253.0.0/24 via 192.168.20.20 dev hv1-mll onlink vrf 
> ovnvrf1337
> +# For now we trigger a recompute as route watching is not yet implemented.
> +check ovn-appctl -t ovn-controller inc-engine/recompute
> +check ovn-nbctl --wait=hv sync
> +check_row_count Learned_Route 1 ip_prefix=233.253.0.0/24 
> nexthop=192.168.20.20
> +
>  OVS_APP_EXIT_AND_WAIT([ovn-controller])
>  
>  as ovn-sb
> @@ -15869,10 +15898,41 @@ check ip route add 233.252.0.0/24 via 192.168.10.10 
> dev lo onlink vrf ovnvrf1337
>  # For now we trigger a recompute as route watching is not yet implemented.
>  check ovn-appctl -t ovn-controller inc-engine/recompute
>  check ovn-nbctl --wait=hv sync
> -check_row_count Learned_Route 2
> +check_row_count Learned_Route 1
>  lp=$(fetch_column port_binding _uuid logical_port=internet-phys)
>  check_row_count Learned_Route 1 logical_port=$lp ip_prefix=233.252.0.0/24 
> nexthop=192.168.10.10
>  
> +# By setting a learning interface filter we will now forget about this route.
> +# The Port referenced by the name does not even exist.
> +check ovn-nbctl --wait=hv set Logical_Router_Port internet-phys \
> +      options:dynamic-routing-port-name=thisportdoesnotexist
> +check_row_count Learned_Route 0
> +
> +# Setting the local ovsdb to map this port to "lo" will make route learning
> +# work again.
> +check ovs-vsctl set Open_vSwitch . \
> +    
> external-ids:dynamic-routing-port-mapping="thisisirrelevant=andjustfortesting;thisportdoesnotexist=lo"
> +check ovn-nbctl --wait=hv sync
> +check_row_count Learned_Route 1
> +
> +# Now we try the interface filter with an existing port.
> +check ovn-nbctl lsp-add phys mylearninglsp
> +check ovs-vsctl -- add-port br-int hv1-mll -- \
> +    set interface hv1-mll type=internal external-ids:iface-id=mylearninglsp
> +check ip link set hv1-mll up
> +wait_for_ports_up mylearninglsp
> +
> +check ovn-nbctl --wait=hv set Logical_Router_Port internet-phys \
> +      options:dynamic-routing-port-name=mylearninglsp
> +
> +check ip route add 233.253.0.0/24 via 192.168.20.20 dev hv1-mll onlink vrf 
> ovnvrf1337
> +# For now we trigger a recompute as route watching is not yet implemented.
> +check ovn-appctl -t ovn-controller inc-engine/recompute
> +check ovn-nbctl --wait=hv sync
> +check_row_count Learned_Route 1 ip_prefix=233.253.0.0/24 
> nexthop=192.168.20.20
> +
> +OVS_APP_EXIT_AND_WAIT([ovn-controller])
> +
>  as ovn-sb
>  OVS_APP_EXIT_AND_WAIT([ovsdb-server])
>  

Regards,
Dumitru


_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to