On 1/28/25 11:17 AM, Felix Huettner wrote:
> On Thu, Jan 16, 2025 at 11:55:21AM +0100, Dumitru Ceara wrote:
>> Hi Felix,
> 
> Hi Dumitru,
> thanks for the review.
> 

Hi Felix,

>>
>> On 1/2/25 4:19 PM, Felix Huettner via dev wrote:
>>> we now learn all routes inside the vrfs we also advertise routes on.
>>
>> Nit: We
>>
>>> The routes are then placed in the southbound database for processing by
>>> northd.
>>>
>>> Routes are only selected if matching the following rules:
>>> 1. must not be a route advertised by us
>>> 2. must not be a local connected route (as we want to not learn transfer
>>>    networks)
>>> 3. the prefix must not be a link local address
>>>
>>> However we can not reliably determine over which link we learned the
>>> route in case we have two LRPs of the same LR on the same chassis.
>>> For now we just assume the routes on both links are identical.
>>> Future commits will refine this.
>>>
>>> Signed-off-by: Felix Huettner <felix.huettner@stackit.cloud>
>>> ---
>>> v2->v3:
>>>  * Set monitor conditions on sb Learned_Route table.
>>>  * Do not learn routes if Learned_Route table does not exist (upgrades).
>>>
>>>  controller/ovn-controller.c         |  50 ++++++++-
>>>  controller/route-exchange-netlink.c |  42 +++++++-
>>>  controller/route-exchange-netlink.h |  13 ++-
>>>  controller/route-exchange.c         | 157 +++++++++++++++++++++++++++-
>>>  controller/route-exchange.h         |   5 +
>>>  lib/ovn-util.c                      |  10 ++
>>>  lib/ovn-util.h                      |   1 +
>>>  tests/system-ovn.at                 |  28 +++++
>>>  8 files changed, 296 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
>>> index 623a70614..77c85086d 100644
>>> --- a/controller/ovn-controller.c
>>> +++ b/controller/ovn-controller.c
>>> @@ -233,7 +233,7 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
>>>       *
>>>       * Monitor Template_Var for local chassis.
>>>       *
>>> -     * Monitor Advertised_Route for local datapaths.
>>> +     * Monitor Advertised/Learned_Route for local datapaths.
>>>       *
>>>       * We always monitor patch ports because they allow us to see the 
>>> linkages
>>>       * between related logical datapaths.  That way, when we know that we 
>>> have
>>> @@ -252,6 +252,7 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
>>>      struct ovsdb_idl_condition chprv = OVSDB_IDL_CONDITION_INIT(&chprv);
>>>      struct ovsdb_idl_condition tv = OVSDB_IDL_CONDITION_INIT(&tv);
>>>      struct ovsdb_idl_condition ar = OVSDB_IDL_CONDITION_INIT(&ar);
>>> +    struct ovsdb_idl_condition lr = OVSDB_IDL_CONDITION_INIT(&lr);
>>>  
>>>      /* Always monitor all logical datapath groups. Otherwise, DPG updates 
>>> may
>>>       * be received *after* the lflows using it are seen by ovn-controller.
>>> @@ -272,6 +273,7 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
>>>          ovsdb_idl_condition_add_clause_true(&chprv);
>>>          ovsdb_idl_condition_add_clause_true(&tv);
>>>          ovsdb_idl_condition_add_clause_true(&ar);
>>> +        ovsdb_idl_condition_add_clause_true(&lr);
>>>          goto out;
>>>      }
>>>  
>>> @@ -361,6 +363,7 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
>>>              sbrec_ip_multicast_add_clause_datapath(&ip_mcast, OVSDB_F_EQ,
>>>                                                     uuid);
>>>              sbrec_advertised_route_add_clause_datapath(&ar, OVSDB_F_EQ, 
>>> uuid);
>>> +            sbrec_learned_route_add_clause_datapath(&lr, OVSDB_F_EQ, uuid);
>>>          }
>>>  
>>>          /* Datapath groups are immutable, which means a new group record is
>>> @@ -389,6 +392,7 @@ out:;
>>>          sb_table_set_req_mon_condition(ovnsb_idl, chassis_private, &chprv),
>>>          sb_table_set_opt_mon_condition(ovnsb_idl, chassis_template_var, 
>>> &tv),
>>>          sb_table_set_opt_mon_condition(ovnsb_idl, advertised_route, &ar),
>>> +        sb_table_set_opt_mon_condition(ovnsb_idl, learned_route, &lr),
>>>      };
>>>  
>>>      unsigned int expected_cond_seqno = 0;
>>> @@ -409,6 +413,7 @@ out:;
>>>      ovsdb_idl_condition_destroy(&chprv);
>>>      ovsdb_idl_condition_destroy(&tv);
>>>      ovsdb_idl_condition_destroy(&ar);
>>> +    ovsdb_idl_condition_destroy(&lr);
>>>      return expected_cond_seqno;
>>>  }
>>>  
>>> @@ -874,7 +879,8 @@ ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl)
>>>      SB_NODE(meter, "meter") \
>>>      SB_NODE(static_mac_binding, "static_mac_binding") \
>>>      SB_NODE(chassis_template_var, "chassis_template_var") \
>>> -    SB_NODE(advertised_route, "advertised_route")
>>> +    SB_NODE(advertised_route, "advertised_route") \
>>> +    SB_NODE(learned_route, "learned_route")
>>>  
>>>  enum sb_engine_node {
>>>  #define SB_NODE(NAME, NAME_STR) SB_##NAME,
>>> @@ -4991,13 +4997,34 @@ route_sb_advertised_route_data_handler(struct 
>>> engine_node *node, void *data)
>>>      return true;
>>>  }
>>>  
>>> +struct ed_type_route_exchange {
>>> +    /* We need the idl to check if a table exists. */
>>> +    struct ovsdb_idl *sb_idl;
>>> +};
>>> +
>>>  static void
>>> -en_route_exchange_run(struct engine_node *node, void *data OVS_UNUSED)
>>> +en_route_exchange_run(struct engine_node *node, void *data)
>>>  {
>>> +    struct ed_type_route_exchange *re = data;
>>> +
>>> +    struct ovsdb_idl_index *sbrec_learned_route_by_datapath =
>>> +        engine_ovsdb_node_get_index(
>>> +            engine_get_input("SB_learned_route", node),
>>> +            "datapath");
>>> +
>>> +    struct ovsdb_idl_index *sbrec_port_binding_by_name =
>>> +        engine_ovsdb_node_get_index(
>>> +                engine_get_input("SB_port_binding", node),
>>> +                "name");
>>> +
>>>      struct ed_type_route *route_data =
>>>          engine_get_input_data("route", node);
>>>  
>>>      struct route_exchange_ctx_in r_ctx_in = {
>>> +        .ovnsb_idl_txn = engine_get_context()->ovnsb_idl_txn,
>>> +        .ovnsb_idl = re->sb_idl,
>>> +        .sbrec_learned_route_by_datapath = sbrec_learned_route_by_datapath,
>>> +        .sbrec_port_binding_by_name = sbrec_port_binding_by_name,
>>>          .announce_routes = &route_data->announce_routes,
>>>      };
>>>  
>>> @@ -5012,9 +5039,11 @@ en_route_exchange_run(struct engine_node *node, void 
>>> *data OVS_UNUSED)
>>>  
>>>  static void *
>>>  en_route_exchange_init(struct engine_node *node OVS_UNUSED,
>>> -                       struct engine_arg *arg OVS_UNUSED)
>>> +                       struct engine_arg *arg)
>>>  {
>>> -    return NULL;
>>> +    struct ed_type_route_exchange *re = xzalloc(sizeof(*re));
>>> +    re->sb_idl = arg->sb_idl;
>>
>> Nit: add a newline.
>>
>>> +    return re;
>>>  }
>>>  
>>>  static void
>>> @@ -5232,6 +5261,9 @@ main(int argc, char *argv[])
>>>      struct ovsdb_idl_index *sbrec_advertised_route_index_by_datapath
>>>          = ovsdb_idl_index_create1(ovnsb_idl_loop.idl,
>>>                                    &sbrec_advertised_route_col_datapath);
>>> +    struct ovsdb_idl_index *sbrec_learned_route_index_by_datapath
>>> +        = ovsdb_idl_index_create1(ovnsb_idl_loop.idl,
>>> +                                  &sbrec_learned_route_col_datapath);
>>
>> We might not need this index at all if we sync all learned routes at
>> once for all datapaths.
> 
> I think we still need it, but see below.
> 
>>
>>>  
>>>      ovsdb_idl_track_add_all(ovnsb_idl_loop.idl);
>>>      ovsdb_idl_omit_alert(ovnsb_idl_loop.idl,
>>> @@ -5258,6 +5290,8 @@ main(int argc, char *argv[])
>>>                     &sbrec_ha_chassis_group_col_external_ids);
>>>      ovsdb_idl_omit(ovnsb_idl_loop.idl,
>>>                     &sbrec_advertised_route_col_external_ids);
>>> +    ovsdb_idl_omit(ovnsb_idl_loop.idl,
>>> +                   &sbrec_learned_route_col_external_ids);
>>>  
>>>      /* We don't want to monitor Connection table at all. So omit all the
>>>       * columns. */
>>> @@ -5351,6 +5385,10 @@ main(int argc, char *argv[])
>>>                       route_sb_advertised_route_data_handler);
>>>  
>>>      engine_add_input(&en_route_exchange, &en_route, NULL);
>>> +    engine_add_input(&en_route_exchange, &en_sb_learned_route,
>>> +                     engine_noop_handler);
>>> +    engine_add_input(&en_route_exchange, &en_sb_port_binding,
>>> +                     engine_noop_handler);
>>>  
>>>      engine_add_input(&en_addr_sets, &en_sb_address_set,
>>>                       addr_sets_sb_address_set_handler);
>>> @@ -5571,6 +5609,8 @@ main(int argc, char *argv[])
>>>                                  
>>> sbrec_chassis_template_var_index_by_chassis);
>>>      engine_ovsdb_node_add_index(&en_sb_advertised_route, "datapath",
>>>                                  sbrec_advertised_route_index_by_datapath);
>>> +    engine_ovsdb_node_add_index(&en_sb_learned_route, "datapath",
>>> +                                sbrec_learned_route_index_by_datapath);
>>>      engine_ovsdb_node_add_index(&en_ovs_flow_sample_collector_set, "id",
>>>                                  ovsrec_flow_sample_collector_set_by_id);
>>>      engine_ovsdb_node_add_index(&en_ovs_port, "qos", ovsrec_port_by_qos);
>>> diff --git a/controller/route-exchange-netlink.c 
>>> b/controller/route-exchange-netlink.c
>>> index e065c49c1..c2a7551f3 100644
>>> --- a/controller/route-exchange-netlink.c
>>> +++ b/controller/route-exchange-netlink.c
>>> @@ -26,6 +26,7 @@
>>>  #include "openvswitch/ofpbuf.h"
>>>  #include "openvswitch/vlog.h"
>>>  #include "packets.h"
>>> +#include "ovn-util.h"
>>>  #include "route-table.h"
>>>  #include "route.h"
>>>  
>>> @@ -171,8 +172,27 @@ re_nl_delete_route(uint32_t table_id, const struct 
>>> in6_addr *dst,
>>>      return modify_route(RTM_DELROUTE, 0, table_id, dst, plen);
>>>  }
>>>  
>>> +static uint32_t
>>> +route_hash(const struct in6_addr *dst, unsigned int plen)
>>
>> We might not need this, see my comment below about using a list instead.
> 
> I'll switch to a list.
> 
>>
>>> +{
>>> +    uint32_t hash = hash_bytes(dst->s6_addr, 16, 0);
>>> +    return hash_int(plen, hash);
>>> +}
>>> +
>>> +void
>>> +re_nl_received_routes_destroy(struct hmap *host_routes)
>>
>> Nit: they're not really host_routes, are they?  Should we rename this to
>> "routes"?
>>
>>> +{
>>> +    struct re_nl_received_route_node *rr;
>>> +    HMAP_FOR_EACH_SAFE (rr, hmap_node, host_routes) {
>>
>> HMAP_FOR_EACH_POP() would make things a bit easier.
>>
>>> +        hmap_remove(host_routes, &rr->hmap_node);
>>> +        free(rr);
>>> +    }
>>> +    hmap_destroy(host_routes);
>>> +}
>>> +
>>>  struct route_msg_handle_data {
>>>      const struct hmap *routes;
>>> +    struct hmap *learned_routes;
>>
>> Do we really need learned_routes to be a hash map?  I don't see any
>> place where we do lookups into the map.  We just always add to the map
>> or iterate through all the elements of the map.  Can't it just be a list
>> of routes instead?
>>
>>>  };
>>>  
>>>  static void
>>> @@ -184,8 +204,25 @@ handle_route_msg_delete_routes(const struct 
>>> route_table_msg *msg, void *data)
>>
>> This function now does more than just deleting routes.  Now it also
>> learns non-ovn-created routes.  Should we just call it
>> handle_route_msg() instead?  We could do that directly in the patch that
>> introduced it, I guess.
> 
> Yea will be fixed.
> 
>>
>>>      struct advertise_route_entry *ar;
>>>      int err;
>>>  
>>> -    /* This route is not from us, we should not touch it. */
>>> +    /* This route is not from us, so we learn it. */
>>>      if (rd->rtm_protocol != RTPROT_OVN) {
>>> +        if (prefix_is_link_local(&rd->rta_dst, rd->rtm_dst_len)) {
>>> +            return;
>>> +        }
>>> +        struct route_data_nexthop *nexthop;
>>> +        LIST_FOR_EACH (nexthop, nexthop_node, &rd->nexthops) {
>>> +            if (ipv6_is_zero(&nexthop->addr)) {
>>> +                /* This is most likely an address on the local link.
>>> +                 * As we just want to learn remote routes we do not need 
>>> it.*/
>>> +                continue;
>>> +            }
>>> +            struct re_nl_received_route_node *rr = xzalloc(sizeof *rr);
>>> +            hmap_insert(handle_data->learned_routes, &rr->hmap_node,
>>> +                        route_hash(&rd->rta_dst, rd->rtm_dst_len));
>>> +            rr->addr = rd->rta_dst;
>>> +            rr->plen = rd->rtm_dst_len;
>>> +            rr->nexthop = nexthop->addr;
>>> +        }
>>>          return;
>>>      }
>>>  
>>> @@ -212,7 +249,7 @@ handle_route_msg_delete_routes(const struct 
>>> route_table_msg *msg, void *data)
>>>  
>>>  void
>>>  re_nl_sync_routes(uint32_t table_id,
>>> -                  const struct hmap *routes)
>>> +                  const struct hmap *routes, struct hmap *learned_routes)
>>>  {
>>>      struct advertise_route_entry *ar;
>>>      HMAP_FOR_EACH (ar, node, routes) {
>>> @@ -224,6 +261,7 @@ re_nl_sync_routes(uint32_t table_id,
>>>       * in the system. */
>>>      struct route_msg_handle_data data = {
>>>          .routes = routes,
>>> +        .learned_routes = learned_routes,
>>>      };
>>>      route_table_dump_one_table(table_id, handle_route_msg_delete_routes,
>>>                                 &data);
>>> diff --git a/controller/route-exchange-netlink.h 
>>> b/controller/route-exchange-netlink.h
>>> index f87ebd75d..566b38fde 100644
>>> --- a/controller/route-exchange-netlink.h
>>> +++ b/controller/route-exchange-netlink.h
>>> @@ -16,6 +16,8 @@
>>>  #define ROUTE_EXCHANGE_NETLINK_H 1
>>>  
>>>  #include <stdint.h>
>>> +#include "openvswitch/hmap.h"
>>> +#include <netinet/in.h>
>>>  
>>>  /* This value is arbitrary but currently unused.
>>>   * See 
>>> https://github.com/iproute2/iproute2/blob/main/etc/iproute2/rt_protos
>>> @@ -24,6 +26,13 @@
>>>  struct in6_addr;
>>>  struct hmap;
>>>  
>>> +struct re_nl_received_route_node {
>>> +    struct hmap_node hmap_node;
>>> +    struct in6_addr addr;
>>> +    unsigned int plen;
>>> +    struct in6_addr nexthop;
>>> +};
>>> +
>>>  int re_nl_create_vrf(const char *ifname, uint32_t table_id);
>>>  int re_nl_delete_vrf(const char *ifname);
>>>  
>>> @@ -34,7 +43,9 @@ int re_nl_delete_route(uint32_t table_id, const struct 
>>> in6_addr *dst,
>>>  
>>>  void re_nl_dump(uint32_t table_id);
>>>  
>>> +void re_nl_received_routes_destroy(struct hmap *);
>>
>> Nit: Raw hmaps make it less clear what the parameter actually is.  I'd
>> explicitly write the parameter name here:
>>
>> void re_nl_received_routes_destroy(struct hmap *routes);
>>
>>>  void re_nl_sync_routes(uint32_t table_id,
>>> -                       const struct hmap *host_routes);
>>> +                       const struct hmap *host_routes,
>>> +                       struct hmap *learned_routes);
>>>  
>>>  #endif /* route-exchange-netlink.h */
>>> diff --git a/controller/route-exchange.c b/controller/route-exchange.c
>>> index 90144f75f..febe13a8e 100644
>>> --- a/controller/route-exchange.c
>>> +++ b/controller/route-exchange.c
>>> @@ -34,6 +34,148 @@ static struct vlog_rate_limit rl = 
>>> VLOG_RATE_LIMIT_INIT(5, 20);
>>>  
>>>  static struct sset _maintained_vrfs = SSET_INITIALIZER(&_maintained_vrfs);
>>>  
>>> +struct route_entry {
>>> +    struct hmap_node hmap_node;
>>> +
>>> +    const struct sbrec_learned_route *sb_route;
>>> +
>>> +    const struct sbrec_datapath_binding *sb_db;
>>> +    const struct sbrec_port_binding *logical_port;
>>> +    char *ip_prefix;
>>> +    char *nexthop;
>>
>> We strdup ip_prefix and nexthop twice for learned routes that are not in
>> the SB yet.  We could avoid that by transferring ownership of the
>> ip_prefix/nexthop values when we call route_alloc_entry().  We'd have to
>> still strdup when we call the function when iterating on learned routes
>> in the SB.
>>
>> However, maybe we don't need all this at all if we change the sync
>> routine as I describe below.
>>
>>> +    bool stale;
>>> +};
>>> +
>>> +static struct route_entry *
>>> +route_alloc_entry(struct hmap *routes,
>>> +                  const struct sbrec_datapath_binding *sb_db,
>>> +                  const struct sbrec_port_binding *logical_port,
>>> +                  const char *ip_prefix, const char *nexthop)
>>> +{
>>> +    struct route_entry *route_e = xzalloc(sizeof *route_e);
>>> +
>>> +    route_e->sb_db = sb_db;
>>> +    route_e->logical_port = logical_port;
>>> +    route_e->ip_prefix = xstrdup(ip_prefix);
>>> +    route_e->nexthop = xstrdup(nexthop);
>>> +    route_e->stale = false;
>>> +    uint32_t hash = uuid_hash(&sb_db->header_.uuid);
>>> +    hash = hash_string(logical_port->logical_port, hash);
>>> +    hash = hash_string(ip_prefix, hash);
>>> +    hmap_insert(routes, &route_e->hmap_node, hash);
>>> +
>>> +    return route_e;
>>> +}
>>> +
>>> +static struct route_entry *
>>> +route_lookup_or_add(struct hmap *route_map,
>>> +                    const struct sbrec_datapath_binding *sb_db,
>>> +                    const struct sbrec_port_binding *logical_port,
>>> +                    const char *ip_prefix, const char *nexthop)
>>> +{
>>> +    struct route_entry *route_e;
>>> +    uint32_t hash;
>>> +
>>> +    hash = uuid_hash(&sb_db->header_.uuid);
>>> +    hash = hash_string(logical_port->logical_port, hash);
>>> +    hash = hash_string(ip_prefix, hash);
>>> +    HMAP_FOR_EACH_WITH_HASH (route_e, hmap_node, hash, route_map) {
>>> +        if (!strcmp(route_e->nexthop, nexthop)) {
>>
>> This is not enough.  Two _different_ route prefixes might hash to the
>> same value so they might end up in the same bucket.  We need to do a
>> full comparison here, that is, compare both prefix and nexthop.
> 
> Thanks, fixed.
> 
>>
>>> +            return route_e;
>>> +        }
>>> +    }
>>> +
>>> +    route_e = route_alloc_entry(route_map, sb_db,
>>> +                                logical_port, ip_prefix, nexthop);
>>
>> Nit: We hash again inside route_alloc_entry().
>>
>>> +    return route_e;
>>> +}
>>> +
>>> +static void
>>> +route_erase_entry(struct route_entry *route_e)
>>> +{
>>> +    free(route_e->ip_prefix);
>>> +    free(route_e->nexthop);
>>> +    free(route_e);
>>> +}
>>> +
>>> +static void
>>> +sb_sync_learned_routes(const struct sbrec_datapath_binding *datapath,
>>> +                       const struct hmap *learned_routes,
>>> +                       const struct sset *bound_ports,
>>> +                       struct ovsdb_idl *ovnsb_idl,
>>> +                       struct ovsdb_idl_txn *ovnsb_idl_txn,
>>> +                       struct ovsdb_idl_index 
>>> *sbrec_learned_route_by_datapath,
>>> +                       struct ovsdb_idl_index *sbrec_port_binding_by_name)
>>> +{
>>> +    if (!sbrec_server_has_learned_route_table(ovnsb_idl)) {
>>> +        return;
>>> +    }
>>
>> It's probably cleaner to move this check higher in the call chain.  Just
>> not even try to sync learned routes if the table doesn't exist.
> 
> Yea, i now moved it to en_route_exchange_run.
> As the Advertised_Route and the Learned_Route tables are created in the same
> commit we can skip this completely if Learned_Route does not exist.
> 
>>
>>> +
>>> +    struct hmap sync_routes = HMAP_INITIALIZER(&sync_routes);
>>> +    struct route_entry *route_e;
>>> +    const struct sbrec_learned_route *sb_route;
>>> +
>>> +    struct sbrec_learned_route *filter =
>>> +        
>>> sbrec_learned_route_index_init_row(sbrec_learned_route_by_datapath);
>>> +    sbrec_learned_route_index_set_datapath(filter, datapath);
>>> +    SBREC_LEARNED_ROUTE_FOR_EACH_EQUAL (sb_route, filter,
>>> +                                        sbrec_learned_route_by_datapath) {
>>
>> What if the datapath used to be local and now is not anymore?  Who takes
>> care of cleanup in that case?
> 
> Ok we have 3 cases here and one seems like it is actually broken:
> 
> Case 1: The port is now bound by a different ovn-controller.
> In this case the new ovn-controller will handle any routes for this port
> from now on. This potentially includes deleting all routes.
> 

How are the routes deleted on the hypervisor (let's call it hv1) that
used to bind the port?  If that was the only port of the logical
datapath (let's call it DP1) that was bound on hv1, after the port
"moves" to hv2, hv1 doesn't consider DP1 as "local" anymore.  We're only
syncing routes for "local" datapaths so we won't sync routes for DP1 on
hv1.  Will we end up with stale routes in the VRF?

> Case 2: The port has been deleted.
> In this case northd cleans up the port and all associated routes.
> 
> Case 3: The port is unbound from any chassis OR the single chassis
> hosting the port goes down.
> In this case the route is currently still existing and not removed.
> I don't think this can be fixed on the ovn-controller side in any way,
> as then we would not handle crashes.
> So the only location that could handle it would be northd. We could
> there remove all Learned_Routes that are learned on ports that are down.
> 
> What do you think of this option?
> 

I think I agree that it makes sense to remove all learned routes in
northd for ports that are down.

>>
>> Indexes incur some memory/cpu costs.  I'd avoid this one if possible.
>> Why do we need to sb_sync_learned_routes() per datapath?  Can't we do it
>> for all datapaths at once?
> 
> There are two things that for me speak against this:
> First we still need per datapath information to actually announce these
> routes. This especially means the bound_ports sset. We could pass this
> through re_nl_sync_routes and then pass it back via the 
> re_nl_received_route_node, but that seems hard to understand.
> 
> The other thing is that we could easily in the future use the route
> table watches (that are part of a later patch) to only update the
> learned routes for a single datapath. In that case we need the same per
> datapath logic anyway.
> 
> However there is nothing that would actually prevent merging this, but i
> feel like it would be harder to understand.
> 

OK, that makes sense.  Let's keep it as is.

>>
>>> +        /* If the port is not local we don't care about it.
>>> +         * Some other ovn-controller will handle it. */
>>> +        if (!sset_contains(bound_ports,
>>> +                           sb_route->logical_port->logical_port)) {
>>> +            continue;
>>> +        }
>>> +        route_e = route_alloc_entry(&sync_routes,
>>> +                                    sb_route->datapath,
>>> +                                    sb_route->logical_port,
>>> +                                    sb_route->ip_prefix,
>>> +                                    sb_route->nexthop);
>>> +        route_e->stale = true;
>>
>> We can avoid the "stale" column if we:
>>
>> 1. build a temporary map with all learned routes that already exist in
>> the SB.
>> 2. iterate through all re_nl_received_route_node entries:
>> - if there's a corresponding (matching) learned route in the temporary
>> map then remove it from the temporary map (it's still up to date)
>> - if not, insert a new SB.Learned_Route
>> 3. whatever is left in the temporary map is just stale SB.Learned_Route
>> records that need to be removed from the SB.
> 
> Yea, fixed.
> 
>>
>>> +        route_e->sb_route = sb_route;
>>> +    }
>>> +    sbrec_learned_route_index_destroy_row(filter);
>>> +
>>> +    struct re_nl_received_route_node *learned_route;
>>> +    HMAP_FOR_EACH (learned_route, hmap_node, learned_routes) {
>>> +        char *ip_prefix = normalize_v46_prefix(&learned_route->addr,
>>> +                                               learned_route->plen);
>>> +        char *nexthop = normalize_v46(&learned_route->nexthop);
>>> +
>>> +        const char *logical_port_name;
>>> +        SSET_FOR_EACH (logical_port_name, bound_ports) {
>>> +            const struct sbrec_port_binding *logical_port =
>>> +                lport_lookup_by_name(sbrec_port_binding_by_name,
>>> +                                     logical_port_name);
>>> +            if (!logical_port) {
>>> +                continue;
>>> +            }
>>> +            route_e = route_lookup_or_add(&sync_routes,
>>> +                datapath,
>>> +                logical_port, ip_prefix, nexthop);
>>> +            route_e->stale = false;
>>> +            if (!route_e->sb_route) {
>>> +                sb_route = sbrec_learned_route_insert(ovnsb_idl_txn);
>>> +                sbrec_learned_route_set_datapath(sb_route, datapath);
>>> +                sbrec_learned_route_set_logical_port(sb_route, 
>>> logical_port);
>>> +                sbrec_learned_route_set_ip_prefix(sb_route, ip_prefix);
>>> +                sbrec_learned_route_set_nexthop(sb_route, nexthop);
>>> +                route_e->sb_route = sb_route;
>>> +            }
>>> +        }
>>> +        free(ip_prefix);
>>> +        free(nexthop);
>>> +    }
>>> +
>>> +    HMAP_FOR_EACH_POP (route_e, hmap_node, &sync_routes) {
>>> +        if (route_e->stale) {
>>> +            sbrec_learned_route_delete(route_e->sb_route);
>>> +        }
>>> +        route_erase_entry(route_e);
>>> +    }
>>> +    hmap_destroy(&sync_routes);
>>> +}
>>> +
>>>  void
>>>  route_exchange_run(struct route_exchange_ctx_in *r_ctx_in,
>>>                     struct route_exchange_ctx_out *r_ctx_out OVS_UNUSED)
>>> @@ -57,7 +199,7 @@ route_exchange_run(struct route_exchange_ctx_in 
>>> *r_ctx_in,
>>>                               "%"PRId64": %s.",
>>>                               vrf_name, ad->key,
>>>                               ovs_strerror(error));
>>> -                continue;
>>> +                goto out;
>>
>> We can avoid using goto by moving the received_routes hmap
>> initialization just before re_nl_sync_routes().  We don't need the map
>> until then.
>>
>>>              }
>>>              sset_add(&_maintained_vrfs, vrf_name);
>>>          } else {
>>> @@ -67,7 +209,18 @@ route_exchange_run(struct route_exchange_ctx_in 
>>> *r_ctx_in,
>>>              sset_find_and_delete(&old_maintained_vrfs, vrf_name);
>>>          }
>>>  
>>> -        re_nl_sync_routes(ad->key, &ad->routes);
>>> +        re_nl_sync_routes(ad->key, &ad->routes,
>>> +                          &received_routes);
>>> +
>>> +        sb_sync_learned_routes(ad->db, &received_routes,
>>> +                               &ad->bound_ports,
>>> +                               r_ctx_in->ovnsb_idl,
>>> +                               r_ctx_in->ovnsb_idl_txn,
>>> +                               r_ctx_in->sbrec_learned_route_by_datapath,
>>> +                               r_ctx_in->sbrec_port_binding_by_name);
>>> +
>>> +out:
>>> +        re_nl_received_routes_destroy(&received_routes);
>>>      }
>>>  
>>>      /* Remove VRFs previously maintained by us not found in the above 
>>> loop. */
>>> diff --git a/controller/route-exchange.h b/controller/route-exchange.h
>>> index 2c2a9ab84..d51fba598 100644
>>> --- a/controller/route-exchange.h
>>> +++ b/controller/route-exchange.h
>>> @@ -18,6 +18,11 @@
>>>  #include <stdbool.h>
>>>  
>>>  struct route_exchange_ctx_in {
>>> +    /* We need the idl to check if a table exists. */
>>
>> Nit: s/if a table/if the Learned_Route table/
>>
>>> +    struct ovsdb_idl *ovnsb_idl;
>>> +    struct ovsdb_idl_txn *ovnsb_idl_txn;
>>> +    struct ovsdb_idl_index *sbrec_learned_route_by_datapath;
>>> +    struct ovsdb_idl_index *sbrec_port_binding_by_name;
>>>      /* Contains struct advertise_datapath_entry */
>>>      struct hmap *announce_routes;
>>>  };
>>> diff --git a/lib/ovn-util.c b/lib/ovn-util.c
>>> index ed847517a..507847280 100644
>>> --- a/lib/ovn-util.c
>>> +++ b/lib/ovn-util.c
>>> @@ -822,6 +822,16 @@ normalize_v46_prefix(const struct in6_addr *prefix, 
>>> unsigned int plen)
>>>      }
>>>  }
>>>  
>>> +char *
>>> +normalize_v46(const struct in6_addr *prefix)
>>> +{
>>> +    if (IN6_IS_ADDR_V4MAPPED(prefix)) {
>>> +        return normalize_ipv4_prefix(in6_addr_get_mapped_ipv4(prefix), 32);
>>> +    } else {
>>> +        return normalize_ipv6_prefix(prefix, 128);
>>> +    }
>>> +}
>>> +
>>>  char *
>>>  str_tolower(const char *orig)
>>>  {
>>> diff --git a/lib/ovn-util.h b/lib/ovn-util.h
>>> index 31c2c68df..8d8fd989b 100644
>>> --- a/lib/ovn-util.h
>>> +++ b/lib/ovn-util.h
>>> @@ -207,6 +207,7 @@ bool ip46_parse(const char *ip_str, struct in6_addr 
>>> *ip);
>>>  char *normalize_ipv4_prefix(ovs_be32 ipv4, unsigned int plen);
>>>  char *normalize_ipv6_prefix(const struct in6_addr *ipv6, unsigned int 
>>> plen);
>>>  char *normalize_v46_prefix(const struct in6_addr *prefix, unsigned int 
>>> plen);
>>> +char *normalize_v46(const struct in6_addr *prefix);
>>>  
>>>  /* Returns a lowercase copy of orig.
>>>   * Caller must free the returned string.
>>> diff --git a/tests/system-ovn.at b/tests/system-ovn.at
>>> index faf29f53d..02d5ce7e1 100644
>>> --- a/tests/system-ovn.at
>>> +++ b/tests/system-ovn.at
>>> @@ -14906,6 +14906,20 @@ blackhole 192.0.2.3 proto 84
>>>  blackhole 192.0.2.10 proto 84
>>>  blackhole 198.51.100.0/24 proto 84])
>>>  
>>> +# now we test route learning
>>
>> Nit: Comments should be sentences (applies to all comments in these tests).
>>
>>> +check_row_count Learned_Route 0
>>> +check ip route add 233.252.0.0/24 via 192.168.10.10 dev lo onlink vrf 
>>> ovnvrf1337
>>> +# for now we trigger a recompute as route watching is not yet implemented
>>> +check ovn-appctl -t ovn-controller inc-engine/recompute
>>> +check ovn-nbctl --wait=hv sync
>>> +check_row_count Learned_Route 1
>>> +lp=$(ovn-sbctl --bare --columns _uuid list port_binding internet-phys)
>>
>> fetch_column
>>
>>> +AT_CHECK_UNQUOTED([ovn-sbctl --columns ip_prefix,nexthop,logical_port 
>>> --bare find Learned_Route], [0], [dnl
>>
>> check_row_count() might be cleaner.
>>
>>> +233.252.0.0/24
>>> +192.168.10.10
>>> +$lp
>>> +])
>>> +
>>>  OVS_APP_EXIT_AND_WAIT([ovn-controller])
>>>  
>>>  as ovn-sb
>>> @@ -15110,6 +15124,20 @@ blackhole 192.0.2.3 proto 84
>>>  blackhole 192.0.2.10 proto 84
>>>  blackhole 198.51.100.0/24 proto 84])
>>>  
>>> +# now we test route learning
>>> +check_row_count Learned_Route 0
>>> +check ip route add 233.252.0.0/24 via 192.168.10.10 dev lo onlink vrf 
>>> ovnvrf1337
>>> +# for now we trigger a recompute as route watching is not yet implemented
>>> +check ovn-appctl -t ovn-controller inc-engine/recompute
>>> +check ovn-nbctl --wait=hv sync
>>> +check_row_count Learned_Route 2
>>> +lp=$(ovn-sbctl --bare --columns _uuid list port_binding internet-phys)
>>
>> fetch_column
>>
>>> +AT_CHECK_UNQUOTED([ovn-sbctl --columns ip_prefix,nexthop,logical_port 
>>> --bare find Learned_Route logical_port=$lp], [0], [dnl
>>> +233.252.0.0/24
>>> +192.168.10.10
>>> +$lp
>>> +])
>>
>> check_row_count() might be cleaner.
>>
>>> +
>>>  as ovn-sb
>>>  OVS_APP_EXIT_AND_WAIT([ovsdb-server])
>>>  
>>
> 
> Thanks a lot,
> Felix
> 

Regards,
Dumitru

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to