Hi Martin, Felix,
On 2/12/25 4:04 PM, [email protected] wrote:
> On Wed, 2025-02-12 at 15:20 +0100, Felix Huettner wrote:
>> On Wed, Feb 12, 2025 at 02:43:09AM +0100, Martin Kalcok wrote:
>>> This is a continuation of previous commit. It's separate
>>> for ease of review. It will be squshed together for the
>>> final version.
>>
>> Hi Martin,
>>
>> i took a look at the changes and i'll add my findings below.
>>
>> Note that i squashed part 3 and 4 together locally for reviewing.
>> I'll
>> try to put my comments to the appropriate patch, but i am already
>> sorry
>> for when i mess that up :)
>>
>>>
>>> Signed-off-by: Martin Kalcok <[email protected]>
>>> ---
>>> northd/en-advertised-route-sync.c | 20 ++-
>>> northd/en-learned-route-sync.c | 3 +-
>>> northd/northd.c | 217
>>> ++++++++++++++++++++++++++++--
>>> northd/northd.h | 11 +-
>>> 4 files changed, 231 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/northd/en-advertised-route-sync.c b/northd/en-
>>> advertised-route-sync.c
>>> index 9d4fb308d..e81c86afa 100644
>>> --- a/northd/en-advertised-route-sync.c
>>> +++ b/northd/en-advertised-route-sync.c
>>> @@ -25,7 +25,6 @@
>>> #include "openvswitch/hmap.h"
>>> #include "ovn-util.h"
>>>
>>> -
>>> static void
>>> advertised_route_table_sync(
>>> struct ovsdb_idl_txn *ovnsb_txn,
>>> @@ -205,9 +204,13 @@ en_dynamic_routes_run(struct engine_node
>>> *node, void *data)
>>> if (!od->dynamic_routing) {
>>> continue;
>>> }
>>> - build_lb_nat_parsed_routes(od, lr_stateful_rec->lrnat_rec,
>>> - &dynamic_routes_data-
>>>> parsed_routes);
>>> + build_nat_parsed_routes(od, lr_stateful_rec->lrnat_rec,
>>> + northd_data,
>>> + &dynamic_routes_data-
>>>> parsed_routes);
>>>
>>> + build_lb_parsed_routes(od, lr_stateful_rec->lb_ips,
>>> + northd_data,
>>> + &dynamic_routes_data-
>>>> parsed_routes);
>>> }
>>> engine_set_node_state(node, EN_UPDATED);
>>> }
>>> @@ -442,10 +445,19 @@ advertised_route_table_sync_route_add(
>>> if (route->source == ROUTE_SOURCE_NAT && (drr & DRRM_NAT) ==
>>> 0) {
>>> return;
>>> }
>>> + if (route->source == ROUTE_SOURCE_LB && (drr & DRRM_LB) == 0)
>>> {
>>> + return;
>>> + }
>>>
>>> + /* XXX: I'm not sure if normalize prefix is the best call
>>> here. It doesn't
>>> + * include "/plen" for host routes, so they get announced
>>> without it. */
>>
>> This is unfortunate but i don't think it would cause issues. In
>> route.c
>> we parse this value using ip46_parse_cidr and this will deduce it is
>> a
>> host route if there is no "/.*".
>>
>> Also this would probably also happen for now if we would specify a
>> host
>> route in a Logical_Router_Static_Route.
>
> You are right, this doesn't seem to be a big issue and the route is
> propagated to the VRF. I just thought that I'd comment on it, since we
> do have access to route->plen here.
>
>>
>>> char *ip_prefix = normalize_v46_prefix(&route->prefix, route-
>>>> plen);
>>> + const struct sbrec_port_binding *tracked_port = NULL;
>>> + if (route->tracked_port) {
>>> + tracked_port = route->tracked_port->sb;
>>> + }
>>> ar_add_entry(sync_routes, route->od->sb, route->out_port->sb,
>>> - ip_prefix, NULL);
>>> + ip_prefix, tracked_port);
>>> }
>>>
>>> static void
>>> diff --git a/northd/en-learned-route-sync.c b/northd/en-learned-
>>> route-sync.c
>>> index 406f1551f..4e87b3265 100644
>>> --- a/northd/en-learned-route-sync.c
>>> +++ b/northd/en-learned-route-sync.c
>>> @@ -181,7 +181,8 @@ parse_route_from_sbrec_route(struct hmap
>>> *parsed_routes_out,
>>>
>>> parsed_route_add(od, nexthop, &prefix, plen, false,
>>> lrp_addr_s,
>>> out_port, 0, false, false, NULL,
>>> - ROUTE_SOURCE_LEARNED, &route->header_,
>>> parsed_routes_out);
>>> + ROUTE_SOURCE_LEARNED, &route->header_, NULL,
>>> + parsed_routes_out);
>>> }
>>>
>>> static void
>>> diff --git a/northd/northd.c b/northd/northd.c
>>> index 4b5708059..c0953028a 100644
>>> --- a/northd/northd.c
>>> +++ b/northd/northd.c
>>> @@ -10992,6 +10992,7 @@ parsed_route_init(const struct ovn_datapath
>>> *od,
>>> bool ecmp_symmetric_reply,
>>> const struct sset *ecmp_selection_fields,
>>> enum route_source source,
>>> + const struct ovn_port *tracked_port,
>>> const struct ovsdb_idl_row *source_hint)
>>> {
>>>
>>> @@ -11007,6 +11008,7 @@ parsed_route_init(const struct ovn_datapath
>>> *od,
>>> new_pr->is_discard_route = is_discard_route;
>>> new_pr->lrp_addr_s = nullable_xstrdup(lrp_addr_s);
>>> new_pr->out_port = out_port;
>>> + new_pr->tracked_port = tracked_port;
>>> new_pr->source = source;
>>> if (ecmp_selection_fields) {
>>> sset_clone(&new_pr->ecmp_selection_fields,
>>> ecmp_selection_fields);
>>> @@ -11030,7 +11032,7 @@ parsed_route_clone(const struct
>>> parsed_route *pr)
>>> pr->od, nexthop, pr->prefix, pr->plen, pr-
>>>> is_discard_route,
>>> pr->lrp_addr_s, pr->out_port, pr->route_table_id, pr-
>>>> is_src_route,
>>> pr->ecmp_symmetric_reply, &pr->ecmp_selection_fields, pr-
>>>> source,
>>> - pr->source_hint);
>>> + pr->tracked_port, pr->source_hint);
>>>
>>> new_pr->hash = pr->hash;
>>> return new_pr;
>>> @@ -11069,13 +11071,14 @@ parsed_route_add(const struct
>>> ovn_datapath *od,
>>> const struct sset *ecmp_selection_fields,
>>> enum route_source source,
>>> const struct ovsdb_idl_row *source_hint,
>>> + const struct ovn_port *tracked_port,
>>> struct hmap *routes)
>>> {
>>>
>>> struct parsed_route *new_pr = parsed_route_init(
>>> od, nexthop, *prefix, plen, is_discard_route, lrp_addr_s,
>>> out_port,
>>> route_table_id, is_src_route, ecmp_symmetric_reply,
>>> - ecmp_selection_fields, source, source_hint);
>>> + ecmp_selection_fields, source, tracked_port, source_hint);
>>>
>>> new_pr->hash = route_hash(new_pr);
>>>
>>> @@ -11212,7 +11215,7 @@ parsed_routes_add_static(const struct
>>> ovn_datapath *od,
>>> parsed_route_add(od, nexthop, &prefix, plen, is_discard_route,
>>> lrp_addr_s,
>>> out_port, route_table_id, is_src_route,
>>> ecmp_symmetric_reply, &ecmp_selection_fields,
>>> source,
>>> - &route->header_, routes);
>>> + &route->header_, NULL, routes);
>>> sset_destroy(&ecmp_selection_fields);
>>> }
>>>
>>> @@ -11230,7 +11233,7 @@ parsed_routes_add_connected(const struct
>>> ovn_datapath *od,
>>> false, addr->addr_s, op,
>>> 0, false,
>>> false, NULL, ROUTE_SOURCE_CONNECTED,
>>> - &op->nbrp->header_, routes);
>>> + &op->nbrp->header_, NULL, routes);
>>> }
>>>
>>> for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
>>> @@ -11242,7 +11245,7 @@ parsed_routes_add_connected(const struct
>>> ovn_datapath *od,
>>> false, addr->addr_s, op,
>>> 0, false,
>>> false, NULL, ROUTE_SOURCE_CONNECTED,
>>> - &op->nbrp->header_, routes);
>>> + &op->nbrp->header_, NULL, routes);
>>> }
>>> }
>>>
>>> @@ -11280,10 +11283,153 @@ build_parsed_routes(const struct
>>> ovn_datapath *od, const struct hmap *lr_ports,
>>> }
>>> }
>>>
>>> +static int
>>> +lrouter_check_nat_entry(const struct ovn_datapath *od,
>>> + const struct ovn_nat *nat_entry,
>>> + const struct hmap *lr_ports, ovs_be32
>>> *mask,
>>> + bool *is_v6, int *cidr_bits, struct
>>> eth_addr *mac,
>>> + bool *distributed, struct ovn_port
>>> **nat_l3dgw_port);
>>> +
>>> +static const struct ovn_port *
>>> +get_nat_route_tracked_port(const struct ovn_port *op,
>>> + const struct ovn_nat *nat,
>>> + const struct northd_data *northd_data)
>>> +{
>>> + /* Ports on GW routers don't need tracked_port because all
>>> resources
>>> + * are located on the same chassis.*/
>>> + if (op->od->is_gw_router) {
>>> + return NULL;
>>> + }
>>> + struct eth_addr mac = eth_addr_broadcast;
>>> + bool is_v6, distributed_nat;
>>> + ovs_be32 mask;
>>> + int cidr_bits;
>>> + struct ovn_port *l3dgw_port;
>>> +
>>> + if (lrouter_check_nat_entry(op->od, nat, &northd_data-
>>>> lr_ports, &mask, &is_v6,
>>> + &cidr_bits,
>>> + &mac, &distributed_nat,
>>> &l3dgw_port) < 0) {
>>> + return NULL;
>>> + }
>>> +
>>> + /* For distributed NAT rules, we find ovn_port with name that
>>> matches
>>> + * logical_port value of the NAT entry, and use that as
>>> tracked_port. */
>>> + if (distributed_nat) {
>>> + return ovn_port_find(&northd_data->ls_ports, nat->nb-
>>>> logical_port);
>>> + /* For centralized NAT rules, we use designated DGP as a
>>> tracked port. */
>>> + } else {
>>> + return l3dgw_port;
>>> + }
>>> +
>>> + return NULL;
>>> +}
>>> +
>>> +static void
>>> +lport_addrs_to_parsed_routes(const struct ovn_port *out_port,
>>> + const struct ovn_port *tracked_port,
>>> + const struct lport_addresses *laddrs,
>>> + enum route_source route_type,
>>> + struct hmap *routes)
>>> +{
>>> + for (int i = 0; i < laddrs->n_ipv4_addrs; i++) {
>>> + struct ipv4_netaddr *addr = &laddrs->ipv4_addrs[i];
>>> + struct in6_addr prefix;
>>> + ip46_parse(addr->network_s, &prefix);
>>> + parsed_route_add(out_port->od, NULL, &prefix, addr->plen,
>>> + false, addr->addr_s, out_port,
>>> + 0, false,
>>> + false, NULL, route_type,
>>> + &out_port->nbrp->header_, tracked_port,
>>> routes);
>>> + }
>>> + for (int i = 0; i < laddrs->n_ipv6_addrs; i++) {
>>> + struct ipv6_netaddr *addr = &laddrs->ipv6_addrs[i];
>>> + parsed_route_add(out_port->od, NULL, &addr->addr, addr-
>>>> plen,
>>> + false, addr->addr_s, out_port,
>>> + 0, false,
>>> + false, NULL, route_type,
>>> + &out_port->nbrp->header_, tracked_port,
>>> routes);
>>> + }
>>> +}
>>> +
>>> +/* XXX: This function is, in nature, very similar to how
>>> + * publish_host_routes_lrp looks up neighboring host routes. I
>>> really wanted
>>> + * to reuse it, but it's designed to work with already existing
>>> parsed_routes
>>> + * when creating Advertised_Route records. And that doesn't work
>>> in following
>>> + * scenario:
>>> + *
>>> + * R1 --- ls_int --- R2
>>> + *
>>> + * If R1 and R2 don't have IPv4/6 configured on their LRPs plugged
>>> to
>>> + * ls_int, and you set "connected-as-host", no parsed_routes will
>>> be generated
>>> + * (makes sense). But as a consequence, publish_host_routes_lrp is
>>> never
>>> + * executed.
>>
>> there is a parsed_route generated for the link local address but we
>> have
>> been filtering it out before processing it further.
>> However in the case of connected-as-host i see no reason to keep this
>> limitation.
>>
>> I have built a small patch on top of current main to share this.
>> Not sure if that is exactly what you are looking for.
>> https://github.com/felixhuettner/ovn/commit/5326af4e0fb0b054a7f023ed8d15ae5b2969928a
>>
>
> Oh right, the filtering happens all the way down during the
> "publish_*_route" phase.
> In the meantime I came up with a way to publish both NAT/LBs that
> doesn't seem to be too costly. I'll take your change for a spin, but if
> your version work (and it looks like it does), the only benefit of
> keeping my approach would be more granular control over what gets
> advertised (NATs, LBs or NATs+LBS+LRP_IPs).
>
Actually, I don't think we need this (especially if it parses NAT/LBs
again). I think we already (almost) have all the NAT information in the
lr_stateful_record data (see below, [suggestion]).
>>> + * We would very much like this scenario to work. i.e. no explicit
>>> IP
>>> + * configuration on ls_int, but NATs/LBs on R2 are reachable from
>>> R1 over
>>> + * R2s IPv6 LLA. This function aims to solve it by generating
>>> + * advertised_routes for NATs that are on op's neighbors (either
>>> directly
>>> + * connected LRs or LRs connected to same LS as op). I tried to
>>> keep the
>>> + * overhead to minimum.
>>> + */
>>> +static void
>>> +build_connected_nat_routes(const struct ovn_port *op, struct hmap
>>> *routes)
>>> +{
>>> + if (!op->peer) {
>>> + return;
>>> + }
>>> + struct ovn_datapath *peer_od = op->peer->od;
>>> + if (!peer_od->nbs && !peer_od->nbr) {
>>> + return;
>>> + }
>>> +
>>> + const struct ovn_port *peer_port = NULL;
>>> + /* This is directly connected LR peer. */
>>> + if (peer_od->nbr) {
>>> + peer_port = op->peer;
>>> + size_t n_nats = 0;
>>> + char **nats = NULL;
>>> + nats = get_nat_addresses(peer_port, &n_nats, false, false,
>>> NULL, true);
>>> + for (size_t i = 0; i < n_nats; i++) {
>>> + /* XXX: This block should be probably squshed to a
>>> function. It's
>>> + * bit repetitive with the one bellow. */
>>> + struct lport_addresses laddrs;
>>> + int ofs = 0;
>>> + extract_addresses(nats[i], &laddrs, &ofs);
>>> + lport_addrs_to_parsed_routes(op, peer_port, &laddrs,
>>> ROUTE_SOURCE_NAT, routes);
>>> + free(nats[i]);
>>> + destroy_lport_addresses(&laddrs);
>>> + }
>>> + free(nats);
>>> + return;
>>> + }
>>> +
>>> + /* This peer is LSP, we need to check all connected router
>>> ports for NAT.*/
>>> + for (size_t i = 0; i < peer_od->n_router_ports; i++) {
>>> + peer_port = peer_od->router_ports[i]->peer;
>>> + if (peer_port == op) {
>>> + /* no need to check NAT addresses on ovn_port that
>>> initiated this
>>> + * function.*/
>>> + continue;
>>> + }
>>> + size_t n_nats = 0;
>>> + char **nats = NULL;
>>> + nats = get_nat_addresses(peer_port, &n_nats, false, false,
>>> NULL, true);
>>> + for (size_t j = 0; j < n_nats; j++) {
>>> + struct lport_addresses laddrs;
>>> + int ofs = 0;
>>> + extract_addresses(nats[j], &laddrs, &ofs);
>>> + lport_addrs_to_parsed_routes(op, peer_port, &laddrs,
>>> ROUTE_SOURCE_NAT, routes);
>>> + free(nats[j]);
>>> + destroy_lport_addresses(&laddrs);
>>> + }
>>> + free(nats);
>>> + }
>>> +}
>>> +
>>> void
>>> -build_lb_nat_parsed_routes(const struct ovn_datapath *od,
>>> - const struct lr_nat_record *lr_nat,
>>> - struct hmap *routes)
>>> +build_nat_parsed_routes(const struct ovn_datapath *od,
>>> + const struct lr_nat_record *lr_nat,
>>> + const struct northd_data *northd_data,
>>> + struct hmap *routes)
>>> {
>>> const struct ovn_port *op;
>>> HMAP_FOR_EACH (op, dp_node, &od->ports) {
>>> @@ -11291,19 +11437,62 @@ build_lb_nat_parsed_routes(const struct
>>> ovn_datapath *od,
>>> if (!(drrm & DRRM_NAT)) {
>>> continue;
>>> }
>>> - /* I'm thinking of extending parsed_route struct with
>>> "tracked_port".
>>> - * Since we are already parsing/iterating NATs here, it
>>> feels more
>>> - * efficinet to figure out the tracked_port here, rather
>>> than
>>> - * re-parsing NATs down the line in the
>>> advertised_route_table_sync
>>> - * function before calling "ar_add_entry".*/
>>> +
>>> for (size_t i = 0; i < lr_nat->n_nat_entries; i++) {
>>> const struct ovn_nat *nat = &lr_nat->nat_entries[i];
>>> int plen = nat_entry_is_v6(nat) ? 128 : 32;
>>> struct in6_addr prefix;
>>> ip46_parse(nat->nb->external_ip, &prefix);
>>> + const struct ovn_port *tracked_port =
>>> + get_nat_route_tracked_port(op, nat, northd_data);
>>> parsed_route_add(od, NULL, &prefix, plen, false,
>>> nat->nb->external_ip, op, 0, false,
>>> false,
>>> - NULL, ROUTE_SOURCE_NAT, &op->nbrp-
>>>> header_, routes);
>>> + NULL, ROUTE_SOURCE_NAT, &op->nbrp-
>>>> header_,
>>> + tracked_port, routes);
>>> + }
>>> +
>>> + /* XXX: This could be made optional by adding "nat-
>>> connected" option
>>> + * to dynamic-routing-redistribute. Similar to how
>>> connected and
>>> + * connected-as-host work.*/
>>> + build_connected_nat_routes(op, routes);
>>> + }
>>> +
>>> +}
>>> +
>>> +void
>>> +build_lb_parsed_routes(const struct ovn_datapath *od,
>>> + const struct ovn_lb_ip_set *lb_ips,
>>> + const struct northd_data *northd_data,
>>
>> This seems to be unused.
>
> Yeah, just something I expected to use for LB tracked port lookup. It
> won't be there in next version.
>
I actually think this is used in en_dynamic_routes_run() and, to be
honest, to me it seems like the right approach. Please see below in the
[suggestion] section.
>>
>>> + struct hmap *routes)
>>> +{
>>> + const struct ovn_port *op;
>>> + HMAP_FOR_EACH (op, dp_node, &od->ports) {
>>> + enum dynamic_routing_redistribute_mode drrm = op-
>>>> dynamic_routing_redistribute;
>>> + if (!(drrm & DRRM_LB)) {
>>> + continue;
>>> + }
>>> +
>>> + const char *ip_address;
>>> + SSET_FOR_EACH (ip_address, &lb_ips->ips_v4) {
>>> + struct in6_addr prefix;
>>> + ip46_parse(ip_address, &prefix);
>>> + /* XXX: Need to figure out tracked_port for LB without
>>> re-parsing
>>> + * ovn_datapath. lr_stateful_record doens't have as
>>> much data
>>> + * about LBs as it does about NATs. */
>>> + const struct ovn_port *tracked_port = NULL;
>>> + parsed_route_add(od, NULL, &prefix, 32, false,
>>> + ip_address, op, 0, false, false,
>>> + NULL, ROUTE_SOURCE_LB, &op->nbrp-
>>>> header_,
>>> + tracked_port, routes);
>>> + }
>>> + SSET_FOR_EACH (ip_address, &lb_ips->ips_v6) {
>>> + struct in6_addr prefix;
>>> + ip46_parse(ip_address, &prefix);
>>> + const struct ovn_port *tracked_port = NULL;
>>> + parsed_route_add(od, NULL, &prefix, 128, false,
>>> + ip_address, op, 0, false, false,
>>> + NULL, ROUTE_SOURCE_LB, &op->nbrp-
>>>> header_,
>>> + tracked_port, routes);
>>> }
>>> }
>>>
>>> diff --git a/northd/northd.h b/northd/northd.h
>>> index 95f2fe010..5c0f7bc52 100644
>>> --- a/northd/northd.h
>>> +++ b/northd/northd.h
>>> @@ -761,6 +761,7 @@ struct parsed_route {
>>> const struct ovsdb_idl_row *source_hint;
>>> char *lrp_addr_s;
>>> const struct ovn_port *out_port;
>>> + const struct ovn_port *tracked_port; /* May be NULL. */
>>
>> I am not sure if adding this here makes sense.
>> It seems to me that in the case that tracked_port is set then most
>> other
>> fields have empty/default values.
>> Therefore maybe it makes sense to create a separate struct just with
>> the
>> needed fields (datapath, port, prefix and tracked_port).
>
> I see one big benefit of keeping only parsed_route struct, and that is
> the fact that many existing useful functions already work with
> parsed_route. It keeps the syncing to SB relatively unified.
>
I guess we could do this as follow up. For now, me personally, I'm OK
with it being in parsed_route. Martin, could you please add a TODO/XXX
comment here?
Also, let's consolidate all TODOs this series adds into the "* Dynamic
Routing" section Felix started in TODO.rst.
[suggestion]
I wanted to try to reduce all the NAT parsing this version still has and
ended up with:
https://github.com/dceara/ovn/tree/refs/heads/tmp-bgp-lb-nat-advertise-v5
The first patch refactors how NATs are parsed in the incremental
processing engine in ovn-northd. We can preparse all the information
that's needed for both lflow generation and dynamic routes. It's still
WIP because I need to test this thoroughly.
In the second patch I used the already parsed NAT information (for LBs
it's even simpler because we only process LBs on chassis where the
distributed gateway ports are bound. It essentially implements the
approach I tried to describe here:
https://mail.openvswitch.org/pipermail/ovs-dev/2025-February/420789.html
I tried it out locally and in my tests the NAT advertisements seem to
work fine. However, Martin, I didn't adapt the system tests you had in
v3 so I didn't run those.
In any case, let me know what you guys think. If this looks OK to you,
maybe we can try to integrate it in the next iteration of the patchset.
Regards,
Dumitru
> Thanks for the feedback,
> Martin
>>
>> Thanks a lot,
>> Felix
>>
>>
>>> };
>>>
>>> /* Returns an independent clone of the provided parsed_route. The
>>> returned
>>> @@ -789,6 +790,7 @@ void parsed_route_add(const struct ovn_datapath
>>> *od,
>>> const struct sset *ecmp_selection_fields,
>>> enum route_source source,
>>> const struct ovsdb_idl_row *source_hint,
>>> + const struct ovn_port *tracked_port,
>>> struct hmap *routes);
>>>
>>> bool
>>> @@ -823,7 +825,14 @@ void route_policies_destroy(struct
>>> route_policies_data *);
>>> void build_parsed_routes(const struct ovn_datapath *, const struct
>>> hmap *,
>>> const struct hmap *, struct hmap *,
>>> struct simap *,
>>> struct hmap *);
>>> -void build_lb_nat_parsed_routes(const struct ovn_datapath *, const
>>> struct lr_nat_record *, struct hmap *);
>>> +void build_nat_parsed_routes(const struct ovn_datapath *,
>>> + const struct lr_nat_record *,
>>> + const struct northd_data *,
>>> + struct hmap *);
>>> +void build_lb_parsed_routes(const struct ovn_datapath *,
>>> + const struct ovn_lb_ip_set *,
>>> + const struct northd_data *,
>>> + struct hmap *);
>>> uint32_t get_route_table_id(struct simap *, const char *);
>>> void routes_init(struct routes_data *);
>>> void routes_destroy(struct routes_data *);
>>> --
>>> 2.43.0
>>>
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev