We now learn all routes inside the vrfs we also advertise routes on. The routes are then placed in the southbound database for processing by northd.
Routes are only selected if matching the following rules: 1. must not be a route advertised by us 2. must not be a local connected route (as we want to not learn transfer networks) 3. the prefix must not be a link local address However we can not reliably determine over which link we learned the route in case we have two LRPs of the same LR on the same chassis. For now we just assume the routes on both links are identical. Future commits will refine this. Signed-off-by: Felix Huettner <felix.huettner@stackit.cloud> --- v3->v4: - addressed review comments. v2->v3: * Set monitor conditions on sb Learned_Route table. * Do not learn routes if Learned_Route table does not exist (upgrades). controller/ovn-controller.c | 64 +++++++++++-- controller/route-exchange-netlink.c | 38 +++++++- controller/route-exchange-netlink.h | 15 ++- controller/route-exchange.c | 138 +++++++++++++++++++++++++++- controller/route-exchange.h | 3 + lib/ovn-util.c | 10 ++ lib/ovn-util.h | 1 + tests/system-ovn.at | 21 +++++ 8 files changed, 277 insertions(+), 13 deletions(-) diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c index 1eb8d39d1..5b31f6fd2 100644 --- a/controller/ovn-controller.c +++ b/controller/ovn-controller.c @@ -233,7 +233,7 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl, * * Monitor Template_Var for local chassis. * - * Monitor Advertised_Route for local datapaths. + * Monitor Advertised/Learned_Route for local datapaths. * * We always monitor patch ports because they allow us to see the linkages * between related logical datapaths. That way, when we know that we have @@ -252,6 +252,7 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl, struct ovsdb_idl_condition chprv = OVSDB_IDL_CONDITION_INIT(&chprv); struct ovsdb_idl_condition tv = OVSDB_IDL_CONDITION_INIT(&tv); struct ovsdb_idl_condition ar = OVSDB_IDL_CONDITION_INIT(&ar); + struct ovsdb_idl_condition lr = OVSDB_IDL_CONDITION_INIT(&lr); /* Always monitor all logical datapath groups. Otherwise, DPG updates may * be received *after* the lflows using it are seen by ovn-controller. @@ -277,6 +278,7 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl, ovsdb_idl_condition_add_clause_true(&chprv); ovsdb_idl_condition_add_clause_true(&tv); ovsdb_idl_condition_add_clause_true(&ar); + ovsdb_idl_condition_add_clause_true(&lr); goto out; } @@ -365,7 +367,6 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl, sbrec_dns_add_clause_datapaths(&dns, OVSDB_F_INCLUDES, &uuid, 1); sbrec_ip_multicast_add_clause_datapath(&ip_mcast, OVSDB_F_EQ, uuid); - sbrec_advertised_route_add_clause_datapath(&ar, OVSDB_F_EQ, uuid); } /* Datapath groups are immutable, which means a new group record is @@ -379,6 +380,14 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl, sbrec_logical_flow_add_clause_logical_dp_group(&lf, OVSDB_F_NE, NULL); } + /* When the ports are getting bound to the chassis e.g incase of + * restart, at that moment we don't have the local datapaths, to avoid + * removing the existing advertised routes from the vrf or removing + * learned routes to the SB, we set condition to monitor all. + */ + ovsdb_idl_condition_add_clause_true(&ar); + ovsdb_idl_condition_add_clause_true(&lr); + out:; unsigned int cond_seqnos[] = { sb_table_set_req_mon_condition(ovnsb_idl, port_binding, &pb), @@ -394,6 +403,7 @@ out:; sb_table_set_req_mon_condition(ovnsb_idl, chassis_private, &chprv), sb_table_set_opt_mon_condition(ovnsb_idl, chassis_template_var, &tv), sb_table_set_opt_mon_condition(ovnsb_idl, advertised_route, &ar), + sb_table_set_opt_mon_condition(ovnsb_idl, learned_route, &lr), }; unsigned int expected_cond_seqno = 0; @@ -414,6 +424,7 @@ out:; ovsdb_idl_condition_destroy(&chprv); ovsdb_idl_condition_destroy(&tv); ovsdb_idl_condition_destroy(&ar); + ovsdb_idl_condition_destroy(&lr); return expected_cond_seqno; } @@ -880,7 +891,8 @@ ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl) SB_NODE(meter, "meter") \ SB_NODE(static_mac_binding, "static_mac_binding") \ SB_NODE(chassis_template_var, "chassis_template_var") \ - SB_NODE(advertised_route, "advertised_route") + SB_NODE(advertised_route, "advertised_route") \ + SB_NODE(learned_route, "learned_route") enum sb_engine_node { #define SB_NODE(NAME, NAME_STR) SB_##NAME, @@ -5001,13 +5013,40 @@ route_sb_advertised_route_data_handler(struct engine_node *node, void *data) return true; } +struct ed_type_route_exchange { + /* We need the idl to check if a table exists. */ + struct ovsdb_idl *sb_idl; +}; + static void -en_route_exchange_run(struct engine_node *node, void *data OVS_UNUSED) +en_route_exchange_run(struct engine_node *node, void *data) { + struct ed_type_route_exchange *re = data; + + struct ovsdb_idl_index *sbrec_learned_route_by_datapath = + engine_ovsdb_node_get_index( + engine_get_input("SB_learned_route", node), + "datapath"); + + struct ovsdb_idl_index *sbrec_port_binding_by_name = + engine_ovsdb_node_get_index( + engine_get_input("SB_port_binding", node), + "name"); + struct ed_type_route *route_data = engine_get_input_data("route", node); + /* There can not actually be any routes to advertise unless we also have + * the Learned_Route table, since they where introduced in the same + * release. */ + if (!sbrec_server_has_learned_route_table(re->sb_idl)) { + return; + } + struct route_exchange_ctx_in r_ctx_in = { + .ovnsb_idl_txn = engine_get_context()->ovnsb_idl_txn, + .sbrec_learned_route_by_datapath = sbrec_learned_route_by_datapath, + .sbrec_port_binding_by_name = sbrec_port_binding_by_name, .announce_routes = &route_data->announce_routes, }; @@ -5022,9 +5061,11 @@ en_route_exchange_run(struct engine_node *node, void *data OVS_UNUSED) static void * en_route_exchange_init(struct engine_node *node OVS_UNUSED, - struct engine_arg *arg OVS_UNUSED) + struct engine_arg *arg) { - return NULL; + struct ed_type_route_exchange *re = xzalloc(sizeof(*re)); + re->sb_idl = arg->sb_idl; + return re; } static void @@ -5239,6 +5280,9 @@ main(int argc, char *argv[]) struct ovsdb_idl_index *sbrec_chassis_template_var_index_by_chassis = ovsdb_idl_index_create1(ovnsb_idl_loop.idl, &sbrec_chassis_template_var_col_chassis); + struct ovsdb_idl_index *sbrec_learned_route_index_by_datapath + = ovsdb_idl_index_create1(ovnsb_idl_loop.idl, + &sbrec_learned_route_col_datapath); ovsdb_idl_track_add_all(ovnsb_idl_loop.idl); ovsdb_idl_omit_alert(ovnsb_idl_loop.idl, @@ -5265,6 +5309,8 @@ main(int argc, char *argv[]) &sbrec_ha_chassis_group_col_external_ids); ovsdb_idl_omit(ovnsb_idl_loop.idl, &sbrec_advertised_route_col_external_ids); + ovsdb_idl_omit(ovnsb_idl_loop.idl, + &sbrec_learned_route_col_external_ids); /* We don't want to monitor Connection table at all. So omit all the * columns. */ @@ -5358,6 +5404,10 @@ main(int argc, char *argv[]) route_sb_advertised_route_data_handler); engine_add_input(&en_route_exchange, &en_route, NULL); + engine_add_input(&en_route_exchange, &en_sb_learned_route, + engine_noop_handler); + engine_add_input(&en_route_exchange, &en_sb_port_binding, + engine_noop_handler); engine_add_input(&en_addr_sets, &en_sb_address_set, addr_sets_sb_address_set_handler); @@ -5576,6 +5626,8 @@ main(int argc, char *argv[]) sbrec_static_mac_binding_by_datapath); engine_ovsdb_node_add_index(&en_sb_chassis_template_var, "chassis", sbrec_chassis_template_var_index_by_chassis); + engine_ovsdb_node_add_index(&en_sb_learned_route, "datapath", + sbrec_learned_route_index_by_datapath); engine_ovsdb_node_add_index(&en_ovs_flow_sample_collector_set, "id", ovsrec_flow_sample_collector_set_by_id); engine_ovsdb_node_add_index(&en_ovs_port, "qos", ovsrec_port_by_qos); diff --git a/controller/route-exchange-netlink.c b/controller/route-exchange-netlink.c index 4ba21ecaa..74741a3fd 100644 --- a/controller/route-exchange-netlink.c +++ b/controller/route-exchange-netlink.c @@ -196,8 +196,19 @@ re_nl_delete_route(uint32_t table_id, const struct in6_addr *dst, return modify_route(RTM_DELROUTE, 0, table_id, dst, plen); } +void +re_nl_learned_routes_destroy(struct ovs_list *learned_routes) +{ + struct re_nl_received_route_node *rr; + LIST_FOR_EACH_POP (rr, list_node, learned_routes) { + free(rr); + } +} + struct route_msg_handle_data { struct hmapx *routes_to_advertise; + struct ovs_list *learned_routes; + const struct sbrec_datapath_binding *db; }; static void @@ -208,8 +219,25 @@ handle_route_msg(const struct route_table_msg *msg, void *data) struct advertise_route_entry *ar; int err; - /* This route is not from us, we should not touch it. */ + /* This route is not from us, so we learn it. */ if (rd->rtm_protocol != RTPROT_OVN) { + if (prefix_is_link_local(&rd->rta_dst, rd->rtm_dst_len)) { + return; + } + struct route_data_nexthop *nexthop; + LIST_FOR_EACH (nexthop, nexthop_node, &rd->nexthops) { + if (ipv6_is_zero(&nexthop->addr)) { + /* This is most likely an address on the local link. + * As we just want to learn remote routes we do not need it.*/ + continue; + } + struct re_nl_received_route_node *rr = xzalloc(sizeof *rr); + ovs_list_push_back(handle_data->learned_routes, &rr->list_node); + rr->db = handle_data->db; + rr->addr = rd->rta_dst; + rr->plen = rd->rtm_dst_len; + rr->nexthop = nexthop->addr; + } return; } @@ -236,7 +264,9 @@ handle_route_msg(const struct route_table_msg *msg, void *data) } void -re_nl_sync_routes(uint32_t table_id, const struct hmap *routes) +re_nl_sync_routes(uint32_t table_id, const struct hmap *routes, + struct ovs_list *learned_routes, + const struct sbrec_datapath_binding *db) { struct hmapx routes_to_advertise = HMAPX_INITIALIZER(&routes_to_advertise); struct advertise_route_entry *ar; @@ -249,11 +279,13 @@ re_nl_sync_routes(uint32_t table_id, const struct hmap *routes) * in the system. */ struct route_msg_handle_data data = { .routes_to_advertise = &routes_to_advertise, + .learned_routes = learned_routes, + .db = db, }; route_table_dump_one_table(table_id, handle_route_msg, &data); - /* Add any remaining routes in the host_routes hmap to the system routing + /* Add any remaining routes in the routes hmap to the system routing * table. */ struct hmapx_node *hn; HMAPX_FOR_EACH (hn, &routes_to_advertise) { diff --git a/controller/route-exchange-netlink.h b/controller/route-exchange-netlink.h index 93b593ad2..bc77504ae 100644 --- a/controller/route-exchange-netlink.h +++ b/controller/route-exchange-netlink.h @@ -19,6 +19,8 @@ #define ROUTE_EXCHANGE_NETLINK_H 1 #include <stdint.h> +#include "openvswitch/list.h" +#include <netinet/in.h> /* This value is arbitrary but currently unused. * See https://github.com/iproute2/iproute2/blob/main/etc/iproute2/rt_protos */ @@ -27,6 +29,14 @@ struct in6_addr; struct hmap; +struct re_nl_received_route_node { + struct ovs_list list_node; + const struct sbrec_datapath_binding *db; + struct in6_addr addr; + unsigned int plen; + struct in6_addr nexthop; +}; + int re_nl_create_vrf(const char *ifname, uint32_t table_id); int re_nl_delete_vrf(const char *ifname); @@ -37,6 +47,9 @@ int re_nl_delete_route(uint32_t table_id, const struct in6_addr *dst, void re_nl_dump(uint32_t table_id); -void re_nl_sync_routes(uint32_t table_id, const struct hmap *routes); +void re_nl_learned_routes_destroy(struct ovs_list *learned_routes); +void re_nl_sync_routes(uint32_t table_id, const struct hmap *routes, + struct ovs_list *learned_routes, + const struct sbrec_datapath_binding *db); #endif /* route-exchange-netlink.h */ diff --git a/controller/route-exchange.c b/controller/route-exchange.c index 0942780e2..a163968a7 100644 --- a/controller/route-exchange.c +++ b/controller/route-exchange.c @@ -21,6 +21,7 @@ #include <net/if.h> #include "openvswitch/vlog.h" +#include "openvswitch/list.h" #include "lib/ovn-sb-idl.h" @@ -37,6 +38,127 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); static struct sset _maintained_vrfs = SSET_INITIALIZER(&_maintained_vrfs); +struct route_entry { + struct hmap_node hmap_node; + + const struct sbrec_learned_route *sb_route; +}; + +static struct route_entry * +route_alloc_entry(struct hmap *routes, + const struct sbrec_learned_route *sb_route) +{ + struct route_entry *route_e = xzalloc(sizeof *route_e); + + route_e->sb_route = sb_route; + uint32_t hash = uuid_hash(&sb_route->datapath->header_.uuid); + hash = hash_string(sb_route->logical_port->logical_port, hash); + hash = hash_string(sb_route->ip_prefix, hash); + hmap_insert(routes, &route_e->hmap_node, hash); + + return route_e; +} + +static struct route_entry * +route_lookup(struct hmap *route_map, + const struct sbrec_datapath_binding *sb_db, + const struct sbrec_port_binding *logical_port, + const char *ip_prefix, const char *nexthop) +{ + struct route_entry *route_e; + uint32_t hash; + + hash = uuid_hash(&sb_db->header_.uuid); + hash = hash_string(logical_port->logical_port, hash); + hash = hash_string(ip_prefix, hash); + HMAP_FOR_EACH_WITH_HASH (route_e, hmap_node, hash, route_map) { + if (route_e->sb_route->datapath != sb_db) { + continue; + } + + if (route_e->sb_route->logical_port != logical_port) { + continue; + } + + if (strcmp(route_e->sb_route->ip_prefix, ip_prefix)) { + continue; + } + + if (strcmp(route_e->sb_route->nexthop, nexthop)) { + continue; + } + + return route_e; + } + + return NULL; +} + +static void +sb_sync_learned_routes(const struct ovs_list *learned_routes, + const struct sbrec_datapath_binding *datapath, + const struct sset *bound_ports, + struct ovsdb_idl_txn *ovnsb_idl_txn, + struct ovsdb_idl_index *sbrec_port_binding_by_name, + struct ovsdb_idl_index *sbrec_learned_route_by_datapath) +{ + struct hmap sync_routes = HMAP_INITIALIZER(&sync_routes); + struct route_entry *route_e; + const struct sbrec_learned_route *sb_route; + + struct sbrec_learned_route *filter = + sbrec_learned_route_index_init_row(sbrec_learned_route_by_datapath); + sbrec_learned_route_index_set_datapath(filter, datapath); + SBREC_LEARNED_ROUTE_FOR_EACH_EQUAL (sb_route, filter, + sbrec_learned_route_by_datapath) { + /* If the port is not local we don't care about it. + * Some other ovn-controller will handle it. */ + if (!sset_contains(bound_ports, + sb_route->logical_port->logical_port)) { + continue; + } + route_e = route_alloc_entry(&sync_routes, sb_route); + } + sbrec_learned_route_index_destroy_row(filter); + + struct re_nl_received_route_node *learned_route; + LIST_FOR_EACH (learned_route, list_node, learned_routes) { + char *ip_prefix = normalize_v46_prefix(&learned_route->addr, + learned_route->plen); + char *nexthop = normalize_v46(&learned_route->nexthop); + + const char *logical_port_name; + SSET_FOR_EACH (logical_port_name, bound_ports) { + const struct sbrec_port_binding *logical_port = + lport_lookup_by_name(sbrec_port_binding_by_name, + logical_port_name); + if (!logical_port) { + continue; + } + route_e = route_lookup(&sync_routes, datapath, + logical_port, ip_prefix, nexthop); + if (route_e) { + hmap_remove(&sync_routes, &route_e->hmap_node); + free(route_e); + } else { + sb_route = sbrec_learned_route_insert(ovnsb_idl_txn); + sbrec_learned_route_set_datapath(sb_route, datapath); + sbrec_learned_route_set_logical_port(sb_route, logical_port); + sbrec_learned_route_set_ip_prefix(sb_route, ip_prefix); + sbrec_learned_route_set_nexthop(sb_route, nexthop); + } + } + free(ip_prefix); + free(nexthop); + } + + HMAP_FOR_EACH_POP (route_e, hmap_node, &sync_routes) { + sbrec_learned_route_delete(route_e->sb_route); + free(route_e); + } + hmap_destroy(&sync_routes); +} + void route_exchange_run(struct route_exchange_ctx_in *r_ctx_in, struct route_exchange_ctx_out *r_ctx_out OVS_UNUSED) @@ -46,8 +168,6 @@ route_exchange_run(struct route_exchange_ctx_in *r_ctx_in, const struct advertise_datapath_entry *ad; HMAP_FOR_EACH (ad, node, r_ctx_in->announce_routes) { - struct hmap received_routes - = HMAP_INITIALIZER(&received_routes); uint32_t table_id = ad->db->tunnel_key; char vrf_name[IFNAMSIZ + 1]; snprintf(vrf_name, sizeof vrf_name, "ovnvrf%"PRIi32, table_id); @@ -72,9 +192,21 @@ route_exchange_run(struct route_exchange_ctx_in *r_ctx_in, sset_find_and_delete(&old_maintained_vrfs, vrf_name); } - re_nl_sync_routes(ad->db->tunnel_key, &ad->routes); + struct ovs_list received_routes = OVS_LIST_INITIALIZER( + &received_routes); + + re_nl_sync_routes(ad->db->tunnel_key, &ad->routes, + &received_routes, ad->db); + + sb_sync_learned_routes(&received_routes, ad->db, + &ad->bound_ports, r_ctx_in->ovnsb_idl_txn, + r_ctx_in->sbrec_port_binding_by_name, + r_ctx_in->sbrec_learned_route_by_datapath); + + re_nl_learned_routes_destroy(&received_routes); } + /* Remove VRFs previously maintained by us not found in the above loop. */ const char *vrf_name; SSET_FOR_EACH_SAFE (vrf_name, &old_maintained_vrfs) { diff --git a/controller/route-exchange.h b/controller/route-exchange.h index 65520242b..d23bb37a2 100644 --- a/controller/route-exchange.h +++ b/controller/route-exchange.h @@ -19,6 +19,9 @@ #define ROUTE_EXCHANGE_H 1 struct route_exchange_ctx_in { + struct ovsdb_idl_txn *ovnsb_idl_txn; + struct ovsdb_idl_index *sbrec_port_binding_by_name; + struct ovsdb_idl_index *sbrec_learned_route_by_datapath; /* Contains struct advertise_datapath_entry */ const struct hmap *announce_routes; }; diff --git a/lib/ovn-util.c b/lib/ovn-util.c index ed847517a..507847280 100644 --- a/lib/ovn-util.c +++ b/lib/ovn-util.c @@ -822,6 +822,16 @@ normalize_v46_prefix(const struct in6_addr *prefix, unsigned int plen) } } +char * +normalize_v46(const struct in6_addr *prefix) +{ + if (IN6_IS_ADDR_V4MAPPED(prefix)) { + return normalize_ipv4_prefix(in6_addr_get_mapped_ipv4(prefix), 32); + } else { + return normalize_ipv6_prefix(prefix, 128); + } +} + char * str_tolower(const char *orig) { diff --git a/lib/ovn-util.h b/lib/ovn-util.h index 31c2c68df..8d8fd989b 100644 --- a/lib/ovn-util.h +++ b/lib/ovn-util.h @@ -207,6 +207,7 @@ bool ip46_parse(const char *ip_str, struct in6_addr *ip); char *normalize_ipv4_prefix(ovs_be32 ipv4, unsigned int plen); char *normalize_ipv6_prefix(const struct in6_addr *ipv6, unsigned int plen); char *normalize_v46_prefix(const struct in6_addr *prefix, unsigned int plen); +char *normalize_v46(const struct in6_addr *prefix); /* Returns a lowercase copy of orig. * Caller must free the returned string. diff --git a/tests/system-ovn.at b/tests/system-ovn.at index 760c97a5d..dc99d4c57 100644 --- a/tests/system-ovn.at +++ b/tests/system-ovn.at @@ -15048,6 +15048,16 @@ blackhole 192.0.2.3 proto 84 blackhole 192.0.2.10 proto 84 blackhole 198.51.100.0/24 proto 84]) +# Now we test route learning. +check_row_count Learned_Route 0 +check ip route add 233.252.0.0/24 via 192.168.10.10 dev lo onlink vrf ovnvrf1337 +# For now we trigger a recompute as route watching is not yet implemented. +check ovn-appctl -t ovn-controller inc-engine/recompute +check ovn-nbctl --wait=hv sync +check_row_count Learned_Route 1 +lp=$(fetch_column port_binding _uuid logical_port=internet-phys) +check_row_count Learned_Route 1 logical_port=$lp ip_prefix=233.252.0.0/24 nexthop=192.168.10.10 + OVS_APP_EXIT_AND_WAIT([ovn-controller]) as ovn-sb @@ -15209,6 +15219,7 @@ check ovn-nbctl lr-nat-add pr1 dnat_and_snat 192.0.2.10 10.0.0.2 check ovn-nbctl lsp-add p2 vif2 \ -- lsp-set-addresses vif2 "00:00:ff:ff:ff:02 198.51.100.10" check ovn-nbctl lr-route-add internet 198.51.100.0/24 192.0.2.3 + .ovnsb_idl = re->sb_idl, # Configure external connectivity. check ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=phynet:br-ext @@ -15251,6 +15262,16 @@ blackhole 192.0.2.3 proto 84 blackhole 192.0.2.10 proto 84 blackhole 198.51.100.0/24 proto 84]) +# Now we test route learning. +check_row_count Learned_Route 0 +check ip route add 233.252.0.0/24 via 192.168.10.10 dev lo onlink vrf ovnvrf1337 +# For now we trigger a recompute as route watching is not yet implemented. +check ovn-appctl -t ovn-controller inc-engine/recompute +check ovn-nbctl --wait=hv sync +check_row_count Learned_Route 2 +lp=$(fetch_column port_binding _uuid logical_port=internet-phys) +check_row_count Learned_Route 1 logical_port=$lp ip_prefix=233.252.0.0/24 nexthop=192.168.10.10 + as ovn-sb OVS_APP_EXIT_AND_WAIT([ovsdb-server]) -- 2.47.1 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev