From: Frode Nordahl <fnord...@ubuntu.com> Introduce route-exchange-netlink module which implements interface for maintaining VRFs [0] and routes through Netlink.
There is a desire to do this without having to (re-)implement routing protocol state machines in OVN, and to accomplish this we make use of Netlink. Netlink was chosen because: * Its ubiquitous nature with availability on any Linux system as as well other platforms. * Presence of a very good Netlink library implementation in our sibling project and library, Open vSwitch. * Popular routing protocol software conveniently already have support for redistributing routes to/from Netlink. * Support for interacting with Virtual Routing and Forwarding domains [0], allowing full isolation between virtual network resources defined within OVN and the hosting system while retaining access to all system network interfaces. It is important to note that the purpose of this integration is generic exchange of control plane information, while allowing to keep the datapath in OVS/OVN, enabling users to leverage its full range of user-, kernel- and mixed- space datapath implementations. 0: https://docs.kernel.org/networking/vrf.html Acked-by: Dumitru Ceara <dce...@redhat.com> Co-Authored-by: Felix Huettner <felix.huettner@stackit.cloud> Signed-off-by: Felix Huettner <felix.huettner@stackit.cloud> Signed-off-by: Frode Nordahl <fnord...@ubuntu.com> --- v7->v8: * addressed review comments v6->v7: * addressed review comments v5->v6: * addressed review comments v4->v5: fix compile error v3->v4: - addressed review comments. - fix authorship configure.ac | 2 + controller/automake.mk | 6 + controller/route-exchange-netlink.c | 267 ++++++++++++++++++++++++++++ controller/route-exchange-netlink.h | 44 +++++ controller/route.c | 2 +- controller/route.h | 1 + m4/ovn.m4 | 25 +++ tests/automake.mk | 5 + tests/system-common-macros.at | 12 ++ 9 files changed, 363 insertions(+), 1 deletion(-) create mode 100644 controller/route-exchange-netlink.c create mode 100644 controller/route-exchange-netlink.h diff --git a/configure.ac b/configure.ac index 4a63b84d4..cf9b4a6fd 100644 --- a/configure.ac +++ b/configure.ac @@ -87,6 +87,8 @@ OVS_CHECK_WIN32 OVS_CHECK_VISUAL_STUDIO_DDK OVN_CHECK_COVERAGE OVS_CHECK_NDEBUG +OVS_CHECK_NETLINK +OVS_CHECK_LINUX_NETLINK OVS_CHECK_OPENSSL OVN_CHECK_LOGDIR OVN_CHECK_PYTHON3 diff --git a/controller/automake.mk b/controller/automake.mk index 452dd0c6d..a8b8dde44 100644 --- a/controller/automake.mk +++ b/controller/automake.mk @@ -59,6 +59,12 @@ controller_ovn_controller_SOURCES = \ controller/route.h \ controller/route.c +if HAVE_NETLINK +controller_ovn_controller_SOURCES += \ + controller/route-exchange-netlink.h \ + controller/route-exchange-netlink.c +endif + controller_ovn_controller_LDADD = lib/libovn.la $(OVS_LIBDIR)/libopenvswitch.la man_MANS += controller/ovn-controller.8 EXTRA_DIST += controller/ovn-controller.8.xml diff --git a/controller/route-exchange-netlink.c b/controller/route-exchange-netlink.c new file mode 100644 index 000000000..97e90135c --- /dev/null +++ b/controller/route-exchange-netlink.c @@ -0,0 +1,267 @@ +/* + * Copyright (c) 2025 Canonical, Ltd. + * Copyright (c) 2025, STACKIT GmbH & Co. KG + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <config.h> + +#include <errno.h> +#include <inttypes.h> +#include <linux/rtnetlink.h> +#include <net/if.h> +#include <netinet/in.h> + +#include "netlink-socket.h" +#include "openvswitch/hmap.h" +#include "hmapx.h" +#include "openvswitch/ofpbuf.h" +#include "openvswitch/vlog.h" +#include "ovn-util.h" +#include "route-table.h" +#include "route.h" + +#include "route-exchange-netlink.h" + +VLOG_DEFINE_THIS_MODULE(route_exchange_netlink); + +#define TABLE_ID_VALID(table_id) (table_id != RT_TABLE_UNSPEC && \ + table_id != RT_TABLE_COMPAT && \ + table_id != RT_TABLE_DEFAULT && \ + table_id != RT_TABLE_MAIN && \ + table_id != RT_TABLE_LOCAL && \ + table_id != RT_TABLE_MAX) + +int +re_nl_create_vrf(const char *ifname, uint32_t table_id) +{ + if (!TABLE_ID_VALID(table_id)) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); + VLOG_WARN_RL(&rl, + "attempt to create VRF using invalid table id %"PRIu32, + table_id); + return EINVAL; + } + + size_t linkinfo_off, infodata_off; + struct ifinfomsg *ifinfo; + struct ofpbuf request; + int err; + + ofpbuf_init(&request, 0); + nl_msg_put_nlmsghdr(&request, 0, RTM_NEWLINK, + NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE | NLM_F_EXCL); + ifinfo = ofpbuf_put_zeros(&request, sizeof *ifinfo); + nl_msg_put_string(&request, IFLA_IFNAME, ifname); + + ifinfo->ifi_change = ifinfo->ifi_flags = IFF_UP; + linkinfo_off = nl_msg_start_nested(&request, IFLA_LINKINFO); + nl_msg_put_string(&request, IFLA_INFO_KIND, "vrf"); + infodata_off = nl_msg_start_nested(&request, IFLA_INFO_DATA); + nl_msg_put_u32(&request, IFLA_VRF_TABLE, table_id); + nl_msg_end_nested(&request, infodata_off); + nl_msg_end_nested(&request, linkinfo_off); + + err = nl_transact(NETLINK_ROUTE, &request, NULL); + + ofpbuf_uninit(&request); + return err; +} + +int +re_nl_delete_vrf(const char *ifname) +{ + struct ifinfomsg *ifinfo; + struct ofpbuf request; + int err; + + ofpbuf_init(&request, 0); + nl_msg_put_nlmsghdr(&request, 0, RTM_DELLINK, NLM_F_REQUEST | NLM_F_ACK); + ifinfo = ofpbuf_put_zeros(&request, sizeof *ifinfo); + nl_msg_put_string(&request, IFLA_IFNAME, ifname); + err = nl_transact(NETLINK_ROUTE, &request, NULL); + + ofpbuf_uninit(&request); + return err; +} + +static int +modify_route(uint32_t type, uint32_t flags_arg, uint32_t table_id, + const struct in6_addr *dst, unsigned int plen) +{ + uint32_t flags = NLM_F_REQUEST | NLM_F_ACK; + bool is_ipv4 = IN6_IS_ADDR_V4MAPPED(dst); + struct ofpbuf request; + struct rtmsg *rt; + int err; + + flags |= flags_arg; + + ofpbuf_init(&request, 0); + nl_msg_put_nlmsghdr(&request, 0, type, flags); + rt = ofpbuf_put_zeros(&request, sizeof *rt); + rt->rtm_family = is_ipv4 ? AF_INET : AF_INET6; + rt->rtm_table = RT_TABLE_UNSPEC; /* RTA_TABLE attribute allows id > 256 */ + /* Manage only OVN routes */ + rt->rtm_protocol = RTPROT_OVN; + rt->rtm_type = RTN_BLACKHOLE; + rt->rtm_scope = RT_SCOPE_UNIVERSE; + rt->rtm_dst_len = plen; + + nl_msg_put_u32(&request, RTA_TABLE, table_id); + + if (is_ipv4) { + nl_msg_put_be32(&request, RTA_DST, in6_addr_get_mapped_ipv4(dst)); + } else { + nl_msg_put_in6_addr(&request, RTA_DST, dst); + } + + if (VLOG_IS_DBG_ENABLED()) { + struct ds msg = DS_EMPTY_INITIALIZER; + + if (type == RTM_DELROUTE) { + ds_put_cstr(&msg, "Removing blackhole route from "); + } else { + ds_put_cstr(&msg, "Adding blackhole route to "); + } + + ds_put_format(&msg, "table %"PRIu32 " for prefix ", table_id); + if (IN6_IS_ADDR_V4MAPPED(dst)) { + ds_put_format(&msg, IP_FMT, + IP_ARGS(in6_addr_get_mapped_ipv4(dst))); + } else { + ipv6_format_addr(dst, &msg); + } + ds_put_format(&msg, "/%u", plen); + + VLOG_DBG("%s", ds_cstr(&msg)); + ds_destroy(&msg); + } + + err = nl_transact(NETLINK_ROUTE, &request, NULL); + + ofpbuf_uninit(&request); + return err; +} + +int +re_nl_add_route(uint32_t table_id, const struct in6_addr *dst, + unsigned int plen) +{ + if (!TABLE_ID_VALID(table_id)) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); + VLOG_WARN_RL(&rl, + "attempt to add route using invalid table id %"PRIu32, + table_id); + return EINVAL; + } + + return modify_route(RTM_NEWROUTE, NLM_F_CREATE | NLM_F_EXCL, table_id, + dst, plen); +} + +int +re_nl_delete_route(uint32_t table_id, const struct in6_addr *dst, + unsigned int plen) +{ + if (!TABLE_ID_VALID(table_id)) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); + VLOG_WARN_RL(&rl, + "attempt to delete route using invalid table id %"PRIu32, + table_id); + return EINVAL; + } + + return modify_route(RTM_DELROUTE, 0, table_id, dst, plen); +} + +struct route_msg_handle_data { + struct hmapx *routes_to_advertise; + const struct hmap *routes; +}; + +static void +handle_route_msg(const struct route_table_msg *msg, void *data) +{ + struct route_msg_handle_data *handle_data = data; + const struct route_data *rd = &msg->rd; + struct advertise_route_entry *ar; + int err; + + /* This route is not from us, we should not touch it. */ + if (rd->rtm_protocol != RTPROT_OVN) { + return; + } + + uint32_t hash = advertise_route_hash(&rd->rta_dst, rd->rtm_dst_len); + HMAP_FOR_EACH_WITH_HASH (ar, node, hash, handle_data->routes) { + if (ipv6_addr_equals(&ar->addr, &rd->rta_dst) + && ar->plen == rd->rtm_dst_len) { + hmapx_find_and_delete(handle_data->routes_to_advertise, ar); + return; + } + } + err = re_nl_delete_route(rd->rta_table_id, &rd->rta_dst, + rd->rtm_dst_len); + if (err) { + char addr_s[INET6_ADDRSTRLEN + 1]; + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); + VLOG_WARN_RL(&rl, "Delete route table_id=%"PRIu32" dst=%s plen=%d " + "failed: %s", rd->rta_table_id, + ipv6_string_mapped( + addr_s, &rd->rta_dst) ? addr_s : "(invalid)", + rd->rtm_dst_len, + ovs_strerror(err)); + } +} + +void +re_nl_sync_routes(uint32_t table_id, const struct hmap *routes) +{ + struct hmapx routes_to_advertise = HMAPX_INITIALIZER(&routes_to_advertise); + struct advertise_route_entry *ar; + + HMAP_FOR_EACH (ar, node, routes) { + hmapx_add(&routes_to_advertise, ar); + } + + /* Remove routes from the system that are not in the routes hmap and + * remove entries from routes hmap that match routes already installed + * in the system. */ + struct route_msg_handle_data data = { + .routes = routes, + .routes_to_advertise = &routes_to_advertise, + }; + route_table_dump_one_table(table_id, handle_route_msg, &data); + + /* Add any remaining routes in the routes_to_advertise hmapx to the + * system routing table. */ + struct hmapx_node *hn; + HMAPX_FOR_EACH (hn, &routes_to_advertise) { + ar = hn->data; + int err = re_nl_add_route(table_id, &ar->addr, ar->plen); + if (err) { + char addr_s[INET6_ADDRSTRLEN + 1]; + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); + VLOG_WARN_RL(&rl, "Add route table_id=%"PRIu32" dst=%s " + "plen=%d: %s", + table_id, + ipv6_string_mapped( + addr_s, &ar->addr) ? addr_s : "(invalid)", + ar->plen, + ovs_strerror(err)); + } + } + hmapx_destroy(&routes_to_advertise); +} diff --git a/controller/route-exchange-netlink.h b/controller/route-exchange-netlink.h new file mode 100644 index 000000000..056273627 --- /dev/null +++ b/controller/route-exchange-netlink.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2025 Canonical, Ltd. + * Copyright (c) 2025, STACKIT GmbH & Co. KG + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ROUTE_EXCHANGE_NETLINK_H +#define ROUTE_EXCHANGE_NETLINK_H 1 + +#include <stdint.h> + +/* This value is arbitrary but currently unused. + * See the kernel rtnetlink UAPI at + * https://github.com/torvalds/linux/blob/master/include/uapi/linux/rtnetlink.h + * */ +#define RTPROT_OVN 84 + +struct in6_addr; +struct hmap; + +int re_nl_create_vrf(const char *ifname, uint32_t table_id); +int re_nl_delete_vrf(const char *ifname); + +int re_nl_add_route(uint32_t table_id, const struct in6_addr *dst, + unsigned int plen); +int re_nl_delete_route(uint32_t table_id, const struct in6_addr *dst, + unsigned int plen); + +void re_nl_dump(uint32_t table_id); + +void re_nl_sync_routes(uint32_t table_id, const struct hmap *routes); + +#endif /* route-exchange-netlink.h */ diff --git a/controller/route.c b/controller/route.c index dccebe35f..108435bad 100644 --- a/controller/route.c +++ b/controller/route.c @@ -38,7 +38,7 @@ route_exchange_relevant_port(const struct sbrec_port_binding *pb) return pb && smap_get_bool(&pb->options, "dynamic-routing", false); } -static uint32_t +uint32_t advertise_route_hash(const struct in6_addr *dst, unsigned int plen) { uint32_t hash = hash_bytes(dst->s6_addr, 16, 0); diff --git a/controller/route.h b/controller/route.h index b38f62e5a..448643ead 100644 --- a/controller/route.h +++ b/controller/route.h @@ -61,6 +61,7 @@ struct advertise_route_entry { }; bool route_exchange_relevant_port(const struct sbrec_port_binding *); +uint32_t advertise_route_hash(const struct in6_addr *dst, unsigned int plen); void route_run(struct route_ctx_in *, struct route_ctx_out *); void route_cleanup(struct hmap *announce_routes); diff --git a/m4/ovn.m4 b/m4/ovn.m4 index 8f36b75fc..8ccc1629e 100644 --- a/m4/ovn.m4 +++ b/m4/ovn.m4 @@ -560,3 +560,28 @@ AC_DEFUN([OVN_CHECK_UNBOUND], fi AM_CONDITIONAL([HAVE_UNBOUND], [test "$HAVE_UNBOUND" = yes]) AC_SUBST([HAVE_UNBOUND])]) + +dnl Checks for Netlink support. +AC_DEFUN([OVS_CHECK_NETLINK], + [AC_CHECK_HEADER([linux/netlink.h], + [HAVE_NETLINK=yes], + [HAVE_NETLINK=no], + [#include <sys/socket.h> + ]) + AM_CONDITIONAL([HAVE_NETLINK], [test "$HAVE_NETLINK" = yes]) + if test "$HAVE_NETLINK" = yes; then + AC_DEFINE([HAVE_NETLINK], [1], + [Define to 1 if Netlink protocol is available.]) + fi]) + +dnl OVS_CHECK_LINUX_NETLINK +dnl +dnl Configure Linux netlink compat. +AC_DEFUN([OVS_CHECK_LINUX_NETLINK], [ + AC_COMPILE_IFELSE([ + AC_LANG_PROGRAM([#include <linux/netlink.h>], [ + struct nla_bitfield32 x = { 0 }; + ])], + [AC_DEFINE([HAVE_NLA_BITFIELD32], [1], + [Define to 1 if struct nla_bitfield32 is available.])]) +]) diff --git a/tests/automake.mk b/tests/automake.mk index 8c0040b6d..2458c8cc1 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -308,6 +308,11 @@ tests_ovstest_LDADD = $(OVS_LIBDIR)/daemon.lo \ controller/vif-plug.$(OBJEXT) \ northd/ipam.$(OBJEXT) +if HAVE_NETLINK +tests_ovstest_LDADD += \ + controller/route-exchange-netlink.$(OBJEXT) +endif + # Python tests. CHECK_PYFILES = \ tests/test-l7.py \ diff --git a/tests/system-common-macros.at b/tests/system-common-macros.at index 968c4028b..bba2597cf 100644 --- a/tests/system-common-macros.at +++ b/tests/system-common-macros.at @@ -530,3 +530,15 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d /failed to query port patch-.*/d /.*terminating with signal 15.*/d"]) ])) + +# CHECK_VRF() +# +# Perform a requirements check for running VRF tests. +# +m4_define([CHECK_VRF], +[ + rc=0 + modprobe vrf || rc=$? + AT_SKIP_IF([test $rc -ne 0]) + on_exit 'modprobe -r vrf' +]) -- 2.47.1 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev