In the future we need to access routing table entries in network
namespaces.
As netlink has no easy way to get a socket in a network namespace we
copy the logic of frr where we:
1. enter the target netns
2. open a socket
3. jump back to our original netns

the socket will keep working between these jumps.

Tests are added as part of the system testsuite as we need root
permissions to switch namespaces.

Signed-off-by: Felix Huettner <[email protected]>
---
v1->v2: reduced changes in other files

 .github/workflows/build-and-test.yml |   2 +-
 NEWS                                 |   3 +
 lib/netlink-socket.c                 | 122 ++++++++++++++++++++++++---
 lib/netlink-socket.h                 |   8 +-
 lib/route-table.c                    |   6 +-
 tests/automake.mk                    |   4 +-
 tests/system-library.at              |  10 +++
 tests/system-userspace-testsuite.at  |   2 +
 tests/test-netlink-socket.c          |  47 +++++++++++
 9 files changed, 184 insertions(+), 20 deletions(-)
 create mode 100644 tests/system-library.at
 create mode 100644 tests/test-netlink-socket.c

diff --git a/.github/workflows/build-and-test.yml 
b/.github/workflows/build-and-test.yml
index 13a6bf3f2..637813144 100644
--- a/.github/workflows/build-and-test.yml
+++ b/.github/workflows/build-and-test.yml
@@ -80,7 +80,7 @@ jobs:
       dependencies: |
         automake libtool gcc bc libjemalloc2 libjemalloc-dev libssl-dev \
         llvm-dev libnuma-dev libpcap-dev selinux-policy-dev libbpf-dev \
-        lftp libreswan
+        lftp libreswan linux-modules-extra-$(uname -r)
       CC:          ${{ matrix.compiler }}
       DPDK:        ${{ matrix.dpdk }}
       DPDK_SHARED: ${{ matrix.dpdk_shared }}
diff --git a/NEWS b/NEWS
index 7a9626bf4..12aae088d 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,8 @@
 Post-v3.4.0
 --------------------
+   - libopenvswitch API change:
+     * Add nl_ns_transact, nl_ns_transact_multiple and nl_ns_dump_start to
+       open netlink sockets in other network namespaces.
 
 
 v3.4.0 - 15 Aug 2024
diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c
index 976ed15e8..56d29faf5 100644
--- a/lib/netlink-socket.c
+++ b/lib/netlink-socket.c
@@ -17,6 +17,7 @@
 #include <config.h>
 #include "netlink-socket.h"
 #include <errno.h>
+#include <fcntl.h>
 #include <inttypes.h>
 #include <stdlib.h>
 #include <sys/socket.h>
@@ -25,6 +26,7 @@
 #include <unistd.h>
 #include "coverage.h"
 #include "openvswitch/dynamic-string.h"
+#include "openvswitch/shash.h"
 #include "hash.h"
 #include "openvswitch/hmap.h"
 #include "netlink.h"
@@ -94,6 +96,7 @@ struct nl_sock {
     uint32_t pid;
     int protocol;
     unsigned int rcvbuf;        /* Receive buffer size (SO_RCVBUF). */
+    char *netns;
 };
 
 /* Compile-time limit on iovecs, so that we can allocate a maximum-size array
@@ -106,7 +109,7 @@ struct nl_sock {
  * Initialized by nl_sock_create(). */
 static int max_iovs;
 
-static int nl_pool_alloc(int protocol, struct nl_sock **sockp);
+static int nl_pool_alloc(const char *, int protocol, struct nl_sock **sockp);
 static void nl_pool_release(struct nl_sock *);
 
 /* Creates a new netlink socket for the given netlink 'protocol'
@@ -145,6 +148,7 @@ nl_sock_create(int protocol, struct nl_sock **sockp)
 
     *sockp = NULL;
     sock = xmalloc(sizeof *sock);
+    sock->netns = NULL;
 
 #ifdef _WIN32
     sock->overlapped.hEvent = NULL;
@@ -294,6 +298,7 @@ nl_sock_destroy(struct nl_sock *sock)
 #else
         close(sock->fd);
 #endif
+        free(sock->netns);
         free(sock);
     }
 }
@@ -1152,6 +1157,9 @@ nl_sock_drain(struct nl_sock *sock)
  * Netlink socket created with the given 'protocol', and initializes 'dump' to
  * reflect the state of the operation.
  *
+ * The dump runs in the specified 'netns'. If 'netns' is NULL the current will
+ * be used.
+ *
  * 'request' must contain a Netlink message.  Before sending the message,
  * nlmsg_len will be finalized to match request->size, and nlmsg_pid will be
  * set to the Netlink socket's pid.  NLM_F_DUMP and NLM_F_ACK will be set in
@@ -1165,13 +1173,21 @@ nl_sock_drain(struct nl_sock *sock)
  * The caller must eventually destroy 'request'.
  */
 void
-nl_dump_start(struct nl_dump *dump, int protocol, const struct ofpbuf *request)
+nl_dump_start(struct nl_dump *dump, int protocol,
+                 const struct ofpbuf *request)
+{
+    nl_ns_dump_start(NULL, dump, protocol, request);
+}
+
+void
+nl_ns_dump_start(const char *netns, struct nl_dump *dump, int protocol,
+              const struct ofpbuf *request)
 {
     nl_msg_nlmsghdr(request)->nlmsg_flags |= NLM_F_DUMP | NLM_F_ACK;
 
     ovs_mutex_init(&dump->mutex);
     ovs_mutex_lock(&dump->mutex);
-    dump->status = nl_pool_alloc(protocol, &dump->sock);
+    dump->status = nl_pool_alloc(netns, protocol, &dump->sock);
     if (!dump->status) {
         dump->status = nl_sock_send__(dump->sock, request,
                                       nl_sock_allocate_seq(dump->sock, 1),
@@ -1725,39 +1741,100 @@ struct nl_pool {
     int n;
 };
 
+struct nl_ns_pool {
+    struct nl_pool pools[MAX_LINKS];
+};
+
 static struct ovs_mutex pool_mutex = OVS_MUTEX_INITIALIZER;
-static struct nl_pool pools[MAX_LINKS] OVS_GUARDED_BY(pool_mutex);
+static struct shash pools OVS_GUARDED_BY(pool_mutex) =
+                                SHASH_INITIALIZER(&pools);
 
 static int
-nl_pool_alloc(int protocol, struct nl_sock **sockp)
+nl_sock_ns_create(const char *netns, int protocol, struct nl_sock **sockp) {
+#ifndef __linux__
+    if (netns) {
+        return -EOPNOTSUPP;
+    }
+#endif
+
+    int ret, ns_fd, ns_default_fd, err;
+    if (netns) {
+        ns_default_fd = open("/proc/self/ns/net", O_RDONLY);
+        if (ns_default_fd < 0) {
+            VLOG_ERR("something wrong when opening self net fd, %d\n",
+                     errno);
+            return -errno;
+        }
+        char *netns_path = xasprintf("/var/run/netns/%s", netns);
+        ns_fd = open(netns_path, O_RDONLY);
+        if (ns_fd < 0) {
+            VLOG_ERR("something wrong when opening other net fd %s,"
+                    " %d\n", netns_path, errno);
+            return -errno;
+        }
+        err = setns(ns_fd, CLONE_NEWNET);
+        if (err < 0) {
+            VLOG_ERR("something wrong during setns to target %s, %d\n",
+                     netns_path, errno);
+        }
+        free(netns_path);
+        close(ns_fd);
+    }
+    ret = nl_sock_create(protocol, sockp);
+    if (netns) {
+        err = setns(ns_default_fd, CLONE_NEWNET);
+        if (err < 0) {
+            VLOG_ABORT("something wrong during setns to home, %d\n",
+                       errno);
+        }
+        close(ns_default_fd);
+        if (*sockp) {
+            (*sockp)->netns = xstrdup(netns);
+        }
+    }
+    return ret;
+}
+
+static int
+nl_pool_alloc(const char *netns, int protocol, struct nl_sock **sockp)
 {
+    const char * netns_name = netns ? netns : "";
+    struct nl_ns_pool *ns_pools;
     struct nl_sock *sock = NULL;
     struct nl_pool *pool;
 
-    ovs_assert(protocol >= 0 && protocol < ARRAY_SIZE(pools));
+    ovs_assert(protocol >= 0 && protocol < ARRAY_SIZE(ns_pools->pools));
 
     ovs_mutex_lock(&pool_mutex);
-    pool = &pools[protocol];
-    if (pool->n > 0) {
-        sock = pool->socks[--pool->n];
+    ns_pools = shash_find_data(&pools, netns_name);
+    if (ns_pools) {
+        pool = &ns_pools->pools[protocol];
+        if (pool->n > 0) {
+            sock = pool->socks[--pool->n];
+        }
     }
     ovs_mutex_unlock(&pool_mutex);
 
     if (sock) {
         *sockp = sock;
         return 0;
-    } else {
-        return nl_sock_create(protocol, sockp);
     }
+    return nl_sock_ns_create(netns, protocol, sockp);
 }
 
 static void
 nl_pool_release(struct nl_sock *sock)
 {
     if (sock) {
-        struct nl_pool *pool = &pools[sock->protocol];
+        const char * netns_name = sock->netns ? sock->netns : "";
 
         ovs_mutex_lock(&pool_mutex);
+        struct nl_ns_pool *ns_pools = shash_find_data(&pools, netns_name);
+        if (!ns_pools) {
+            ns_pools = xzalloc(sizeof(*ns_pools));
+            shash_add(&pools, netns_name, ns_pools);
+        }
+        struct nl_pool *pool = &ns_pools->pools[sock->protocol];
         if (pool->n < ARRAY_SIZE(pool->socks)) {
             pool->socks[pool->n++] = sock;
             sock = NULL;
@@ -1772,6 +1849,9 @@ nl_pool_release(struct nl_sock *sock)
  * (e.g. NETLINK_ROUTE or NETLINK_GENERIC) and waits for a response.  If
  * successful, returns 0.  On failure, returns a positive errno value.
  *
+ * The 'request' is send in the specified netns. If 'netns' is NULL the current
+ * namespace is used.
+ *
  * If 'replyp' is nonnull, then on success '*replyp' is set to the kernel's
  * reply, which the caller is responsible for freeing with ofpbuf_delete(), and
  * on failure '*replyp' is set to NULL.  If 'replyp' is null, then the kernel's
@@ -1812,11 +1892,18 @@ nl_pool_release(struct nl_sock *sock)
 int
 nl_transact(int protocol, const struct ofpbuf *request,
             struct ofpbuf **replyp)
+{
+    return nl_ns_transact(NULL, protocol, request, replyp);
+}
+
+int
+nl_ns_transact(const char *netns, int protocol, const struct ofpbuf *request,
+               struct ofpbuf **replyp)
 {
     struct nl_sock *sock;
     int error;
 
-    error = nl_pool_alloc(protocol, &sock);
+    error = nl_pool_alloc(netns, protocol, &sock);
     if (error) {
         if (replyp) {
             *replyp = NULL;
@@ -1853,11 +1940,18 @@ nl_transact(int protocol, const struct ofpbuf *request,
 void
 nl_transact_multiple(int protocol,
                      struct nl_transaction **transactions, size_t n)
+{
+    nl_ns_transact_multiple(NULL, protocol, transactions, n);
+}
+
+void
+nl_ns_transact_multiple(const char *netns, int protocol,
+                        struct nl_transaction **transactions, size_t n)
 {
     struct nl_sock *sock;
     int error;
 
-    error = nl_pool_alloc(protocol, &sock);
+    error = nl_pool_alloc(netns, protocol, &sock);
     if (!error) {
         nl_sock_transact_multiple(sock, transactions, n);
         nl_pool_release(sock);
diff --git a/lib/netlink-socket.h b/lib/netlink-socket.h
index 7852ad052..fb53284a6 100644
--- a/lib/netlink-socket.h
+++ b/lib/netlink-socket.h
@@ -253,7 +253,11 @@ struct nl_transaction {
 /* Transactions without an allocated socket. */
 int nl_transact(int protocol, const struct ofpbuf *request,
                 struct ofpbuf **replyp);
+int nl_ns_transact(const char *netns, int protocol,
+                   const struct ofpbuf *request, struct ofpbuf **replyp);
 void nl_transact_multiple(int protocol, struct nl_transaction **, size_t n);
+void nl_ns_transact_multiple(const char *netns, int protocol,
+                             struct nl_transaction **, size_t n);
 
 /* Table dumping. */
 #define NL_DUMP_BUFSIZE         4096
@@ -270,8 +274,10 @@ struct nl_dump {
     struct ovs_mutex mutex;     /* Protects 'status', synchronizes recv(). */
 };
 
-void nl_dump_start(struct nl_dump *, int protocol,
+void nl_dump_start(struct nl_dump *dump, int protocol,
                    const struct ofpbuf *request);
+void nl_ns_dump_start(const char *netns, struct nl_dump *dump, int protocol,
+                      const struct ofpbuf *request);
 bool nl_dump_next(struct nl_dump *, struct ofpbuf *reply, struct ofpbuf *buf);
 int nl_dump_done(struct nl_dump *);
 
diff --git a/lib/route-table.c b/lib/route-table.c
index c6cb21394..bce29ebb2 100644
--- a/lib/route-table.c
+++ b/lib/route-table.c
@@ -156,7 +156,7 @@ route_table_wait(void)
 }
 
 static bool
-route_table_dump_one_table(unsigned char id)
+route_table_dump_one_table(const char *netns, unsigned char id)
 {
     uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
     struct ofpbuf request, reply, buf;
@@ -172,7 +172,7 @@ route_table_dump_one_table(unsigned char id)
     rq_msg->rtm_family = AF_UNSPEC;
     rq_msg->rtm_table = id;
 
-    nl_dump_start(&dump, NETLINK_ROUTE, &request);
+    nl_ns_dump_start(netns, &dump, NETLINK_ROUTE, &request);
     ofpbuf_uninit(&request);
 
     ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
@@ -212,7 +212,7 @@ route_table_reset(void)
     COVERAGE_INC(route_table_dump);
 
     for (size_t i = 0; i < ARRAY_SIZE(tables); i++) {
-        if (!route_table_dump_one_table(tables[i])) {
+        if (!route_table_dump_one_table(NULL, tables[i])) {
             /* Got unfiltered reply, no need to dump further. */
             break;
         }
diff --git a/tests/automake.mk b/tests/automake.mk
index edfc2cb33..abb0d352c 100644
--- a/tests/automake.mk
+++ b/tests/automake.mk
@@ -178,7 +178,8 @@ SYSTEM_TESTSUITE_AT = \
        tests/system-layer3-tunnels.at \
        tests/system-traffic.at \
        tests/system-ipsec.at \
-       tests/system-interface.at
+       tests/system-interface.at \
+       tests/system-library.at
 
 SYSTEM_OFFLOADS_TESTSUITE_AT = \
        tests/system-common-macros.at \
@@ -500,6 +501,7 @@ if LINUX
 tests_ovstest_SOURCES += \
        tests/test-netlink-conntrack.c \
        tests/test-netlink-policy.c \
+       tests/test-netlink-socket.c \
        tests/test-psample.c
 endif
 
diff --git a/tests/system-library.at b/tests/system-library.at
new file mode 100644
index 000000000..de7f2178f
--- /dev/null
+++ b/tests/system-library.at
@@ -0,0 +1,10 @@
+AT_BANNER([system-library])
+
+AT_SETUP([netlink socket])
+AT_SKIP_IF([test "$IS_WIN32" = "yes"])
+AT_SKIP_IF([test "$IS_BSD" = "yes"])
+ADD_NAMESPACES([ns1337])
+NS_EXEC([ns1337], [ip link add vrf1337 type vrf table 1337])
+AT_CHECK([ovstest test-netlink-socket], [0])
+AT_CLEANUP
+
diff --git a/tests/system-userspace-testsuite.at 
b/tests/system-userspace-testsuite.at
index 2e9659a67..6e456cb09 100644
--- a/tests/system-userspace-testsuite.at
+++ b/tests/system-userspace-testsuite.at
@@ -27,3 +27,5 @@ m4_include([tests/system-layer3-tunnels.at])
 m4_include([tests/system-interface.at])
 m4_include([tests/system-userspace-packet-type-aware.at])
 m4_include([tests/system-route.at])
+
+m4_include([tests/system-library.at])
diff --git a/tests/test-netlink-socket.c b/tests/test-netlink-socket.c
new file mode 100644
index 000000000..b4597880f
--- /dev/null
+++ b/tests/test-netlink-socket.c
@@ -0,0 +1,47 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include <stdlib.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/if.h>
+
+#include "netlink.h"
+#include "netlink-socket.h"
+#include "ovstest.h"
+
+static int
+try_find_link(const char *netns) {
+    struct ofpbuf request, *reply;
+    int err;
+    ofpbuf_init(&request, 0);
+    nl_msg_put_nlmsghdr(&request, 0, RTM_GETLINK, NLM_F_REQUEST);
+    ofpbuf_put_zeros(&request, sizeof(struct ifinfomsg));
+    nl_msg_put_string(&request, IFLA_IFNAME, "vrf1337");
+    err = nl_ns_transact(netns, NETLINK_ROUTE, &request, &reply);
+    ofpbuf_uninit(&request);
+    ofpbuf_delete(reply);
+    return err;
+}
+
+static void
+test_netlink_socket_main(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
+{
+    ovs_assert(try_find_link(NULL) != 0);
+    ovs_assert(try_find_link("ns1337") == 0);
+}
+
+OVSTEST_REGISTER("test-netlink-socket", test_netlink_socket_main);
-- 
2.47.0


_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to