The netlink notification's ancillary data contains the network
namespace id (netnsid) needed to identify the device correctly.

Signed-off-by: Flavio Leitner <f...@redhat.com>
---
 configure.ac           |   3 +-
 lib/automake.mk        |   1 +
 lib/dpif-netlink.c     |   6 +--
 lib/netdev-linux.c     |   2 +-
 lib/netlink-notifier.c |   2 +-
 lib/netlink-socket.c   |  53 ++++++++++++++++---
 lib/netlink-socket.h   |   2 +-
 lib/netnsid.h          | 139 +++++++++++++++++++++++++++++++++++++++++++++++++
 utilities/nlmon.c      |   2 +-
 9 files changed, 196 insertions(+), 14 deletions(-)
 create mode 100644 lib/netnsid.h

diff --git a/configure.ac b/configure.ac
index 9940a1a45..f0e4b5127 100644
--- a/configure.ac
+++ b/configure.ac
@@ -108,7 +108,8 @@ AC_CHECK_MEMBERS([struct sockaddr_in6.sin6_scope_id], [], 
[],
 #include <sys/types.h>
 #include <netinet/in.h>]])
 AC_CHECK_FUNCS([mlockall strnlen getloadavg statvfs getmntent_r sendmmsg 
clock_gettime])
-AC_CHECK_HEADERS([mntent.h sys/statvfs.h linux/types.h linux/if_ether.h 
stdatomic.h])
+AC_CHECK_HEADERS([mntent.h sys/statvfs.h linux/types.h linux/if_ether.h])
+AC_CHECK_HEADERS([linux/net_namespace.h stdatomic.h])
 AC_CHECK_HEADERS([net/if_mib.h], [], [], [[#include <sys/types.h>
 #include <net/if.h>]])
 
diff --git a/lib/automake.mk b/lib/automake.mk
index c7eda6e31..dcf90899f 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -143,6 +143,7 @@ lib_libopenvswitch_la_SOURCES = \
        lib/netflow.h \
        lib/netlink.c \
        lib/netlink.h \
+       lib/netnsid.h \
        lib/nx-match.c \
        lib/nx-match.h \
        lib/object-collection.c \
diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 8543a2bbe..2ba05d7b1 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -1288,7 +1288,7 @@ dpif_netlink_port_poll(const struct dpif *dpif_, char 
**devnamep)
         int error;
 
         ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub);
-        error = nl_sock_recv(dpif->port_notifier, &buf, false);
+        error = nl_sock_recv(dpif->port_notifier, &buf, NULL, false);
         if (!error) {
             struct dpif_netlink_vport vport;
 
@@ -2622,7 +2622,7 @@ dpif_netlink_recv_windows(struct dpif_netlink *dpif, 
uint32_t handler_id,
                 return EAGAIN;
             }
 
-            error = nl_sock_recv(sock_pool[i].nl_sock, buf, false);
+            error = nl_sock_recv(sock_pool[i].nl_sock, buf, NULL, false);
             if (error == ENOBUFS) {
                 /* ENOBUFS typically means that we've received so many
                  * packets that the buffer overflowed.  Try again
@@ -2697,7 +2697,7 @@ dpif_netlink_recv__(struct dpif_netlink *dpif, uint32_t 
handler_id,
                 return EAGAIN;
             }
 
-            error = nl_sock_recv(ch->sock, buf, false);
+            error = nl_sock_recv(ch->sock, buf, NULL, false);
             if (error == ENOBUFS) {
                 /* ENOBUFS typically means that we've received so many
                  * packets that the buffer overflowed.  Try again
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 7ea40a891..1f95c6d1e 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -639,7 +639,7 @@ netdev_linux_run(const struct netdev_class *netdev_class 
OVS_UNUSED)
         struct ofpbuf buf;
 
         ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub);
-        error = nl_sock_recv(sock, &buf, false);
+        error = nl_sock_recv(sock, &buf, NULL, false);
         if (!error) {
             struct rtnetlink_change change;
 
diff --git a/lib/netlink-notifier.c b/lib/netlink-notifier.c
index 7d8cfffa2..dfecb9778 100644
--- a/lib/netlink-notifier.c
+++ b/lib/netlink-notifier.c
@@ -185,7 +185,7 @@ nln_run(struct nln *nln)
         int error;
 
         ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub);
-        error = nl_sock_recv(nln->notify_sock, &buf, false);
+        error = nl_sock_recv(nln->notify_sock, &buf, NULL, false);
         if (!error) {
             int group = nln->parse(&buf, nln->change);
 
diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c
index 317bf907f..f68ca860d 100644
--- a/lib/netlink-socket.c
+++ b/lib/netlink-socket.c
@@ -19,6 +19,7 @@
 #include <errno.h>
 #include <inttypes.h>
 #include <stdlib.h>
+#include <sys/socket.h>
 #include <sys/types.h>
 #include <sys/uio.h>
 #include <unistd.h>
@@ -28,6 +29,7 @@
 #include "openvswitch/hmap.h"
 #include "netlink.h"
 #include "netlink-protocol.h"
+#include "netnsid.h"
 #include "odp-netlink.h"
 #include "openvswitch/ofpbuf.h"
 #include "ovs-thread.h"
@@ -607,7 +609,7 @@ nl_sock_send_seq(struct nl_sock *sock, const struct ofpbuf 
*msg,
 }
 
 static int
-nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, bool wait)
+nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, int *nsid, bool wait)
 {
     /* We can't accurately predict the size of the data to be received.  The
      * caller is supposed to have allocated enough space in 'buf' to handle the
@@ -618,7 +620,10 @@ nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, 
bool wait)
     uint8_t tail[65536];
     struct iovec iov[2];
     struct msghdr msg;
+    uint8_t msgctrl[64];
+    struct cmsghdr *cmsg;
     ssize_t retval;
+    int *ptr;
     int error;
 
     ovs_assert(buf->allocated >= sizeof *nlmsghdr);
@@ -632,6 +637,8 @@ nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, 
bool wait)
     memset(&msg, 0, sizeof msg);
     msg.msg_iov = iov;
     msg.msg_iovlen = 2;
+    msg.msg_control = msgctrl;
+    msg.msg_controllen = sizeof msgctrl;
 
     /* Receive a Netlink message from the kernel.
      *
@@ -706,6 +713,39 @@ nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, 
bool wait)
     }
 #endif
 
+    if (nsid) {
+        /* The network namespace id from which the message was sent comes
+         * as ancillary data. For older kernels, this data is either not
+         * available or it might be -1, so it falls back to local network
+         * namespace (no id). Latest kernels return a valid ID only if
+         * available or nothing. */
+        netnsid_set_local(nsid);
+        cmsg = CMSG_FIRSTHDR(&msg);
+        while (cmsg != NULL) {
+            if (cmsg->cmsg_level == SOL_NETLINK
+                && cmsg->cmsg_type == NETLINK_LISTEN_ALL_NSID) {
+                ptr = ALIGNED_CAST(int *, CMSG_DATA(cmsg));
+                netnsid_set(nsid, *ptr);
+            }
+            if (cmsg->cmsg_level == SOL_SOCKET
+                && cmsg->cmsg_type == SCM_RIGHTS) {
+                /* This is unexpected and unwanted, close all fds */
+                int nfds;
+                int i;
+                nfds = (cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)))
+                       / sizeof(int);
+                ptr = ALIGNED_CAST(int *, CMSG_DATA(cmsg));
+                for (i = 0; i < nfds; i++) {
+                    VLOG_ERR_RL(&rl, "closing unexpected received fd (%d).",
+                                ptr[i]);
+                    close(ptr[i]);
+                }
+            }
+
+            cmsg = CMSG_NXTHDR(&msg, cmsg);
+        }
+    }
+
     log_nlmsg(__func__, 0, buf->data, buf->size, sock->protocol);
     COVERAGE_INC(netlink_received);
 
@@ -714,7 +754,8 @@ nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, 
bool wait)
 
 /* Tries to receive a Netlink message from the kernel on 'sock' into 'buf'.  If
  * 'wait' is true, waits for a message to be ready.  Otherwise, fails with
- * EAGAIN if the 'sock' receive buffer is empty.
+ * EAGAIN if the 'sock' receive buffer is empty.  If 'nsid' is provided, the
+ * network namespace id from which the message was sent will be provided.
  *
  * The caller must have initialized 'buf' with an allocation of at least
  * NLMSG_HDRLEN bytes.  For best performance, the caller should allocate enough
@@ -730,9 +771,9 @@ nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, 
bool wait)
  * Regardless of success or failure, this function resets 'buf''s headroom to
  * 0. */
 int
-nl_sock_recv(struct nl_sock *sock, struct ofpbuf *buf, bool wait)
+nl_sock_recv(struct nl_sock *sock, struct ofpbuf *buf, int *nsid, bool wait)
 {
-    return nl_sock_recv__(sock, buf, wait);
+    return nl_sock_recv__(sock, buf, nsid, wait);
 }
 
 static void
@@ -821,7 +862,7 @@ nl_sock_transact_multiple__(struct nl_sock *sock,
         }
 
         /* Receive a reply. */
-        error = nl_sock_recv__(sock, buf_txn->reply, false);
+        error = nl_sock_recv__(sock, buf_txn->reply, NULL, false);
         if (error) {
             if (error == EAGAIN) {
                 nl_sock_record_errors__(transactions, n, 0);
@@ -1101,7 +1142,7 @@ nl_dump_refill(struct nl_dump *dump, struct ofpbuf 
*buffer)
     int error;
 
     while (!buffer->size) {
-        error = nl_sock_recv__(dump->sock, buffer, false);
+        error = nl_sock_recv__(dump->sock, buffer, NULL, false);
         if (error) {
             /* The kernel never blocks providing the results of a dump, so
              * error == EAGAIN means that we've read the whole thing, and
diff --git a/lib/netlink-socket.h b/lib/netlink-socket.h
index d3cc64288..98f6554fa 100644
--- a/lib/netlink-socket.h
+++ b/lib/netlink-socket.h
@@ -221,7 +221,7 @@ int nl_sock_unsubscribe_packets(struct nl_sock *sock);
 int nl_sock_send(struct nl_sock *, const struct ofpbuf *, bool wait);
 int nl_sock_send_seq(struct nl_sock *, const struct ofpbuf *,
                      uint32_t nlmsg_seq, bool wait);
-int nl_sock_recv(struct nl_sock *, struct ofpbuf *, bool wait);
+int nl_sock_recv(struct nl_sock *, struct ofpbuf *, int *nsid, bool wait);
 
 int nl_sock_drain(struct nl_sock *);
 
diff --git a/lib/netnsid.h b/lib/netnsid.h
new file mode 100644
index 000000000..1d5ab83c5
--- /dev/null
+++ b/lib/netnsid.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2017 Red Hat Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NETNSID_H
+#define NETNSID_H 1
+
+#include <stdbool.h>
+
+#ifdef HAVE_LINUX_NET_NAMESPACE_H
+#include <linux/net_namespace.h>
+#endif
+
+/*
+ * The network namespace ID is a positive number that identifies the namespace
+ * which the netlink message was sent.  It is used to identify if a received
+ * message belongs to a port attached to the bridge.
+ *
+ * There are three port states listed below:
+ * UNSET: A port in this state means that it could be either in same network
+ * namespace as the daemon (LOCAL) or in another namespace (ID).  Any operation
+ * on a port in this state that requires the ID will trigger a query to the
+ * kernel to find out in which namespace the port currently is.
+ *
+ * LOCAL: A port in this state means that it is in the same network namespace
+ * as the daemons.
+ *
+ * ID: A port that is not LOCAL and not UNSET has a valid positive (zero
+ * included) remote namespace ID.
+ *
+ * Possible state changes:
+ *
+ * Initial port's state: UNSET.
+ *
+ * UNSET -> LOCAL: The daemon queries the kernel and finds that it's in the
+ * same network namespace as the daemon or the API is not available (older
+ * kernels).
+ *
+ * LOCAL -> UNSET: The kernel sends a deregistering netlink message which
+ * unsets the port. It happens when the port is removed (or moved to another
+ * network namespace).
+ *
+ * UNSET -> ID: The daemon queries the kernel and finds that the port is
+ * in a specific network namespace with ID assigned.
+ *
+ * ID -> UNSET: When it receives a deregistering netlink message from that
+ * namespace indicating the device is being removed (or moved to another
+ * network namespace).
+ */
+
+#ifdef NETNSA_NSID_NOT_ASSIGNED
+#define NETNSID_LOCAL NETNSA_NSID_NOT_ASSIGNED
+#else
+#define NETNSID_LOCAL -1
+#endif
+#define NETNSID_UNSET (NETNSID_LOCAL - 1)
+
+/* Prototypes */
+static inline void netnsid_set_local(int *nsid);
+static inline bool netnsid_is_local(int nsid);
+static inline void netnsid_unset(int *nsid);
+static inline bool netnsid_is_unset(int nsid);
+static inline bool netnsid_is_remote(int nsid);
+static inline void netnsid_set(int *nsid, int id);
+static inline bool netnsid_eq(int nsid1, int nsid2);
+
+/* Functions */
+static inline void
+netnsid_set_local(int *nsid)
+{
+    *nsid = NETNSID_LOCAL;
+}
+
+static inline bool
+netnsid_is_local(int nsid)
+{
+    return nsid == NETNSID_LOCAL;
+}
+
+static inline void
+netnsid_unset(int *nsid)
+{
+    *nsid = NETNSID_UNSET;
+}
+
+static inline bool
+netnsid_is_unset(int nsid)
+{
+    return nsid == NETNSID_UNSET;
+}
+
+static inline bool
+netnsid_is_remote(int nsid)
+{
+    if (netnsid_is_unset(nsid) || netnsid_is_local(nsid)) {
+        return false;
+    }
+
+    return true;
+}
+
+static inline void
+netnsid_set(int *nsid, int id)
+{
+    /* The kernel only sends positive numbers for valid IDs. */
+    if (id != NETNSID_LOCAL) {
+        ovs_assert(id >= 0);
+    }
+
+    *nsid = id;
+}
+
+static inline bool
+netnsid_eq(int nsid1, int nsid2)
+{
+    if (netnsid_is_unset(nsid1) || netnsid_is_unset(nsid2)) {
+        return false;
+    }
+
+    if (nsid1 == nsid2) {
+        return true;
+    }
+
+    return false;
+}
+
+#endif
diff --git a/utilities/nlmon.c b/utilities/nlmon.c
index b91fa09b3..d38a70b6f 100644
--- a/utilities/nlmon.c
+++ b/utilities/nlmon.c
@@ -59,7 +59,7 @@ main(int argc OVS_UNUSED, char *argv[])
 
     ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub);
     for (;;) {
-        error = nl_sock_recv(sock, &buf, false);
+        error = nl_sock_recv(sock, &buf, NULL, false);
         if (error == EAGAIN) {
             /* Nothing to do. */
         } else if (error == ENOBUFS) {
-- 
2.14.3

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to