RTM_GETLINK commands can block for a considerable time if RTNL lock is
very contended, slowing down the entire main thread. The way
netdev-linux currently updates the state of each netdevs is by polling,
i.e: sending a request for each netdev on every poll loop.

A more efficient approach would be to use netlink's convenient
notification mechanism, registering callbacks to update the netdev's
internal state.

There are currently two mechanisms available: netlink-notifier (nln) and
rtnetlink notifiers which rely on the former and only subscribe to the
RTNLGRP_LINK mcast group. Given that netdev-linux needs to join also
inet-related mcast groups, we could very well use netlink-notifier for
all notifications.

However, this comes with a slight inconvenience. Other subsystems
already rely on rtnetlink to know when interfaces have changed (e.g:
iface-notifier, route-table). When this happens, they typically trigger
some kind of reconfiguration which is very likely to query the actual
current state of netdevs. But if netdev_linux has not yet updated its
internal state (because "netdev_linux_run()" has not run yet) and we want
netdev internal state to be cached for performance, these subsystems
will reconfigure themselves using known-to-be-dirty netdev cache.

If we want to both cache netdev internal state (e.g: flags, ether_addr) and
use a callback-based approach effectively, we need to use the same
callback chain, i.e: we need to use rtnetlink to update netdev-linux
netdevs as well.

The order of callbacks cannot be guaranteed so it could still be possible to
have another subsystem be notified  before netdev-linux has been given a chance
to update its state. However, subsystems would likely want to aggregate
reconfigurations and will likely have their own "_run()" function to operate.

Therefore, it could be reasonalbe to assume the other subsystems would use the
callback to set a flag that is then used to trigger reconfigurations in their
own periodic "_run()" functions (this is exactly the case of the two existing
users: iface-notifier, route-table).

This patch makes netdev-linux use rtnetlink notifier for interface
updates and independent nln notifiers for address changes.

Signed-off-by: Adrian Moreno <[email protected]>
---
 lib/netdev-linux.c     | 211 ++++++++++++++++++++---------------------
 lib/netdev-linux.h     |   3 +
 lib/netlink-notifier.h |   4 +-
 lib/rtnetlink.c        |  12 ++-
 4 files changed, 118 insertions(+), 112 deletions(-)

diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index f1a9e5fcd..c49bef829 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -297,6 +297,10 @@ static struct ovs_mutex lag_mutex = OVS_MUTEX_INITIALIZER;
 static struct shash lag_shash OVS_GUARDED_BY(lag_mutex)
     = SHASH_INITIALIZER(&lag_shash);
 
+static struct rtnetlink_change netlink_change;
+static struct nln *nln = NULL;
+static struct nln_notifier *notifiers[4] = {NULL, NULL, NULL, NULL};
+
 /* Traffic control. */
 
 /* An instance of a traffic control class.  Always associated with a particular
@@ -647,40 +651,6 @@ static void netdev_linux_changed(struct netdev_linux 
*netdev,
                                  unsigned int ifi_flags, unsigned int mask)
     OVS_REQUIRES(netdev->mutex);
 
-/* Returns a NETLINK_ROUTE socket listening for RTNLGRP_LINK,
- * RTNLGRP_IPV4_IFADDR and RTNLGRP_IPV6_IFADDR changes, or NULL
- * if no such socket could be created. */
-static struct nl_sock *
-netdev_linux_notify_sock(void)
-{
-    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
-    static struct nl_sock *sock;
-    unsigned int mcgroups[] = {RTNLGRP_LINK, RTNLGRP_IPV4_IFADDR,
-                                RTNLGRP_IPV6_IFADDR, RTNLGRP_IPV6_IFINFO};
-
-    if (ovsthread_once_start(&once)) {
-        int error;
-
-        error = nl_sock_create(NETLINK_ROUTE, &sock);
-        if (!error) {
-            size_t i;
-
-            nl_sock_listen_all_nsid(sock, true);
-            for (i = 0; i < ARRAY_SIZE(mcgroups); i++) {
-                error = nl_sock_join_mcgroup(sock, mcgroups[i]);
-                if (error) {
-                    nl_sock_destroy(sock);
-                    sock = NULL;
-                    break;
-                }
-            }
-        }
-        ovsthread_once_done(&once);
-    }
-
-    return sock;
-}
-
 static bool
 netdev_linux_miimon_enabled(void)
 {
@@ -698,7 +668,7 @@ netdev_linux_kind_is_lag(const char *kind)
 }
 
 static void
-netdev_linux_update_lag(struct rtnetlink_change *change)
+netdev_linux_update_lag(const struct rtnetlink_change *change)
     OVS_REQUIRES(lag_mutex)
 {
     struct linux_lag_member *lag;
@@ -763,100 +733,127 @@ netdev_linux_update_lag(struct rtnetlink_change *change)
 }
 
 void
-netdev_linux_run(const struct netdev_class *netdev_class OVS_UNUSED)
+netdev_linux_rtnetlink_update(const struct rtnetlink_change *change, int nsid,
+                              void *aux OVS_UNUSED)
 {
-    struct nl_sock *sock;
-    int error;
+    struct netdev *netdev_ = NULL;
 
-    if (netdev_linux_miimon_enabled()) {
-        netdev_linux_miimon_run();
-    }
+    if (change && change->irrelevant) {
+        return;
+    } else if (!change) {
+        struct shash device_shash;
+        struct shash_node *node;
 
-    sock = netdev_linux_notify_sock();
-    if (!sock) {
+        shash_init(&device_shash);
+        netdev_get_devices(&netdev_linux_class, &device_shash);
+        SHASH_FOR_EACH (node, &device_shash) {
+            struct netdev_linux *netdev = netdev_linux_cast(netdev_);
+            netdev_ = node->data;
+            int error;
+
+            ovs_mutex_lock(&netdev->mutex);
+            error = update_flags_local(netdev);
+            netdev_linux_changed(netdev, netdev->ifi_flags,
+                                 error ? 0 : VALID_FLAGS);
+            ovs_mutex_unlock(&netdev->mutex);
+
+            netdev_close(netdev_);
+        }
+        shash_destroy(&device_shash);
         return;
     }
 
-    do {
-        uint64_t buf_stub[4096 / 8];
-        int nsid;
-        struct ofpbuf buf;
+    if (change->ifname) {
+        netdev_ = netdev_from_name(change->ifname);
+    }
 
-        ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub);
-        error = nl_sock_recv(sock, &buf, &nsid, false);
-        if (!error) {
-            struct rtnetlink_change change;
+    if (netdev_ && is_netdev_linux_class(netdev_->netdev_class)) {
+        struct netdev_linux *netdev = netdev_linux_cast(netdev_);
 
-            if (rtnetlink_parse(&buf, &change) && !change.irrelevant) {
-                struct netdev *netdev_ = NULL;
-                char dev_name[IFNAMSIZ];
+        ovs_mutex_lock(&netdev->mutex);
+        netdev_linux_update(netdev, nsid, change);
+        ovs_mutex_unlock(&netdev->mutex);
+    }
 
-                if (!change.ifname) {
-                     change.ifname = if_indextoname(change.if_index, dev_name);
-                }
+    if (change->ifname &&
+        rtnetlink_type_is_rtnlgrp_link(change->nlmsg_type)) {
 
-                if (change.ifname) {
-                    netdev_ = netdev_from_name(change.ifname);
-                }
-                if (netdev_ && is_netdev_linux_class(netdev_->netdev_class)) {
-                    struct netdev_linux *netdev = netdev_linux_cast(netdev_);
+        /* Need to try updating the LAG information. */
+        ovs_mutex_lock(&lag_mutex);
+        netdev_linux_update_lag(change);
+        ovs_mutex_unlock(&lag_mutex);
+    }
+    netdev_close(netdev_);
+}
 
-                    ovs_mutex_lock(&netdev->mutex);
-                    netdev_linux_update(netdev, nsid, &change);
-                    ovs_mutex_unlock(&netdev->mutex);
-                }
+static void
+netdev_linux_nln_cb(const void *change, int nsid, void *aux)
+{
+    netdev_linux_rtnetlink_update(change, nsid, aux);
+}
 
-                if (change.ifname &&
-                    rtnetlink_type_is_rtnlgrp_link(change.nlmsg_type)) {
+static void
+netdev_linux_rtnetlink_config(void)
+{
+    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
 
-                    /* Need to try updating the LAG information. */
-                    ovs_mutex_lock(&lag_mutex);
-                    netdev_linux_update_lag(&change);
-                    ovs_mutex_unlock(&lag_mutex);
-                }
-                netdev_close(netdev_);
-            }
-        } else if (error == ENOBUFS) {
-            struct shash device_shash;
-            struct shash_node *node;
+    if (ovsthread_once_start(&once)) {
+        unsigned int extra_mcgroups[] = {RTNLGRP_IPV4_IFADDR,
+                                RTNLGRP_IPV6_IFADDR, RTNLGRP_IPV6_IFINFO};
+        struct nln_notifier *notifier;
+        int i;
 
-            nl_sock_drain(sock);
+        ovs_assert(ARRAY_SIZE(notifiers) == ARRAY_SIZE(extra_mcgroups) + 1);
 
-            shash_init(&device_shash);
-            netdev_get_devices(&netdev_linux_class, &device_shash);
-            SHASH_FOR_EACH (node, &device_shash) {
-                struct netdev *netdev_ = node->data;
-                struct netdev_linux *netdev = netdev_linux_cast(netdev_);
-
-                ovs_mutex_lock(&netdev->mutex);
-                update_flags_local(netdev);
-                netdev_linux_changed(netdev, netdev->ifi_flags, VALID_FLAGS);
-                ovs_mutex_unlock(&netdev->mutex);
-
-                netdev_close(netdev_);
-            }
-            shash_destroy(&device_shash);
-        } else if (error != EAGAIN) {
-            static struct vlog_rate_limit rll = VLOG_RATE_LIMIT_INIT(1, 5);
-            VLOG_WARN_RL(&rll, "error reading or parsing netlink (%s)",
-                         ovs_strerror(error));
+        nln = nln_create(NETLINK_ROUTE, true, rtnetlink_parse_cb,
+                         &netlink_change);
+        if (!nln) {
+            VLOG_ERR("failed to create nln notifier");
         }
-        ofpbuf_uninit(&buf);
-    } while (!error);
+
+        for (i = 0; i < ARRAY_SIZE(extra_mcgroups); i++) {
+            notifier = nln_notifier_create(nln, extra_mcgroups[i],
+                                           netdev_linux_nln_cb, NULL);
+            if (!notifier) {
+                VLOG_ERR("failed to create nln notifier for mcgroup %d",
+                         extra_mcgroups[i]);
+            }
+            notifiers[i] = notifier;
+        }
+
+        notifier = rtnetlink_notifier_create(netdev_linux_rtnetlink_update,
+                                             NULL);
+        if (!notifier) {
+            VLOG_ERR("failed to create rtnetlink notifier");
+        }
+        notifiers[i] = notifier;
+
+        ovsthread_once_done(&once);
+    }
+}
+
+void
+netdev_linux_run(const struct netdev_class *netdev_class OVS_UNUSED)
+{
+    rtnetlink_run();
+    if (nln) {
+        nln_run(nln);
+    }
+    if (netdev_linux_miimon_enabled()) {
+        netdev_linux_miimon_run();
+    }
 }
 
 static void
 netdev_linux_wait(const struct netdev_class *netdev_class OVS_UNUSED)
 {
-    struct nl_sock *sock;
-
+    rtnetlink_wait();
+    if (nln) {
+        nln_wait(nln);
+    }
     if (netdev_linux_miimon_enabled()) {
         netdev_linux_miimon_wait();
     }
-    sock = netdev_linux_notify_sock();
-    if (sock) {
-        nl_sock_wait(sock, POLLIN);
-    }
 }
 
 static void
@@ -899,9 +896,6 @@ netdev_linux_update__(struct netdev_linux *dev,
                 dev->etheraddr = change->mac;
                 dev->cache_valid |= VALID_ETHERADDR;
                 dev->ether_addr_error = 0;
-
-                /* The mac addr has been changed, report it now. */
-                rtnetlink_report_link();
             }
 
             if (change->primary && netdev_linux_kind_is_lag(change->primary)) {
@@ -963,6 +957,7 @@ netdev_linux_common_construct(struct netdev *netdev_)
                      name);
         return EINVAL;
     }
+    netdev_linux_rtnetlink_config();
 
     /* The device could be in the same network namespace or in another one. */
     netnsid_unset(&netdev->netnsid);
diff --git a/lib/netdev-linux.h b/lib/netdev-linux.h
index ec19b0ded..a2f2215a9 100644
--- a/lib/netdev-linux.h
+++ b/lib/netdev-linux.h
@@ -25,6 +25,7 @@
  * Linux-specific code. */
 
 struct netdev;
+struct rtnetlink_change;
 
 int netdev_linux_ethtool_set_flag(struct netdev *netdev, uint32_t flag,
                                   const char *flag_name, bool enable);
@@ -34,5 +35,7 @@ int tc_add_policer_action(uint32_t index, uint64_t kbits_rate,
                           uint32_t pkts_burst, bool update);
 int tc_del_policer_action(uint32_t index, struct ofputil_meter_stats *stats);
 int tc_get_policer_action(uint32_t index, struct ofputil_meter_stats *stats);
+void netdev_linux_rtnetlink_update(const struct rtnetlink_change *change,
+                                   int nsid, void *);
 
 #endif /* netdev-linux.h */
diff --git a/lib/netlink-notifier.h b/lib/netlink-notifier.h
index 60c002615..03c40db31 100644
--- a/lib/netlink-notifier.h
+++ b/lib/netlink-notifier.h
@@ -1,5 +1,4 @@
-/*
- * Copyright (c) 2009 Nicira, Inc.
+/* Copyright (c) 2009 Nicira, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -51,4 +50,5 @@ void nln_notifier_destroy(struct nln_notifier *);
 void nln_run(struct nln *);
 void nln_wait(struct nln *);
 void nln_report(const struct nln *nln, void *change, int group, int nsid);
+int rtnetlink_parse_cb(struct ofpbuf *buf, void *change);
 #endif /* netlink-notifier.h */
diff --git a/lib/rtnetlink.c b/lib/rtnetlink.c
index 82613cfd5..30a4557c1 100644
--- a/lib/rtnetlink.c
+++ b/lib/rtnetlink.c
@@ -190,7 +190,7 @@ rtnetlink_parse(struct ofpbuf *buf, struct rtnetlink_change 
*change)
 }
 
 /* Return RTNLGRP_LINK on success, 0 on parse error. */
-static int
+int
 rtnetlink_parse_cb(struct ofpbuf *buf, void *change)
 {
     return rtnetlink_parse(buf, change) ? RTNLGRP_LINK : 0;
@@ -206,12 +206,20 @@ rtnetlink_parse_cb(struct ofpbuf *buf, void *change)
  *
  * xxx Joins more multicast groups when needed.
  *
+ * Callbacks might find that netdev-linux netdevs still hold outdated cached
+ * information. If the notification has to trigger some kind of reconfiguration
+ * that requires up-to-date netdev cache, it should do it asynchronously, for
+ * instance by setting a flag in the callback and acting on it during the
+ * normal "*_run()" operation.
+ *
+ * Notifications might come from any network namespace.
+ *
  * Returns an initialized nln_notifier if successful, NULL otherwise. */
 struct nln_notifier *
 rtnetlink_notifier_create(rtnetlink_notify_func *cb, void *aux)
 {
     if (!nln) {
-        nln = nln_create(NETLINK_ROUTE, false, rtnetlink_parse_cb,
+        nln = nln_create(NETLINK_ROUTE, true, rtnetlink_parse_cb,
                          &rtn_change);
     }
 
-- 
2.52.0

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to