RTM_GETLINK commands can block for a considerable time if RTNL lock is very contended, slowing down the entire main thread. The way netdev-linux currently updates the state of each netdevs is by polling, i.e: sending a request for each netdev on every poll loop.
A more efficient approach would be to use netlink's convenient notification mechanism, registering callbacks to update the netdev's internal state. There are currently two mechanisms available: netlink-notifier (nln) and rtnetlink notifiers which rely on the former and only subscribe to the RTNLGRP_LINK mcast group. Given that netdev-linux needs to join also inet-related mcast groups, we could very well use netlink-notifier for all notifications. However, this comes with a slight inconvenience. Other subsystems already rely on rtnetlink to know when interfaces have changed (e.g: iface-notifier, route-table). When this happens, they typically trigger some kind of reconfiguration which is very likely to query the actual current state of netdevs. But if netdev_linux has not yet updated its internal state (because "netdev_linux_run()" has not run yet) and we want netdev internal state to be cached for performance, these subsystems will reconfigure themselves using known-to-be-dirty netdev cache. If we want to both cache netdev internal state (e.g: flags, ether_addr) and use a callback-based approach effectively, we need to use the same callback chain, i.e: we need to use rtnetlink to update netdev-linux netdevs as well. The order of callbacks cannot be guaranteed so it could still be possible to have another subsystem be notified before netdev-linux has been given a chance to update its state. However, subsystems would likely want to aggregate reconfigurations and will likely have their own "_run()" function to operate. Therefore, it could be reasonalbe to assume the other subsystems would use the callback to set a flag that is then used to trigger reconfigurations in their own periodic "_run()" functions (this is exactly the case of the two existing users: iface-notifier, route-table). This patch makes netdev-linux use rtnetlink notifier for interface updates and independent nln notifiers for address changes. Signed-off-by: Adrian Moreno <[email protected]> --- lib/netdev-linux.c | 211 ++++++++++++++++++++--------------------- lib/netdev-linux.h | 3 + lib/netlink-notifier.h | 4 +- lib/rtnetlink.c | 12 ++- 4 files changed, 118 insertions(+), 112 deletions(-) diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index f1a9e5fcd..c49bef829 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -297,6 +297,10 @@ static struct ovs_mutex lag_mutex = OVS_MUTEX_INITIALIZER; static struct shash lag_shash OVS_GUARDED_BY(lag_mutex) = SHASH_INITIALIZER(&lag_shash); +static struct rtnetlink_change netlink_change; +static struct nln *nln = NULL; +static struct nln_notifier *notifiers[4] = {NULL, NULL, NULL, NULL}; + /* Traffic control. */ /* An instance of a traffic control class. Always associated with a particular @@ -647,40 +651,6 @@ static void netdev_linux_changed(struct netdev_linux *netdev, unsigned int ifi_flags, unsigned int mask) OVS_REQUIRES(netdev->mutex); -/* Returns a NETLINK_ROUTE socket listening for RTNLGRP_LINK, - * RTNLGRP_IPV4_IFADDR and RTNLGRP_IPV6_IFADDR changes, or NULL - * if no such socket could be created. */ -static struct nl_sock * -netdev_linux_notify_sock(void) -{ - static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; - static struct nl_sock *sock; - unsigned int mcgroups[] = {RTNLGRP_LINK, RTNLGRP_IPV4_IFADDR, - RTNLGRP_IPV6_IFADDR, RTNLGRP_IPV6_IFINFO}; - - if (ovsthread_once_start(&once)) { - int error; - - error = nl_sock_create(NETLINK_ROUTE, &sock); - if (!error) { - size_t i; - - nl_sock_listen_all_nsid(sock, true); - for (i = 0; i < ARRAY_SIZE(mcgroups); i++) { - error = nl_sock_join_mcgroup(sock, mcgroups[i]); - if (error) { - nl_sock_destroy(sock); - sock = NULL; - break; - } - } - } - ovsthread_once_done(&once); - } - - return sock; -} - static bool netdev_linux_miimon_enabled(void) { @@ -698,7 +668,7 @@ netdev_linux_kind_is_lag(const char *kind) } static void -netdev_linux_update_lag(struct rtnetlink_change *change) +netdev_linux_update_lag(const struct rtnetlink_change *change) OVS_REQUIRES(lag_mutex) { struct linux_lag_member *lag; @@ -763,100 +733,127 @@ netdev_linux_update_lag(struct rtnetlink_change *change) } void -netdev_linux_run(const struct netdev_class *netdev_class OVS_UNUSED) +netdev_linux_rtnetlink_update(const struct rtnetlink_change *change, int nsid, + void *aux OVS_UNUSED) { - struct nl_sock *sock; - int error; + struct netdev *netdev_ = NULL; - if (netdev_linux_miimon_enabled()) { - netdev_linux_miimon_run(); - } + if (change && change->irrelevant) { + return; + } else if (!change) { + struct shash device_shash; + struct shash_node *node; - sock = netdev_linux_notify_sock(); - if (!sock) { + shash_init(&device_shash); + netdev_get_devices(&netdev_linux_class, &device_shash); + SHASH_FOR_EACH (node, &device_shash) { + struct netdev_linux *netdev = netdev_linux_cast(netdev_); + netdev_ = node->data; + int error; + + ovs_mutex_lock(&netdev->mutex); + error = update_flags_local(netdev); + netdev_linux_changed(netdev, netdev->ifi_flags, + error ? 0 : VALID_FLAGS); + ovs_mutex_unlock(&netdev->mutex); + + netdev_close(netdev_); + } + shash_destroy(&device_shash); return; } - do { - uint64_t buf_stub[4096 / 8]; - int nsid; - struct ofpbuf buf; + if (change->ifname) { + netdev_ = netdev_from_name(change->ifname); + } - ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub); - error = nl_sock_recv(sock, &buf, &nsid, false); - if (!error) { - struct rtnetlink_change change; + if (netdev_ && is_netdev_linux_class(netdev_->netdev_class)) { + struct netdev_linux *netdev = netdev_linux_cast(netdev_); - if (rtnetlink_parse(&buf, &change) && !change.irrelevant) { - struct netdev *netdev_ = NULL; - char dev_name[IFNAMSIZ]; + ovs_mutex_lock(&netdev->mutex); + netdev_linux_update(netdev, nsid, change); + ovs_mutex_unlock(&netdev->mutex); + } - if (!change.ifname) { - change.ifname = if_indextoname(change.if_index, dev_name); - } + if (change->ifname && + rtnetlink_type_is_rtnlgrp_link(change->nlmsg_type)) { - if (change.ifname) { - netdev_ = netdev_from_name(change.ifname); - } - if (netdev_ && is_netdev_linux_class(netdev_->netdev_class)) { - struct netdev_linux *netdev = netdev_linux_cast(netdev_); + /* Need to try updating the LAG information. */ + ovs_mutex_lock(&lag_mutex); + netdev_linux_update_lag(change); + ovs_mutex_unlock(&lag_mutex); + } + netdev_close(netdev_); +} - ovs_mutex_lock(&netdev->mutex); - netdev_linux_update(netdev, nsid, &change); - ovs_mutex_unlock(&netdev->mutex); - } +static void +netdev_linux_nln_cb(const void *change, int nsid, void *aux) +{ + netdev_linux_rtnetlink_update(change, nsid, aux); +} - if (change.ifname && - rtnetlink_type_is_rtnlgrp_link(change.nlmsg_type)) { +static void +netdev_linux_rtnetlink_config(void) +{ + static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; - /* Need to try updating the LAG information. */ - ovs_mutex_lock(&lag_mutex); - netdev_linux_update_lag(&change); - ovs_mutex_unlock(&lag_mutex); - } - netdev_close(netdev_); - } - } else if (error == ENOBUFS) { - struct shash device_shash; - struct shash_node *node; + if (ovsthread_once_start(&once)) { + unsigned int extra_mcgroups[] = {RTNLGRP_IPV4_IFADDR, + RTNLGRP_IPV6_IFADDR, RTNLGRP_IPV6_IFINFO}; + struct nln_notifier *notifier; + int i; - nl_sock_drain(sock); + ovs_assert(ARRAY_SIZE(notifiers) == ARRAY_SIZE(extra_mcgroups) + 1); - shash_init(&device_shash); - netdev_get_devices(&netdev_linux_class, &device_shash); - SHASH_FOR_EACH (node, &device_shash) { - struct netdev *netdev_ = node->data; - struct netdev_linux *netdev = netdev_linux_cast(netdev_); - - ovs_mutex_lock(&netdev->mutex); - update_flags_local(netdev); - netdev_linux_changed(netdev, netdev->ifi_flags, VALID_FLAGS); - ovs_mutex_unlock(&netdev->mutex); - - netdev_close(netdev_); - } - shash_destroy(&device_shash); - } else if (error != EAGAIN) { - static struct vlog_rate_limit rll = VLOG_RATE_LIMIT_INIT(1, 5); - VLOG_WARN_RL(&rll, "error reading or parsing netlink (%s)", - ovs_strerror(error)); + nln = nln_create(NETLINK_ROUTE, true, rtnetlink_parse_cb, + &netlink_change); + if (!nln) { + VLOG_ERR("failed to create nln notifier"); } - ofpbuf_uninit(&buf); - } while (!error); + + for (i = 0; i < ARRAY_SIZE(extra_mcgroups); i++) { + notifier = nln_notifier_create(nln, extra_mcgroups[i], + netdev_linux_nln_cb, NULL); + if (!notifier) { + VLOG_ERR("failed to create nln notifier for mcgroup %d", + extra_mcgroups[i]); + } + notifiers[i] = notifier; + } + + notifier = rtnetlink_notifier_create(netdev_linux_rtnetlink_update, + NULL); + if (!notifier) { + VLOG_ERR("failed to create rtnetlink notifier"); + } + notifiers[i] = notifier; + + ovsthread_once_done(&once); + } +} + +void +netdev_linux_run(const struct netdev_class *netdev_class OVS_UNUSED) +{ + rtnetlink_run(); + if (nln) { + nln_run(nln); + } + if (netdev_linux_miimon_enabled()) { + netdev_linux_miimon_run(); + } } static void netdev_linux_wait(const struct netdev_class *netdev_class OVS_UNUSED) { - struct nl_sock *sock; - + rtnetlink_wait(); + if (nln) { + nln_wait(nln); + } if (netdev_linux_miimon_enabled()) { netdev_linux_miimon_wait(); } - sock = netdev_linux_notify_sock(); - if (sock) { - nl_sock_wait(sock, POLLIN); - } } static void @@ -899,9 +896,6 @@ netdev_linux_update__(struct netdev_linux *dev, dev->etheraddr = change->mac; dev->cache_valid |= VALID_ETHERADDR; dev->ether_addr_error = 0; - - /* The mac addr has been changed, report it now. */ - rtnetlink_report_link(); } if (change->primary && netdev_linux_kind_is_lag(change->primary)) { @@ -963,6 +957,7 @@ netdev_linux_common_construct(struct netdev *netdev_) name); return EINVAL; } + netdev_linux_rtnetlink_config(); /* The device could be in the same network namespace or in another one. */ netnsid_unset(&netdev->netnsid); diff --git a/lib/netdev-linux.h b/lib/netdev-linux.h index ec19b0ded..a2f2215a9 100644 --- a/lib/netdev-linux.h +++ b/lib/netdev-linux.h @@ -25,6 +25,7 @@ * Linux-specific code. */ struct netdev; +struct rtnetlink_change; int netdev_linux_ethtool_set_flag(struct netdev *netdev, uint32_t flag, const char *flag_name, bool enable); @@ -34,5 +35,7 @@ int tc_add_policer_action(uint32_t index, uint64_t kbits_rate, uint32_t pkts_burst, bool update); int tc_del_policer_action(uint32_t index, struct ofputil_meter_stats *stats); int tc_get_policer_action(uint32_t index, struct ofputil_meter_stats *stats); +void netdev_linux_rtnetlink_update(const struct rtnetlink_change *change, + int nsid, void *); #endif /* netdev-linux.h */ diff --git a/lib/netlink-notifier.h b/lib/netlink-notifier.h index 60c002615..03c40db31 100644 --- a/lib/netlink-notifier.h +++ b/lib/netlink-notifier.h @@ -1,5 +1,4 @@ -/* - * Copyright (c) 2009 Nicira, Inc. +/* Copyright (c) 2009 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -51,4 +50,5 @@ void nln_notifier_destroy(struct nln_notifier *); void nln_run(struct nln *); void nln_wait(struct nln *); void nln_report(const struct nln *nln, void *change, int group, int nsid); +int rtnetlink_parse_cb(struct ofpbuf *buf, void *change); #endif /* netlink-notifier.h */ diff --git a/lib/rtnetlink.c b/lib/rtnetlink.c index 82613cfd5..30a4557c1 100644 --- a/lib/rtnetlink.c +++ b/lib/rtnetlink.c @@ -190,7 +190,7 @@ rtnetlink_parse(struct ofpbuf *buf, struct rtnetlink_change *change) } /* Return RTNLGRP_LINK on success, 0 on parse error. */ -static int +int rtnetlink_parse_cb(struct ofpbuf *buf, void *change) { return rtnetlink_parse(buf, change) ? RTNLGRP_LINK : 0; @@ -206,12 +206,20 @@ rtnetlink_parse_cb(struct ofpbuf *buf, void *change) * * xxx Joins more multicast groups when needed. * + * Callbacks might find that netdev-linux netdevs still hold outdated cached + * information. If the notification has to trigger some kind of reconfiguration + * that requires up-to-date netdev cache, it should do it asynchronously, for + * instance by setting a flag in the callback and acting on it during the + * normal "*_run()" operation. + * + * Notifications might come from any network namespace. + * * Returns an initialized nln_notifier if successful, NULL otherwise. */ struct nln_notifier * rtnetlink_notifier_create(rtnetlink_notify_func *cb, void *aux) { if (!nln) { - nln = nln_create(NETLINK_ROUTE, false, rtnetlink_parse_cb, + nln = nln_create(NETLINK_ROUTE, true, rtnetlink_parse_cb, &rtn_change); } -- 2.52.0 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
