Now that netdev-linux flags are also cached, we need to ensure they are updated correctly.
Currently, there are subsystems that rely on link state changes via rtnetlink notifiers (e.g: iface-notifier, route-table). In parallel, netdev-linux creates its own netlink socket to listen for link notifications based on which netdev's states are updated. This creates a potential race: some subsystems might be notified that a link changed but the internal netdev representation is still old. If we want to both cache netdev internal state (e.g: flags, ether_addr) effectively we need to use the same callback chain, i.e: we need to use rtnetlink to update netdev-linux netdevs as well when links change. The order of callbacks cannot be guaranteed so it could still be possible to have another subsystem be notified before netdev-linux has been given a chance to update its state. However, subsystems would likely want to aggregate reconfigurations and will likely have their own "_run()" function to operate. Therefore, it could be reasonalbe to assume the other subsystems would use the callback to set a flag that is then used to trigger reconfigurations in their own periodic "_run()" functions (this is exactly the case of the two existing users: iface-notifier, route-table). This patch makes netdev-linux use rtnetlink notifier for interface updates and independent nln notifiers for address changes. Signed-off-by: Adrian Moreno <[email protected]> --- lib/netdev-linux.c | 214 ++++++++++++++++++++--------------------- lib/netdev-linux.h | 1 + lib/netlink-notifier.h | 3 +- lib/rtnetlink.c | 12 ++- lib/rtnetlink.h | 1 + 5 files changed, 118 insertions(+), 113 deletions(-) diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index e6127240b..97be563f7 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -298,6 +298,10 @@ static struct ovs_mutex lag_mutex = OVS_MUTEX_INITIALIZER; static struct shash lag_shash OVS_GUARDED_BY(lag_mutex) = SHASH_INITIALIZER(&lag_shash); +static struct rtnetlink_change netlink_change; +static struct nln *nln = NULL; +static struct nln_notifier *notifiers[4] = {NULL, NULL, NULL, NULL}; + /* Traffic control. */ /* An instance of a traffic control class. Always associated with a particular @@ -648,40 +652,6 @@ static void netdev_linux_changed(struct netdev_linux *netdev, unsigned int ifi_flags, unsigned int mask) OVS_REQUIRES(netdev->mutex); -/* Returns a NETLINK_ROUTE socket listening for RTNLGRP_LINK, - * RTNLGRP_IPV4_IFADDR and RTNLGRP_IPV6_IFADDR changes, or NULL - * if no such socket could be created. */ -static struct nl_sock * -netdev_linux_notify_sock(void) -{ - static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; - static struct nl_sock *sock; - unsigned int mcgroups[] = {RTNLGRP_LINK, RTNLGRP_IPV4_IFADDR, - RTNLGRP_IPV6_IFADDR, RTNLGRP_IPV6_IFINFO}; - - if (ovsthread_once_start(&once)) { - int error; - - error = nl_sock_create(NETLINK_ROUTE, &sock); - if (!error) { - size_t i; - - nl_sock_listen_all_nsid(sock, true); - for (i = 0; i < ARRAY_SIZE(mcgroups); i++) { - error = nl_sock_join_mcgroup(sock, mcgroups[i]); - if (error) { - nl_sock_destroy(sock); - sock = NULL; - break; - } - } - } - ovsthread_once_done(&once); - } - - return sock; -} - static bool netdev_linux_miimon_enabled(void) { @@ -699,7 +669,7 @@ netdev_linux_kind_is_lag(const char *kind) } static void -netdev_linux_update_lag(struct rtnetlink_change *change) +netdev_linux_update_lag(const struct rtnetlink_change *change) OVS_REQUIRES(lag_mutex) { struct linux_lag_member *lag; @@ -763,101 +733,128 @@ netdev_linux_update_lag(struct rtnetlink_change *change) } } -void -netdev_linux_run(const struct netdev_class *netdev_class OVS_UNUSED) +static void +netdev_linux_rtnetlink_update(const struct rtnetlink_change *change, + void *aux OVS_UNUSED) { - struct nl_sock *sock; - int error; + struct netdev *netdev_ = NULL; - if (netdev_linux_miimon_enabled()) { - netdev_linux_miimon_run(); - } + if (change && change->irrelevant) { + return; + } else if (!change) { + struct shash device_shash; + struct shash_node *node; - sock = netdev_linux_notify_sock(); - if (!sock) { + shash_init(&device_shash); + netdev_get_devices(&netdev_linux_class, &device_shash); + SHASH_FOR_EACH (node, &device_shash) { + struct netdev_linux *netdev = netdev_linux_cast(netdev_); + netdev_ = node->data; + int error; + + ovs_mutex_lock(&netdev->mutex); + error = update_flags_local(netdev); + netdev_linux_changed(netdev, netdev->ifi_flags, + error ? 0 : VALID_FLAGS); + ovs_mutex_unlock(&netdev->mutex); + + netdev_close(netdev_); + } + shash_destroy(&device_shash); return; } - do { - uint64_t buf_stub[4096 / 8]; - int nsid; - struct ofpbuf buf; + if (change->ifname) { + netdev_ = netdev_from_name(change->ifname); + } - ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub); - error = nl_sock_recv(sock, &buf, &nsid, false); - if (!error) { - struct rtnetlink_change change; + if (netdev_ && is_netdev_linux_class(netdev_->netdev_class)) { + struct netdev_linux *netdev = netdev_linux_cast(netdev_); - if (rtnetlink_parse(&buf, &change) && !change.irrelevant) { - struct netdev *netdev_ = NULL; - char dev_name[IFNAMSIZ]; + ovs_mutex_lock(&netdev->mutex); + netdev_linux_update(netdev, change->nsid, change); + ovs_mutex_unlock(&netdev->mutex); + } - if (!change.ifname) { - change.ifname = if_indextoname(change.if_index, dev_name); - } + if (change->ifname && + rtnetlink_type_is_rtnlgrp_link(change->nlmsg_type)) { - if (change.ifname) { - netdev_ = netdev_from_name(change.ifname); - } - if (netdev_ && is_netdev_linux_class(netdev_->netdev_class)) { - struct netdev_linux *netdev = netdev_linux_cast(netdev_); + /* Need to try updating the LAG information. */ + ovs_mutex_lock(&lag_mutex); + netdev_linux_update_lag(change); + ovs_mutex_unlock(&lag_mutex); + } + netdev_close(netdev_); +} - ovs_mutex_lock(&netdev->mutex); - netdev_linux_update(netdev, nsid, &change); - ovs_mutex_unlock(&netdev->mutex); - } +static void +netdev_linux_nln_cb(const void *change, void *aux) +{ + netdev_linux_rtnetlink_update(change, aux); +} - if (change.ifname && - rtnetlink_type_is_rtnlgrp_link(change.nlmsg_type)) { +static void +netdev_linux_rtnetlink_config(void) +{ + static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; - /* Need to try updating the LAG information. */ - ovs_mutex_lock(&lag_mutex); - netdev_linux_update_lag(&change); - ovs_mutex_unlock(&lag_mutex); - } - netdev_close(netdev_); - } - } else if (error == ENOBUFS) { - struct shash device_shash; - struct shash_node *node; + if (ovsthread_once_start(&once)) { + unsigned int extra_mcgroups[] = {RTNLGRP_IPV4_IFADDR, + RTNLGRP_IPV6_IFADDR, RTNLGRP_IPV6_IFINFO}; + struct nln_notifier *notifier; + int i; - nl_sock_drain(sock); + ovs_assert(ARRAY_SIZE(notifiers) == ARRAY_SIZE(extra_mcgroups) + 1); - shash_init(&device_shash); - netdev_get_devices(&netdev_linux_class, &device_shash); - SHASH_FOR_EACH (node, &device_shash) { - struct netdev *netdev_ = node->data; - struct netdev_linux *netdev = netdev_linux_cast(netdev_); - - ovs_mutex_lock(&netdev->mutex); - update_flags_local(netdev); - netdev_linux_changed(netdev, netdev->ifi_flags, VALID_FLAGS); - ovs_mutex_unlock(&netdev->mutex); - - netdev_close(netdev_); - } - shash_destroy(&device_shash); - } else if (error != EAGAIN) { - static struct vlog_rate_limit rll = VLOG_RATE_LIMIT_INIT(1, 5); - VLOG_WARN_RL(&rll, "error reading or parsing netlink (%s)", - ovs_strerror(error)); + nln = nln_create(NETLINK_ROUTE, true, rtnetlink_parse_cb, + &netlink_change); + if (!nln) { + VLOG_ERR("failed to create nln notifier"); } - ofpbuf_uninit(&buf); - } while (!error); + + for (i = 0; i < ARRAY_SIZE(extra_mcgroups); i++) { + notifier = nln_notifier_create(nln, extra_mcgroups[i], + netdev_linux_nln_cb, NULL); + if (!notifier) { + VLOG_ERR("failed to create nln notifier for mcgroup %d", + extra_mcgroups[i]); + } + notifiers[i] = notifier; + } + + notifier = rtnetlink_notifier_create(netdev_linux_rtnetlink_update, + NULL); + if (!notifier) { + VLOG_ERR("failed to create rtnetlink notifier"); + } + notifiers[i] = notifier; + + ovsthread_once_done(&once); + } +} + +void +netdev_linux_run(const struct netdev_class *netdev_class OVS_UNUSED) +{ + rtnetlink_run(); + if (nln) { + nln_run(nln); + } + if (netdev_linux_miimon_enabled()) { + netdev_linux_miimon_run(); + } } static void netdev_linux_wait(const struct netdev_class *netdev_class OVS_UNUSED) { - struct nl_sock *sock; - + rtnetlink_wait(); + if (nln) { + nln_wait(nln); + } if (netdev_linux_miimon_enabled()) { netdev_linux_miimon_wait(); } - sock = netdev_linux_notify_sock(); - if (sock) { - nl_sock_wait(sock, POLLIN); - } } static void @@ -900,9 +897,6 @@ netdev_linux_update__(struct netdev_linux *dev, dev->etheraddr = change->mac; dev->cache_valid |= VALID_ETHERADDR; dev->ether_addr_error = 0; - - /* The mac addr has been changed, report it now. */ - rtnetlink_report_link(); } if (change->primary && netdev_linux_kind_is_lag(change->primary)) { @@ -968,6 +962,8 @@ netdev_linux_common_construct(struct netdev *netdev_) return ENAMETOOLONG; } + netdev_linux_rtnetlink_config(); + /* The device could be in the same network namespace or in another one. */ netnsid_unset(&netdev->netnsid); ovs_mutex_init(&netdev->mutex); diff --git a/lib/netdev-linux.h b/lib/netdev-linux.h index ec19b0ded..81a5634f6 100644 --- a/lib/netdev-linux.h +++ b/lib/netdev-linux.h @@ -25,6 +25,7 @@ * Linux-specific code. */ struct netdev; +struct rtnetlink_change; int netdev_linux_ethtool_set_flag(struct netdev *netdev, uint32_t flag, const char *flag_name, bool enable); diff --git a/lib/netlink-notifier.h b/lib/netlink-notifier.h index 8b6c32048..d52e5f56c 100644 --- a/lib/netlink-notifier.h +++ b/lib/netlink-notifier.h @@ -1,5 +1,4 @@ -/* - * Copyright (c) 2009 Nicira, Inc. +/* Copyright (c) 2009 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/lib/rtnetlink.c b/lib/rtnetlink.c index 6b63afa06..4ae050ff0 100644 --- a/lib/rtnetlink.c +++ b/lib/rtnetlink.c @@ -191,7 +191,7 @@ rtnetlink_parse(struct ofpbuf *buf, struct rtnetlink_change *change) } /* Return RTNLGRP_LINK on success, 0 on parse error. */ -static int +int rtnetlink_parse_cb(struct ofpbuf *buf, int nsid, void *change) { bool ret = rtnetlink_parse(buf, change); @@ -212,12 +212,20 @@ rtnetlink_parse_cb(struct ofpbuf *buf, int nsid, void *change) * * xxx Joins more multicast groups when needed. * + * Callbacks might find that netdev-linux netdevs still hold outdated cached + * information. If the notification has to trigger some kind of reconfiguration + * that requires up-to-date netdev cache, it should do it asynchronously, for + * instance by setting a flag in the callback and acting on it during the + * normal "*_run()" operation. + * + * Notifications might come from any network namespace. + * * Returns an initialized nln_notifier if successful, NULL otherwise. */ struct nln_notifier * rtnetlink_notifier_create(rtnetlink_notify_func *cb, void *aux) { if (!nln) { - nln = nln_create(NETLINK_ROUTE, false, rtnetlink_parse_cb, + nln = nln_create(NETLINK_ROUTE, true, rtnetlink_parse_cb, &rtn_change); } diff --git a/lib/rtnetlink.h b/lib/rtnetlink.h index 9b2397b4e..f0b49bf1a 100644 --- a/lib/rtnetlink.h +++ b/lib/rtnetlink.h @@ -76,4 +76,5 @@ void rtnetlink_notifier_destroy(struct nln_notifier *); void rtnetlink_run(void); void rtnetlink_wait(void); void rtnetlink_report_link(void); +int rtnetlink_parse_cb(struct ofpbuf *buf, int nsid, void *change); #endif /* rtnetlink.h */ -- 2.53.0 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
