Previously, we tracked status changes for ofports on a per-device basis.
Each time in the main thread's loop, we would loop through all ofports
and manually check whether the status has changed for corresponding
devices.

This patch shifts change_seq above the netdevice layer, with one atomic
variable tracking status change for all ports. In the average case where
ports are not constantly going up or down, this allows us to check
change_seq once per loop and not poll any ports. In the worst case,
execution is expected to be similar to how it is currently.

As change_seq is no longer tracked per-device, it doesn't make sense to
cache this status in each ofport struct. As such, we shift this into the
ofproto struct.

In a test environment of 5000 internal ports and 50 tunnel ports with
bfd, this reduces CPU usage from about 45% to about 35%.

Signed-off-by: Joe Stringer <[email protected]>
---
 lib/bond.c                 |    4 ++--
 lib/netdev-bsd.c           |   22 ++++---------------
 lib/netdev-dummy.c         |   25 +++------------------
 lib/netdev-linux.c         |   23 ++------------------
 lib/netdev-provider.h      |   11 ----------
 lib/netdev-vport.c         |   18 +++------------
 lib/netdev.c               |   52 ++++++++++++++++++++++++++++++++++++--------
 lib/netdev.h               |    4 +++-
 ofproto/ofproto-provider.h |    2 +-
 ofproto/ofproto.c          |   49 +++++++++++++++++++----------------------
 ofproto/tunnel.c           |    4 ++--
 11 files changed, 85 insertions(+), 129 deletions(-)

diff --git a/lib/bond.c b/lib/bond.c
index 5be1bae..1879805 100644
--- a/lib/bond.c
+++ b/lib/bond.c
@@ -435,7 +435,7 @@ bond_run(struct bond *bond, enum lacp_status lacp_status)
     /* Enable slaves based on link status and LACP feedback. */
     HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) {
         bond_link_status_update(slave);
-        slave->change_seq = netdev_change_seq(slave->netdev);
+        netdev_update_seq(&slave->change_seq);
     }
     if (!bond->active_slave || !bond->active_slave->enabled) {
         bond_choose_active_slave(bond);
@@ -466,7 +466,7 @@ bond_wait(struct bond *bond)
             poll_timer_wait_until(slave->delay_expires);
         }
 
-        if (slave->change_seq != netdev_change_seq(slave->netdev)) {
+        if (netdev_changed(slave->change_seq)) {
             poll_immediate_wake();
         }
     }
diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c
index 3eb29ea..ae611fe 100644
--- a/lib/netdev-bsd.c
+++ b/lib/netdev-bsd.c
@@ -86,7 +86,6 @@ struct netdev_bsd {
     struct ovs_mutex mutex;
 
     unsigned int cache_valid;
-    unsigned int change_seq;
 
     int ifindex;
     uint8_t etheraddr[ETH_ADDR_LEN];
@@ -198,12 +197,9 @@ netdev_bsd_wait(void)
 }
 
 static void
-netdev_bsd_changed(struct netdev_bsd *dev)
+netdev_bsd_changed(struct netdev_bsd *dev OVS_UNUSED)
 {
-    dev->change_seq++;
-    if (!dev->change_seq) {
-        dev->change_seq++;
-    }
+    netdev_notify_change();
 }
 
 /* Invalidate cache in case of interface status change. */
@@ -294,7 +290,7 @@ netdev_bsd_construct_system(struct netdev *netdev_)
     }
 
     ovs_mutex_init(&netdev->mutex);
-    netdev->change_seq = 1;
+    netdev_notify_change();
     netdev->tap_fd = -1;
     netdev->kernel_name = xstrdup(netdev_->name);
 
@@ -329,7 +325,7 @@ netdev_bsd_construct_tap(struct netdev *netdev_)
      * to retrieve the name of the tap device. */
     ovs_mutex_init(&netdev->mutex);
     netdev->tap_fd = open("/dev/tap", O_RDWR);
-    netdev->change_seq = 1;
+    netdev_notify_change();
     if (netdev->tap_fd < 0) {
         error = errno;
         VLOG_WARN("opening \"/dev/tap\" failed: %s", ovs_strerror(error));
@@ -1470,12 +1466,6 @@ netdev_bsd_update_flags(struct netdev *netdev_, enum 
netdev_flags off,
     return error;
 }
 
-static unsigned int
-netdev_bsd_change_seq(const struct netdev *netdev)
-{
-    return netdev_bsd_cast(netdev)->change_seq;
-}
-
 
 const struct netdev_class netdev_bsd_class = {
     "system",
@@ -1531,8 +1521,6 @@ const struct netdev_class netdev_bsd_class = {
 
     netdev_bsd_update_flags,
 
-    netdev_bsd_change_seq,
-
     netdev_bsd_rx_alloc,
     netdev_bsd_rx_construct,
     netdev_bsd_rx_destruct,
@@ -1596,8 +1584,6 @@ const struct netdev_class netdev_tap_class = {
 
     netdev_bsd_update_flags,
 
-    netdev_bsd_change_seq,
-
     netdev_bsd_rx_alloc,
     netdev_bsd_rx_construct,
     netdev_bsd_rx_destruct,
diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
index fd30454..83c70f6 100644
--- a/lib/netdev-dummy.c
+++ b/lib/netdev-dummy.c
@@ -66,7 +66,6 @@ struct netdev_dummy {
     int mtu OVS_GUARDED;
     struct netdev_stats stats OVS_GUARDED;
     enum netdev_flags flags OVS_GUARDED;
-    unsigned int change_seq OVS_GUARDED;
     int ifindex OVS_GUARDED;
 
     struct pstream *pstream OVS_GUARDED;
@@ -285,7 +284,7 @@ netdev_dummy_construct(struct netdev *netdev_)
     netdev->hwaddr[5] = n;
     netdev->mtu = 1500;
     netdev->flags = 0;
-    netdev->change_seq = 1;
+    netdev_dummy_changed(netdev);
     netdev->ifindex = -EOPNOTSUPP;
 
     netdev->pstream = NULL;
@@ -686,29 +685,13 @@ netdev_dummy_update_flags(struct netdev *netdev_,
 
     return error;
 }
-
-static unsigned int
-netdev_dummy_change_seq(const struct netdev *netdev_)
-{
-    struct netdev_dummy *netdev = netdev_dummy_cast(netdev_);
-    unsigned int change_seq;
-
-    ovs_mutex_lock(&netdev->mutex);
-    change_seq = netdev->change_seq;
-    ovs_mutex_unlock(&netdev->mutex);
-
-    return change_seq;
-}
 
 /* Helper functions. */
 
 static void
-netdev_dummy_changed(struct netdev_dummy *dev)
+netdev_dummy_changed(struct netdev_dummy *dev OVS_UNUSED)
 {
-    dev->change_seq++;
-    if (!dev->change_seq) {
-        dev->change_seq++;
-    }
+    netdev_notify_change();
 }
 
 static const struct netdev_class dummy_class = {
@@ -766,8 +749,6 @@ static const struct netdev_class dummy_class = {
 
     netdev_dummy_update_flags,
 
-    netdev_dummy_change_seq,
-
     netdev_dummy_rx_alloc,
     netdev_dummy_rx_construct,
     netdev_dummy_rx_destruct,
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 7e75144..34ea173 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -357,7 +357,6 @@ struct netdev_linux {
     struct ovs_mutex mutex;
 
     unsigned int cache_valid;
-    unsigned int change_seq;
 
     bool miimon;                    /* Link status of last poll. */
     long long int miimon_interval;  /* Miimon Poll rate. Disabled if <= 0. */
@@ -585,10 +584,7 @@ netdev_linux_changed(struct netdev_linux *dev,
                      unsigned int ifi_flags, unsigned int mask)
     OVS_REQUIRES(dev->mutex)
 {
-    dev->change_seq++;
-    if (!dev->change_seq) {
-        dev->change_seq++;
-    }
+    netdev_notify_change();
 
     if ((dev->ifi_flags ^ ifi_flags) & IFF_RUNNING) {
         dev->carrier_resets++;
@@ -640,7 +636,7 @@ static void
 netdev_linux_common_construct(struct netdev_linux *netdev)
 {
     ovs_mutex_init(&netdev->mutex);
-    netdev->change_seq = 1;
+    netdev_notify_change();
 }
 
 /* Creates system and internal devices. */
@@ -2598,19 +2594,6 @@ netdev_linux_update_flags(struct netdev *netdev_, enum 
netdev_flags off,
     return error;
 }
 
-static unsigned int
-netdev_linux_change_seq(const struct netdev *netdev_)
-{
-    struct netdev_linux *netdev = netdev_linux_cast(netdev_);
-    unsigned int change_seq;
-
-    ovs_mutex_lock(&netdev->mutex);
-    change_seq = netdev->change_seq;
-    ovs_mutex_unlock(&netdev->mutex);
-
-    return change_seq;
-}
-
 #define NETDEV_LINUX_CLASS(NAME, CONSTRUCT, GET_STATS, SET_STATS,  \
                            GET_FEATURES, GET_STATUS)            \
 {                                                               \
@@ -2669,8 +2652,6 @@ netdev_linux_change_seq(const struct netdev *netdev_)
                                                                 \
     netdev_linux_update_flags,                                  \
                                                                 \
-    netdev_linux_change_seq,                                    \
-                                                                \
     netdev_linux_rx_alloc,                                      \
     netdev_linux_rx_construct,                                  \
     netdev_linux_rx_destruct,                                   \
diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
index 9ab58fb..9ba667f 100644
--- a/lib/netdev-provider.h
+++ b/lib/netdev-provider.h
@@ -609,17 +609,6 @@ struct netdev_class {
     int (*update_flags)(struct netdev *netdev, enum netdev_flags off,
                         enum netdev_flags on, enum netdev_flags *old_flags);
 
-    /* Returns a sequence number which indicates changes in one of 'netdev''s
-     * properties.  The returned sequence number must be nonzero so that
-     * callers have a value which they may use as a reset when tracking
-     * 'netdev'.
-     *
-     * Minimally, the returned sequence number is required to change whenever
-     * 'netdev''s flags, features, ethernet address, or carrier changes.  The
-     * returned sequence number is allowed to change even when 'netdev' doesn't
-     * change, although implementations should try to avoid this. */
-    unsigned int (*change_seq)(const struct netdev *netdev);
-
 /* ## ------------------- ## */
 /* ## netdev_rx Functions ## */
 /* ## ------------------- ## */
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index ae4f626..a81abff 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -52,7 +52,6 @@ struct netdev_vport {
     /* Protects all members below. */
     struct ovs_mutex mutex;
 
-    unsigned int change_seq;
     uint8_t etheraddr[ETH_ADDR_LEN];
     struct netdev_stats stats;
 
@@ -172,7 +171,7 @@ netdev_vport_construct(struct netdev *netdev_)
     struct netdev_vport *netdev = netdev_vport_cast(netdev_);
 
     ovs_mutex_init(&netdev->mutex);
-    netdev->change_seq = 1;
+    netdev_vport_changed(netdev);
     eth_addr_random(netdev->etheraddr);
 
     route_table_register();
@@ -264,12 +263,6 @@ netdev_vport_update_flags(struct netdev *netdev OVS_UNUSED,
     return 0;
 }
 
-static unsigned int
-netdev_vport_change_seq(const struct netdev *netdev)
-{
-    return netdev_vport_cast(netdev)->change_seq;
-}
-
 static void
 netdev_vport_run(void)
 {
@@ -285,12 +278,9 @@ netdev_vport_wait(void)
 /* Helper functions. */
 
 static void
-netdev_vport_changed(struct netdev_vport *ndv)
+netdev_vport_changed(struct netdev_vport *ndv OVS_UNUSED)
 {
-    ndv->change_seq++;
-    if (!ndv->change_seq) {
-        ndv->change_seq++;
-    }
+    netdev_notify_change();
 }
 
 /* Code specific to tunnel types. */
@@ -742,8 +732,6 @@ get_stats(const struct netdev *netdev, struct netdev_stats 
*stats)
                                                             \
     netdev_vport_update_flags,                              \
                                                             \
-    netdev_vport_change_seq,                                \
-                                                            \
     NULL,                   /* rx_alloc */                  \
     NULL,                   /* rx_construct */              \
     NULL,                   /* rx_destruct */               \
diff --git a/lib/netdev.c b/lib/netdev.c
index 5ed6062..b5f91c4 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -82,6 +82,11 @@ struct netdev_registered_class {
     atomic_int ref_cnt;         /* Number of 'struct netdev's of this class. */
 };
 
+/* Device status is not expected to change particularly often. Keeping track of
+ * changes for all netdevs allows us to skip device polling in the average
+ * case, reducing CPU usage when operating large numbers of ports. */
+static atomic_uint change_seq = ATOMIC_VAR_INIT(0);
+
 /* This is set pretty low because we probably won't learn anything from the
  * additional log messages. */
 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
@@ -1494,17 +1499,46 @@ netdev_dump_queue_stats(const struct netdev *netdev,
             : EOPNOTSUPP);
 }
 
-/* Returns a sequence number which indicates changes in one of 'netdev''s
- * properties.  The returned sequence will be nonzero so that callers have a
- * value which they may use as a reset when tracking 'netdev'.
+/* Updates the given sequence number cache. The caller can use this to track
+ * whether it has dealt with netdev changes. */
+void
+netdev_update_seq(unsigned int *seq)
+{
+    atomic_read(&change_seq, seq);
+}
+
+/* Compares the given sequence number with the current sequence of changes to
+ * devices. This sequence number will change whenever a netdev's flags,
+ * features, ethernet address, carrier, bfd status, or cfm status changes. It
+ * may change for other reasons as well, or no reason at all.
  *
- * The returned sequence number will change whenever 'netdev''s flags,
- * features, ethernet address, or carrier changes.  It may change for other
- * reasons as well, or no reason at all. */
-unsigned int
-netdev_change_seq(const struct netdev *netdev)
+ * Returns true if a netdev has changed, false otherwise. */
+bool
+netdev_changed(unsigned int seq)
 {
-    return netdev->netdev_class->change_seq(netdev);
+    unsigned int current;
+
+    atomic_read(&change_seq, &current);
+
+    return seq != current;
+}
+
+/* Increments the stored sequence number, providing the caller a way to notify
+ * interested parties that changes have occurred in a netdev's status. This
+ * will never be incremented to zero; that value is reserved as a reset
+ * value.*/
+void
+netdev_notify_change(void)
+{
+    unsigned int seq;
+
+    atomic_read(&change_seq, &seq);
+
+    seq++;
+    if (!seq) {
+        seq++;
+    }
+    atomic_store(&change_seq, seq);
 }
 
 /* Returns the class type of 'netdev'.
diff --git a/lib/netdev.h b/lib/netdev.h
index bafa50e..9db3c9a 100644
--- a/lib/netdev.h
+++ b/lib/netdev.h
@@ -313,7 +313,9 @@ typedef void netdev_dump_queue_stats_cb(unsigned int 
queue_id,
 int netdev_dump_queue_stats(const struct netdev *,
                             netdev_dump_queue_stats_cb *, void *aux);
 
-unsigned int netdev_change_seq(const struct netdev *netdev);
+void netdev_update_seq(unsigned int *seq);
+bool netdev_changed(unsigned int seq);
+void netdev_notify_change(void);
 
 #ifdef  __cplusplus
 }
diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index 2844e4c..ff561ef 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -85,6 +85,7 @@ struct ofproto {
     uint16_t alloc_port_no;     /* Last allocated OpenFlow port number. */
     uint16_t max_ports;         /* Max possible OpenFlow port num, plus one. */
     struct hmap ofport_usage;   /* Map ofport to last used time. */
+    unsigned int change_seq;    /* Change sequence for netdev status. */
 
     /* Flow tables. */
     long long int eviction_group_timer; /* For rate limited reheapification. */
@@ -170,7 +171,6 @@ struct ofport {
     struct netdev *netdev;
     struct ofputil_phy_port pp;
     ofp_port_t ofp_port;        /* OpenFlow port number. */
-    unsigned int change_seq;
     long long int created;      /* Time created, in msec. */
     int mtu;
 };
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index d565b19..7e327ea 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -1406,9 +1406,6 @@ any_pending_ops(const struct ofproto *p)
 int
 ofproto_run(struct ofproto *p)
 {
-    struct sset changed_netdevs;
-    const char *changed_netdev;
-    struct ofport *ofport;
     int error;
 
     error = p->ofproto_class->run(p);
@@ -1460,24 +1457,28 @@ ofproto_run(struct ofproto *p)
         }
     }
 
-    /* Update OpenFlow port status for any port whose netdev has changed.
-     *
-     * Refreshing a given 'ofport' can cause an arbitrary ofport to be
-     * destroyed, so it's not safe to update ports directly from the
-     * HMAP_FOR_EACH loop, or even to use HMAP_FOR_EACH_SAFE.  Instead, we
-     * need this two-phase approach. */
-    sset_init(&changed_netdevs);
-    HMAP_FOR_EACH (ofport, hmap_node, &p->ports) {
-        unsigned int change_seq = netdev_change_seq(ofport->netdev);
-        if (ofport->change_seq != change_seq) {
-            ofport->change_seq = change_seq;
-            sset_add(&changed_netdevs, netdev_get_name(ofport->netdev));
+    if (netdev_changed(p->change_seq)) {
+        struct sset devnames;
+        const char *devname;
+        struct ofport *ofport;
+
+        /* Update OpenFlow port status for any port whose netdev has changed.
+         *
+         * Refreshing a given 'ofport' can cause an arbitrary ofport to be
+         * destroyed, so it's not safe to update ports directly from the
+         * HMAP_FOR_EACH loop, or even to use HMAP_FOR_EACH_SAFE.  Instead, we
+         * need this two-phase approach. */
+        sset_init(&devnames);
+        HMAP_FOR_EACH (ofport, hmap_node, &p->ports) {
+            sset_add(&devnames, netdev_get_name(ofport->netdev));
         }
+        SSET_FOR_EACH (devname, &devnames) {
+            update_port(p, devname);
+        }
+        sset_destroy(&devnames);
+
+        netdev_update_seq(&p->change_seq);
     }
-    SSET_FOR_EACH (changed_netdev, &changed_netdevs) {
-        update_port(p, changed_netdev);
-    }
-    sset_destroy(&changed_netdevs);
 
     switch (p->state) {
     case S_OPENFLOW:
@@ -1567,17 +1568,13 @@ ofproto_run_fast(struct ofproto *p)
 void
 ofproto_wait(struct ofproto *p)
 {
-    struct ofport *ofport;
-
     p->ofproto_class->wait(p);
     if (p->ofproto_class->port_poll_wait) {
         p->ofproto_class->port_poll_wait(p);
     }
 
-    HMAP_FOR_EACH (ofport, hmap_node, &p->ports) {
-        if (ofport->change_seq != netdev_change_seq(ofport->netdev)) {
-            poll_immediate_wake();
-        }
+    if (netdev_changed(p->change_seq)) {
+        poll_immediate_wake();
     }
 
     switch (p->state) {
@@ -2132,7 +2129,6 @@ ofport_install(struct ofproto *p,
     }
     ofport->ofproto = p;
     ofport->netdev = netdev;
-    ofport->change_seq = netdev_change_seq(netdev);
     ofport->pp = *pp;
     ofport->ofp_port = pp->port_no;
     ofport->created = time_msec();
@@ -2367,7 +2363,6 @@ update_port(struct ofproto *ofproto, const char *name)
              * Don't close the old netdev yet in case port_modified has to
              * remove a retained reference to it.*/
             port->netdev = netdev;
-            port->change_seq = netdev_change_seq(netdev);
 
             if (port->ofproto->ofproto_class->port_modified) {
                 port->ofproto->ofproto_class->port_modified(port);
diff --git a/ofproto/tunnel.c b/ofproto/tunnel.c
index b238cd0..ed817ec 100644
--- a/ofproto/tunnel.c
+++ b/ofproto/tunnel.c
@@ -97,7 +97,7 @@ tnl_port_add__(const struct ofport_dpif *ofport, const struct 
netdev *netdev,
     tnl_port = xzalloc(sizeof *tnl_port);
     tnl_port->ofport = ofport;
     tnl_port->netdev = netdev_ref(netdev);
-    tnl_port->netdev_seq = netdev_change_seq(tnl_port->netdev);
+    netdev_update_seq(&tnl_port->netdev_seq);
 
     tnl_port->match.in_key = cfg->in_key;
     tnl_port->match.ip_src = cfg->ip_src;
@@ -159,7 +159,7 @@ tnl_port_reconfigure(const struct ofport_dpif *ofport,
         changed = tnl_port_add__(ofport, netdev, odp_port, false);
     } else if (tnl_port->netdev != netdev
                || tnl_port->match.odp_port != odp_port
-               || tnl_port->netdev_seq != netdev_change_seq(netdev)) {
+               || netdev_changed(tnl_port->netdev_seq)) {
         VLOG_DBG("reconfiguring %s", tnl_port_get_name(tnl_port));
         tnl_port_del__(ofport);
         tnl_port_add__(ofport, netdev, odp_port, true);
-- 
1.7.9.5

_______________________________________________
dev mailing list
[email protected]
http://openvswitch.org/mailman/listinfo/dev

Reply via email to