A reboot of one switch in an MC-LAG bond makes all bond links
to go down, causing a total connectivity loss for 3 seconds.

Packet capture shows that spurious LACP PDUs are sent to OVS with
a different MAC address (partner system id) during the final
stages of the MC-LAG switch reboot. The current implementation
doesn't care about the partner sys_id (MAC address).

The code change based on the following:
- If an interface (lead interface) on a bond has an "attached"
  LACP connection, then any other slaves on that bond is allowed
  to become active only when its partner's sys_id is the same as
  the partner's sys_id of the lead interface.
- So, when a slave interface of a bond becomes "current" (it gets
  valid LACP information), first checks if there is already an
  active interface on the bond.
- If there is a lead, the slave checks for the partner sys_ids,
  and becomes active only when they are the same, otherwise it
  remains in "current" state, but "detached".
- If there is no lead, it follows the old way, and accepts any
  partner sys_id.

Signed-off-by: Robert Mulik <robert.mu...@ericsson.com>
---
 lib/lacp.c | 38 ++++++++++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 6 deletions(-)

diff --git a/lib/lacp.c b/lib/lacp.c
index e6ad7b9..eaac2a5 100644
--- a/lib/lacp.c
+++ b/lib/lacp.c
@@ -595,7 +595,7 @@ lacp_wait(struct lacp *lacp) OVS_EXCLUDED(mutex)
 static void
 lacp_update_attached(struct lacp *lacp) OVS_REQUIRES(mutex)
 {
-    struct slave *lead, *slave;
+    struct slave *lead, *lead_current, *slave;
     struct lacp_info lead_pri;
     bool lead_enable;
     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 10);
@@ -603,7 +603,25 @@ lacp_update_attached(struct lacp *lacp) OVS_REQUIRES(mutex)
     lacp->update = false;

     lead = NULL;
+    lead_current = NULL;
     lead_enable = false;
+
+    /* Check if there is a working interface.
+     * Store as lead_current, if there is one. */
+    HMAP_FOR_EACH (slave, node, &lacp->slaves) {
+        if (slave->status == LACP_CURRENT && slave->attached) {
+            struct lacp_info pri;
+            slave_get_priority(slave, &pri);
+            if (!lead_current || memcmp(&pri, &lead_pri, sizeof pri) < 0) {
+                lead_current = slave;
+                lead = lead_current;
+                lead_pri = pri;
+                lead_enable = true;
+            }
+        }
+    }
+
+    /* Find interface with highest priority. */
     HMAP_FOR_EACH (slave, node, &lacp->slaves) {
         struct lacp_info pri;

@@ -624,14 +642,22 @@ lacp_update_attached(struct lacp *lacp) 
OVS_REQUIRES(mutex)
             continue;
         }

-        slave->attached = true;
         slave_get_priority(slave, &pri);
         bool enable = slave_may_enable__(slave);

-        if (!lead
-            || enable > lead_enable
-            || (enable == lead_enable
-                && memcmp(&pri, &lead_pri, sizeof pri) < 0)) {
+        /* Check if partner MAC address is the same as on the working
+         * interface. Activate slave only if the MAC is the same, or
+         * there is no working interface. */
+        if (!lead_current || (lead_current
+            && eth_addr_equals(slave->partner.sys_id,
+                               lead_current->partner.sys_id))) {
+            slave->attached = true;
+        }
+        if (slave->attached &&
+                (!lead
+                 || enable > lead_enable
+                 || (enable == lead_enable
+                     && memcmp(&pri, &lead_pri, sizeof pri) < 0))) {
             lead = slave;
             lead_enable = enable;
             lead_pri = pri;
--
1.9.1

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to