Roland, this is the patch with changes according to Yossi's comments
in case you are convinced that there is a place to a fix in IPoIB along
with a fix in bonding or core network code.

-------------------------------

Whenever an illegal multicast address is passed to IPoIB for it to join it 
stops all
subsequent requests from being joined. That happens because IPoIB joins to 
multicast
addresses in the order they arrived and doesn't handle the next group's join 
until the 
current join finishes with success. This phenomena happens a lot when a bonding 
interface 
enslaves IPoIB devices. Before enslaving IPoIB interfaces the bonding device 
acts like an
Ethernet device, including the way it translates muticast IP addresses to HW 
addresses. When
it comes up without slaves it translates the group 224.0.0.1 (all hosts) as if 
it were an
Ethernet device and when it enslaves IPoIB devices this is the address that 
they get for
joining (which is a garbage for them)

This patch moves the multicast address to the end of the list after a join 
attempt. Even if the
join fails then the next attempt will be for a different address.

Signed-off-by: Moni Shoua <[email protected]>
--

 drivers/infiniband/ulp/ipoib/ipoib_multicast.c |   20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 
b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index a0e9753..3c3c63d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -379,6 +379,7 @@ static int ipoib_mcast_join_complete(int status,
        struct ipoib_mcast *mcast = multicast->context;
        struct net_device *dev = mcast->dev;
        struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_mcast *next_mcast;
 
        ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n",
                        mcast->mcmember.mgid.raw, status);
@@ -427,9 +428,17 @@ static int ipoib_mcast_join_complete(int status,
 
        mutex_lock(&mcast_mutex);
        spin_lock_irq(&priv->lock);
-       if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
-               queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
-                                  mcast->backoff * HZ);
+       if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) {
+               list_for_each_entry(next_mcast, &priv->multicast_list, list) {
+                       if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, 
&next_mcast->flags)
+                           && !test_bit(IPOIB_MCAST_FLAG_BUSY, 
&next_mcast->flags)
+                           && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, 
&next_mcast->flags))
+                               break;
+               }
+               if (&next_mcast->list != &priv->multicast_list)
+                       queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
+                               next_mcast->backoff * HZ);
+       }
        spin_unlock_irq(&priv->lock);
        mutex_unlock(&mcast_mutex);
 
@@ -570,13 +579,16 @@ void ipoib_mcast_join_task(struct work_struct *work)
                                break;
                        }
                }
-               spin_unlock_irq(&priv->lock);
 
                if (&mcast->list == &priv->multicast_list) {
                        /* All done */
+                       spin_unlock_irq(&priv->lock);
                        break;
                }
 
+               list_move_tail(&mcast->list, &priv->multicast_list);
+               spin_unlock_irq(&priv->lock);
+
                ipoib_mcast_join(dev, mcast, 1);
                return;
        }
_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to