There appear to be several races in IPoIB multicast code:
for example when a MAD event may start the multicast thread, while ipoib_stop
tries to stop it, leaving a thread running after the device is removed.
This patch forces all external callers of multicast.c into ipoib_workqueue,
which avoids the need for explicit synchronisation.

Signed-off-by: Michael S. Tsirkin <[EMAIL PROTECTED]>

Index: linux-2.6.14-dbg/drivers/infiniband/ulp/ipoib/ipoib_ib.c
===================================================================
--- linux-2.6.14-dbg.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c       
2005-11-20 11:21:39.000000000 +0200
+++ linux-2.6.14-dbg/drivers/infiniband/ulp/ipoib/ipoib_ib.c    2005-11-20 
12:04:32.000000000 +0200
@@ -431,7 +431,8 @@ int ipoib_ib_dev_up(struct net_device *d
 
        set_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
 
-       return ipoib_mcast_start_thread(dev);
+       queue_work(ipoib_workqueue, &priv->start_task);
+       return 0;
 }
 
 int ipoib_ib_dev_down(struct net_device *dev)
@@ -452,19 +453,8 @@ int ipoib_ib_dev_down(struct net_device 
                flush_workqueue(ipoib_workqueue);
        }
 
-       ipoib_mcast_stop_thread(dev, 1);
-
-       /*
-        * Flush the multicast groups first so we stop any multicast joins. The
-        * completion thread may have already died and we may deadlock waiting
-        * for the completion thread to finish some multicast joins.
-        */
-       ipoib_mcast_dev_flush(dev);
-
-       /* Delete broadcast and local addresses since they will be recreated */
-       ipoib_mcast_dev_down(dev);
-
-       ipoib_flush_paths(dev);
+       queue_work(ipoib_workqueue, &priv->down_task);
+       flush_workqueue(ipoib_workqueue);
 
        return 0;
 }
@@ -619,11 +609,8 @@ void ipoib_ib_dev_cleanup(struct net_dev
 
        ipoib_dbg(priv, "cleaning up ib_dev\n");
 
-       ipoib_mcast_stop_thread(dev, 1);
-
-       /* Delete the broadcast address and the local address */
-       ipoib_mcast_dev_down(dev);
-
+       queue_work(ipoib_workqueue, &priv->cleanup_task);
+       flush_workqueue(ipoib_workqueue);
        ipoib_transport_dev_cleanup(dev);
 }
 
Index: linux-2.6.14-dbg/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
===================================================================
--- linux-2.6.14-dbg.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c        
2005-11-20 11:21:39.000000000 +0200
+++ linux-2.6.14-dbg/drivers/infiniband/ulp/ipoib/ipoib_multicast.c     
2005-11-20 12:19:03.000000000 +0200
@@ -573,7 +573,7 @@ void ipoib_mcast_join_task(void *dev_ptr
        netif_carrier_on(dev);
 }
 
-int ipoib_mcast_start_thread(struct net_device *dev)
+static void ipoib_mcast_start_thread(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
 
@@ -583,11 +583,9 @@ int ipoib_mcast_start_thread(struct net_
        if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
                queue_work(ipoib_workqueue, &priv->mcast_task);
        up(&mcast_mutex);
-
-       return 0;
 }
 
-int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
+static void ipoib_mcast_stop_thread(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct ipoib_mcast *mcast;
@@ -599,9 +597,6 @@ int ipoib_mcast_stop_thread(struct net_d
        cancel_delayed_work(&priv->mcast_task);
        up(&mcast_mutex);
 
-       if (flush)
-               flush_workqueue(ipoib_workqueue);
-
        if (priv->broadcast && priv->broadcast->query) {
                ib_sa_cancel_query(priv->broadcast->query_id, 
priv->broadcast->query);
                priv->broadcast->query = NULL;
@@ -618,8 +613,6 @@ int ipoib_mcast_stop_thread(struct net_d
                        wait_for_completion(&mcast->done);
                }
        }
-
-       return 0;
 }
 
 static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
@@ -737,7 +730,7 @@ out:
        spin_unlock(&priv->lock);
 }
 
-void ipoib_mcast_dev_flush(struct net_device *dev)
+static void ipoib_mcast_dev_flush(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        LIST_HEAD(remove_list);
@@ -793,7 +786,7 @@ void ipoib_mcast_dev_flush(struct net_de
        }
 }
 
-void ipoib_mcast_dev_down(struct net_device *dev)
+static void ipoib_mcast_dev_down(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        unsigned long flags;
@@ -822,7 +815,7 @@ void ipoib_mcast_restart_task(void *dev_
 
        ipoib_dbg_mcast(priv, "restarting multicast task\n");
 
-       ipoib_mcast_stop_thread(dev, 0);
+       ipoib_mcast_stop_thread(dev);
 
        spin_lock_irqsave(&priv->lock, flags);
 
@@ -908,6 +901,41 @@ void ipoib_mcast_restart_task(void *dev_
                ipoib_mcast_start_thread(dev);
 }
 
+void ipoib_mcast_down_task(void *dev_ptr)
+{
+       struct net_device *dev = dev_ptr;
+
+       ipoib_mcast_stop_thread(dev);
+
+       /*
+        * Flush the multicast groups first so we stop any multicast joins. The
+        * completion thread may have already died and we may deadlock waiting
+        * for the completion thread to finish some multicast joins.
+        */
+       ipoib_mcast_dev_flush(dev);
+
+       /* Delete broadcast and local addresses since they will be recreated */
+       ipoib_mcast_dev_down(dev);
+
+       ipoib_flush_paths(dev);
+}
+
+void ipoib_mcast_cleanup_task(void *dev_ptr)
+{
+       struct net_device *dev = dev_ptr;
+       ipoib_mcast_stop_thread(dev);
+
+       /* Delete the broadcast address and the local address */
+       ipoib_mcast_dev_down(dev);
+}
+
+void ipoib_mcast_start_task(void *dev_ptr)
+{
+       struct net_device *dev = dev_ptr;
+       ipoib_mcast_start_thread(dev);
+}
+
+
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
 
 struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev)
Index: linux-2.6.14-dbg/drivers/infiniband/ulp/ipoib/ipoib_main.c
===================================================================
--- linux-2.6.14-dbg.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c     
2005-11-20 11:21:39.000000000 +0200
+++ linux-2.6.14-dbg/drivers/infiniband/ulp/ipoib/ipoib_main.c  2005-11-20 
11:57:00.000000000 +0200
@@ -899,6 +899,9 @@ static void ipoib_setup(struct net_devic
        INIT_WORK(&priv->mcast_task,   ipoib_mcast_join_task,    priv->dev);
        INIT_WORK(&priv->flush_task,   ipoib_ib_dev_flush,       priv->dev);
        INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task, priv->dev);
+       INIT_WORK(&priv->down_task,    ipoib_mcast_down_task,    priv->dev);
+       INIT_WORK(&priv->cleanup_task, ipoib_mcast_cleanup_task, priv->dev);
+       INIT_WORK(&priv->start_task,   ipoib_mcast_start_task,   priv->dev);
        INIT_WORK(&priv->ah_reap_task, ipoib_reap_ah,            priv->dev);
 }
 
Index: linux-2.6.14-dbg/drivers/infiniband/ulp/ipoib/ipoib.h
===================================================================
--- linux-2.6.14-dbg.orig/drivers/infiniband/ulp/ipoib/ipoib.h  2005-11-20 
11:21:39.000000000 +0200
+++ linux-2.6.14-dbg/drivers/infiniband/ulp/ipoib/ipoib.h       2005-11-20 
12:18:43.000000000 +0200
@@ -137,6 +137,9 @@ struct ipoib_dev_priv {
        struct work_struct mcast_task;
        struct work_struct flush_task;
        struct work_struct restart_task;
+       struct work_struct down_task;
+       struct work_struct cleanup_task;
+       struct work_struct start_task;
        struct work_struct ah_reap_task;
 
        struct ib_device *ca;
@@ -263,11 +266,9 @@ void ipoib_mcast_send(struct net_device 
                      struct sk_buff *skb);
 
 void ipoib_mcast_restart_task(void *dev_ptr);
-int ipoib_mcast_start_thread(struct net_device *dev);
-int ipoib_mcast_stop_thread(struct net_device *dev, int flush);
-
-void ipoib_mcast_dev_down(struct net_device *dev);
-void ipoib_mcast_dev_flush(struct net_device *dev);
+void ipoib_mcast_start_task(void *dev_ptr);
+void ipoib_mcast_down_task(void *dev_ptr);
+void ipoib_mcast_cleanup_task(void *dev_ptr);
 
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
 struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev);

-- 
MST
_______________________________________________
openib-general mailing list
[email protected]
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to