Hello, Roland!
The following was found by code review. Please review.
I have updated the description, so I expect the issue and the fix are clear now.

---

ipoib_mcast_stop_thread currently tests mcast->query and if it is NULL,
does not perform wait_for_completion on the mcast and frees
the mcast object directly. 

However, since both operations are done without locking, it is possible that
ipoib_mcast_join_complete is in progress on this mcast object and has set
mcast->query to NULL already.

Solve this by:
- taking priv->lock before we change mcast->query in ipoib_mcast_join_complete,
  and keeping it until we no longer need the mcast object
- taking priv->lock around mcast->query test in ipoib_mcast_stop_thread

Signed-off-by: Michael S. Tsirkin <[EMAIL PROTECTED]>

Index: linux-2.6.14/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
===================================================================
--- linux-2.6.14.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c    
2006-01-15 17:02:52.000000000 +0200
+++ linux-2.6.14/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2006-01-15 
17:05:16.000000000 +0200
@@ -432,9 +432,11 @@ static void ipoib_mcast_join_complete(in
        if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
                mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
 
+       mutex_lock(&mcast_mutex);
+
+       spin_lock_irq(&priv->lock);
        mcast->query = NULL;
 
-       mutex_lock(&mcast_mutex);
        if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) {
                if (status == -ETIMEDOUT)
                        queue_work(ipoib_workqueue, &priv->mcast_task);
@@ -443,6 +445,7 @@ static void ipoib_mcast_join_complete(in
                                           mcast->backoff * HZ);
        } else
                complete(&mcast->done);
+       spin_unlock_irq(&priv->lock);
        mutex_unlock(&mcast_mutex);
 
        return;
@@ -627,21 +630,27 @@ int ipoib_mcast_stop_thread(struct net_d
        if (flush)
                flush_workqueue(ipoib_workqueue);
 
+       spin_lock_irq(&priv->lock);
        if (priv->broadcast && priv->broadcast->query) {
                ib_sa_cancel_query(priv->broadcast->query_id, 
priv->broadcast->query);
                priv->broadcast->query = NULL;
+               spin_unlock_irq(&priv->lock);
                ipoib_dbg_mcast(priv, "waiting for bcast\n");
                wait_for_completion(&priv->broadcast->done);
-       }
+       } else
+               spin_unlock_irq(&priv->lock);
 
        list_for_each_entry(mcast, &priv->multicast_list, list) {
+               spin_lock_irq(&priv->lock);
                if (mcast->query) {
                        ib_sa_cancel_query(mcast->query_id, mcast->query);
                        mcast->query = NULL;
+                       spin_unlock_irq(&priv->lock);
                        ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT 
"\n",
                                        IPOIB_GID_ARG(mcast->mcmember.mgid));
                        wait_for_completion(&mcast->done);
-               }
+               } else
+                       spin_unlock_irq(&priv->lock);
        }
 
        return 0;

-- 
Michael S. Tsirkin
Staff Engineer, Mellanox Technologies
_______________________________________________
openib-general mailing list
[email protected]
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to