hard_start_xmit dereferences to_ipoib_neigh when only tx_lock is taken.  This
would only be safe if all calls that modify *to_ipoib_neigh take tx_lock too.
Currently this is not always true for ipoib_neigh_free and path_rec_completion,
which results in memory corruption.  Fix this race, making sure
path_rec_completion and ipoib_neigh_free are always called under
tx_lock.

Signed-off-by: Michael S. Tsirkin <[EMAIL PROTECTED]>

---

The following works fine for me here. Pls consider for 2.6.22.

ipoib_main.c      |   42 ++++++++++++++++--------------------------
ipoib_multicast.c |    6 ++++--
2 files changed, 20 insertions(+), 28 deletions(-)

Index: linux-2.6/drivers/infiniband/ulp/ipoib/ipoib_main.c
===================================================================
--- linux-2.6.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c    2007-05-22 
01:46:54.000000000 +0300
+++ linux-2.6/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-05-23 
22:45:18.000000000 +0300
@@ -262,7 +262,8 @@ static void path_free(struct net_device 
        while ((skb = __skb_dequeue(&path->queue)))
                dev_kfree_skb_irq(skb);
 
-       spin_lock_irqsave(&priv->lock, flags);
+       spin_lock_irqsave(&priv->tx_lock, flags);
+       spin_lock(&priv->lock);
 
        list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) {
                /*
@@ -277,7 +278,8 @@ static void path_free(struct net_device 
                ipoib_neigh_free(dev, neigh);
        }
 
-       spin_unlock_irqrestore(&priv->lock, flags);
+       spin_unlock(&priv->lock);
+       spin_unlock_irqrestore(&priv->tx_lock, flags);
 
        if (path->ah)
                ipoib_put_ah(path->ah);
@@ -401,7 +403,8 @@ static void path_rec_completion(int stat
                        ah = ipoib_create_ah(dev, priv->pd, &av);
        }
 
-       spin_lock_irqsave(&priv->lock, flags);
+       spin_lock_irqsave(&priv->tx_lock, flags);
+       spin_lock(&priv->lock);
 
        path->ah = ah;
 
@@ -442,7 +445,8 @@ static void path_rec_completion(int stat
        path->query = NULL;
        complete(&path->done);
 
-       spin_unlock_irqrestore(&priv->lock, flags);
+       spin_unlock(&priv->lock);
+       spin_unlock_irqrestore(&priv->tx_lock, flags);
 
        while ((skb = __skb_dequeue(&skqueue))) {
                skb->dev = dev;
@@ -614,32 +618,16 @@ static void unicast_arp_send(struct sk_b
        path = __path_find(dev, phdr->hwaddr + 4);
        if (!path) {
                path = path_rec_create(dev, phdr->hwaddr + 4);
-               if (path) {
-                       /* put pseudoheader back on for next time */
-                       skb_push(skb, sizeof *phdr);
-                       __skb_queue_tail(&path->queue, skb);
-
-                       if (path_rec_start(dev, path)) {
-                               spin_unlock(&priv->lock);
-                               path_free(dev, path);
-                               return;
-                       } else
-                               __path_add(dev, path);
-               } else {
-                       ++priv->stats.tx_dropped;
-                       dev_kfree_skb_any(skb);
-               }
-
-               spin_unlock(&priv->lock);
-               return;
+               if (path)
+                       __path_add(dev, path);
        }
 
-       if (path->ah) {
+       if (path && path->ah) {
                ipoib_dbg(priv, "Send unicast ARP to %04x\n",
                          be16_to_cpu(path->pathrec.dlid));
 
                ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr));
-       } else if ((path->query || !path_rec_start(dev, path)) &&
+       } else if (path && (path->query || !path_rec_start(dev, path)) &&
                   skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
                /* put pseudoheader back on for next time */
                skb_push(skb, sizeof *phdr);
@@ -822,7 +810,8 @@ static void ipoib_neigh_cleanup(struct n
                  IPOIB_QPN(n->ha),
                  IPOIB_GID_RAW_ARG(n->ha + 4));
 
-       spin_lock_irqsave(&priv->lock, flags);
+       spin_lock_irqsave(&priv->tx_lock, flags);
+       spin_lock(&priv->lock);
 
        neigh = *to_ipoib_neigh(n);
        if (neigh) {
@@ -832,7 +821,8 @@ static void ipoib_neigh_cleanup(struct n
                ipoib_neigh_free(n->dev, neigh);
        }
 
-       spin_unlock_irqrestore(&priv->lock, flags);
+       spin_unlock(&priv->lock);
+       spin_unlock_irqrestore(&priv->tx_lock, flags);
 
        if (ah)
                ipoib_put_ah(ah);
Index: linux-2.6/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
===================================================================
--- linux-2.6.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c       
2007-05-22 01:46:54.000000000 +0300
+++ linux-2.6/drivers/infiniband/ulp/ipoib/ipoib_multicast.c    2007-05-23 
21:38:28.000000000 +0300
@@ -100,7 +100,8 @@ static void ipoib_mcast_free(struct ipoi
                        "deleting multicast group " IPOIB_GID_FMT "\n",
                        IPOIB_GID_ARG(mcast->mcmember.mgid));
 
-       spin_lock_irqsave(&priv->lock, flags);
+       spin_lock_irqsave(&priv->tx_lock, flags);
+       spin_lock(&priv->lock);
 
        list_for_each_entry_safe(neigh, tmp, &mcast->neigh_list, list) {
                /*
@@ -114,7 +115,8 @@ static void ipoib_mcast_free(struct ipoi
                ipoib_neigh_free(dev, neigh);
        }
 
-       spin_unlock_irqrestore(&priv->lock, flags);
+       spin_unlock(&priv->lock);
+       spin_unlock_irqrestore(&priv->tx_lock, flags);
 
        if (mcast->ah)
                ipoib_put_ah(mcast->ah);

-- 
MST
_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to