How does the following strike you?
I didnt notice any performance impact - could more people test this please?

---

Assuming that a remote node is replaced and its address changes
(e.g. gid change), it seems that the ha field will gets out of sync with
the address handle stored in ipoib_neigh->ah, with the result that the
ah field would point to an incorrect path, resulting in all packets
being lost.

Signed-off-by: Michael S. Tsirkin <[EMAIL PROTECTED]>

Index: linux-2.6.14/drivers/infiniband/ulp/ipoib/ipoib_main.c
===================================================================
--- linux-2.6.14.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c 2005-11-14 
18:29:40.000000000 +0200
+++ linux-2.6.14/drivers/infiniband/ulp/ipoib/ipoib_main.c      2005-11-14 
20:26:43.000000000 +0200
@@ -394,6 +394,7 @@ static void path_rec_completion(int stat
                list_for_each_entry(neigh, &path->neigh_list, list) {
                        kref_get(&path->ah->ref);
                        neigh->ah = path->ah;
+                       memcpy(neigh->dgid.raw, path->pathrec.dgid.raw, sizeof 
(union ib_gid));
 
                        while ((skb = __skb_dequeue(&neigh->queue)))
                                __skb_queue_tail(&skqueue, skb);
@@ -503,6 +504,7 @@ static void neigh_add_path(struct sk_buf
        if (path->pathrec.dlid) {
                kref_get(&path->ah->ref);
                neigh->ah = path->ah;
+               memcpy(neigh->dgid.raw, path->pathrec.dgid.raw, sizeof (union 
ib_gid));
 
                ipoib_send(dev, skb, path->ah,
                           be32_to_cpup((__be32 *) skb->dst->neighbour->ha));
@@ -633,6 +635,17 @@ static int ipoib_start_xmit(struct sk_bu
                neigh = *to_ipoib_neigh(skb->dst->neighbour);
 
                if (likely(neigh->ah)) {
+                       if (unlikely(memcmp(neigh->dgid.raw,
+                                           skb->dst->neighbour->ha + 4,
+                                           sizeof (union ib_gid)))) {
+                               ipoib_put_ah(neigh->ah);
+                               *to_ipoib_neigh(skb->dst->neighbour) = NULL;
+                               skb->dst->neighbour->ops->destructor = NULL;
+                               list_del(&neigh->list);
+                               kfree(neigh);
+                               ipoib_path_lookup(skb, dev);
+                               goto out;
+                       }
                        ipoib_send(dev, skb, neigh->ah,
                                   be32_to_cpup((__be32 *) 
skb->dst->neighbour->ha));
                        goto out;
Index: linux-2.6.14/drivers/infiniband/ulp/ipoib/ipoib.h
===================================================================
--- linux-2.6.14.orig/drivers/infiniband/ulp/ipoib/ipoib.h      2005-11-14 
18:29:40.000000000 +0200
+++ linux-2.6.14/drivers/infiniband/ulp/ipoib/ipoib.h   2005-11-14 
18:29:42.000000000 +0200
@@ -209,6 +209,7 @@ struct ipoib_path {
 
 struct ipoib_neigh {
        struct ipoib_ah    *ah;
+       union ib_gid        dgid;
        struct sk_buff_head queue;
 
        struct neighbour   *neighbour;
-- 
MST
_______________________________________________
openib-general mailing list
[email protected]
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to