Quoting r. Roland Dreier <[EMAIL PROTECTED]>:
> Subject: Re: OFED 1.1 release - schedule and features
> 
>  > So if the link which ib0 maps to is DOWN you move the ib0 IPv4 address
>  > to another device whose link is UP (eg ib1) and you somehow have ib1
>  > send a gratuitous ARP?
> 
> I think there may be a problem in the way IPoIB deals with gratuitous
> ARPs.  Because if a neighbour structure is updated by the networking
> core, there's no way for IPoIB to know about that and update the
> associated IB path.
> 
> Has anyone actually tried this failover approach?
> 
>  - R.

OK, we've seen the problem here - and here's a patch to fix it.
Seems to work fine here - I'll let it run for a day just to make sure.

How does it look?

---

The neighbour ha field may get updated without destroying the neighbour.

In this case, the ha field gets out of sync with the address handle stored
in ipoib_neigh->ah, with the result that the ah field would point to an
incorrect path, resulting in all packets being lost.

Signed-off-by: Michael S. Tsirkin <[EMAIL PROTECTED]>

diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h 
b/drivers/infiniband/ulp/ipoib/ipoib.h
index 3f89f5e..474aa21 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -212,6 +212,7 @@ struct ipoib_path {
 
 struct ipoib_neigh {
        struct ipoib_ah    *ah;
+       union ib_gid        dgid;
        struct sk_buff_head queue;
 
        struct neighbour   *neighbour;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c 
b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 1c6ea1c..cf71d2a 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -404,6 +404,8 @@ static void path_rec_completion(int stat
                list_for_each_entry(neigh, &path->neigh_list, list) {
                        kref_get(&path->ah->ref);
                        neigh->ah = path->ah;
+                       memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw,
+                              sizeof(union ib_gid));
 
                        while ((skb = __skb_dequeue(&neigh->queue)))
                                __skb_queue_tail(&skqueue, skb);
@@ -510,6 +512,8 @@ static void neigh_add_path(struct sk_buf
        if (path->ah) {
                kref_get(&path->ah->ref);
                neigh->ah = path->ah;
+               memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw,
+                      sizeof(union ib_gid));
 
                ipoib_send(dev, skb, path->ah,
                           be32_to_cpup((__be32 *) skb->dst->neighbour->ha));
@@ -633,6 +637,25 @@ static int ipoib_start_xmit(struct sk_bu
                neigh = *to_ipoib_neigh(skb->dst->neighbour);
 
                if (likely(neigh->ah)) {
+                       if (unlikely(memcmp(&neigh->dgid.raw,
+                                           skb->dst->neighbour->ha + 4,
+                                           sizeof(union ib_gid)))) {
+                               spin_lock(&priv->lock);
+                               /*
+                                * It's safe to call ipoib_put_ah() inside
+                                * priv->lock here, because we know that
+                                * path->ah will always hold one more reference,
+                                * so ipoib_put_ah() will never do more than
+                                * decrement the ref count.
+                                */
+                               ipoib_put_ah(neigh->ah);
+                               list_del(&neigh->list);
+                               ipoib_neigh_free(neigh);
+                               spin_unlock(&priv->lock);
+                               ipoib_path_lookup(skb, dev);
+                               goto out;
+                       }
+
                        ipoib_send(dev, skb, neigh->ah,
                                   be32_to_cpup((__be32 *) 
skb->dst->neighbour->ha));
                        goto out;

-- 
MST

_______________________________________________
openib-general mailing list
[email protected]
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to