I tried this out on RHEL 5 (2.6.18-8.el5) and it works OK.

Acked-by: Ralph Campbell <[EMAIL PROTECTED]>


On Sun, 2008-02-10 at 17:08 +0200, Eli Cohen wrote:
> [PATCH] IB/ipoib: bug fix creation of own_ah
> 
> Create own ah after the port is queried. This also handles
> cases when a port change event occurs (which includes lid
> change event).
> 
> Signed-off-by: Eli Cohen <[EMAIL PROTECTED]>
> ---
> 
> Ralph, Arlin,
> 
> I think this patch should fix the problem you were seeing. Could you
> check if it does? I pushed this to ofed 1.3 tree.
> 
> 
>   kernel_patches/fixes/ipoib_0190_unsig_udqp.patch |  169 
> +++++++++++++---------
>   1 files changed, 99 insertions(+), 70 deletions(-)
> 
> diff --git a/kernel_patches/fixes/ipoib_0190_unsig_udqp.patch 
> b/kernel_patches/fixes/ipoib_0190_unsig_udqp.patch
> index 1ffb78a..2a1062d 100644
> --- a/kernel_patches/fixes/ipoib_0190_unsig_udqp.patch
> +++ b/kernel_patches/fixes/ipoib_0190_unsig_udqp.patch
> @@ -13,7 +13,7 @@ Signed-off-by: Eli Cohen <[EMAIL PROTECTED]>
>   Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h
>   ===================================================================
>   --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib.h   2008-02-10 
> 10:04:06.097933000 +0200
> -+++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h 2008-02-10 
> 10:08:46.941535000 +0200
> ++++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h 2008-02-10 
> 14:11:45.097178000 +0200
>   @@ -373,6 +373,7 @@ struct ipoib_dev_priv {
> 
>       struct ib_wc         ibwc[IPOIB_NUM_WC];
> @@ -39,10 +39,18 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h
> 
>    struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
>                                struct ib_pd *pd, struct ib_ah_attr *attr);
> +@@ -534,6 +536,7 @@ int ipoib_pkey_dev_delay_open(struct net
> + void ipoib_drain_cq(struct net_device *dev);
> +
> + void ipoib_set_ethtool_ops(struct net_device *dev);
> ++void destroy_own_ah(struct ipoib_dev_priv *priv);
> +
> + #ifdef CONFIG_INFINIBAND_IPOIB_CM
> +
>   Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
>   ===================================================================
>   --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c        
> 2008-02-10 10:04:06.092935000 +0200
> -+++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c      2008-02-10 
> 10:09:59.703873000 +0200
> ++++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c      2008-02-10 
> 14:11:45.086179000 +0200
>   @@ -254,12 +254,10 @@ repost:
>                          "for buf %d\n", wr_id);
>    }
> @@ -83,7 +91,9 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
>           test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
>               netif_wake_queue(dev);
>   +}
> -+
> +
> +-    if (need_lock)
> +-            spin_unlock_irqrestore(&priv->tx_lock, flags);
>   +static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
>   +{
>   +   struct ipoib_dev_priv *priv = netdev_priv(dev);
> @@ -96,9 +106,7 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
>   +           _ipoib_ib_handle_tx_wc(dev, i);
>   +   } while (i++ != wr_id);
>   +   priv->tx_poll = i & (ipoib_sendq_size - 1);
> -
> --    if (need_lock)
> --            spin_unlock_irqrestore(&priv->tx_lock, flags);
> ++
>   +   if (unlikely(wc->status != IB_WC_SUCCESS &&
>   +                wc->status != IB_WC_WR_FLUSH_ERR))
> 
> @@ -128,7 +136,7 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
>    }
> 
>    int ipoib_poll(struct napi_struct *napi, int budget)
> -@@ -361,11 +372,63 @@ void ipoib_ib_rx_completion(struct ib_cq
> +@@ -361,11 +372,66 @@ void ipoib_ib_rx_completion(struct ib_cq
>       netif_rx_schedule(dev, &priv->napi);
>    }
> 
> @@ -161,7 +169,8 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
>   +   spin_lock_irqsave(&priv->tx_lock, flags);
>   +   if (((int)priv->tx_tail - (int)priv->tx_head < 0) &&
>   +           time_after(jiffies, dev->trans_start + 10) &&
> -+            priv->tx_outstanding < ipoib_sendq_size) {
> ++            priv->tx_outstanding < ipoib_sendq_size &&
> ++            priv->own_ah) {
>   +           wrid = priv->tx_head & (ipoib_sendq_size - 1);
>   +           priv->tx_ring[wrid].skb = NULL;
>   +           if (post_zlen_send_wr(priv, wrid))
> @@ -173,8 +182,7 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
>   +   }
>   +   poll_tx(priv);
>   +   spin_unlock_irqrestore(&priv->tx_lock, flags);
> -
> --    poll_tx(priv, 1);
> ++
>   +   mod_timer(&priv->poll_timer, jiffies + HZ / 2);
>   +}
>   +
> @@ -189,13 +197,16 @@ Index: 
> ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
>   +   if (!post_zlen_send_wr(priv, wrid)) {
>   +           ++priv->tx_head;
>   +           ++priv->tx_outstanding;
> -+    }
> ++    } else
> ++            ipoib_warn(priv, "post_zlen failed\n");
> +
> +-    poll_tx(priv, 1);
>   +   poll_tx(priv);
>   +   spin_unlock_irqrestore(&priv->tx_lock, flags);
>    }
> 
>    static inline int post_send(struct ipoib_dev_priv *priv,
> -@@ -405,6 +468,11 @@ static inline int post_send(struct ipoib
> +@@ -405,6 +471,11 @@ static inline int post_send(struct ipoib
>       } else
>               priv->tx_wr.opcode      = IB_WR_SEND;
> 
> @@ -207,7 +218,7 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
>       return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr);
>    }
> 
> -@@ -481,15 +549,17 @@ void ipoib_send(struct net_device *dev,
> +@@ -481,15 +552,17 @@ void ipoib_send(struct net_device *dev,
> 
>               address->last_send = priv->tx_head;
>               ++priv->tx_head;
> @@ -227,58 +238,7 @@ Index: 
> ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> 
>       return;
> 
> -@@ -530,6 +600,32 @@ void ipoib_reap_ah(struct work_struct *w
> -                                round_jiffies_relative(HZ));
> - }
> -
> -+static int create_own_ah(struct ipoib_dev_priv *priv)
> -+{
> -+    struct ib_ah_attr attr = {
> -+            .dlid = priv->local_lid,
> -+            .port_num = priv->port,
> -+    };
> -+
> -+    if (priv->own_ah) {
> -+            ipoib_dbg(priv, "own ah already exists\n");
> -+            return -EINVAL;
> -+    }
> -+    priv->own_ah = ib_create_ah(priv->pd, &attr);
> -+    return IS_ERR(priv->own_ah);
> -+}
> -+
> -+static void destroy_own_ah(struct ipoib_dev_priv *priv)
> -+{
> -+    if (!priv->own_ah) {
> -+            ipoib_dbg(priv, "destroying an already destroyed own ah\n");
> -+            return;
> -+    }
> -+
> -+    ib_destroy_ah(priv->own_ah);
> -+    priv->own_ah = NULL;
> -+}
> -+
> - int ipoib_ib_dev_open(struct net_device *dev)
> - {
> -     struct ipoib_dev_priv *priv = netdev_priv(dev);
> -@@ -542,9 +638,17 @@ int ipoib_ib_dev_open(struct net_device
> -     }
> -     set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
> -
> -+    ret = create_own_ah(priv);
> -+    if (ret) {
> -+            priv->own_ah = NULL;
> -+            ipoib_warn(priv, "failed to create own ah\n");
> -+            return -1;
> -+    }
> -+
> -     ret = ipoib_init_qp(dev);
> -     if (ret) {
> -             ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret);
> -+            destroy_own_ah(priv);
> -             return -1;
> -     }
> -
> -@@ -566,6 +670,11 @@ int ipoib_ib_dev_open(struct net_device
> +@@ -566,6 +639,11 @@ int ipoib_ib_dev_open(struct net_device
>       queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
>                          round_jiffies_relative(HZ));
> 
> @@ -290,7 +250,7 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
>       set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
> 
>       return 0;
> -@@ -662,7 +771,7 @@ void ipoib_drain_cq(struct net_device *d
> +@@ -662,7 +740,7 @@ void ipoib_drain_cq(struct net_device *d
>                               if (priv->ibwc[i].wr_id & IPOIB_OP_CM)
>                                       ipoib_cm_handle_tx_wc(dev, priv->ibwc + 
> i);
>                               else
> @@ -299,7 +259,7 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
>                       }
>               }
>       } while (n == IPOIB_NUM_WC);
> -@@ -673,12 +782,14 @@ int ipoib_ib_dev_stop(struct net_device
> +@@ -673,12 +751,14 @@ int ipoib_ib_dev_stop(struct net_device
>       struct ipoib_dev_priv *priv = netdev_priv(dev);
>       struct ib_qp_attr qp_attr;
>       unsigned long begin;
> @@ -315,7 +275,7 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> 
>       /*
>        * Move our QP to the error state and then reinitialize in
> -@@ -700,32 +811,30 @@ int ipoib_ib_dev_stop(struct net_device
> +@@ -700,32 +780,30 @@ int ipoib_ib_dev_stop(struct net_device
>                        * assume the HW is wedged and just free up
>                        * all our pending work requests.
>                        */
> @@ -363,7 +323,7 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
>               ipoib_drain_cq(dev);
> 
>               msleep(1);
> -@@ -734,6 +843,7 @@ int ipoib_ib_dev_stop(struct net_device
> +@@ -734,6 +812,7 @@ int ipoib_ib_dev_stop(struct net_device
>       ipoib_dbg(priv, "All sends and receives done.\n");
> 
>    timeout:
> @@ -371,10 +331,19 @@ Index: 
> ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
>       qp_attr.qp_state = IB_QPS_RESET;
>       if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
>               ipoib_warn(priv, "Failed to modify QP to RESET state\n");
> +@@ -838,6 +917,8 @@ static void __ipoib_ib_dev_flush(struct
> +             ipoib_ib_dev_open(dev);
> +     }
> +
> ++    destroy_own_ah(priv);
> ++
> +     /*
> +      * The device could have been brought down between the start and when
> +      * we get here, don't bring it back up if it's not configured up
>   Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
>   ===================================================================
>   --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_verbs.c     
> 2008-02-10 10:04:06.101933000 +0200
> -+++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c   2008-02-10 
> 10:08:46.948541000 +0200
> ++++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c   2008-02-10 
> 14:10:10.611696000 +0200
>   @@ -153,7 +153,7 @@ int ipoib_transport_dev_init(struct net_
>                       .max_send_sge = dev->features & NETIF_F_SG ? 
> MAX_SKB_FRAGS + 1 : 1,
>                       .max_recv_sge = 1
> @@ -393,3 +362,63 @@ Index: 
> ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
>       if (IS_ERR(priv->scq)) {
>               printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name);
>               goto out_free_rcq;
> +Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
> +===================================================================
> +--- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c  
> 2008-02-10 14:10:09.000000000 +0200
> ++++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_multicast.c       
> 2008-02-10 14:11:45.091181000 +0200
> +@@ -492,6 +492,42 @@ static void ipoib_mcast_join(struct net_
> +     }
> + }
> +
> ++static int create_own_ah(struct ipoib_dev_priv *priv)
> ++{
> ++    struct ib_ah_attr attr = {
> ++            .dlid = priv->local_lid,
> ++            .port_num = priv->port,
> ++    };
> ++        struct ib_ah *ah;
> ++
> ++    if (priv->own_ah)
> ++            return 0;
> ++
> ++    ah = ib_create_ah(priv->pd, &attr);
> ++    if (!IS_ERR(ah)) {
> ++            ipoib_dbg(priv, "created own ah\n");
> ++            priv->own_ah = ah;
> ++    }
> ++
> ++    return IS_ERR(priv->own_ah);
> ++}
> ++
> ++void destroy_own_ah(struct ipoib_dev_priv *priv)
> ++{
> ++    unsigned long flags;
> ++
> ++    if (!priv->own_ah) {
> ++            ipoib_dbg(priv, "own ah already destroyed\n");
> ++            return;
> ++    } else
> ++            ipoib_dbg(priv, "destroying own ah\n");
> ++
> ++    spin_lock_irqsave(&priv->tx_lock, flags);
> ++    ib_destroy_ah(priv->own_ah);
> ++    priv->own_ah = NULL;
> ++    spin_unlock_irqrestore(&priv->tx_lock, flags);
> ++}
> ++
> + void ipoib_mcast_join_task(struct work_struct *work)
> + {
> +     struct ipoib_dev_priv *priv =
> +@@ -509,8 +545,11 @@ void ipoib_mcast_join_task(struct work_s
> +     {
> +             struct ib_port_attr attr;
> +
> +-            if (!ib_query_port(priv->ca, priv->port, &attr))
> ++            if (!ib_query_port(priv->ca, priv->port, &attr)) {
> +                     priv->local_lid = attr.lid;
> ++                    if (create_own_ah(priv))
> ++                            ipoib_warn(priv, "create own_ah failed\n");
> ++            }
> +             else
> +                     ipoib_warn(priv, "ib_query_port failed\n");
> +     }

_______________________________________________
ewg mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ewg

Reply via email to