Jack Morgenstein wrote:
> On Monday 29 June 2009 18:06, Moni Shoua wrote:
>> Jack Morgenstein wrote:
>>> On Sunday 28 June 2009 19:09, Moni Shoua wrote:
>>>> maybe synchronizing the race with a completion var (like IPoIB does in 
>>>> struct ipoib_path) will help. I think this will work. I can send a patch 
>>>> if you want unless you see this idea doesn't work for this case.
>>>>
>>>> MoniS
>> What do you think of this (see below)?
>> I used a completion struct to synchronize between leave and join in a 
>> similar way that ipoib_path is using the field 'done'.
>> There is one difference though from the original code (I don't know if 
>> that's acceptable), here we wait for the join request to finish before we 
>> destroy the mcast struct and not canceling it.
>> Please note that I didn't compile this code. I just want to hear opinions 
>> first before I make a real patch out of this.
>>
> I have to think about this some.  I'm a bit nervous about 2 things:
> 1. Somehow, the complete never arrives, and the "leave" gets stuck forever 
> waiting.
Is that a concern or an experience after trying this patch?
If complete never arrives then this a bug that can be fixed (so I believe).
> 2. What if someone does another ifconfig ib0 up while we are waiting for the 
> complete?
Even so, I don't see a path that leads from ifconfig up to changing the status 
of join_req for a group that alreay exists.

>    (I think it is disregarded, but I'm not sure -- with this change and all).
> 
> Nice idea, though.
> -Jack
>> diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h 
>> b/drivers/infiniband/ulp/ipoib/ipoib.h
>> index 753a983..a551258 100644
>> --- a/drivers/infiniband/ulp/ipoib/ipoib.h
>> +++ b/drivers/infiniband/ulp/ipoib/ipoib.h
>> @@ -145,6 +145,7 @@ struct ipoib_mcast {
>>      struct sk_buff_head pkt_queue;
>>  
>>      struct net_device *dev;
>> +    struct completion     join_req;
>>  };
>>  
>>  struct ipoib_rx_buf {
>> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 
>> b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
>> index a0e9753..afe72dd 100644
>> --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
>> +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
>> @@ -117,6 +117,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct 
>> net_device *dev,
>>      mcast->dev = dev;
>>      mcast->created = jiffies;
>>      mcast->backoff = 1;
>> +    complete(&mcast->join_req);     
>>  
>>      INIT_LIST_HEAD(&mcast->list);
>>      INIT_LIST_HEAD(&mcast->neigh_list);
>> @@ -286,6 +287,7 @@ ipoib_mcast_sendonly_join_complete(int status,
>>      if (status == -ENETRESET)
>>              return 0;
>>  
>> +    complete(&mcast->join_req);
>>      if (!status)
>>              status = ipoib_mcast_join_finish(mcast, &multicast->rec);
>>  
>> @@ -336,6 +338,7 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast 
>> *mcast)
>>      rec.port_gid = priv->local_gid;
>>      rec.pkey     = cpu_to_be16(priv->pkey);
>>  
>> +    init_completion(&mcast->join_req);
>>      mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
>>                                       priv->port, &rec,
>>                                       IB_SA_MCMEMBER_REC_MGID        |
>> @@ -384,6 +387,7 @@ static int ipoib_mcast_join_complete(int status,
>>                      mcast->mcmember.mgid.raw, status);
>>  
>>      /* We trap for port events ourselves. */
>> +    complete(&mcast->join_req);
>>      if (status == -ENETRESET)
>>              return 0;
>>  
>> @@ -482,10 +486,12 @@ static void ipoib_mcast_join(struct net_device *dev, 
>> struct ipoib_mcast *mcast,
>>      }
>>  
>>      set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
>> +    init_completion(&mcast->join_req);
>>      mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
>>                                       &rec, comp_mask, GFP_KERNEL,
>>                                       ipoib_mcast_join_complete, mcast);
>>      if (IS_ERR(mcast->mc)) {
>> +            complete(&mcast->join_req);
>>              clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
>>              ret = PTR_ERR(mcast->mc);
>>              ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", 
>> ret);
>> @@ -630,8 +636,7 @@ static int ipoib_mcast_leave(struct net_device *dev, 
>> struct ipoib_mcast *mcast)
>>      struct ipoib_dev_priv *priv = netdev_priv(dev);
>>      int ret = 0;
>>  
>> -    if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
>> -            ib_sa_free_multicast(mcast->mc);
>> +    wait_for_completion(mcast->join_req);
>>  
>>      if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
>>              ipoib_dbg_mcast(priv, "leaving MGID %pI6\n",
>>
> _______________________________________________
> general mailing list
> [email protected]
> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general
> 
> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
> 

_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to