Re: [ofa-general] [PATCH] opensm: fix outstanding mad counters tracking

2007-08-21 Thread Tziporet Koren

Sasha Khapyorsky wrote:

Hi Ira,

On 09:34 Mon 20 Aug , Ira Weiny wrote:
  

Sasha,

Should this be applied to 1.2?



Yes, I think it should, although don't know when next 1.2.x release will
be.

Sasha

  


Ira,
Till (and if) we have another 1.2.x release you can apply the patch on 
your local OFED copy using the ofed_patch.sh script that is located 
under OFED docs.


Tziporet
___
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general


Re: [ofa-general] [PATCH] opensm: fix outstanding mad counters tracking

2007-08-21 Thread Ira Weiny
On Tue, 21 Aug 2007 17:46:33 +0300
Tziporet Koren [EMAIL PROTECTED] wrote:

 Sasha Khapyorsky wrote:
  Hi Ira,
 

snip

 
  Yes, I think it should, although don't know when next 1.2.x release will
  be.
 
 Ira,
 Till (and if) we have another 1.2.x release you can apply the patch on 
 your local OFED copy using the ofed_patch.sh script that is located 
 under OFED docs.
 

Thanks, we have already applied using patch to our local repo here.

I think we have seen this issue with our 1.2 here and I wondered if this patch
would be applicable to 1.2.

Sasha, I will let you know if there are issues.

Thanks,
Ira
___
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general


[ofa-general] [PATCH] opensm: fix outstanding mad counters tracking

2007-08-20 Thread Sasha Khapyorsky

When MAD sending fails in osm_vendor_send() the send_err_callback() is
invoked - this callback maintains (decreases by 1) the outstanding MAD
counters. In the current osm_vl15_poller() code those MAD counters are
also explicitly decreased in the case when osm_vendor_send() returns
error - so actually we have double free case and as result OpenSM
deadlocks there.

This patch removes this additional outstanding mad counters decreasing
code from osm_vl15_poller().

Signed-off-by: Sasha Khapyorsky [EMAIL PROTECTED]
---
 opensm/include/opensm/osm_vl15intf.h |   28 +-
 opensm/opensm/osm_opensm.c   |7 +-
 opensm/opensm/osm_vl15intf.c |  103 +-
 3 files changed, 18 insertions(+), 120 deletions(-)

diff --git a/opensm/include/opensm/osm_vl15intf.h 
b/opensm/include/opensm/osm_vl15intf.h
index 6de9898..4b290d3 100644
--- a/opensm/include/opensm/osm_vl15intf.h
+++ b/opensm/include/opensm/osm_vl15intf.h
@@ -53,13 +53,11 @@
 #include complib/cl_event.h
 #include complib/cl_thread.h
 #include complib/cl_qlist.h
-#include complib/cl_passivelock.h
 #include opensm/osm_stats.h
 #include opensm/osm_log.h
 #include opensm/osm_madw.h
 #include opensm/osm_mad_pool.h
 #include vendor/osm_vendor.h
-#include opensm/osm_subnet.h
 
 #ifdef __cplusplus
 #  define BEGIN_C_DECLS extern C {
@@ -132,10 +130,6 @@ typedef struct _osm_vl15 {
osm_vendor_t *p_vend;
osm_log_t *p_log;
osm_stats_t *p_stats;
-   osm_subn_t *p_subn;
-   cl_disp_reg_handle_t h_disp;
-   cl_plock_t *p_lock;
-
 } osm_vl15_t;
 /*
 * FIELDS
@@ -174,15 +168,6 @@ typedef struct _osm_vl15 {
 *  p_stats
 *  Pointer to the OpenSM statistics block.
 *
-*  p_subn
-* Pointer to the Subnet object for this subnet.
-*
-*  h_disp
-*Handle returned from dispatcher registration.
-*
-*  p_lock
-*  Pointer to the serializing lock.
-*
 * SEE ALSO
 *  VL15 object
 */
@@ -267,9 +252,7 @@ osm_vl15_init(IN osm_vl15_t * const p_vl15,
  IN osm_vendor_t * const p_vend,
  IN osm_log_t * const p_log,
  IN osm_stats_t * const p_stats,
- IN const int32_t max_wire_smps,
- IN osm_subn_t * const p_subn,
- IN cl_dispatcher_t * const p_disp, IN cl_plock_t * const p_lock);
+ IN const int32_t max_wire_smps);
 /*
 * PARAMETERS
 *  p_vl15
@@ -287,15 +270,6 @@ osm_vl15_init(IN osm_vl15_t * const p_vl15,
 *  max_wire_smps
 *  [in] Maximum number of MADs allowed on the wire at one time.
 *
-*  p_subn
-* [in] Pointer to the subnet object.
-*
-*  p_disp
-* [in] Pointer to the dispatcher object.
-*
-*  p_lock
-*  [in] Pointer to the OpenSM serializing lock.
-*
 * RETURN VALUES
 *  IB_SUCCESS if the VL15 object was initialized successfully.
 *
diff --git a/opensm/opensm/osm_opensm.c b/opensm/opensm/osm_opensm.c
index 9a596dd..329305e 100644
--- a/opensm/opensm/osm_opensm.c
+++ b/opensm/opensm/osm_opensm.c
@@ -249,10 +249,9 @@ osm_opensm_init(IN osm_opensm_t * const p_osm,
if (status != IB_SUCCESS)
goto Exit;
 
-   status = osm_vl15_init(p_osm-vl15,
-  p_osm-p_vendor,
-  p_osm-log, p_osm-stats, p_opt-max_wire_smps,
-  p_osm-subn, p_osm-disp, p_osm-lock);
+   status = osm_vl15_init(p_osm-vl15, p_osm-p_vendor,
+  p_osm-log, p_osm-stats,
+  p_opt-max_wire_smps);
if (status != IB_SUCCESS)
goto Exit;
 
diff --git a/opensm/opensm/osm_vl15intf.c b/opensm/opensm/osm_vl15intf.c
index bc667b6..af44423 100644
--- a/opensm/opensm/osm_vl15intf.c
+++ b/opensm/opensm/osm_vl15intf.c
@@ -51,13 +51,12 @@
 
 #include string.h
 #include iba/ib_types.h
+#include complib/cl_thread.h
+#include vendor/osm_vendor_api.h
 #include opensm/osm_vl15intf.h
 #include opensm/osm_madw.h
-#include vendor/osm_vendor_api.h
 #include opensm/osm_log.h
 #include opensm/osm_helper.h
-#include complib/cl_thread.h
-#include signal.h
 
 /**
  **/
@@ -65,18 +64,13 @@
 static void vl15_send_mad(osm_vl15_t * p_vl, osm_madw_t * p_madw)
 {
ib_api_status_t status;
-   cl_status_t cl_status;
-   uint32_t mads_sent;
-   uint32_t unicasts_sent;
-   uint32_t mads_on_wire;
-   uint32_t outstanding;
 
/*
   Non-response-expected mads are not throttled on the wire
   since we can have no confirmation that they arrived
   at their destination.
 */
-   if (p_madw-resp_expected == TRUE) {
+   if (p_madw-resp_expected == TRUE)
/*
   Note that other threads may not see the response MAD
   arrive before send() even returns.
@@ -84,14 +78,11 @@ static 

Re: [ofa-general] [PATCH] opensm: fix outstanding mad counters tracking

2007-08-20 Thread Ira Weiny
Sasha,

Should this be applied to 1.2?

Ira


On Mon, 20 Aug 2007 16:35:30 +0300
Sasha Khapyorsky [EMAIL PROTECTED] wrote:

 
 When MAD sending fails in osm_vendor_send() the send_err_callback() is
 invoked - this callback maintains (decreases by 1) the outstanding MAD
 counters. In the current osm_vl15_poller() code those MAD counters are
 also explicitly decreased in the case when osm_vendor_send() returns
 error - so actually we have double free case and as result OpenSM
 deadlocks there.
 
 This patch removes this additional outstanding mad counters decreasing
 code from osm_vl15_poller().
 
 Signed-off-by: Sasha Khapyorsky [EMAIL PROTECTED]
 ---
  opensm/include/opensm/osm_vl15intf.h |   28 +-
  opensm/opensm/osm_opensm.c   |7 +-
  opensm/opensm/osm_vl15intf.c |  103 
 +-
  3 files changed, 18 insertions(+), 120 deletions(-)
 
 diff --git a/opensm/include/opensm/osm_vl15intf.h 
 b/opensm/include/opensm/osm_vl15intf.h
 index 6de9898..4b290d3 100644
 --- a/opensm/include/opensm/osm_vl15intf.h
 +++ b/opensm/include/opensm/osm_vl15intf.h
 @@ -53,13 +53,11 @@
  #include complib/cl_event.h
  #include complib/cl_thread.h
  #include complib/cl_qlist.h
 -#include complib/cl_passivelock.h
  #include opensm/osm_stats.h
  #include opensm/osm_log.h
  #include opensm/osm_madw.h
  #include opensm/osm_mad_pool.h
  #include vendor/osm_vendor.h
 -#include opensm/osm_subnet.h
  
  #ifdef __cplusplus
  #  define BEGIN_C_DECLS extern C {
 @@ -132,10 +130,6 @@ typedef struct _osm_vl15 {
   osm_vendor_t *p_vend;
   osm_log_t *p_log;
   osm_stats_t *p_stats;
 - osm_subn_t *p_subn;
 - cl_disp_reg_handle_t h_disp;
 - cl_plock_t *p_lock;
 -
  } osm_vl15_t;
  /*
  * FIELDS
 @@ -174,15 +168,6 @@ typedef struct _osm_vl15 {
  *p_stats
  *Pointer to the OpenSM statistics block.
  *
 -*  p_subn
 -* Pointer to the Subnet object for this subnet.
 -*
 -*  h_disp
 -*Handle returned from dispatcher registration.
 -*
 -*p_lock
 -*Pointer to the serializing lock.
 -*
  * SEE ALSO
  *VL15 object
  */
 @@ -267,9 +252,7 @@ osm_vl15_init(IN osm_vl15_t * const p_vl15,
 IN osm_vendor_t * const p_vend,
 IN osm_log_t * const p_log,
 IN osm_stats_t * const p_stats,
 -   IN const int32_t max_wire_smps,
 -   IN osm_subn_t * const p_subn,
 -   IN cl_dispatcher_t * const p_disp, IN cl_plock_t * const p_lock);
 +   IN const int32_t max_wire_smps);
  /*
  * PARAMETERS
  *p_vl15
 @@ -287,15 +270,6 @@ osm_vl15_init(IN osm_vl15_t * const p_vl15,
  *max_wire_smps
  *[in] Maximum number of MADs allowed on the wire at one time.
  *
 -*  p_subn
 -* [in] Pointer to the subnet object.
 -*
 -*  p_disp
 -* [in] Pointer to the dispatcher object.
 -*
 -*p_lock
 -*[in] Pointer to the OpenSM serializing lock.
 -*
  * RETURN VALUES
  *IB_SUCCESS if the VL15 object was initialized successfully.
  *
 diff --git a/opensm/opensm/osm_opensm.c b/opensm/opensm/osm_opensm.c
 index 9a596dd..329305e 100644
 --- a/opensm/opensm/osm_opensm.c
 +++ b/opensm/opensm/osm_opensm.c
 @@ -249,10 +249,9 @@ osm_opensm_init(IN osm_opensm_t * const p_osm,
   if (status != IB_SUCCESS)
   goto Exit;
  
 - status = osm_vl15_init(p_osm-vl15,
 -p_osm-p_vendor,
 -p_osm-log, p_osm-stats, p_opt-max_wire_smps,
 -p_osm-subn, p_osm-disp, p_osm-lock);
 + status = osm_vl15_init(p_osm-vl15, p_osm-p_vendor,
 +p_osm-log, p_osm-stats,
 +p_opt-max_wire_smps);
   if (status != IB_SUCCESS)
   goto Exit;
  
 diff --git a/opensm/opensm/osm_vl15intf.c b/opensm/opensm/osm_vl15intf.c
 index bc667b6..af44423 100644
 --- a/opensm/opensm/osm_vl15intf.c
 +++ b/opensm/opensm/osm_vl15intf.c
 @@ -51,13 +51,12 @@
  
  #include string.h
  #include iba/ib_types.h
 +#include complib/cl_thread.h
 +#include vendor/osm_vendor_api.h
  #include opensm/osm_vl15intf.h
  #include opensm/osm_madw.h
 -#include vendor/osm_vendor_api.h
  #include opensm/osm_log.h
  #include opensm/osm_helper.h
 -#include complib/cl_thread.h
 -#include signal.h
  
  /**
   **/
 @@ -65,18 +64,13 @@
  static void vl15_send_mad(osm_vl15_t * p_vl, osm_madw_t * p_madw)
  {
   ib_api_status_t status;
 - cl_status_t cl_status;
 - uint32_t mads_sent;
 - uint32_t unicasts_sent;
 - uint32_t mads_on_wire;
 - uint32_t outstanding;
  
   /*
  Non-response-expected mads are not throttled on the wire
  since we can have no confirmation that they arrived
  at their destination.
*/
 - if (p_madw-resp_expected == TRUE) {
 + if (p_madw-resp_expected == 

Re: [ofa-general] [PATCH] opensm: fix outstanding mad counters tracking

2007-08-20 Thread Sasha Khapyorsky
Hi Ira,

On 09:34 Mon 20 Aug , Ira Weiny wrote:
 Sasha,
 
 Should this be applied to 1.2?

Yes, I think it should, although don't know when next 1.2.x release will
be.

Sasha

 
 Ira
 
 
 On Mon, 20 Aug 2007 16:35:30 +0300
 Sasha Khapyorsky [EMAIL PROTECTED] wrote:
 
  
  When MAD sending fails in osm_vendor_send() the send_err_callback() is
  invoked - this callback maintains (decreases by 1) the outstanding MAD
  counters. In the current osm_vl15_poller() code those MAD counters are
  also explicitly decreased in the case when osm_vendor_send() returns
  error - so actually we have double free case and as result OpenSM
  deadlocks there.
  
  This patch removes this additional outstanding mad counters decreasing
  code from osm_vl15_poller().
  
  Signed-off-by: Sasha Khapyorsky [EMAIL PROTECTED]
  ---
   opensm/include/opensm/osm_vl15intf.h |   28 +-
   opensm/opensm/osm_opensm.c   |7 +-
   opensm/opensm/osm_vl15intf.c |  103 
  +-
   3 files changed, 18 insertions(+), 120 deletions(-)
  
  diff --git a/opensm/include/opensm/osm_vl15intf.h 
  b/opensm/include/opensm/osm_vl15intf.h
  index 6de9898..4b290d3 100644
  --- a/opensm/include/opensm/osm_vl15intf.h
  +++ b/opensm/include/opensm/osm_vl15intf.h
  @@ -53,13 +53,11 @@
   #include complib/cl_event.h
   #include complib/cl_thread.h
   #include complib/cl_qlist.h
  -#include complib/cl_passivelock.h
   #include opensm/osm_stats.h
   #include opensm/osm_log.h
   #include opensm/osm_madw.h
   #include opensm/osm_mad_pool.h
   #include vendor/osm_vendor.h
  -#include opensm/osm_subnet.h
   
   #ifdef __cplusplus
   #  define BEGIN_C_DECLS extern C {
  @@ -132,10 +130,6 @@ typedef struct _osm_vl15 {
  osm_vendor_t *p_vend;
  osm_log_t *p_log;
  osm_stats_t *p_stats;
  -   osm_subn_t *p_subn;
  -   cl_disp_reg_handle_t h_disp;
  -   cl_plock_t *p_lock;
  -
   } osm_vl15_t;
   /*
   * FIELDS
  @@ -174,15 +168,6 @@ typedef struct _osm_vl15 {
   *  p_stats
   *  Pointer to the OpenSM statistics block.
   *
  -*  p_subn
  -* Pointer to the Subnet object for this subnet.
  -*
  -*  h_disp
  -*Handle returned from dispatcher registration.
  -*
  -*  p_lock
  -*  Pointer to the serializing lock.
  -*
   * SEE ALSO
   *  VL15 object
   */
  @@ -267,9 +252,7 @@ osm_vl15_init(IN osm_vl15_t * const p_vl15,
IN osm_vendor_t * const p_vend,
IN osm_log_t * const p_log,
IN osm_stats_t * const p_stats,
  - IN const int32_t max_wire_smps,
  - IN osm_subn_t * const p_subn,
  - IN cl_dispatcher_t * const p_disp, IN cl_plock_t * const p_lock);
  + IN const int32_t max_wire_smps);
   /*
   * PARAMETERS
   *  p_vl15
  @@ -287,15 +270,6 @@ osm_vl15_init(IN osm_vl15_t * const p_vl15,
   *  max_wire_smps
   *  [in] Maximum number of MADs allowed on the wire at one time.
   *
  -*  p_subn
  -* [in] Pointer to the subnet object.
  -*
  -*  p_disp
  -* [in] Pointer to the dispatcher object.
  -*
  -*  p_lock
  -*  [in] Pointer to the OpenSM serializing lock.
  -*
   * RETURN VALUES
   *  IB_SUCCESS if the VL15 object was initialized successfully.
   *
  diff --git a/opensm/opensm/osm_opensm.c b/opensm/opensm/osm_opensm.c
  index 9a596dd..329305e 100644
  --- a/opensm/opensm/osm_opensm.c
  +++ b/opensm/opensm/osm_opensm.c
  @@ -249,10 +249,9 @@ osm_opensm_init(IN osm_opensm_t * const p_osm,
  if (status != IB_SUCCESS)
  goto Exit;
   
  -   status = osm_vl15_init(p_osm-vl15,
  -  p_osm-p_vendor,
  -  p_osm-log, p_osm-stats, p_opt-max_wire_smps,
  -  p_osm-subn, p_osm-disp, p_osm-lock);
  +   status = osm_vl15_init(p_osm-vl15, p_osm-p_vendor,
  +  p_osm-log, p_osm-stats,
  +  p_opt-max_wire_smps);
  if (status != IB_SUCCESS)
  goto Exit;
   
  diff --git a/opensm/opensm/osm_vl15intf.c b/opensm/opensm/osm_vl15intf.c
  index bc667b6..af44423 100644
  --- a/opensm/opensm/osm_vl15intf.c
  +++ b/opensm/opensm/osm_vl15intf.c
  @@ -51,13 +51,12 @@
   
   #include string.h
   #include iba/ib_types.h
  +#include complib/cl_thread.h
  +#include vendor/osm_vendor_api.h
   #include opensm/osm_vl15intf.h
   #include opensm/osm_madw.h
  -#include vendor/osm_vendor_api.h
   #include opensm/osm_log.h
   #include opensm/osm_helper.h
  -#include complib/cl_thread.h
  -#include signal.h
   
   /**
**/
  @@ -65,18 +64,13 @@
   static void vl15_send_mad(osm_vl15_t * p_vl, osm_madw_t * p_madw)
   {
  ib_api_status_t status;
  -   cl_status_t cl_status;
  -   uint32_t mads_sent;
  -   uint32_t unicasts_sent;
  -   uint32_t mads_on_wire;
  -   uint32_t outstanding;
   
  /*
 Non-response-expected mads are not