Re: [ofa-general] [PATCH] opensm: fix outstanding mad counters tracking
Sasha Khapyorsky wrote: Hi Ira, On 09:34 Mon 20 Aug , Ira Weiny wrote: Sasha, Should this be applied to 1.2? Yes, I think it should, although don't know when next 1.2.x release will be. Sasha Ira, Till (and if) we have another 1.2.x release you can apply the patch on your local OFED copy using the ofed_patch.sh script that is located under OFED docs. Tziporet ___ general mailing list general@lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
Re: [ofa-general] [PATCH] opensm: fix outstanding mad counters tracking
On Tue, 21 Aug 2007 17:46:33 +0300 Tziporet Koren [EMAIL PROTECTED] wrote: Sasha Khapyorsky wrote: Hi Ira, snip Yes, I think it should, although don't know when next 1.2.x release will be. Ira, Till (and if) we have another 1.2.x release you can apply the patch on your local OFED copy using the ofed_patch.sh script that is located under OFED docs. Thanks, we have already applied using patch to our local repo here. I think we have seen this issue with our 1.2 here and I wondered if this patch would be applicable to 1.2. Sasha, I will let you know if there are issues. Thanks, Ira ___ general mailing list general@lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
[ofa-general] [PATCH] opensm: fix outstanding mad counters tracking
When MAD sending fails in osm_vendor_send() the send_err_callback() is invoked - this callback maintains (decreases by 1) the outstanding MAD counters. In the current osm_vl15_poller() code those MAD counters are also explicitly decreased in the case when osm_vendor_send() returns error - so actually we have double free case and as result OpenSM deadlocks there. This patch removes this additional outstanding mad counters decreasing code from osm_vl15_poller(). Signed-off-by: Sasha Khapyorsky [EMAIL PROTECTED] --- opensm/include/opensm/osm_vl15intf.h | 28 +- opensm/opensm/osm_opensm.c |7 +- opensm/opensm/osm_vl15intf.c | 103 +- 3 files changed, 18 insertions(+), 120 deletions(-) diff --git a/opensm/include/opensm/osm_vl15intf.h b/opensm/include/opensm/osm_vl15intf.h index 6de9898..4b290d3 100644 --- a/opensm/include/opensm/osm_vl15intf.h +++ b/opensm/include/opensm/osm_vl15intf.h @@ -53,13 +53,11 @@ #include complib/cl_event.h #include complib/cl_thread.h #include complib/cl_qlist.h -#include complib/cl_passivelock.h #include opensm/osm_stats.h #include opensm/osm_log.h #include opensm/osm_madw.h #include opensm/osm_mad_pool.h #include vendor/osm_vendor.h -#include opensm/osm_subnet.h #ifdef __cplusplus # define BEGIN_C_DECLS extern C { @@ -132,10 +130,6 @@ typedef struct _osm_vl15 { osm_vendor_t *p_vend; osm_log_t *p_log; osm_stats_t *p_stats; - osm_subn_t *p_subn; - cl_disp_reg_handle_t h_disp; - cl_plock_t *p_lock; - } osm_vl15_t; /* * FIELDS @@ -174,15 +168,6 @@ typedef struct _osm_vl15 { * p_stats * Pointer to the OpenSM statistics block. * -* p_subn -* Pointer to the Subnet object for this subnet. -* -* h_disp -*Handle returned from dispatcher registration. -* -* p_lock -* Pointer to the serializing lock. -* * SEE ALSO * VL15 object */ @@ -267,9 +252,7 @@ osm_vl15_init(IN osm_vl15_t * const p_vl15, IN osm_vendor_t * const p_vend, IN osm_log_t * const p_log, IN osm_stats_t * const p_stats, - IN const int32_t max_wire_smps, - IN osm_subn_t * const p_subn, - IN cl_dispatcher_t * const p_disp, IN cl_plock_t * const p_lock); + IN const int32_t max_wire_smps); /* * PARAMETERS * p_vl15 @@ -287,15 +270,6 @@ osm_vl15_init(IN osm_vl15_t * const p_vl15, * max_wire_smps * [in] Maximum number of MADs allowed on the wire at one time. * -* p_subn -* [in] Pointer to the subnet object. -* -* p_disp -* [in] Pointer to the dispatcher object. -* -* p_lock -* [in] Pointer to the OpenSM serializing lock. -* * RETURN VALUES * IB_SUCCESS if the VL15 object was initialized successfully. * diff --git a/opensm/opensm/osm_opensm.c b/opensm/opensm/osm_opensm.c index 9a596dd..329305e 100644 --- a/opensm/opensm/osm_opensm.c +++ b/opensm/opensm/osm_opensm.c @@ -249,10 +249,9 @@ osm_opensm_init(IN osm_opensm_t * const p_osm, if (status != IB_SUCCESS) goto Exit; - status = osm_vl15_init(p_osm-vl15, - p_osm-p_vendor, - p_osm-log, p_osm-stats, p_opt-max_wire_smps, - p_osm-subn, p_osm-disp, p_osm-lock); + status = osm_vl15_init(p_osm-vl15, p_osm-p_vendor, + p_osm-log, p_osm-stats, + p_opt-max_wire_smps); if (status != IB_SUCCESS) goto Exit; diff --git a/opensm/opensm/osm_vl15intf.c b/opensm/opensm/osm_vl15intf.c index bc667b6..af44423 100644 --- a/opensm/opensm/osm_vl15intf.c +++ b/opensm/opensm/osm_vl15intf.c @@ -51,13 +51,12 @@ #include string.h #include iba/ib_types.h +#include complib/cl_thread.h +#include vendor/osm_vendor_api.h #include opensm/osm_vl15intf.h #include opensm/osm_madw.h -#include vendor/osm_vendor_api.h #include opensm/osm_log.h #include opensm/osm_helper.h -#include complib/cl_thread.h -#include signal.h /** **/ @@ -65,18 +64,13 @@ static void vl15_send_mad(osm_vl15_t * p_vl, osm_madw_t * p_madw) { ib_api_status_t status; - cl_status_t cl_status; - uint32_t mads_sent; - uint32_t unicasts_sent; - uint32_t mads_on_wire; - uint32_t outstanding; /* Non-response-expected mads are not throttled on the wire since we can have no confirmation that they arrived at their destination. */ - if (p_madw-resp_expected == TRUE) { + if (p_madw-resp_expected == TRUE) /* Note that other threads may not see the response MAD arrive before send() even returns. @@ -84,14 +78,11 @@ static
Re: [ofa-general] [PATCH] opensm: fix outstanding mad counters tracking
Sasha, Should this be applied to 1.2? Ira On Mon, 20 Aug 2007 16:35:30 +0300 Sasha Khapyorsky [EMAIL PROTECTED] wrote: When MAD sending fails in osm_vendor_send() the send_err_callback() is invoked - this callback maintains (decreases by 1) the outstanding MAD counters. In the current osm_vl15_poller() code those MAD counters are also explicitly decreased in the case when osm_vendor_send() returns error - so actually we have double free case and as result OpenSM deadlocks there. This patch removes this additional outstanding mad counters decreasing code from osm_vl15_poller(). Signed-off-by: Sasha Khapyorsky [EMAIL PROTECTED] --- opensm/include/opensm/osm_vl15intf.h | 28 +- opensm/opensm/osm_opensm.c |7 +- opensm/opensm/osm_vl15intf.c | 103 +- 3 files changed, 18 insertions(+), 120 deletions(-) diff --git a/opensm/include/opensm/osm_vl15intf.h b/opensm/include/opensm/osm_vl15intf.h index 6de9898..4b290d3 100644 --- a/opensm/include/opensm/osm_vl15intf.h +++ b/opensm/include/opensm/osm_vl15intf.h @@ -53,13 +53,11 @@ #include complib/cl_event.h #include complib/cl_thread.h #include complib/cl_qlist.h -#include complib/cl_passivelock.h #include opensm/osm_stats.h #include opensm/osm_log.h #include opensm/osm_madw.h #include opensm/osm_mad_pool.h #include vendor/osm_vendor.h -#include opensm/osm_subnet.h #ifdef __cplusplus # define BEGIN_C_DECLS extern C { @@ -132,10 +130,6 @@ typedef struct _osm_vl15 { osm_vendor_t *p_vend; osm_log_t *p_log; osm_stats_t *p_stats; - osm_subn_t *p_subn; - cl_disp_reg_handle_t h_disp; - cl_plock_t *p_lock; - } osm_vl15_t; /* * FIELDS @@ -174,15 +168,6 @@ typedef struct _osm_vl15 { *p_stats *Pointer to the OpenSM statistics block. * -* p_subn -* Pointer to the Subnet object for this subnet. -* -* h_disp -*Handle returned from dispatcher registration. -* -*p_lock -*Pointer to the serializing lock. -* * SEE ALSO *VL15 object */ @@ -267,9 +252,7 @@ osm_vl15_init(IN osm_vl15_t * const p_vl15, IN osm_vendor_t * const p_vend, IN osm_log_t * const p_log, IN osm_stats_t * const p_stats, - IN const int32_t max_wire_smps, - IN osm_subn_t * const p_subn, - IN cl_dispatcher_t * const p_disp, IN cl_plock_t * const p_lock); + IN const int32_t max_wire_smps); /* * PARAMETERS *p_vl15 @@ -287,15 +270,6 @@ osm_vl15_init(IN osm_vl15_t * const p_vl15, *max_wire_smps *[in] Maximum number of MADs allowed on the wire at one time. * -* p_subn -* [in] Pointer to the subnet object. -* -* p_disp -* [in] Pointer to the dispatcher object. -* -*p_lock -*[in] Pointer to the OpenSM serializing lock. -* * RETURN VALUES *IB_SUCCESS if the VL15 object was initialized successfully. * diff --git a/opensm/opensm/osm_opensm.c b/opensm/opensm/osm_opensm.c index 9a596dd..329305e 100644 --- a/opensm/opensm/osm_opensm.c +++ b/opensm/opensm/osm_opensm.c @@ -249,10 +249,9 @@ osm_opensm_init(IN osm_opensm_t * const p_osm, if (status != IB_SUCCESS) goto Exit; - status = osm_vl15_init(p_osm-vl15, -p_osm-p_vendor, -p_osm-log, p_osm-stats, p_opt-max_wire_smps, -p_osm-subn, p_osm-disp, p_osm-lock); + status = osm_vl15_init(p_osm-vl15, p_osm-p_vendor, +p_osm-log, p_osm-stats, +p_opt-max_wire_smps); if (status != IB_SUCCESS) goto Exit; diff --git a/opensm/opensm/osm_vl15intf.c b/opensm/opensm/osm_vl15intf.c index bc667b6..af44423 100644 --- a/opensm/opensm/osm_vl15intf.c +++ b/opensm/opensm/osm_vl15intf.c @@ -51,13 +51,12 @@ #include string.h #include iba/ib_types.h +#include complib/cl_thread.h +#include vendor/osm_vendor_api.h #include opensm/osm_vl15intf.h #include opensm/osm_madw.h -#include vendor/osm_vendor_api.h #include opensm/osm_log.h #include opensm/osm_helper.h -#include complib/cl_thread.h -#include signal.h /** **/ @@ -65,18 +64,13 @@ static void vl15_send_mad(osm_vl15_t * p_vl, osm_madw_t * p_madw) { ib_api_status_t status; - cl_status_t cl_status; - uint32_t mads_sent; - uint32_t unicasts_sent; - uint32_t mads_on_wire; - uint32_t outstanding; /* Non-response-expected mads are not throttled on the wire since we can have no confirmation that they arrived at their destination. */ - if (p_madw-resp_expected == TRUE) { + if (p_madw-resp_expected ==
Re: [ofa-general] [PATCH] opensm: fix outstanding mad counters tracking
Hi Ira, On 09:34 Mon 20 Aug , Ira Weiny wrote: Sasha, Should this be applied to 1.2? Yes, I think it should, although don't know when next 1.2.x release will be. Sasha Ira On Mon, 20 Aug 2007 16:35:30 +0300 Sasha Khapyorsky [EMAIL PROTECTED] wrote: When MAD sending fails in osm_vendor_send() the send_err_callback() is invoked - this callback maintains (decreases by 1) the outstanding MAD counters. In the current osm_vl15_poller() code those MAD counters are also explicitly decreased in the case when osm_vendor_send() returns error - so actually we have double free case and as result OpenSM deadlocks there. This patch removes this additional outstanding mad counters decreasing code from osm_vl15_poller(). Signed-off-by: Sasha Khapyorsky [EMAIL PROTECTED] --- opensm/include/opensm/osm_vl15intf.h | 28 +- opensm/opensm/osm_opensm.c |7 +- opensm/opensm/osm_vl15intf.c | 103 +- 3 files changed, 18 insertions(+), 120 deletions(-) diff --git a/opensm/include/opensm/osm_vl15intf.h b/opensm/include/opensm/osm_vl15intf.h index 6de9898..4b290d3 100644 --- a/opensm/include/opensm/osm_vl15intf.h +++ b/opensm/include/opensm/osm_vl15intf.h @@ -53,13 +53,11 @@ #include complib/cl_event.h #include complib/cl_thread.h #include complib/cl_qlist.h -#include complib/cl_passivelock.h #include opensm/osm_stats.h #include opensm/osm_log.h #include opensm/osm_madw.h #include opensm/osm_mad_pool.h #include vendor/osm_vendor.h -#include opensm/osm_subnet.h #ifdef __cplusplus # define BEGIN_C_DECLS extern C { @@ -132,10 +130,6 @@ typedef struct _osm_vl15 { osm_vendor_t *p_vend; osm_log_t *p_log; osm_stats_t *p_stats; - osm_subn_t *p_subn; - cl_disp_reg_handle_t h_disp; - cl_plock_t *p_lock; - } osm_vl15_t; /* * FIELDS @@ -174,15 +168,6 @@ typedef struct _osm_vl15 { * p_stats * Pointer to the OpenSM statistics block. * -* p_subn -* Pointer to the Subnet object for this subnet. -* -* h_disp -*Handle returned from dispatcher registration. -* -* p_lock -* Pointer to the serializing lock. -* * SEE ALSO * VL15 object */ @@ -267,9 +252,7 @@ osm_vl15_init(IN osm_vl15_t * const p_vl15, IN osm_vendor_t * const p_vend, IN osm_log_t * const p_log, IN osm_stats_t * const p_stats, - IN const int32_t max_wire_smps, - IN osm_subn_t * const p_subn, - IN cl_dispatcher_t * const p_disp, IN cl_plock_t * const p_lock); + IN const int32_t max_wire_smps); /* * PARAMETERS * p_vl15 @@ -287,15 +270,6 @@ osm_vl15_init(IN osm_vl15_t * const p_vl15, * max_wire_smps * [in] Maximum number of MADs allowed on the wire at one time. * -* p_subn -* [in] Pointer to the subnet object. -* -* p_disp -* [in] Pointer to the dispatcher object. -* -* p_lock -* [in] Pointer to the OpenSM serializing lock. -* * RETURN VALUES * IB_SUCCESS if the VL15 object was initialized successfully. * diff --git a/opensm/opensm/osm_opensm.c b/opensm/opensm/osm_opensm.c index 9a596dd..329305e 100644 --- a/opensm/opensm/osm_opensm.c +++ b/opensm/opensm/osm_opensm.c @@ -249,10 +249,9 @@ osm_opensm_init(IN osm_opensm_t * const p_osm, if (status != IB_SUCCESS) goto Exit; - status = osm_vl15_init(p_osm-vl15, - p_osm-p_vendor, - p_osm-log, p_osm-stats, p_opt-max_wire_smps, - p_osm-subn, p_osm-disp, p_osm-lock); + status = osm_vl15_init(p_osm-vl15, p_osm-p_vendor, + p_osm-log, p_osm-stats, + p_opt-max_wire_smps); if (status != IB_SUCCESS) goto Exit; diff --git a/opensm/opensm/osm_vl15intf.c b/opensm/opensm/osm_vl15intf.c index bc667b6..af44423 100644 --- a/opensm/opensm/osm_vl15intf.c +++ b/opensm/opensm/osm_vl15intf.c @@ -51,13 +51,12 @@ #include string.h #include iba/ib_types.h +#include complib/cl_thread.h +#include vendor/osm_vendor_api.h #include opensm/osm_vl15intf.h #include opensm/osm_madw.h -#include vendor/osm_vendor_api.h #include opensm/osm_log.h #include opensm/osm_helper.h -#include complib/cl_thread.h -#include signal.h /** **/ @@ -65,18 +64,13 @@ static void vl15_send_mad(osm_vl15_t * p_vl, osm_madw_t * p_madw) { ib_api_status_t status; - cl_status_t cl_status; - uint32_t mads_sent; - uint32_t unicasts_sent; - uint32_t mads_on_wire; - uint32_t outstanding; /* Non-response-expected mads are not