Hi Ira,

On 09:34 Mon 20 Aug     , Ira Weiny wrote:
> Sasha,
> 
> Should this be applied to 1.2?

Yes, I think it should, although don't know when next 1.2.x release will
be.

Sasha

> 
> Ira
> 
> 
> On Mon, 20 Aug 2007 16:35:30 +0300
> Sasha Khapyorsky <[EMAIL PROTECTED]> wrote:
> 
> > 
> > When MAD sending fails in osm_vendor_send() the send_err_callback() is
> > invoked - this callback maintains (decreases by 1) the outstanding MAD
> > counters. In the current osm_vl15_poller() code those MAD counters are
> > also explicitly decreased in the case when osm_vendor_send() returns
> > error - so actually we have "double free" case and as result OpenSM
> > deadlocks there.
> > 
> > This patch removes this additional outstanding mad counters decreasing
> > code from osm_vl15_poller().
> > 
> > Signed-off-by: Sasha Khapyorsky <[EMAIL PROTECTED]>
> > ---
> >  opensm/include/opensm/osm_vl15intf.h |   28 +---------
> >  opensm/opensm/osm_opensm.c           |    7 +-
> >  opensm/opensm/osm_vl15intf.c         |  103 
> > +++++-----------------------------
> >  3 files changed, 18 insertions(+), 120 deletions(-)
> > 
> > diff --git a/opensm/include/opensm/osm_vl15intf.h 
> > b/opensm/include/opensm/osm_vl15intf.h
> > index 6de9898..4b290d3 100644
> > --- a/opensm/include/opensm/osm_vl15intf.h
> > +++ b/opensm/include/opensm/osm_vl15intf.h
> > @@ -53,13 +53,11 @@
> >  #include <complib/cl_event.h>
> >  #include <complib/cl_thread.h>
> >  #include <complib/cl_qlist.h>
> > -#include <complib/cl_passivelock.h>
> >  #include <opensm/osm_stats.h>
> >  #include <opensm/osm_log.h>
> >  #include <opensm/osm_madw.h>
> >  #include <opensm/osm_mad_pool.h>
> >  #include <vendor/osm_vendor.h>
> > -#include <opensm/osm_subnet.h>
> >  
> >  #ifdef __cplusplus
> >  #  define BEGIN_C_DECLS extern "C" {
> > @@ -132,10 +130,6 @@ typedef struct _osm_vl15 {
> >     osm_vendor_t *p_vend;
> >     osm_log_t *p_log;
> >     osm_stats_t *p_stats;
> > -   osm_subn_t *p_subn;
> > -   cl_disp_reg_handle_t h_disp;
> > -   cl_plock_t *p_lock;
> > -
> >  } osm_vl15_t;
> >  /*
> >  * FIELDS
> > @@ -174,15 +168,6 @@ typedef struct _osm_vl15 {
> >  *  p_stats
> >  *          Pointer to the OpenSM statistics block.
> >  *
> > -*  p_subn
> > -*     Pointer to the Subnet object for this subnet.
> > -*
> > -*  h_disp
> > -*    Handle returned from dispatcher registration.
> > -*
> > -*  p_lock
> > -*          Pointer to the serializing lock.
> > -*
> >  * SEE ALSO
> >  *  VL15 object
> >  *********/
> > @@ -267,9 +252,7 @@ osm_vl15_init(IN osm_vl15_t * const p_vl15,
> >           IN osm_vendor_t * const p_vend,
> >           IN osm_log_t * const p_log,
> >           IN osm_stats_t * const p_stats,
> > -         IN const int32_t max_wire_smps,
> > -         IN osm_subn_t * const p_subn,
> > -         IN cl_dispatcher_t * const p_disp, IN cl_plock_t * const p_lock);
> > +         IN const int32_t max_wire_smps);
> >  /*
> >  * PARAMETERS
> >  *  p_vl15
> > @@ -287,15 +270,6 @@ osm_vl15_init(IN osm_vl15_t * const p_vl15,
> >  *  max_wire_smps
> >  *          [in] Maximum number of MADs allowed on the wire at one time.
> >  *
> > -*  p_subn
> > -*     [in] Pointer to the subnet object.
> > -*
> > -*  p_disp
> > -*     [in] Pointer to the dispatcher object.
> > -*
> > -*  p_lock
> > -*          [in] Pointer to the OpenSM serializing lock.
> > -*
> >  * RETURN VALUES
> >  *  IB_SUCCESS if the VL15 object was initialized successfully.
> >  *
> > diff --git a/opensm/opensm/osm_opensm.c b/opensm/opensm/osm_opensm.c
> > index 9a596dd..329305e 100644
> > --- a/opensm/opensm/osm_opensm.c
> > +++ b/opensm/opensm/osm_opensm.c
> > @@ -249,10 +249,9 @@ osm_opensm_init(IN osm_opensm_t * const p_osm,
> >     if (status != IB_SUCCESS)
> >             goto Exit;
> >  
> > -   status = osm_vl15_init(&p_osm->vl15,
> > -                          p_osm->p_vendor,
> > -                          &p_osm->log, &p_osm->stats, p_opt->max_wire_smps,
> > -                          &p_osm->subn, &p_osm->disp, &p_osm->lock);
> > +   status = osm_vl15_init(&p_osm->vl15, p_osm->p_vendor,
> > +                          &p_osm->log, &p_osm->stats,
> > +                          p_opt->max_wire_smps);
> >     if (status != IB_SUCCESS)
> >             goto Exit;
> >  
> > diff --git a/opensm/opensm/osm_vl15intf.c b/opensm/opensm/osm_vl15intf.c
> > index bc667b6..af44423 100644
> > --- a/opensm/opensm/osm_vl15intf.c
> > +++ b/opensm/opensm/osm_vl15intf.c
> > @@ -51,13 +51,12 @@
> >  
> >  #include <string.h>
> >  #include <iba/ib_types.h>
> > +#include <complib/cl_thread.h>
> > +#include <vendor/osm_vendor_api.h>
> >  #include <opensm/osm_vl15intf.h>
> >  #include <opensm/osm_madw.h>
> > -#include <vendor/osm_vendor_api.h>
> >  #include <opensm/osm_log.h>
> >  #include <opensm/osm_helper.h>
> > -#include <complib/cl_thread.h>
> > -#include <signal.h>
> >  
> >  /**********************************************************************
> >   **********************************************************************/
> > @@ -65,18 +64,13 @@
> >  static void vl15_send_mad(osm_vl15_t * p_vl, osm_madw_t * p_madw)
> >  {
> >     ib_api_status_t status;
> > -   cl_status_t cl_status;
> > -   uint32_t mads_sent;
> > -   uint32_t unicasts_sent;
> > -   uint32_t mads_on_wire;
> > -   uint32_t outstanding;
> >  
> >     /*
> >        Non-response-expected mads are not throttled on the wire
> >        since we can have no confirmation that they arrived
> >        at their destination.
> >      */
> > -   if (p_madw->resp_expected == TRUE) {
> > +   if (p_madw->resp_expected == TRUE)
> >             /*
> >                Note that other threads may not see the response MAD
> >                arrive before send() even returns.
> > @@ -84,14 +78,11 @@ static void vl15_send_mad(osm_vl15_t * p_vl, osm_madw_t 
> > * p_madw)
> >                To avoid this confusion, preincrement the counts on the
> >                assumption that send() will succeed.
> >              */
> > -           mads_on_wire =
> > -               cl_atomic_inc(&p_vl->p_stats->qp0_mads_outstanding_on_wire);
> > -           CL_ASSERT(mads_on_wire <= p_vl->max_wire_smps);
> > -   } else
> > -           unicasts_sent =
> > -               cl_atomic_inc(&p_vl->p_stats->qp0_unicasts_sent);
> > +           cl_atomic_inc(&p_vl->p_stats->qp0_mads_outstanding_on_wire);
> > +   else
> > +           cl_atomic_inc(&p_vl->p_stats->qp0_unicasts_sent);
> >  
> > -   mads_sent = cl_atomic_inc(&p_vl->p_stats->qp0_mads_sent);
> > +   cl_atomic_inc(&p_vl->p_stats->qp0_mads_sent);
> >  
> >     status = osm_vendor_send(osm_madw_get_bind_handle(p_madw),
> >                              p_madw, p_madw->resp_expected);
> > @@ -118,61 +109,12 @@ static void vl15_send_mad(osm_vl15_t * p_vl, 
> > osm_madw_t * p_madw)
> >        fix up the pre-incremented count values.
> >      */
> >  
> > -   /* Decrement qp0_mads_sent and qp0_mads_outstanding_on_wire
> > -      that were incremented in the code above. */
> > -   mads_sent = cl_atomic_dec(&p_vl->p_stats->qp0_mads_sent);
> > -
> > -   if (p_madw->resp_expected == FALSE)
> > -           return;
> > -
> > -   cl_atomic_dec(&p_vl->p_stats->qp0_mads_outstanding_on_wire);
> > -
> > -   /*
> > -      The following code is similar to the code in
> > -      __osm_sm_mad_ctrl_retire_trans_mad. We need to decrement the
> > -      qp0_mads_outstanding counter, and if we reached 0 - need to call
> > -      the cl_disp_post with OSM_SIGNAL_NO_PENDING_TRANSACTION (in order
> > -      to wake up the state mgr).
> > -      There is one difference from the code in 
> > __osm_sm_mad_ctrl_retire_trans_mad.
> > -      This code is called for all (vl15) mads, if osm_vendor_send() 
> > failed, unlike
> > -      __osm_sm_mad_ctrl_retire_trans_mad which is called only on mads where
> > -      resp_expected == TRUE. As a result, the qp0_mads_outstanding counter
> > -      should be decremented and handled accordingly only if this is a mad
> > -      with resp_expected == TRUE.
> > -    */
> > -
> > -   outstanding = cl_atomic_dec(&p_vl->p_stats->qp0_mads_outstanding);
> > -
> > -   osm_log(p_vl->p_log, OSM_LOG_DEBUG,
> > -           "__osm_vl15_poller: "
> > -           "%u QP0 MADs outstanding\n",
> > -           p_vl->p_stats->qp0_mads_outstanding);
> > -
> > -   if (outstanding)
> > -           return;
> > -
> > -   /*
> > -      The wire is clean.
> > -      Signal the state manager.
> > -    */
> > -   if (osm_log_is_active(p_vl->p_log, OSM_LOG_DEBUG))
> > -           osm_log(p_vl->p_log,
> > -                   OSM_LOG_DEBUG,
> > -                   "__osm_vl15_poller: "
> > -                   "Posting Dispatcher message %s\n",
> > -                   osm_get_disp_msg_str(OSM_MSG_NO_SMPS_OUTSTANDING));
> > -
> > -   cl_status = cl_disp_post(p_vl->h_disp,
> > -                            OSM_MSG_NO_SMPS_OUTSTANDING, (void *)
> > -                            OSM_SIGNAL_NO_PENDING_TRANSACTIONS,
> > -                            NULL, NULL);
> > -
> > -   if (cl_status != CL_SUCCESS)
> > -           osm_log(p_vl->p_log,
> > -                   OSM_LOG_ERROR,
> > -                   "__osm_vl15_poller: ERR 3E06: "
> > -                   "Dispatcher post message failed (%s)\n",
> > -                   CL_STATUS_MSG(cl_status));
> > +   /* Decrement qp0_mads_sent that were incremented in the code above.
> > +      qp0_mads_outstanding will be decremented by send error callback
> > +      (called by osm_vendor_send() */
> > +   cl_atomic_dec(&p_vl->p_stats->qp0_mads_sent);
> > +   if (!p_madw->resp_expected)
> > +           cl_atomic_dec(&p_vl->p_stats->qp0_unicasts_sent);
> >  }
> >  
> >  static void __osm_vl15_poller(IN void *p_ptr)
> > @@ -260,7 +202,6 @@ void osm_vl15_construct(IN osm_vl15_t * const p_vl)
> >     cl_qlist_init(&p_vl->rfifo);
> >     cl_qlist_init(&p_vl->ufifo);
> >     cl_thread_construct(&p_vl->poller);
> > -   p_vl->h_disp = CL_DISP_INVALID_HANDLE;
> >  }
> >  
> >  /**********************************************************************
> > @@ -317,9 +258,7 @@ osm_vl15_init(IN osm_vl15_t * const p_vl,
> >           IN osm_vendor_t * const p_vend,
> >           IN osm_log_t * const p_log,
> >           IN osm_stats_t * const p_stats,
> > -         IN const int32_t max_wire_smps,
> > -         IN osm_subn_t * const p_subn,
> > -         IN cl_dispatcher_t * const p_disp, IN cl_plock_t * const p_lock)
> > +         IN const int32_t max_wire_smps)
> >  {
> >     ib_api_status_t status = IB_SUCCESS;
> >  
> > @@ -329,8 +268,6 @@ osm_vl15_init(IN osm_vl15_t * const p_vl,
> >     p_vl->p_log = p_log;
> >     p_vl->p_stats = p_stats;
> >     p_vl->max_wire_smps = max_wire_smps;
> > -   p_vl->p_subn = p_subn;
> > -   p_vl->p_lock = p_lock;
> >  
> >     status = cl_event_init(&p_vl->signal, FALSE);
> >     if (status != IB_SUCCESS)
> > @@ -351,16 +288,6 @@ osm_vl15_init(IN osm_vl15_t * const p_vl,
> >     if (status != IB_SUCCESS)
> >             goto Exit;
> >  
> > -   p_vl->h_disp = cl_disp_register(p_disp, CL_DISP_MSGID_NONE, NULL, NULL);
> > -
> > -   if (p_vl->h_disp == CL_DISP_INVALID_HANDLE) {
> > -           osm_log(p_log, OSM_LOG_ERROR,
> > -                   "osm_vl15_init: ERR 3E01: "
> > -                   "Dispatcher registration failed\n");
> > -           status = IB_INSUFFICIENT_RESOURCES;
> > -           goto Exit;
> > -   }
> > -
> >        Exit:
> >     OSM_LOG_EXIT(p_log);
> >     return (status);
> > @@ -444,8 +371,6 @@ osm_vl15_shutdown(IN osm_vl15_t * const p_vl,
> >     /* grap a lock on the object */
> >     cl_spinlock_acquire(&p_vl->lock);
> >  
> > -   cl_disp_unregister(p_vl->h_disp);
> > -
> >     /* go over all outstanding MADs and retire their transactions */
> >  
> >     /* first we handle the list of response MADs */
> > -- 
> > 1.5.3.rc2.29.gc4640f
> > 
> > _______________________________________________
> > general mailing list
> > [email protected]
> > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general
> > 
> > To unsubscribe, please visit 
> > http://openib.org/mailman/listinfo/openib-general
_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to