Hopefully by the end of the day - Nathan is testing now.

Sam

On Mar 1, 2012, at 11:36 AM, Jeffrey Squyres wrote:

> ...or in 1.5.5.  
> 
> How soon will you be able to tell if it fixes some hangs?
> 
> 
> On Mar 1, 2012, at 10:56 AM, Nathan Hjelm wrote:
> 
>> Found a pretty nasty frag leak (and a minor one) in ob1 (see commit below). 
>> If this fix addresses some hangs we are seeing on infiniband LANL might want 
>> a 1.4.6 rolled (or a faster rollout for 1.6.0).
>> 
>> -Nathan
>> 
>> ---------- Forwarded message ----------
>> Date: Thu, 1 Mar 2012 08:53:39 -0700
>> From: hje...@osl.iu.edu
>> Reply-To: de...@open-mpi.org
>> To: s...@open-mpi.org
>> Subject: [OMPI svn] svn:open-mpi r26077
>> 
>> Author: hjelmn
>> Date: 2012-03-01 10:53:39 EST (Thu, 01 Mar 2012)
>> New Revision: 26077
>> URL: https://svn.open-mpi.org/trac/ompi/changeset/26077
>> 
>> Log:
>> ob1: fix two fragment leaks
>> - MAJOR! get src descriptor leaks if mca_bml_base_send fails
>> - minor. descriptor leaked in mca_pml_send_request_start_copy if the btl 
>> returns OMPI_ERR_RESOURCE_BUSY.
>> Text files modified:
>>  trunk/ompi/mca/pml/ob1/pml_ob1_sendreq.c |    27 ++++++++++++++++-----------
>>  1 files changed, 16 insertions(+), 11 deletions(-)
>> 
>> Modified: trunk/ompi/mca/pml/ob1/pml_ob1_sendreq.c
>> ==============================================================================
>> --- trunk/ompi/mca/pml/ob1/pml_ob1_sendreq.c (original)
>> +++ trunk/ompi/mca/pml/ob1/pml_ob1_sendreq.c 2012-03-01 10:53:39 EST (Thu, 
>> 01 Mar 2012)
>> @@ -1,3 +1,4 @@
>> +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
>> /*
>> * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
>> *                         University Research and Technology
>> @@ -12,6 +13,8 @@
>> * Copyright (c) 2008      UT-Battelle, LLC. All rights reserved.
>> * Copyright (c) 2010      Oracle and/or its affiliates.  All rights reserved.
>> * Copyright (c) 2012      NVIDIA Corporation.  All rights reserved.
>> + * Copyright (c) 2012      Los Alamos National Security, LLC. All rights
>> + *                         reserved.
>> * $COPYRIGHT$
>> *
>> * Additional copyrights may follow
>> @@ -546,15 +549,14 @@
>>        }
>>        return OMPI_SUCCESS;
>>    }
>> -    switch(OPAL_SOS_GET_ERROR_CODE(rc)) {
>> -        case OMPI_ERR_RESOURCE_BUSY:
>> -            /* No more resources. Allow the upper level to queue the send */
>> -            rc = OMPI_ERR_OUT_OF_RESOURCE;
>> -            break;
>> -        default:
>> -            mca_bml_base_free(bml_btl, des);
>> -            break;
>> +
>> +    if (OMPI_ERR_RESOURCE_BUSY == OPAL_SOS_GET_ERROR_CODE(rc)) {
>> +        /* No more resources. Allow the upper level to queue the send */
>> +        rc = OMPI_ERR_OUT_OF_RESOURCE;
>>    }
>> +
>> +    mca_bml_base_free (bml_btl, des);
>> +
>>    return rc;
>> }
>> 
>> @@ -631,7 +633,7 @@
>>     * operation is achieved.
>>     */
>> 
>> -    mca_btl_base_descriptor_t* des;
>> +    mca_btl_base_descriptor_t *des, *src = NULL;
>>    mca_btl_base_segment_t* segment;
>>    mca_pml_ob1_hdr_t* hdr;
>>    bool need_local_cb = false;
>> @@ -640,7 +642,6 @@
>>    bml_btl = sendreq->req_rdma[0].bml_btl;
>>    if((sendreq->req_rdma_cnt == 1) && (bml_btl->btl_flags & 
>> (MCA_BTL_FLAGS_GET | MCA_BTL_FLAGS_CUDA_GET))) {
>>        mca_mpool_base_registration_t* reg = sendreq->req_rdma[0].btl_reg;
>> -        mca_btl_base_descriptor_t* src;
>>        size_t i;
>>        size_t old_position = 
>> sendreq->req_send.req_base.req_convertor.bConverted;
>> 
>> @@ -781,6 +782,10 @@
>>        return OMPI_SUCCESS;
>>    }
>>    mca_bml_base_free(bml_btl, des);
>> +    if (NULL != src) {
>> +        mca_bml_base_free (bml_btl, src);
>> +    }
>> +
>>    return rc;
>> }
>> 
>> @@ -1144,7 +1149,7 @@
>>                              0,
>>                              &frag->rdma_length,
>>                              MCA_BTL_DES_FLAGS_BTL_OWNERSHIP |
>> -                          MCA_BTL_DES_FLAGS_PUT,
>> +                              MCA_BTL_DES_FLAGS_PUT,
>>                              &des );
>> 
>>    if( OPAL_UNLIKELY(NULL == des) ) {
>> _______________________________________________
>> svn mailing list
>> s...@open-mpi.org
>> http://www.open-mpi.org/mailman/listinfo.cgi/svn
>> _______________________________________________
>> devel mailing list
>> de...@open-mpi.org
>> http://www.open-mpi.org/mailman/listinfo.cgi/devel
> 
> 
> -- 
> Jeff Squyres
> jsquy...@cisco.com
> For corporate legal information go to: 
> http://www.cisco.com/web/about/doing_business/legal/cri/
> 
> 
> _______________________________________________
> devel mailing list
> de...@open-mpi.org
> http://www.open-mpi.org/mailman/listinfo.cgi/devel


Reply via email to