There's some pretty extensive ob1 changes in here. Can we get these reviewed? Brian / George?
On Apr 24, 2012, at 4:18 PM, hje...@osl.iu.edu wrote: > Author: hjelmn > Date: 2012-04-24 16:18:56 EDT (Tue, 24 Apr 2012) > New Revision: 26329 > URL: https://svn.open-mpi.org/trac/ompi/changeset/26329 > > Log: > ob1: add support for get fallback on put/send > Text files modified: > trunk/ompi/mca/btl/ugni/btl_ugni_get.c | 17 ---- > > trunk/ompi/mca/btl/ugni/btl_ugni_put.c | 48 -------------- > > trunk/ompi/mca/btl/ugni/btl_ugni_rdma.h | 7 -- > > trunk/ompi/mca/btl/ugni/btl_ugni_smsg.c | 5 - > > trunk/ompi/mca/btl/ugni/btl_ugni_smsg.h | 1 > > trunk/ompi/mca/pml/ob1/pml_ob1.c | 5 + > > trunk/ompi/mca/pml/ob1/pml_ob1.h | 2 > > trunk/ompi/mca/pml/ob1/pml_ob1_component.c | 4 > > trunk/ompi/mca/pml/ob1/pml_ob1_recvfrag.c | 15 +++- > > trunk/ompi/mca/pml/ob1/pml_ob1_recvreq.c | 94 > ++++++++++++++++++++++++++-- > trunk/ompi/mca/pml/ob1/pml_ob1_sendreq.c | 131 > ++++++++++++++++++++++----------------- > trunk/ompi/mca/pml/ob1/pml_ob1_sendreq.h | 2 > > 12 files changed, 182 insertions(+), 149 deletions(-) > > Modified: trunk/ompi/mca/btl/ugni/btl_ugni_get.c > ============================================================================== > --- trunk/ompi/mca/btl/ugni/btl_ugni_get.c (original) > +++ trunk/ompi/mca/btl/ugni/btl_ugni_get.c 2012-04-24 16:18:56 EDT (Tue, > 24 Apr 2012) > @@ -13,19 +13,6 @@ > #include "btl_ugni_rdma.h" > #include "btl_ugni_smsg.h" > > -static int mca_btl_ugni_init_put (struct mca_btl_base_module_t *btl, > - mca_btl_ugni_base_frag_t *frag) { > - /* off alignment/off size. switch to put */ > - frag->hdr.rdma.src_seg = frag->base.des_src[0]; > - frag->hdr.rdma.dst_seg = frag->base.des_dst[0]; > - frag->hdr.rdma.ctx = (void *) frag; > - > - /* send the fragment header using smsg. ignore local completion */ > - return ompi_mca_btl_ugni_smsg_send (frag, true, &frag->hdr.rdma, > - sizeof (frag->hdr.rdma), NULL, 0, > - MCA_BTL_UGNI_TAG_PUT_INIT); > -} > - > /** > * Initiate a get operation. > * > @@ -54,7 +41,7 @@ > > if (OPAL_UNLIKELY(check || size > mca_btl_ugni_component.ugni_get_limit)) > { > /* switch to put */ > - return mca_btl_ugni_init_put (btl, frag); > + return OMPI_ERR_NOT_AVAILABLE; > } > > if (NULL != frag->base.des_cbfunc) { > @@ -68,7 +55,7 @@ > return mca_btl_ugni_post_bte (frag, GNI_POST_RDMA_GET, des->des_dst, > des->des_src); > } > > -void mca_btl_ugni_callback_rdma_complete (mca_btl_ugni_base_frag_t *frag, > int rc) > +static void mca_btl_ugni_callback_rdma_complete (mca_btl_ugni_base_frag_t > *frag, int rc) > { > BTL_VERBOSE(("rdma operation for rem_ctx %p complete", > frag->hdr.rdma.ctx)); > > > Modified: trunk/ompi/mca/btl/ugni/btl_ugni_put.c > ============================================================================== > --- trunk/ompi/mca/btl/ugni/btl_ugni_put.c (original) > +++ trunk/ompi/mca/btl/ugni/btl_ugni_put.c 2012-04-24 16:18:56 EDT (Tue, > 24 Apr 2012) > @@ -46,51 +46,3 @@ > > return mca_btl_ugni_post_bte (frag, GNI_POST_RDMA_PUT, des->des_src, > des->des_dst); > } > - > -/* reversed get */ > -static void mca_btl_ugni_callback_put_retry (mca_btl_ugni_base_frag_t *frag, > int rc) > -{ > - (void) mca_btl_ugni_start_put(frag->endpoint, frag->hdr.rdma, frag); > -} > - > -int mca_btl_ugni_start_put (mca_btl_base_endpoint_t *ep, > - mca_btl_ugni_rdma_frag_hdr_t hdr, > - mca_btl_ugni_base_frag_t *frag) > -{ > - int rc; > - > - BTL_VERBOSE(("starting reverse get (put) for remote ctx: %p", hdr.ctx)); > - > - if (NULL == frag) { > - rc = MCA_BTL_UGNI_FRAG_ALLOC_RDMA_INT(ep, frag); > - if (OPAL_UNLIKELY(NULL == frag)) { > - BTL_ERROR(("error allocating rdma frag for reverse get. rc = %d. > fl_num_allocated = %d", rc, > - ep->btl->rdma_int_frags.fl_num_allocated)); > - return rc; > - } > - } > - > - frag->hdr.rdma = hdr; > - > - frag->base.des_cbfunc = NULL; > - frag->base.des_flags = MCA_BTL_DES_FLAGS_BTL_OWNERSHIP; > - > - frag->segments[0] = hdr.src_seg; > - frag->base.des_src = frag->segments; > - frag->base.des_src_cnt = 1; > - > - frag->segments[1] = hdr.dst_seg; > - frag->base.des_dst = frag->segments + 1; > - frag->base.des_dst_cnt = 1; > - > - rc = mca_btl_ugni_put (&ep->btl->super, ep, &frag->base); > - if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { > - frag->cbfunc = mca_btl_ugni_callback_put_retry; > - opal_list_append (&ep->btl->failed_frags, (opal_list_item_t *) frag); > - return rc; > - } > - > - frag->cbfunc = mca_btl_ugni_callback_rdma_complete; > - > - return OMPI_SUCCESS; > -} > > Modified: trunk/ompi/mca/btl/ugni/btl_ugni_rdma.h > ============================================================================== > --- trunk/ompi/mca/btl/ugni/btl_ugni_rdma.h (original) > +++ trunk/ompi/mca/btl/ugni/btl_ugni_rdma.h 2012-04-24 16:18:56 EDT (Tue, > 24 Apr 2012) > @@ -16,17 +16,10 @@ > #include "btl_ugni.h" > #include "btl_ugni_frag.h" > > -/* mca_btl_ugni_start_put: get operation could not be completed. start put > instead */ > -int mca_btl_ugni_start_put (mca_btl_base_endpoint_t *ep, > - mca_btl_ugni_rdma_frag_hdr_t hdr, > - mca_btl_ugni_base_frag_t *frag); > - > int mca_btl_ugni_start_eager_get (mca_btl_base_endpoint_t *ep, > mca_btl_ugni_eager_ex_frag_hdr_t hdr, > mca_btl_ugni_base_frag_t *frag); > > -void mca_btl_ugni_callback_rdma_complete (mca_btl_ugni_base_frag_t *frag, > int rc); > - > static inline int init_gni_post_desc(mca_btl_ugni_base_frag_t *frag, > gni_post_type_t op_type, > uint64_t lcl_addr, > > Modified: trunk/ompi/mca/btl/ugni/btl_ugni_smsg.c > ============================================================================== > --- trunk/ompi/mca/btl/ugni/btl_ugni_smsg.c (original) > +++ trunk/ompi/mca/btl/ugni/btl_ugni_smsg.c 2012-04-24 16:18:56 EDT (Tue, > 24 Apr 2012) > @@ -78,11 +78,6 @@ > reg->cbfunc(&ep->btl->super, tag, &(frag.base), reg->cbdata); > > break; > - case MCA_BTL_UGNI_TAG_PUT_INIT: > - frag.hdr.rdma = ((mca_btl_ugni_rdma_frag_hdr_t *) data_ptr)[0]; > - > - mca_btl_ugni_start_put (ep, frag.hdr.rdma, NULL); > - break; > case MCA_BTL_UGNI_TAG_GET_INIT: > frag.hdr.eager_ex = ((mca_btl_ugni_eager_ex_frag_hdr_t *) > data_ptr)[0]; > > > Modified: trunk/ompi/mca/btl/ugni/btl_ugni_smsg.h > ============================================================================== > --- trunk/ompi/mca/btl/ugni/btl_ugni_smsg.h (original) > +++ trunk/ompi/mca/btl/ugni/btl_ugni_smsg.h 2012-04-24 16:18:56 EDT (Tue, > 24 Apr 2012) > @@ -21,7 +21,6 @@ > typedef enum { > MCA_BTL_UGNI_TAG_SEND, > MCA_BTL_UGNI_TAG_DISCONNECT, > - MCA_BTL_UGNI_TAG_PUT_INIT, > MCA_BTL_UGNI_TAG_GET_INIT, > MCA_BTL_UGNI_TAG_RDMA_COMPLETE > } mca_btl_ugni_smsg_tag_t; > > Modified: trunk/ompi/mca/pml/ob1/pml_ob1.c > ============================================================================== > --- trunk/ompi/mca/pml/ob1/pml_ob1.c (original) > +++ trunk/ompi/mca/pml/ob1/pml_ob1.c 2012-04-24 16:18:56 EDT (Tue, 24 Apr > 2012) > @@ -147,6 +147,7 @@ > OBJ_CONSTRUCT(&mca_pml_ob1.recv_pending, opal_list_t); > OBJ_CONSTRUCT(&mca_pml_ob1.pckt_pending, opal_list_t); > OBJ_CONSTRUCT(&mca_pml_ob1.rdma_pending, opal_list_t); > + > /* missing communicator pending list */ > OBJ_CONSTRUCT(&mca_pml_ob1.non_existing_communicator_pending, > opal_list_t); > > @@ -599,8 +600,10 @@ > OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock); > if(NULL == frag) > break; > + > + frag->retries++; > + > if(frag->rdma_state == MCA_PML_OB1_RDMA_PUT) { > - frag->retries++; > rc = mca_pml_ob1_send_request_put_frag(frag); > } else { > rc = mca_pml_ob1_recv_request_get_frag(frag); > > Modified: trunk/ompi/mca/pml/ob1/pml_ob1.h > ============================================================================== > --- trunk/ompi/mca/pml/ob1/pml_ob1.h (original) > +++ trunk/ompi/mca/pml/ob1/pml_ob1.h 2012-04-24 16:18:56 EDT (Tue, 24 Apr > 2012) > @@ -52,7 +52,7 @@ > int free_list_inc; /* number of elements to grow free list */ > size_t send_pipeline_depth; > size_t recv_pipeline_depth; > - size_t rdma_put_retries_limit; > + size_t rdma_retries_limit; > int max_rdma_per_request; > int max_send_per_range; > bool leave_pinned; > > Modified: trunk/ompi/mca/pml/ob1/pml_ob1_component.c > ============================================================================== > --- trunk/ompi/mca/pml/ob1/pml_ob1_component.c (original) > +++ trunk/ompi/mca/pml/ob1/pml_ob1_component.c 2012-04-24 16:18:56 EDT > (Tue, 24 Apr 2012) > @@ -112,8 +112,8 @@ > mca_pml_ob1_param_register_int("send_pipeline_depth", 3); > mca_pml_ob1.recv_pipeline_depth = > mca_pml_ob1_param_register_int("recv_pipeline_depth", 4); > - mca_pml_ob1.rdma_put_retries_limit = > - mca_pml_ob1_param_register_int("rdma_put_retries_limit", 5); > + mca_pml_ob1.rdma_retries_limit = > + mca_pml_ob1_param_register_int("rdma_retries_limit", 5); > mca_pml_ob1.max_rdma_per_request = > mca_pml_ob1_param_register_int("max_rdma_per_request", 4); > mca_pml_ob1.max_send_per_range = > > Modified: trunk/ompi/mca/pml/ob1/pml_ob1_recvfrag.c > ============================================================================== > --- trunk/ompi/mca/pml/ob1/pml_ob1_recvfrag.c (original) > +++ trunk/ompi/mca/pml/ob1/pml_ob1_recvfrag.c 2012-04-24 16:18:56 EDT (Tue, > 24 Apr 2012) > @@ -294,15 +294,22 @@ > if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_ob1_common_hdr_t)) ) > { > return; > } > - > + > ob1_hdr_ntoh(hdr, MCA_PML_OB1_HDR_TYPE_ACK); > sendreq = (mca_pml_ob1_send_request_t*)hdr->hdr_ack.hdr_src_req.pval; > sendreq->req_recv = hdr->hdr_ack.hdr_dst_req; > - > + > /* if the request should be delivered entirely by copy in/out > * then throttle sends */ > - if(hdr->hdr_common.hdr_flags & MCA_PML_OB1_HDR_FLAGS_NORDMA) > + if(hdr->hdr_common.hdr_flags & MCA_PML_OB1_HDR_FLAGS_NORDMA) { > + if (NULL != sendreq->src_des) { > + /* release registered memory */ > + mca_bml_base_free (sendreq->req_rdma[0].bml_btl, > sendreq->src_des); > + sendreq->src_des = NULL; > + } > + > sendreq->req_throttle_sends = true; > + } > > mca_pml_ob1_send_request_copy_in_out(sendreq, > hdr->hdr_ack.hdr_send_offset, > @@ -324,7 +331,7 @@ > > if(send_request_pml_complete_check(sendreq) == false) > mca_pml_ob1_send_request_schedule(sendreq); > - > + > return; > } > > > Modified: trunk/ompi/mca/pml/ob1/pml_ob1_recvreq.c > ============================================================================== > --- trunk/ompi/mca/pml/ob1/pml_ob1_recvreq.c (original) > +++ trunk/ompi/mca/pml/ob1/pml_ob1_recvreq.c 2012-04-24 16:18:56 EDT (Tue, > 24 Apr 2012) > @@ -352,6 +352,66 @@ > } > > > +static int mca_pml_ob1_init_get_fallback (mca_pml_ob1_rdma_frag_t *frag, > + mca_btl_base_descriptor_t *dst) { > + mca_pml_ob1_recv_request_t *recvreq = (mca_pml_ob1_recv_request_t *) > frag->rdma_req; > + mca_bml_base_btl_t *bml_btl = frag->rdma_bml; > + mca_btl_base_descriptor_t *ctl; > + mca_pml_ob1_rdma_hdr_t *hdr; > + size_t hdr_size; > + unsigned int i; > + int rc; > + > + /* prepare a descriptor for rdma control message */ > + hdr_size = sizeof (mca_pml_ob1_rdma_hdr_t); > + if (dst->des_dst_cnt > 1) { > + hdr_size += (sizeof (mca_btl_base_segment_t) * > + (dst->des_dst_cnt-1)); > + } > + > + mca_bml_base_alloc (bml_btl, &ctl, MCA_BTL_NO_ORDER, hdr_size, > + MCA_BTL_DES_FLAGS_PRIORITY | > MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | > + MCA_BTL_DES_SEND_ALWAYS_CALLBACK); > + if (OPAL_UNLIKELY(NULL == ctl)) { > + return OMPI_ERR_OUT_OF_RESOURCE; > + } > + ctl->des_cbfunc = mca_pml_ob1_recv_ctl_completion; > + > + /* fill in rdma header */ > + hdr = (mca_pml_ob1_rdma_hdr_t *) ctl->des_src->seg_addr.pval; > + hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_PUT; > + hdr->hdr_common.hdr_flags = > + (!recvreq->req_ack_sent) ? MCA_PML_OB1_HDR_TYPE_ACK : 0; > + > + hdr->hdr_req = frag->rdma_hdr.hdr_rget.hdr_rndv.hdr_src_req; > + hdr->hdr_rdma_offset = recvreq->req_rdma_offset; > + hdr->hdr_des.pval = dst; > + > + hdr->hdr_seg_cnt = dst->des_dst_cnt; > + > + for (i = 0 ; i < dst->des_dst_cnt ; ++i) { > + hdr->hdr_segs[i].seg_addr.lval = > ompi_ptr_ptol(dst->des_dst[i].seg_addr.pval); > + hdr->hdr_segs[i].seg_len = dst->des_dst[i].seg_len; > + hdr->hdr_segs[i].seg_key.key64[0] = dst->des_dst[i].seg_key.key64[0]; > + hdr->hdr_segs[i].seg_key.key64[1] = dst->des_dst[i].seg_key.key64[1]; > + } > + > + dst->des_cbfunc = mca_pml_ob1_put_completion; > + dst->des_cbdata = recvreq; > + > + if (!recvreq->req_ack_sent) > + recvreq->req_ack_sent = true; > + > + /* send rdma request to peer */ > + rc = mca_bml_base_send (bml_btl, ctl, MCA_PML_OB1_HDR_TYPE_PUT); > + if (OPAL_UNLIKELY(rc < 0)) { > + mca_bml_base_free (bml_btl, ctl); > + return rc; > + } > + > + return OMPI_SUCCESS; > +} > + > /* > * > */ > @@ -371,14 +431,25 @@ > 0, > &frag->rdma_length, > MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | > MCA_BTL_DES_SEND_ALWAYS_CALLBACK | > - MCA_BTL_DES_FLAGS_GET, > + MCA_BTL_DES_FLAGS_GET, > &descriptor ); > if( OPAL_UNLIKELY(NULL == descriptor) ) { > - frag->rdma_length = save_size; > - OPAL_THREAD_LOCK(&mca_pml_ob1.lock); > - opal_list_append(&mca_pml_ob1.rdma_pending, (opal_list_item_t*)frag); > - OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock); > - return OMPI_ERR_OUT_OF_RESOURCE; > + if (frag->retries < mca_pml_ob1.rdma_retries_limit) { > + frag->rdma_length = save_size; > + OPAL_THREAD_LOCK(&mca_pml_ob1.lock); > + opal_list_append(&mca_pml_ob1.rdma_pending, > (opal_list_item_t*)frag); > + OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock); > + return OMPI_ERR_OUT_OF_RESOURCE; > + } else { > + ompi_proc_t *proc = (ompi_proc_t *) > recvreq->req_recv.req_base.req_proc; > + > + /* tell peer to fall back on send */ > + recvreq->req_send_offset = 0; > + rc = mca_pml_ob1_recv_request_ack_send(proc, > frag->rdma_hdr.hdr_rget.hdr_rndv.hdr_src_req.lval, > + recvreq, > recvreq->req_send_offset, true); > + MCA_PML_OB1_RDMA_FRAG_RETURN(frag); > + return rc; > + } > } > > descriptor->des_src = frag->rdma_segs; > @@ -393,6 +464,11 @@ > /* queue up get request */ > rc = mca_bml_base_get(bml_btl,descriptor); > if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) { > + if (OPAL_UNLIKELY(OMPI_ERR_NOT_AVAILABLE == rc)) { > + /* get isn't supported for this transfer. tell peer to fallback > on put */ > + rc = mca_pml_ob1_init_get_fallback (frag, descriptor); > + } > + > if(OMPI_ERR_OUT_OF_RESOURCE == rc) { > mca_bml_base_free(bml_btl, descriptor); > OPAL_THREAD_LOCK(&mca_pml_ob1.lock); > @@ -400,7 +476,7 @@ > (opal_list_item_t*)frag); > OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock); > return OMPI_ERR_OUT_OF_RESOURCE; > - } else { > + } else if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { > ORTE_ERROR_LOG(rc); > orte_errmgr.abort(-1, NULL); > } > @@ -551,7 +627,9 @@ > orte_errmgr.abort(-1, NULL); > } > #endif /* OMPI_CUDA_SUPPORT */ > + > frag->rdma_hdr.hdr_rget = *hdr; > + frag->retries = 0; > frag->rdma_req = recvreq; > frag->rdma_ep = bml_endpoint; > frag->rdma_length = size; > @@ -792,7 +870,7 @@ > mca_bml_base_prepare_dst(bml_btl, reg, > &recvreq->req_recv.req_base.req_convertor, > MCA_BTL_NO_ORDER, 0, &size, > MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | > - MCA_BTL_DES_FLAGS_PUT, &dst); > + MCA_BTL_DES_FLAGS_PUT, &dst); > OPAL_THREAD_UNLOCK(&recvreq->lock); > > if(OPAL_UNLIKELY(dst == NULL)) { > > Modified: trunk/ompi/mca/pml/ob1/pml_ob1_sendreq.c > ============================================================================== > --- trunk/ompi/mca/pml/ob1/pml_ob1_sendreq.c (original) > +++ trunk/ompi/mca/pml/ob1/pml_ob1_sendreq.c 2012-04-24 16:18:56 EDT (Tue, > 24 Apr 2012) > @@ -264,6 +264,7 @@ > MCA_PML_OB1_COMPUTE_SEGMENT_LENGTH( des->des_src, des->des_src_cnt, > 0, req_bytes_delivered ); > OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, > req_bytes_delivered); > + sendreq->src_des = NULL; > > send_request_pml_complete_check(sendreq); > /* free the descriptor */ > @@ -639,6 +640,8 @@ > bool need_local_cb = false; > int rc; > > + sendreq->src_des = NULL; > + > bml_btl = sendreq->req_rdma[0].bml_btl; > if((sendreq->req_rdma_cnt == 1) && (bml_btl->btl_flags & > (MCA_BTL_FLAGS_GET | MCA_BTL_FLAGS_CUDA_GET))) { > mca_mpool_base_registration_t* reg = sendreq->req_rdma[0].btl_reg; > @@ -657,10 +660,8 @@ > mca_bml_base_prepare_src( bml_btl, > reg, > &sendreq->req_send.req_base.req_convertor, > - MCA_BTL_NO_ORDER, > - 0, > - &size, > - MCA_BTL_DES_FLAGS_GET, > + MCA_BTL_NO_ORDER, 0, &size, > + MCA_BTL_DES_FLAGS_GET | > MCA_BTL_DES_FLAGS_BTL_OWNERSHIP, > &src ); > MEMCHECKER( > memchecker_call(&opal_memchecker_base_mem_noaccess, > @@ -676,6 +677,8 @@ > src->des_cbfunc = mca_pml_ob1_rget_completion; > src->des_cbdata = sendreq; > > + sendreq->src_des = src; > + > /* allocate space for get hdr + segment list */ > mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER, > sizeof(mca_pml_ob1_rget_hdr_t) + > @@ -782,8 +785,9 @@ > return OMPI_SUCCESS; > } > mca_bml_base_free(bml_btl, des); > - if (NULL != src) { > - mca_bml_base_free (bml_btl, src); > + if (sendreq->src_des) { > + mca_bml_base_free (bml_btl, sendreq->src_des); > + sendreq->src_des = NULL; > } > > return rc; > @@ -1133,63 +1137,71 @@ > MCA_PML_OB1_PROGRESS_PENDING(bml_btl); > } > > -int mca_pml_ob1_send_request_put_frag( mca_pml_ob1_rdma_frag_t* frag ) > -{ > - mca_mpool_base_registration_t* reg = NULL; > - mca_bml_base_btl_t* bml_btl = frag->rdma_bml; > - mca_btl_base_descriptor_t* des; > +int mca_pml_ob1_send_request_put_frag( mca_pml_ob1_rdma_frag_t *frag ) > +{ > + mca_pml_ob1_send_request_t* sendreq = > (mca_pml_ob1_send_request_t*)frag->rdma_req; > + mca_mpool_base_registration_t *reg = NULL; > + mca_bml_base_btl_t *bml_btl = frag->rdma_bml; > + mca_btl_base_descriptor_t *des; > size_t save_size = frag->rdma_length; > int rc; > > - /* setup descriptor */ > - mca_bml_base_prepare_src( bml_btl, > - reg, > - &frag->convertor, > - MCA_BTL_NO_ORDER, > - 0, > - &frag->rdma_length, > - MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | > - MCA_BTL_DES_FLAGS_PUT, > - &des ); > + if (OPAL_LIKELY(NULL == sendreq->src_des)) { > + /* setup descriptor */ > + mca_bml_base_prepare_src( bml_btl, > + reg, > + &frag->convertor, > + MCA_BTL_NO_ORDER, > + 0, > + &frag->rdma_length, > + MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | > + MCA_BTL_DES_FLAGS_PUT, > + &des ); > > - if( OPAL_UNLIKELY(NULL == des) ) { > - if(frag->retries < mca_pml_ob1.rdma_put_retries_limit) { > - size_t offset = (size_t)frag->rdma_hdr.hdr_rdma.hdr_rdma_offset; > - frag->rdma_length = save_size; > - opal_convertor_set_position(&frag->convertor, &offset); > - OPAL_THREAD_LOCK(&mca_pml_ob1.lock); > - opal_list_append(&mca_pml_ob1.rdma_pending, > (opal_list_item_t*)frag); > - OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock); > - } else { > - mca_pml_ob1_send_request_t *sendreq = > - (mca_pml_ob1_send_request_t*)frag->rdma_req; > + if( OPAL_UNLIKELY(NULL == des) ) { > + if(frag->retries < mca_pml_ob1.rdma_retries_limit) { > + size_t offset = > (size_t)frag->rdma_hdr.hdr_rdma.hdr_rdma_offset; > + frag->rdma_length = save_size; > + opal_convertor_set_position(&frag->convertor, &offset); > + OPAL_THREAD_LOCK(&mca_pml_ob1.lock); > + opal_list_append(&mca_pml_ob1.rdma_pending, > (opal_list_item_t*)frag); > + OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock); > + } else { > + mca_pml_ob1_send_request_t *sendreq = > + (mca_pml_ob1_send_request_t*)frag->rdma_req; > + > + /* tell receiver to unregister memory */ > + mca_pml_ob1_send_fin(sendreq->req_send.req_base.req_proc, > + bml_btl, > frag->rdma_hdr.hdr_rdma.hdr_des, > + MCA_BTL_NO_ORDER, 1); > + > + /* send fragment by copy in/out */ > + mca_pml_ob1_send_request_copy_in_out(sendreq, > + > frag->rdma_hdr.hdr_rdma.hdr_rdma_offset, frag->rdma_length); > + /* if a pointer to a receive request is not set it means that > + * ACK was not yet received. Don't schedule sends before ACK > */ > + if(NULL != sendreq->req_recv.pval) > + mca_pml_ob1_send_request_schedule(sendreq); > + } > > - /* tell receiver to unregister memory */ > - mca_pml_ob1_send_fin(sendreq->req_send.req_base.req_proc, > - bml_btl, frag->rdma_hdr.hdr_rdma.hdr_des, > - MCA_BTL_NO_ORDER, 1); > - > - /* send fragment by copy in/out */ > - mca_pml_ob1_send_request_copy_in_out(sendreq, > - frag->rdma_hdr.hdr_rdma.hdr_rdma_offset, > frag->rdma_length); > - /* if a pointer to a receive request is not set it means that > - * ACK was not yet received. Don't schedule sends before ACK */ > - if(NULL != sendreq->req_recv.pval) > - mca_pml_ob1_send_request_schedule(sendreq); > + return OMPI_ERR_OUT_OF_RESOURCE; > } > - return OMPI_ERR_OUT_OF_RESOURCE; > + } else { > + /* already have a source descriptor */ > + des = sendreq->src_des; > + sendreq->src_des = NULL; > } > - > - des->des_dst = frag->rdma_segs; > + > + des->des_dst = frag->rdma_segs; > des->des_dst_cnt = frag->rdma_hdr.hdr_rdma.hdr_seg_cnt; > - des->des_cbfunc = mca_pml_ob1_put_completion; > - des->des_cbdata = frag; > + des->des_cbfunc = mca_pml_ob1_put_completion; > + des->des_cbdata = frag; > > PERUSE_TRACE_COMM_OMPI_EVENT( PERUSE_COMM_REQ_XFER_CONTINUE, > > &(((mca_pml_ob1_send_request_t*)frag->rdma_req)->req_send.req_base), > save_size, PERUSE_SEND ); > > rc = mca_bml_base_put(bml_btl, des); > - if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) { > + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { > mca_bml_base_free(bml_btl, des); > frag->rdma_length = save_size; > if(OMPI_ERR_OUT_OF_RESOURCE == rc) { > @@ -1203,6 +1215,7 @@ > orte_errmgr.abort(-1, NULL); > } > } > + > return OMPI_SUCCESS; > } > > @@ -1261,21 +1274,25 @@ > frag->reg = NULL; > frag->retries = 0; > > + if (OPAL_UNLIKELY(NULL != sendreq->src_des)) { > + /* get fallback path */ > + sendreq->req_state = 0; > + } > + > /* lookup the corresponding registration */ > for(i=0; i<sendreq->req_rdma_cnt; i++) { > - if(sendreq->req_rdma[i].bml_btl == frag->rdma_bml) { > - frag->reg = sendreq->req_rdma[i].btl_reg; > - break; > - } > - } > + if(sendreq->req_rdma[i].bml_btl == frag->rdma_bml) { > + frag->reg = sendreq->req_rdma[i].btl_reg; > + break; > + } > + } > > /* RDMA writes may proceed in parallel to send and to each other, so > * create clone of the convertor for each RDMA fragment > */ > size = hdr->hdr_rdma_offset; > > opal_convertor_clone_with_position(&sendreq->req_send.req_base.req_convertor, > - &frag->convertor, 0, &size); > + &frag->convertor, 0, &size); > > mca_pml_ob1_send_request_put_frag(frag); > } > - > > Modified: trunk/ompi/mca/pml/ob1/pml_ob1_sendreq.h > ============================================================================== > --- trunk/ompi/mca/pml/ob1/pml_ob1_sendreq.h (original) > +++ trunk/ompi/mca/pml/ob1/pml_ob1_sendreq.h 2012-04-24 16:18:56 EDT (Tue, > 24 Apr 2012) > @@ -54,6 +54,7 @@ > mca_pml_ob1_send_pending_t req_pending; > opal_mutex_t req_send_range_lock; > opal_list_t req_send_ranges; > + mca_btl_base_descriptor_t *src_des; > mca_pml_ob1_com_btl_t req_rdma[1]; > }; > typedef struct mca_pml_ob1_send_request_t mca_pml_ob1_send_request_t; > @@ -129,6 +130,7 @@ > OMPI_FREE_LIST_WAIT(&mca_pml_base_send_requests, item, rc); \ > sendreq = (mca_pml_ob1_send_request_t*)item; \ > sendreq->req_send.req_base.req_proc = proc; \ > + sendreq->src_des = NULL; \ > } \ > } > > _______________________________________________ > svn-full mailing list > svn-f...@open-mpi.org > http://www.open-mpi.org/mailman/listinfo.cgi/svn-full -- Jeff Squyres jsquy...@cisco.com For corporate legal information go to: http://www.cisco.com/web/about/doing_business/legal/cri/