Mike -- Does mxm_init() do Reasonable Things to check to see if the local OpenFabrics-capable devices are unsuitable for MXM? E.g., does it check to see if the local OpenFabrics devices are MXM-capable, and if not, fail gracefully?
Also, I would suggest NOT showing a show_help message if there are OF devices available such that CM/MXM can (probably) fail over to OB1/openib. I.e., only show a show_help message if devices are available for MXM, but an actual error occurs during the MXM initialization. Otherwise, if I mpirun (with the MXM MTL installed) on a system with only RoCE or iWARP devices present, MXM will complain but then fail over to OB1/openib. That would probably be confusing. On Aug 7, 2011, at 8:06 AM, mi...@osl.iu.edu wrote: > Author: miked > Date: 2011-08-07 08:06:49 EDT (Sun, 07 Aug 2011) > New Revision: 25005 > URL: https://svn.open-mpi.org/trac/ompi/changeset/25005 > > Log: > better mxm selection mechanism, some refactoring > Text files modified: > trunk/ompi/mca/mtl/mxm/mtl_mxm_cancel.c | 4 ++-- > > trunk/ompi/mca/mtl/mxm/mtl_mxm_component.c | 32 > ++++++++++++++------------------ > trunk/ompi/mca/mtl/mxm/mtl_mxm_recv.c | 6 +++--- > > trunk/ompi/mca/mtl/mxm/mtl_mxm_request.h | 6 +++++- > > trunk/ompi/mca/mtl/mxm/mtl_mxm_send.c | 4 ++-- > > 5 files changed, 26 insertions(+), 26 deletions(-) > > Modified: trunk/ompi/mca/mtl/mxm/mtl_mxm_cancel.c > ============================================================================== > --- trunk/ompi/mca/mtl/mxm/mtl_mxm_cancel.c (original) > +++ trunk/ompi/mca/mtl/mxm/mtl_mxm_cancel.c 2011-08-07 08:06:49 EDT (Sun, > 07 Aug 2011) > @@ -18,9 +18,9 @@ > mxm_error_t err; > mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t*) > mtl_request; > > - err = mxm_req_cancel(mtl_mxm_request->mxm_base_request); > + err = mxm_req_cancel(&mtl_mxm_request->mxm.base); > if (MXM_OK == err) { > - err = mxm_req_test(mtl_mxm_request->mxm_base_request); > + err = mxm_req_test(&mtl_mxm_request->mxm.base); > if (MXM_OK == err) { > mtl_request->ompi_req->req_status._cancelled = true; > > mtl_mxm_request->super.completion_callback(&mtl_mxm_request->super); > > Modified: trunk/ompi/mca/mtl/mxm/mtl_mxm_component.c > ============================================================================== > --- trunk/ompi/mca/mtl/mxm/mtl_mxm_component.c (original) > +++ trunk/ompi/mca/mtl/mxm/mtl_mxm_component.c 2011-08-07 08:06:49 EDT > (Sun, 07 Aug 2011) > @@ -72,18 +72,27 @@ > > static int ompi_mtl_mxm_component_open(void) > { > - struct stat st; > > - /* Component available only if IB hardware is present */ > - if (0 == stat("/dev/infiniband/uverbs0", &st)) { > - return OMPI_SUCCESS; > - } else { > + mxm_context_opts_t mxm_opts; > + mxm_error_t err; > + > + mca_mtl_mxm_output = opal_output_open(NULL); > + opal_output_set_verbosity(mca_mtl_mxm_output, ompi_mtl_mxm.verbose); > + > + mxm_fill_context_opts(&mxm_opts); > + err = mxm_init(&mxm_opts, &ompi_mtl_mxm.mxm_context); > + if (MXM_OK != err) { > + orte_show_help("help-mtl-mxm.txt", "mxm init", true, > + mxm_error_string(err)); > return OPAL_ERR_NOT_AVAILABLE; > } > + return OMPI_SUCCESS; > } > > static int ompi_mtl_mxm_component_close(void) > { > + mxm_cleanup(ompi_mtl_mxm.mxm_context); > + ompi_mtl_mxm.mxm_context = NULL; > return OMPI_SUCCESS; > } > > @@ -91,21 +100,8 @@ > ompi_mtl_mxm_component_init(bool enable_progress_threads, > bool enable_mpi_threads) > { > - mxm_context_opts_t mxm_opts; > - mxm_error_t err; > int rc; > > - mca_mtl_mxm_output = opal_output_open(NULL); > - opal_output_set_verbosity(mca_mtl_mxm_output, ompi_mtl_mxm.verbose); > - > - mxm_fill_context_opts(&mxm_opts); > - err = mxm_init(&mxm_opts, &ompi_mtl_mxm.mxm_context); > - if (MXM_OK != err) { > - orte_show_help("help-mtl-mxm.txt", "mxm init", true, > - mxm_error_string(err)); > - return NULL; > - } > - > rc = ompi_mtl_mxm_module_init(); > if (OMPI_SUCCESS != rc) { > return NULL; > > Modified: trunk/ompi/mca/mtl/mxm/mtl_mxm_recv.c > ============================================================================== > --- trunk/ompi/mca/mtl/mxm/mtl_mxm_recv.c (original) > +++ trunk/ompi/mca/mtl/mxm/mtl_mxm_recv.c 2011-08-07 08:06:49 EDT (Sun, > 07 Aug 2011) > @@ -22,12 +22,12 @@ > { > mca_mtl_mxm_request_t *req = (mca_mtl_mxm_request_t *) context; > struct ompi_request_t *ompi_req = req->super.ompi_req; > - mxm_recv_req_t *mxm_recv_req = (mxm_recv_req_t *)req->mxm_base_request; > + mxm_recv_req_t *mxm_recv_req = &req->mxm.recv; > > /* Set completion status and envelope */ > ompi_req->req_status.MPI_TAG = mxm_recv_req->completion.sender_tag; > ompi_req->req_status.MPI_SOURCE = mxm_recv_req->completion.sender_imm; > - ompi_req->req_status.MPI_ERROR = > ompi_mtl_mxm_to_mpi_status(req->mxm_base_request->error); > + ompi_req->req_status.MPI_ERROR = > ompi_mtl_mxm_to_mpi_status(mxm_recv_req->base.error); > ompi_req->req_status._ucount = mxm_recv_req->completion.actual_len; > > /* Copy data */ > @@ -63,7 +63,7 @@ > return ret; > } > > - mxm_recv_req = (mxm_recv_req_t *)mtl_mxm_request->mxm_base_request; > + mxm_recv_req = &mtl_mxm_request->mxm.recv; > > /* prepare a receive request embedded in the MTL request */ > mxm_recv_req->base.state = MXM_REQ_NEW; > > Modified: trunk/ompi/mca/mtl/mxm/mtl_mxm_request.h > ============================================================================== > --- trunk/ompi/mca/mtl/mxm/mtl_mxm_request.h (original) > +++ trunk/ompi/mca/mtl/mxm/mtl_mxm_request.h 2011-08-07 08:06:49 EDT (Sun, > 07 Aug 2011) > @@ -16,7 +16,11 @@ > > struct mca_mtl_mxm_request_t { > struct mca_mtl_request_t super; > - mxm_req_base_t *mxm_base_request; > + union { > + mxm_req_base_t base; > + mxm_send_req_t send; > + mxm_recv_req_t recv; > + } mxm; > /* mxm_segment_t mxm_segment[1]; */ > void *buf; > size_t length; > > Modified: trunk/ompi/mca/mtl/mxm/mtl_mxm_send.c > ============================================================================== > --- trunk/ompi/mca/mtl/mxm/mtl_mxm_send.c (original) > +++ trunk/ompi/mca/mtl/mxm/mtl_mxm_send.c 2011-08-07 08:06:49 EDT (Sun, > 07 Aug 2011) > @@ -25,7 +25,7 @@ > free(mtl_mxm_request->buf); > } > > - mtl_mxm_request->super.ompi_req->req_status.MPI_ERROR = > ompi_mtl_mxm_to_mpi_status(mtl_mxm_request->mxm_base_request->error); > + mtl_mxm_request->super.ompi_req->req_status.MPI_ERROR = > ompi_mtl_mxm_to_mpi_status(mtl_mxm_request->mxm.base.error); > > mtl_mxm_request->super.completion_callback(&mtl_mxm_request->super); > } > @@ -93,7 +93,7 @@ > return ret; > } > > - mxm_send_req = (mxm_send_req_t *) mtl_mxm_request->mxm_base_request; > + mxm_send_req = &mtl_mxm_request->mxm.send; > > /* prepare a send request embedded in the MTL request */ > mxm_send_req->base.state = MXM_REQ_NEW; > _______________________________________________ > svn-full mailing list > svn-f...@open-mpi.org > http://www.open-mpi.org/mailman/listinfo.cgi/svn-full -- Jeff Squyres jsquy...@cisco.com For corporate legal information go to: http://www.cisco.com/web/about/doing_business/legal/cri/