Mike --

Does mxm_init() do Reasonable Things to check to see if the local 
OpenFabrics-capable devices are unsuitable for MXM?  E.g., does it check to see 
if the local OpenFabrics devices are MXM-capable, and if not, fail gracefully?

Also, I would suggest NOT showing a show_help message if there are OF devices 
available such that CM/MXM can (probably) fail over to OB1/openib.  I.e., only 
show a show_help message if devices are available for MXM, but an actual error 
occurs during the MXM initialization.

Otherwise, if I mpirun (with the MXM MTL installed) on a system with only RoCE 
or iWARP devices present, MXM will complain but then fail over to OB1/openib.  
That would probably be confusing.



On Aug 7, 2011, at 8:06 AM, mi...@osl.iu.edu wrote:

> Author: miked
> Date: 2011-08-07 08:06:49 EDT (Sun, 07 Aug 2011)
> New Revision: 25005
> URL: https://svn.open-mpi.org/trac/ompi/changeset/25005
> 
> Log:
> better mxm selection mechanism, some refactoring
> Text files modified: 
>   trunk/ompi/mca/mtl/mxm/mtl_mxm_cancel.c    |     4 ++--                     
>                
>   trunk/ompi/mca/mtl/mxm/mtl_mxm_component.c |    32 
> ++++++++++++++------------------        
>   trunk/ompi/mca/mtl/mxm/mtl_mxm_recv.c      |     6 +++---                   
>                
>   trunk/ompi/mca/mtl/mxm/mtl_mxm_request.h   |     6 +++++-                   
>                
>   trunk/ompi/mca/mtl/mxm/mtl_mxm_send.c      |     4 ++--                     
>                
>   5 files changed, 26 insertions(+), 26 deletions(-)
> 
> Modified: trunk/ompi/mca/mtl/mxm/mtl_mxm_cancel.c
> ==============================================================================
> --- trunk/ompi/mca/mtl/mxm/mtl_mxm_cancel.c   (original)
> +++ trunk/ompi/mca/mtl/mxm/mtl_mxm_cancel.c   2011-08-07 08:06:49 EDT (Sun, 
> 07 Aug 2011)
> @@ -18,9 +18,9 @@
>     mxm_error_t err;
>     mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t*) 
> mtl_request;
> 
> -    err = mxm_req_cancel(mtl_mxm_request->mxm_base_request);
> +    err = mxm_req_cancel(&mtl_mxm_request->mxm.base);
>     if (MXM_OK == err) {
> -        err = mxm_req_test(mtl_mxm_request->mxm_base_request);
> +        err = mxm_req_test(&mtl_mxm_request->mxm.base);
>         if (MXM_OK == err) {
>             mtl_request->ompi_req->req_status._cancelled = true;
>             
> mtl_mxm_request->super.completion_callback(&mtl_mxm_request->super);
> 
> Modified: trunk/ompi/mca/mtl/mxm/mtl_mxm_component.c
> ==============================================================================
> --- trunk/ompi/mca/mtl/mxm/mtl_mxm_component.c        (original)
> +++ trunk/ompi/mca/mtl/mxm/mtl_mxm_component.c        2011-08-07 08:06:49 EDT 
> (Sun, 07 Aug 2011)
> @@ -72,18 +72,27 @@
> 
> static int ompi_mtl_mxm_component_open(void)
> {
> -    struct stat st;
> 
> -    /* Component available only if IB hardware is present */
> -    if (0 == stat("/dev/infiniband/uverbs0", &st)) {
> -        return OMPI_SUCCESS;
> -    } else {
> +    mxm_context_opts_t mxm_opts;
> +    mxm_error_t err;
> +
> +    mca_mtl_mxm_output = opal_output_open(NULL);
> +    opal_output_set_verbosity(mca_mtl_mxm_output, ompi_mtl_mxm.verbose);
> +
> +    mxm_fill_context_opts(&mxm_opts);
> +    err = mxm_init(&mxm_opts, &ompi_mtl_mxm.mxm_context);
> +    if (MXM_OK != err) {
> +        orte_show_help("help-mtl-mxm.txt", "mxm init", true,
> +                       mxm_error_string(err));
>         return OPAL_ERR_NOT_AVAILABLE;
>     }
> +    return OMPI_SUCCESS;
> }
> 
> static int ompi_mtl_mxm_component_close(void)
> {
> +    mxm_cleanup(ompi_mtl_mxm.mxm_context);
> +    ompi_mtl_mxm.mxm_context = NULL;
>     return OMPI_SUCCESS;
> }
> 
> @@ -91,21 +100,8 @@
> ompi_mtl_mxm_component_init(bool enable_progress_threads,
>                             bool enable_mpi_threads)
> {
> -    mxm_context_opts_t mxm_opts;
> -    mxm_error_t err;
>     int rc;
> 
> -    mca_mtl_mxm_output = opal_output_open(NULL);
> -    opal_output_set_verbosity(mca_mtl_mxm_output, ompi_mtl_mxm.verbose);
> -
> -    mxm_fill_context_opts(&mxm_opts);
> -    err = mxm_init(&mxm_opts, &ompi_mtl_mxm.mxm_context);
> -    if (MXM_OK != err) {
> -        orte_show_help("help-mtl-mxm.txt", "mxm init", true,
> -                       mxm_error_string(err));
> -        return NULL;
> -    }
> -
>     rc = ompi_mtl_mxm_module_init();
>     if (OMPI_SUCCESS != rc) {
>       return NULL;
> 
> Modified: trunk/ompi/mca/mtl/mxm/mtl_mxm_recv.c
> ==============================================================================
> --- trunk/ompi/mca/mtl/mxm/mtl_mxm_recv.c     (original)
> +++ trunk/ompi/mca/mtl/mxm/mtl_mxm_recv.c     2011-08-07 08:06:49 EDT (Sun, 
> 07 Aug 2011)
> @@ -22,12 +22,12 @@
> {
>       mca_mtl_mxm_request_t *req = (mca_mtl_mxm_request_t *) context;
>     struct ompi_request_t *ompi_req = req->super.ompi_req;
> -    mxm_recv_req_t *mxm_recv_req = (mxm_recv_req_t *)req->mxm_base_request;
> +    mxm_recv_req_t *mxm_recv_req = &req->mxm.recv;
> 
>     /* Set completion status and envelope */
>     ompi_req->req_status.MPI_TAG    = mxm_recv_req->completion.sender_tag;
>     ompi_req->req_status.MPI_SOURCE = mxm_recv_req->completion.sender_imm;
> -    ompi_req->req_status.MPI_ERROR  = 
> ompi_mtl_mxm_to_mpi_status(req->mxm_base_request->error);
> +    ompi_req->req_status.MPI_ERROR  = 
> ompi_mtl_mxm_to_mpi_status(mxm_recv_req->base.error);
>     ompi_req->req_status._ucount    = mxm_recv_req->completion.actual_len;
> 
>     /* Copy data */
> @@ -63,7 +63,7 @@
>         return ret;
>     }
> 
> -    mxm_recv_req = (mxm_recv_req_t *)mtl_mxm_request->mxm_base_request;
> +    mxm_recv_req = &mtl_mxm_request->mxm.recv;
> 
>     /* prepare a receive request embedded in the MTL request */
>     mxm_recv_req->base.state    = MXM_REQ_NEW;
> 
> Modified: trunk/ompi/mca/mtl/mxm/mtl_mxm_request.h
> ==============================================================================
> --- trunk/ompi/mca/mtl/mxm/mtl_mxm_request.h  (original)
> +++ trunk/ompi/mca/mtl/mxm/mtl_mxm_request.h  2011-08-07 08:06:49 EDT (Sun, 
> 07 Aug 2011)
> @@ -16,7 +16,11 @@
> 
> struct mca_mtl_mxm_request_t {
>     struct mca_mtl_request_t super;
> -    mxm_req_base_t *mxm_base_request;
> +    union {
> +     mxm_req_base_t base;
> +     mxm_send_req_t send;
> +     mxm_recv_req_t recv;
> +    } mxm;
>     /* mxm_segment_t mxm_segment[1]; */
>     void *buf;
>     size_t length;
> 
> Modified: trunk/ompi/mca/mtl/mxm/mtl_mxm_send.c
> ==============================================================================
> --- trunk/ompi/mca/mtl/mxm/mtl_mxm_send.c     (original)
> +++ trunk/ompi/mca/mtl/mxm/mtl_mxm_send.c     2011-08-07 08:06:49 EDT (Sun, 
> 07 Aug 2011)
> @@ -25,7 +25,7 @@
>         free(mtl_mxm_request->buf);
>     }
> 
> -    mtl_mxm_request->super.ompi_req->req_status.MPI_ERROR  = 
> ompi_mtl_mxm_to_mpi_status(mtl_mxm_request->mxm_base_request->error);
> +    mtl_mxm_request->super.ompi_req->req_status.MPI_ERROR  = 
> ompi_mtl_mxm_to_mpi_status(mtl_mxm_request->mxm.base.error);
> 
>     mtl_mxm_request->super.completion_callback(&mtl_mxm_request->super);
> }
> @@ -93,7 +93,7 @@
>         return ret;
>     }
> 
> -    mxm_send_req = (mxm_send_req_t *) mtl_mxm_request->mxm_base_request;
> +    mxm_send_req = &mtl_mxm_request->mxm.send;
> 
>     /* prepare a send request embedded in the MTL request */
>     mxm_send_req->base.state                  = MXM_REQ_NEW;
> _______________________________________________
> svn-full mailing list
> svn-f...@open-mpi.org
> http://www.open-mpi.org/mailman/listinfo.cgi/svn-full


-- 
Jeff Squyres
jsquy...@cisco.com
For corporate legal information go to:
http://www.cisco.com/web/about/doing_business/legal/cri/


Reply via email to