Currently, alternate CMs cannot be called because ompi_btl_openib_connect_base_open forces a choice of either oob or xoob (and goes into an erroneous error path if you pick something else). This patch reorganizes ompi_btl_openib_connect_base_open so that new functions can easily be added. New Open functions were added to oob and xoob for the error handling.
I tested calling oob, xoob, and rdma_cm. oob happily allows connections to be established and throws no errors. xoob fails because ompi does not have it compiled in (and I have no connectx cards). rdma_cm calls the empty hooks and exits without connecting (thus throwing non-connection errors). All expected behavior. Since this patch fixes the existing behavior, and is not necessarily tied to my implementing of rdma_cm, I think it is acceptable to go in now. Thanks, Jon Index: ompi/mca/btl/openib/connect/btl_openib_connect_base.c =================================================================== --- ompi/mca/btl/openib/connect/btl_openib_connect_base.c (revision 16937) +++ ompi/mca/btl/openib/connect/btl_openib_connect_base.c (working copy) @@ -50,8 +50,8 @@ */ int ompi_btl_openib_connect_base_open(void) { - int i; - char **temp, *a, *b; + char **temp, *a, *b, *defval; + int i, ret = OMPI_ERROR; /* Make an MCA parameter to select which connect module to use */ temp = NULL; @@ -66,40 +66,23 @@ /* For XRC qps we must to use XOOB connection manager */ if (mca_btl_openib_component.num_xrc_qps > 0) { - mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version, - "connect", - b, false, false, - "xoob", ¶m); - if (0 != strcmp("xoob", param)) { - opal_show_help("help-mpi-btl-openib.txt", - "XRC with wrong OOB", true, - orte_system_info.nodename, - mca_btl_openib_component.num_xrc_qps); - return OMPI_ERROR; - } + defval = "xoob"; } else { /* For all others we should use OOB */ - mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version, - "connect", - b, false, false, - "oob", ¶m); - if (0 != strcmp("oob", param)) { - opal_show_help("help-mpi-btl-openib.txt", - "SRQ or PP with wrong OOB", true, - orte_system_info.nodename, - mca_btl_openib_component.num_srq_qps, - mca_btl_openib_component.num_pp_qps); - return OMPI_ERROR; - } + defval = "oob"; } + mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version, + "connect", b, false, false, defval, ¶m); + /* Call the open function on all the connect modules */ for (i = 0; NULL != all[i]; ++i) { - if (NULL != all[i]->bcf_open) { - all[i]->bcf_open(); + if (0 == strcmp(all[i]->bcf_name, param)) { + ret = all[i]->bcf_open(); + break; } } - return OMPI_SUCCESS; + return ret; } Index: ompi/mca/btl/openib/connect/btl_openib_connect_ibcm.c =================================================================== --- ompi/mca/btl/openib/connect/btl_openib_connect_ibcm.c (revision 16937) +++ ompi/mca/btl/openib/connect/btl_openib_connect_ibcm.c (working copy) @@ -28,11 +28,7 @@ static int ibcm_open(void) { - mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version, - "btl_openib_connect_ibcm_foo", - "A dummy help message", false, false, - 17, NULL); - + printf("ibcm open\n"); return OMPI_SUCCESS; } Index: ompi/mca/btl/openib/connect/btl_openib_connect_oob.c =================================================================== --- ompi/mca/btl/openib/connect/btl_openib_connect_oob.c (revision 16937) +++ ompi/mca/btl/openib/connect/btl_openib_connect_oob.c (working copy) @@ -22,6 +22,8 @@ #include "ompi_config.h" +#include "opal/util/show_help.h" + #include "orte/mca/ns/base/base.h" #include "orte/mca/oob/base/base.h" #include "orte/mca/rml/rml.h" @@ -39,6 +41,7 @@ ENDPOINT_CONNECT_ACK } connect_message_type_t; +static int oob_open(void); static int oob_init(void); static int oob_start_connect(mca_btl_base_endpoint_t *e); static int oob_finalize(void); @@ -67,8 +70,8 @@ */ ompi_btl_openib_connect_base_funcs_t ompi_btl_openib_connect_oob = { "oob", - /* No need for "open */ - NULL, + /* Open */ + oob_open, /* Init */ oob_init, /* Connect */ @@ -78,6 +81,23 @@ }; /* + * Open function. + */ +static int oob_open(void) +{ + if (mca_btl_openib_component.num_xrc_qps > 0) { + opal_show_help("help-mpi-btl-openib.txt", + "SRQ or PP with wrong OOB", true, + orte_system_info.nodename, + mca_btl_openib_component.num_srq_qps, + mca_btl_openib_component.num_pp_qps); + return OMPI_ERROR; + } + + return OMPI_SUCCESS; +} + +/* * Init function. Post non-blocking RML receive to accept incoming * connection requests. */ Index: ompi/mca/btl/openib/connect/btl_openib_connect_rdma_cm.c =================================================================== --- ompi/mca/btl/openib/connect/btl_openib_connect_rdma_cm.c (revision 16937) +++ ompi/mca/btl/openib/connect/btl_openib_connect_rdma_cm.c (working copy) @@ -28,11 +28,7 @@ static int rdma_cm_open(void) { - mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version, - "btl_openib_connect_rdma_cm_foo", - "A dummy help message", false, false, - 17, NULL); - + printf("rdma cm open\n"); return OMPI_SUCCESS; } Index: ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c =================================================================== --- ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c (revision 16937) +++ ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c (working copy) @@ -10,6 +10,8 @@ #include "ompi_config.h" +#include "opal/util/show_help.h" + #include "orte/mca/ns/base/base.h" #include "orte/mca/oob/base/base.h" #include "orte/mca/rml/rml.h" @@ -22,6 +24,7 @@ #include "btl_openib_xrc.h" #include "connect/connect.h" +static int xoob_open(void); static int xoob_init(void); static int xoob_start_connect(mca_btl_base_endpoint_t *e); static int xoob_finalize(void); @@ -32,8 +35,8 @@ */ ompi_btl_openib_connect_base_funcs_t ompi_btl_openib_connect_xoob = { "xoob", - /* No need for "open */ - NULL, + /* Open */ + xoob_open, /* Init */ xoob_init, /* Connect */ @@ -99,7 +102,24 @@ static int init_rem_info(mca_btl_openib_rem_info_t *rem_info); static void free_rem_info(mca_btl_openib_rem_info_t *rem_info); + /* + * Open function. + */ +static int xoob_open(void) +{ + if (mca_btl_openib_component.num_xrc_qps <= 0) { + opal_show_help("help-mpi-btl-openib.txt", + "XRC with wrong OOB", true, + orte_system_info.nodename, + mca_btl_openib_component.num_xrc_qps); + return OMPI_ERROR; + } + + return OMPI_SUCCESS; +} + +/* * Init function. Post non-blocking RML receive to accept incoming * connection requests. */ @@ -834,6 +854,12 @@ #else /* In case if the XRC was disabled during compilation we will print message and return error */ +static int xoob_open(void) +{ + printf("xoob open\n"); + return OMPI_ERR_NOT_IMPLEMENTED; +} + static int xoob_init(void) { printf("xoob init\n");