Currently, alternate CMs cannot be called because
ompi_btl_openib_connect_base_open forces a choice of either oob or
xoob
(and goes into an erroneous error path if you pick something else).
This patch reorganizes ompi_btl_openib_connect_base_open so that new
functions can easily be added. New Open functions were added to oob
and xoob for the error handling.
I tested calling oob, xoob, and rdma_cm. oob happily allows
connections
to be established and throws no errors. xoob fails because ompi does
not have it compiled in (and I have no connectx cards). rdma_cm calls
the empty hooks and exits without connecting (thus throwing
non-connection errors). All expected behavior.
Since this patch fixes the existing behavior, and is not necessarily
tied to my implementing of rdma_cm, I think it is acceptable to go in
now.
Thanks,
Jon
Index: ompi/mca/btl/openib/connect/btl_openib_connect_base.c
===================================================================
--- ompi/mca/btl/openib/connect/btl_openib_connect_base.c (revision
16937)
+++ ompi/mca/btl/openib/connect/btl_openib_connect_base.c (working
copy)
@@ -50,8 +50,8 @@
*/
int ompi_btl_openib_connect_base_open(void)
{
- int i;
- char **temp, *a, *b;
+ char **temp, *a, *b, *defval;
+ int i, ret = OMPI_ERROR;
/* Make an MCA parameter to select which connect module to use */
temp = NULL;
@@ -66,40 +66,23 @@
/* For XRC qps we must to use XOOB connection manager */
if (mca_btl_openib_component.num_xrc_qps > 0) {
-
mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version,
- "connect",
- b, false, false,
- "xoob", ¶m);
- if (0 != strcmp("xoob", param)) {
- opal_show_help("help-mpi-btl-openib.txt",
- "XRC with wrong OOB", true,
- orte_system_info.nodename,
- mca_btl_openib_component.num_xrc_qps);
- return OMPI_ERROR;
- }
+ defval = "xoob";
} else { /* For all others we should use OOB */
-
mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version,
- "connect",
- b, false, false,
- "oob", ¶m);
- if (0 != strcmp("oob", param)) {
- opal_show_help("help-mpi-btl-openib.txt",
- "SRQ or PP with wrong OOB", true,
- orte_system_info.nodename,
- mca_btl_openib_component.num_srq_qps,
- mca_btl_openib_component.num_pp_qps);
- return OMPI_ERROR;
- }
+ defval = "oob";
}
+
mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version,
+ "connect", b, false, false, defval, ¶m);
+
/* Call the open function on all the connect modules */
for (i = 0; NULL != all[i]; ++i) {
- if (NULL != all[i]->bcf_open) {
- all[i]->bcf_open();
+ if (0 == strcmp(all[i]->bcf_name, param)) {
+ ret = all[i]->bcf_open();
+ break;
}
}
- return OMPI_SUCCESS;
+ return ret;
}
Index: ompi/mca/btl/openib/connect/btl_openib_connect_ibcm.c
===================================================================
--- ompi/mca/btl/openib/connect/btl_openib_connect_ibcm.c (revision
16937)
+++ ompi/mca/btl/openib/connect/btl_openib_connect_ibcm.c (working
copy)
@@ -28,11 +28,7 @@
static int ibcm_open(void)
{
-
mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
- "btl_openib_connect_ibcm_foo",
- "A dummy help message", false, false,
- 17, NULL);
-
+ printf("ibcm open\n");
return OMPI_SUCCESS;
}
Index: ompi/mca/btl/openib/connect/btl_openib_connect_oob.c
===================================================================
--- ompi/mca/btl/openib/connect/btl_openib_connect_oob.c (revision
16937)
+++ ompi/mca/btl/openib/connect/btl_openib_connect_oob.c (working
copy)
@@ -22,6 +22,8 @@
#include "ompi_config.h"
+#include "opal/util/show_help.h"
+
#include "orte/mca/ns/base/base.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/rml/rml.h"
@@ -39,6 +41,7 @@
ENDPOINT_CONNECT_ACK
} connect_message_type_t;
+static int oob_open(void);
static int oob_init(void);
static int oob_start_connect(mca_btl_base_endpoint_t *e);
static int oob_finalize(void);
@@ -67,8 +70,8 @@
*/
ompi_btl_openib_connect_base_funcs_t ompi_btl_openib_connect_oob = {
"oob",
- /* No need for "open */
- NULL,
+ /* Open */
+ oob_open,
/* Init */
oob_init,
/* Connect */
@@ -78,6 +81,23 @@
};
/*
+ * Open function.
+ */
+static int oob_open(void)
+{
+ if (mca_btl_openib_component.num_xrc_qps > 0) {
+ opal_show_help("help-mpi-btl-openib.txt",
+ "SRQ or PP with wrong OOB", true,
+ orte_system_info.nodename,
+ mca_btl_openib_component.num_srq_qps,
+ mca_btl_openib_component.num_pp_qps);
+ return OMPI_ERROR;
+ }
+
+ return OMPI_SUCCESS;
+}
+
+/*
* Init function. Post non-blocking RML receive to accept incoming
* connection requests.
*/
Index: ompi/mca/btl/openib/connect/btl_openib_connect_rdma_cm.c
===================================================================
--- ompi/mca/btl/openib/connect/btl_openib_connect_rdma_cm.c
(revision 16937)
+++ ompi/mca/btl/openib/connect/btl_openib_connect_rdma_cm.c
(working copy)
@@ -28,11 +28,7 @@
static int rdma_cm_open(void)
{
-
mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
- "btl_openib_connect_rdma_cm_foo",
- "A dummy help message", false, false,
- 17, NULL);
-
+ printf("rdma cm open\n");
return OMPI_SUCCESS;
}
Index: ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c
===================================================================
--- ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c (revision
16937)
+++ ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c (working
copy)
@@ -10,6 +10,8 @@
#include "ompi_config.h"
+#include "opal/util/show_help.h"
+
#include "orte/mca/ns/base/base.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/rml/rml.h"
@@ -22,6 +24,7 @@
#include "btl_openib_xrc.h"
#include "connect/connect.h"
+static int xoob_open(void);
static int xoob_init(void);
static int xoob_start_connect(mca_btl_base_endpoint_t *e);
static int xoob_finalize(void);
@@ -32,8 +35,8 @@
*/
ompi_btl_openib_connect_base_funcs_t ompi_btl_openib_connect_xoob = {
"xoob",
- /* No need for "open */
- NULL,
+ /* Open */
+ xoob_open,
/* Init */
xoob_init,
/* Connect */
@@ -99,7 +102,24 @@
static int init_rem_info(mca_btl_openib_rem_info_t *rem_info);
static void free_rem_info(mca_btl_openib_rem_info_t *rem_info);
+
/*
+ * Open function.
+ */
+static int xoob_open(void)
+{
+ if (mca_btl_openib_component.num_xrc_qps <= 0) {
+ opal_show_help("help-mpi-btl-openib.txt",
+ "XRC with wrong OOB", true,
+ orte_system_info.nodename,
+ mca_btl_openib_component.num_xrc_qps);
+ return OMPI_ERROR;
+ }
+
+ return OMPI_SUCCESS;
+}
+
+/*
* Init function. Post non-blocking RML receive to accept incoming
* connection requests.
*/
@@ -834,6 +854,12 @@
#else
/* In case if the XRC was disabled during compilation we will print
message and return error */
+static int xoob_open(void)
+{
+ printf("xoob open\n");
+ return OMPI_ERR_NOT_IMPLEMENTED;
+}
+
static int xoob_init(void)
{
printf("xoob init\n");
_______________________________________________
devel mailing list
de...@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/devel