Currently, alternate CMs cannot be called because
ompi_btl_openib_connect_base_open forces a choice of either oob or xoob
(and goes into an erroneous error path if you pick something else).
This patch reorganizes ompi_btl_openib_connect_base_open so that new
functions can easily be added.  New Open functions were added to oob
and xoob for the error handling.

I tested calling oob, xoob, and rdma_cm.  oob happily allows connections
to be established and throws no errors.  xoob fails because ompi does
not have it compiled in (and I have no connectx cards).  rdma_cm calls
the empty hooks and exits without connecting (thus throwing
non-connection errors).  All expected behavior.

Since this patch fixes the existing behavior, and is not necessarily
tied to my implementing of rdma_cm, I think it is acceptable to go in
now.  

Thanks,
Jon

Index: ompi/mca/btl/openib/connect/btl_openib_connect_base.c
===================================================================
--- ompi/mca/btl/openib/connect/btl_openib_connect_base.c       (revision 16937)
+++ ompi/mca/btl/openib/connect/btl_openib_connect_base.c       (working copy)
@@ -50,8 +50,8 @@
  */
 int ompi_btl_openib_connect_base_open(void)
 {
-    int i;
-    char **temp, *a, *b;
+    char **temp, *a, *b, *defval;
+    int i, ret = OMPI_ERROR;

     /* Make an MCA parameter to select which connect module to use */
     temp = NULL;
@@ -66,40 +66,23 @@

     /* For XRC qps we must to use XOOB connection manager */
     if (mca_btl_openib_component.num_xrc_qps > 0) {
-        mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version,
-                "connect",
-                b, false, false,
-                "xoob", &param);
-        if (0 != strcmp("xoob", param)) {
-            opal_show_help("help-mpi-btl-openib.txt",
-                    "XRC with wrong OOB", true,
-                    orte_system_info.nodename,
-                    mca_btl_openib_component.num_xrc_qps);
-            return OMPI_ERROR;
-        }
+       defval = "xoob";
     } else { /* For all others we should use OOB */
-        mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version,
-                "connect",
-                b, false, false,
-                "oob", &param);
-        if (0 != strcmp("oob", param)) {
-            opal_show_help("help-mpi-btl-openib.txt",
-                    "SRQ or PP with wrong OOB", true,
-                    orte_system_info.nodename,
-                    mca_btl_openib_component.num_srq_qps,
-                    mca_btl_openib_component.num_pp_qps);
-            return OMPI_ERROR;
-        }
+       defval = "oob";
     }

+    mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version,
+                             "connect", b, false, false, defval, &param);
+
     /* Call the open function on all the connect modules */
     for (i = 0; NULL != all[i]; ++i) {
-        if (NULL != all[i]->bcf_open) {
-            all[i]->bcf_open();
+        if (0 == strcmp(all[i]->bcf_name, param)) {
+            ret = all[i]->bcf_open();
+           break;
         }
     }

-    return OMPI_SUCCESS;
+    return ret;
 }


Index: ompi/mca/btl/openib/connect/btl_openib_connect_ibcm.c
===================================================================
--- ompi/mca/btl/openib/connect/btl_openib_connect_ibcm.c       (revision 16937)
+++ ompi/mca/btl/openib/connect/btl_openib_connect_ibcm.c       (working copy)
@@ -28,11 +28,7 @@

 static int ibcm_open(void)
 {
-    mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
-                           "btl_openib_connect_ibcm_foo",
-                           "A dummy help message", false, false,
-                           17, NULL);
-
+    printf("ibcm open\n");
     return OMPI_SUCCESS;
 }

Index: ompi/mca/btl/openib/connect/btl_openib_connect_oob.c
===================================================================
--- ompi/mca/btl/openib/connect/btl_openib_connect_oob.c        (revision 16937)
+++ ompi/mca/btl/openib/connect/btl_openib_connect_oob.c        (working copy)
@@ -22,6 +22,8 @@

 #include "ompi_config.h"

+#include "opal/util/show_help.h"
+
 #include "orte/mca/ns/base/base.h"
 #include "orte/mca/oob/base/base.h"
 #include "orte/mca/rml/rml.h"
@@ -39,6 +41,7 @@
     ENDPOINT_CONNECT_ACK
 } connect_message_type_t;

+static int oob_open(void);
 static int oob_init(void);
 static int oob_start_connect(mca_btl_base_endpoint_t *e);
 static int oob_finalize(void);
@@ -67,8 +70,8 @@
  */
 ompi_btl_openib_connect_base_funcs_t ompi_btl_openib_connect_oob = {
     "oob",
-    /* No need for "open */
-    NULL,
+    /* Open */
+    oob_open,
     /* Init */
     oob_init,
     /* Connect */
@@ -78,6 +81,23 @@
 };

 /*
+ * Open function.
+ */
+static int oob_open(void)
+{
+    if (mca_btl_openib_component.num_xrc_qps > 0) {
+            opal_show_help("help-mpi-btl-openib.txt",
+                    "SRQ or PP with wrong OOB", true,
+                    orte_system_info.nodename,
+                    mca_btl_openib_component.num_srq_qps,
+                    mca_btl_openib_component.num_pp_qps);
+            return OMPI_ERROR;
+    }
+
+    return OMPI_SUCCESS;
+}
+
+/*
  * Init function.  Post non-blocking RML receive to accept incoming
  * connection requests.
  */
Index: ompi/mca/btl/openib/connect/btl_openib_connect_rdma_cm.c
===================================================================
--- ompi/mca/btl/openib/connect/btl_openib_connect_rdma_cm.c    (revision 16937)
+++ ompi/mca/btl/openib/connect/btl_openib_connect_rdma_cm.c    (working copy)
@@ -28,11 +28,7 @@

 static int rdma_cm_open(void)
 {
-    mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
-                           "btl_openib_connect_rdma_cm_foo",
-                           "A dummy help message", false, false,
-                           17, NULL);
-
+    printf("rdma cm open\n");
     return OMPI_SUCCESS;
 }

Index: ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c
===================================================================
--- ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c       (revision 16937)
+++ ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c       (working copy)
@@ -10,6 +10,8 @@

 #include "ompi_config.h"

+#include "opal/util/show_help.h"
+
 #include "orte/mca/ns/base/base.h"
 #include "orte/mca/oob/base/base.h"
 #include "orte/mca/rml/rml.h"
@@ -22,6 +24,7 @@
 #include "btl_openib_xrc.h"
 #include "connect/connect.h"

+static int xoob_open(void);
 static int xoob_init(void);
 static int xoob_start_connect(mca_btl_base_endpoint_t *e);
 static int xoob_finalize(void);
@@ -32,8 +35,8 @@
  */
 ompi_btl_openib_connect_base_funcs_t ompi_btl_openib_connect_xoob = {
     "xoob",
-    /* No need for "open */
-    NULL,
+    /* Open */
+    xoob_open,
     /* Init */
     xoob_init,
     /* Connect */
@@ -99,7 +102,24 @@

 static int init_rem_info(mca_btl_openib_rem_info_t *rem_info);
 static void free_rem_info(mca_btl_openib_rem_info_t *rem_info);
+
 /*
+ * Open function.
+ */
+static int xoob_open(void)
+{
+    if (mca_btl_openib_component.num_xrc_qps <= 0) {
+            opal_show_help("help-mpi-btl-openib.txt",
+                    "XRC with wrong OOB", true,
+                    orte_system_info.nodename,
+                    mca_btl_openib_component.num_xrc_qps);
+            return OMPI_ERROR;
+    }
+
+    return OMPI_SUCCESS;
+}
+
+/*
  * Init function.  Post non-blocking RML receive to accept incoming
  * connection requests.
  */
@@ -834,6 +854,12 @@

 #else
 /* In case if the XRC was disabled during compilation we will print message 
and return error */
+static int xoob_open(void)
+{
+    printf("xoob open\n");
+    return OMPI_ERR_NOT_IMPLEMENTED;
+}
+
 static int xoob_init(void)
 {
     printf("xoob init\n");

Reply via email to