For some reason, the MX btl sets btl_bandwidth in megabits/s instead of
megabytes/s. So we get crazy btl_weights in case of heterogeneous
multirail. And --mca btl_mx_bandwidth <width> cannot work around the
problem (it probably doesn't help because it's overriden by the runtime
link width detection anyway?).

Signed-off-by: Brice Goglin <brice.gog...@inria.fr>

Index: ompi/mca/btl/mx/btl_mx_component.c
===================================================================
--- ompi/mca/btl/mx/btl_mx_component.c  (révision 23711)
+++ ompi/mca/btl/mx/btl_mx_component.c  (copie de travail)
@@ -159,7 +159,7 @@
                                          MCA_BTL_FLAGS_PUT |
                                          MCA_BTL_FLAGS_SEND |
                                          MCA_BTL_FLAGS_RDMA_MATCHED);
-    mca_btl_mx_module.super.btl_bandwidth = 2000;
+    mca_btl_mx_module.super.btl_bandwidth = 250;
     mca_btl_mx_module.super.btl_latency = 5;
     mca_btl_base_param_register(&mca_btl_mx_component.super.btl_version,
                                 &mca_btl_mx_module.super);
@@ -357,7 +357,7 @@
     mx_btl->mx_endpoint = mx_endpoint;
     mx_btl->mx_endpoint_addr = mx_endpoint_addr;

-    mx_btl->super.btl_bandwidth = 2000;  /* whatever */
+    mx_btl->super.btl_bandwidth = 250;  /* whatever */
     mx_btl->super.btl_latency = 10;
 #if defined(MX_HAS_NET_TYPE)
     {
@@ -370,11 +370,11 @@
         } else {
             if( MX_SPEED_2G == value ) {
                 mx_unique_network_id |= 0xaa000000;
-                mx_btl->super.btl_bandwidth = 2000;
+                mx_btl->super.btl_bandwidth = 250;
                 mx_btl->super.btl_latency = 5;
             } else if( MX_SPEED_10G == value ) {
                 mx_unique_network_id |= 0xbb000000;
-                mx_btl->super.btl_bandwidth = 10000;
+                mx_btl->super.btl_bandwidth = 1250;
                 mx_btl->super.btl_latency = 3;
             } else {
                 mx_unique_network_id |= 0xcc000000;


Index: ompi/mca/btl/mx/btl_mx_component.c
===================================================================
--- ompi/mca/btl/mx/btl_mx_component.c	(révision 23711)
+++ ompi/mca/btl/mx/btl_mx_component.c	(copie de travail)
@@ -159,7 +159,7 @@
                                          MCA_BTL_FLAGS_PUT |
                                          MCA_BTL_FLAGS_SEND |
                                          MCA_BTL_FLAGS_RDMA_MATCHED);
-    mca_btl_mx_module.super.btl_bandwidth = 2000;
+    mca_btl_mx_module.super.btl_bandwidth = 250;
     mca_btl_mx_module.super.btl_latency = 5;
     mca_btl_base_param_register(&mca_btl_mx_component.super.btl_version,
                                 &mca_btl_mx_module.super);
@@ -357,7 +357,7 @@
     mx_btl->mx_endpoint = mx_endpoint;
     mx_btl->mx_endpoint_addr = mx_endpoint_addr;

-    mx_btl->super.btl_bandwidth = 2000;  /* whatever */
+    mx_btl->super.btl_bandwidth = 250;  /* whatever */
     mx_btl->super.btl_latency = 10;
 #if defined(MX_HAS_NET_TYPE)
     {
@@ -370,11 +370,11 @@
         } else {
             if( MX_SPEED_2G == value ) {
                 mx_unique_network_id |= 0xaa000000;
-                mx_btl->super.btl_bandwidth = 2000;
+                mx_btl->super.btl_bandwidth = 250;
                 mx_btl->super.btl_latency = 5;
             } else if( MX_SPEED_10G == value ) {
                 mx_unique_network_id |= 0xbb000000;
-                mx_btl->super.btl_bandwidth = 10000;
+                mx_btl->super.btl_bandwidth = 1250;
                 mx_btl->super.btl_latency = 3;
             } else {
                 mx_unique_network_id |= 0xcc000000;

Reply via email to