This commit causes mpirun to segfault when running the IBM spawn tests on our slurm platforms (it may affect others as well). The failures only happen when mpirun is run in a batch script.

The backtrace I get is:
Program terminated with signal 11, Segmentation fault.
#0 0x0000002a969b9dbe in daemon_leader (jobid=2643591169, num_local_contributors=1,
    type=1 '\001', data=0x588c40, flag=1 '\001', participants=0x566e80)
    at grpcomm_basic_module.c:1196
1196                OBJ_RELEASE(collection);
(gdb) bt
#0 0x0000002a969b9dbe in daemon_leader (jobid=2643591169, num_local_contributors=1,
    type=1 '\001', data=0x588c40, flag=1 '\001', participants=0x566e80)
    at grpcomm_basic_module.c:1196
#1 0x0000002a969ba316 in daemon_collective (jobid=2643591169, num_local_contributors=1,
    type=1 '\001', data=0x588c40, flag=1 '\001', participants=0x566e80)
    at grpcomm_basic_module.c:1279
#2 0x0000002a956a94a9 in orte_odls_base_default_collect_data (proc=0x588eb8, buf=0x588ef0)
    at base/odls_base_default_fns.c:2183
#3 0x0000002a95692990 in process_commands (sender=0x588eb8, buffer=0x588ef0, tag=1)
    at orted/orted_comm.c:485
#4 0x0000002a956920a0 in orte_daemon_cmd_processor (fd=-1, opal_event=1, data=0x588e90)
    at orted/orted_comm.c:271
#5 0x0000002a957fe4ca in event_process_active (base=0x50d940) at event.c:647 #6 0x0000002a957fea8b in opal_event_base_loop (base=0x50d940, flags=0) at event.c:819
#7  0x0000002a957fe6c5 in opal_event_loop (flags=0) at event.c:726
#8  0x0000002a957fe57e in opal_event_dispatch () at event.c:662
#9 0x000000000040335d in orterun (argc=5, argv=0x7fbffff008) at orterun.c:551
#10 0x0000000000402bb3 in main (argc=5, argv=0x7fbffff008) at main.c:13
(gdb)


I ran with
srun -N 3 -b mpirun -mca mpi_yield_when_idle 1 ~/ompi-tests/ibm/dynamic/spawn_
multiple

Thanks,

Tim
r...@osl.iu.edu wrote:
Author: rhc
Date: 2008-04-23 10:52:09 EDT (Wed, 23 Apr 2008)
New Revision: 18252
URL: https://svn.open-mpi.org/trac/ompi/changeset/18252

Log:
Add a loadbalancing feature to the round-robin mapper - more to be sent to 
devel list

Fix a potential problem with RM-provided nodenames not matching returns from 
gethostname - ensure that the HNP's nodename gets DNS-resolved when comparing 
against RM-provided hostnames. Note that this may be an issue for RM-based 
clusters that don't have local DNS resolution, but hopefully that is more 
indicative of a poorly configured system.

Text files modified: trunk/orte/mca/ras/base/ras_base_node.c | 6 +++ trunk/orte/mca/rmaps/base/base.h | 4 ++ trunk/orte/mca/rmaps/base/rmaps_base_open.c | 10 +++++++ trunk/orte/mca/rmaps/base/rmaps_base_support_fns.c | 55 +++++++++++++++++++-------------------- trunk/orte/mca/rmaps/round_robin/rmaps_rr.c | 50 ++++++++++++++++++++++++++++++++---- trunk/orte/tools/orterun/orterun.c | 3 ++ 6 files changed, 92 insertions(+), 36 deletions(-)

Modified: trunk/orte/mca/ras/base/ras_base_node.c
==============================================================================
--- trunk/orte/mca/ras/base/ras_base_node.c     (original)
+++ trunk/orte/mca/ras/base/ras_base_node.c     2008-04-23 10:52:09 EDT (Wed, 
23 Apr 2008)
@@ -23,6 +23,7 @@
#include "opal/util/output.h"
 #include "opal/util/argv.h"
+#include "opal/util/if.h"
#include "orte/mca/errmgr/errmgr.h"
 #include "orte/util/name_fns.h"
@@ -111,7 +112,7 @@
          * first position since it is the first one entered. We need to check 
to see
          * if this node is the same as the HNP's node so we don't double-enter 
it
          */
-        if (0 == strcmp(node->name, hnp_node->name)) {
+        if (0 == strcmp(node->name, hnp_node->name) || 
opal_ifislocal(node->name)) {
             OPAL_OUTPUT_VERBOSE((5, orte_ras_base.ras_output,
                                  "%s ras:base:node_insert updating HNP info to %ld 
slots",
                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
@@ -124,6 +125,9 @@
             hnp_node->slots_alloc = node->slots_alloc;
             hnp_node->slots_max = node->slots_max;
             hnp_node->launch_id = node->launch_id;
+            /* use the RM's name for the node */
+            free(hnp_node->name);
+            hnp_node->name = strdup(node->name);
             /* set the node to available for use */
             hnp_node->allocate = true;
             /* update the total slots in the job */

Modified: trunk/orte/mca/rmaps/base/base.h
==============================================================================
--- trunk/orte/mca/rmaps/base/base.h    (original)
+++ trunk/orte/mca/rmaps/base/base.h    2008-04-23 10:52:09 EDT (Wed, 23 Apr 
2008)
@@ -57,10 +57,12 @@
     bool pernode;
     /** number of ppn for n_per_node mode */
     int npernode;
-    /* do we not allow use of the localhost */
+    /* do not allow use of the localhost */
     bool no_use_local;
     /* display the map after it is computed */
     bool display_map;
+    /* balance load across nodes */
+    bool loadbalance;
 } orte_rmaps_base_t;
/**

Modified: trunk/orte/mca/rmaps/base/rmaps_base_open.c
==============================================================================
--- trunk/orte/mca/rmaps/base/rmaps_base_open.c (original)
+++ trunk/orte/mca/rmaps/base/rmaps_base_open.c 2008-04-23 10:52:09 EDT (Wed, 
23 Apr 2008)
@@ -125,6 +125,16 @@
         orte_rmaps_base.oversubscribe = true;
     }
+ /* Do we want to loadbalance the job */
+    param = mca_base_param_reg_int_name("rmaps", "base_loadbalance",
+                                        "Balance total number of procs across all 
allocated nodes",
+                                        false, false, (int)false, &value);
+    orte_rmaps_base.loadbalance = OPAL_INT_TO_BOOL(value);
+    /* if we are doing npernode or pernode, then we cannot loadbalance */
+    if (orte_rmaps_base.pernode) {
+        orte_rmaps_base.loadbalance = false;
+    }
+ /* should we display the map after determining it? */
     mca_base_param_reg_int_name("rmaps", "base_display_map",
                                 "Whether to display the process map after it is 
computed",

Modified: trunk/orte/mca/rmaps/base/rmaps_base_support_fns.c
==============================================================================
--- trunk/orte/mca/rmaps/base/rmaps_base_support_fns.c  (original)
+++ trunk/orte/mca/rmaps/base/rmaps_base_support_fns.c  2008-04-23 10:52:09 EDT 
(Wed, 23 Apr 2008)
@@ -88,18 +88,17 @@
             ORTE_ERROR_LOG(rc);
             return rc;
         }
-    }
- - /** check that anything is here */
-    if (0 == opal_list_get_size(allocated_nodes)) {
-        opal_show_help("help-orte-rmaps-base.txt",
-                       "orte-rmaps-base:no-available-resources",
-                       true);
-        return ORTE_ERR_SILENT;
+        /** check that anything is here */
+        if (0 == opal_list_get_size(allocated_nodes)) {
+            opal_show_help("help-orte-rmaps-base.txt",
+                           "orte-rmaps-base:no-available-resources",
+                           true);
+            return ORTE_ERR_SILENT;
+        }
     }
/* did the app_context contain a hostfile? */
-    if (NULL != app->hostfile) {
+    if (NULL != app && NULL != app->hostfile) {
         /* yes - filter the node list through the file, removing
          * any nodes not found in the file
          */
@@ -108,27 +107,27 @@
             ORTE_ERROR_LOG(rc);
             return rc;
         }
+        /** check that anything is here */
+        if (0 == opal_list_get_size(allocated_nodes)) {
+            opal_show_help("help-orte-rmaps-base.txt", 
"orte-rmaps-base:no-mapped-node",
+                           true, app->app, app->hostfile);
+            return ORTE_ERR_SILENT;
+        }
     }
- /** check that anything is here */
-    if (0 == opal_list_get_size(allocated_nodes)) {
-        opal_show_help("help-orte-rmaps-base.txt", 
"orte-rmaps-base:no-mapped-node",
-                       true, app->app, app->hostfile);
-        return ORTE_ERR_SILENT;
-    }
- - /* now filter the list through any -host specification */
-    if (ORTE_SUCCESS != (rc = orte_util_filter_dash_host_nodes(allocated_nodes,
-                                                               
app->dash_host))) {
-        ORTE_ERROR_LOG(rc);
-        return rc;
-    }
- - /** check that anything is left! */
-    if (0 == opal_list_get_size(allocated_nodes)) {
-        opal_show_help("help-orte-rmaps-base.txt", 
"orte-rmaps-base:no-mapped-node",
-                       true, app->app, "");
-        return ORTE_ERR_SILENT;
+   /* now filter the list through any -host specification */
+    if (NULL != app) {
+        if (ORTE_SUCCESS != (rc = 
orte_util_filter_dash_host_nodes(allocated_nodes,
+                                                                   
app->dash_host))) {
+            ORTE_ERROR_LOG(rc);
+            return rc;
+        }
+        /** check that anything is left! */
+        if (0 == opal_list_get_size(allocated_nodes)) {
+            opal_show_help("help-orte-rmaps-base.txt", 
"orte-rmaps-base:no-mapped-node",
+                           true, app->app, "");
+            return ORTE_ERR_SILENT;
+        }
     }
/* If the "no local" option was set, then remove the local node

Modified: trunk/orte/mca/rmaps/round_robin/rmaps_rr.c
==============================================================================
--- trunk/orte/mca/rmaps/round_robin/rmaps_rr.c (original)
+++ trunk/orte/mca/rmaps/round_robin/rmaps_rr.c 2008-04-23 10:52:09 EDT (Wed, 
23 Apr 2008)
@@ -46,6 +46,7 @@
  * Local variable
  */
 static opal_list_item_t *cur_node_item = NULL;
+static int ppn = 0;
/*
  * Create a default mapping for the application, scheduling round
@@ -228,10 +229,12 @@
             /* Update the number of procs allocated */
             ++num_alloc;
- /** if all the procs have been mapped OR we have fully used up this node, then
-             * break from the loop
+            /** if all the procs have been mapped OR we have fully used up 
this node
+ * OR we are at our ppn and loadbalancing, then break from the loop */
-            if(num_alloc == app->num_procs || ORTE_ERR_NODE_FULLY_USED == rc) {
+            if (num_alloc == app->num_procs ||
+                ORTE_ERR_NODE_FULLY_USED == rc ||
+                (orte_rmaps_base.loadbalance && i == ppn)) {
                 break;
             }
         }
@@ -241,7 +244,9 @@
          * node is NOT max'd out
          *
          */
-        if (i < (num_slots_to_take-1) && ORTE_ERR_NODE_FULLY_USED != rc) {
+        if (i < (num_slots_to_take-1) &&
+            ORTE_ERR_NODE_FULLY_USED != rc &&
+            i != ppn) {
             continue;
         }
         cur_node_item = next;
@@ -261,7 +266,7 @@
     orte_std_cntr_t i;
     opal_list_t node_list;
     opal_list_item_t *item;
-    orte_node_t *node;
+    orte_node_t *node, **nodes;
     orte_vpid_t vpid_start;
     orte_std_cntr_t num_nodes, num_slots;
     int rc;
@@ -276,6 +281,39 @@
     /* start at the beginning... */
     vpid_start = 0;
+ /* if loadbalancing is requested, then we need to compute
+     * the #procs/node - note that this cannot be done
+     * if we are doing pernode or if #procs was not given
+     */
+    if (orte_rmaps_base.loadbalance && !map->pernode) {
+        /* compute total #procs */
+        for(i=0; i < jdata->num_apps; i++) {
+            app = apps[i];
+            if (0 == app->num_procs) {
+                /* can't do it - just move on */
+                opal_show_help("help-orte-rmaps-rr.txt",
+                               "orte-rmaps-rr:loadbalance-and-zero-np",
+                               true);
+                rc = ORTE_ERR_SILENT;
+                goto error;
+            }
+            ppn += app->num_procs;
+        }
+        /* get the total avail nodes */
+        nodes = (orte_node_t**)orte_node_pool->addr;
+        num_nodes=0;
+        for (i=0; i < orte_node_pool->size; i++) {
+            if (NULL == nodes[i]) {
+                break;  /* nodes are left aligned, so stop when we hit a null 
*/
+ } + if (nodes[i]->allocate) {
+                num_nodes++;
+            }
+        }
+        /* compute the balance */
+        ppn = ppn / num_nodes;
+    }
+
     /* cycle through the app_contexts, mapping them sequentially */
     for(i=0; i < jdata->num_apps; i++) {
         app = apps[i];
@@ -387,7 +425,7 @@
                 goto error;
             }
         }
-
+ /** track the total number of processes we mapped */
         jdata->num_procs += app->num_procs;
Modified: trunk/orte/tools/orterun/orterun.c
==============================================================================
--- trunk/orte/tools/orterun/orterun.c  (original)
+++ trunk/orte/tools/orterun/orterun.c  2008-04-23 10:52:09 EDT (Wed, 23 Apr 
2008)
@@ -215,6 +215,9 @@
     { "rmaps", "base", "no_oversubscribe", '\0', "nooversubscribe", 
"nooversubscribe", 0,
       NULL, OPAL_CMD_LINE_TYPE_BOOL,
       "Nodes are not to be oversubscribed, even if the system supports such 
operation"},
+    { "rmaps", "base", "loadbalance", '\0', "loadbalance", "loadbalance", 0,
+      NULL, OPAL_CMD_LINE_TYPE_BOOL,
+      "Balance total number of procs across all allocated nodes"},
     { "rmaps", "base", "display_map", '\0', "display-map", "display-map", 0,
       NULL, OPAL_CMD_LINE_TYPE_BOOL,
       "Display the process map just before launch"},
_______________________________________________
svn mailing list
s...@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/svn

Reply via email to