Author: rhc
Date: 2008-04-23 10:52:09 EDT (Wed, 23 Apr 2008)
New Revision: 18252
URL: https://svn.open-mpi.org/trac/ompi/changeset/18252
Log:
Add a loadbalancing feature to the round-robin mapper - more to be sent to
devel list
Fix a potential problem with RM-provided nodenames not matching returns from
gethostname - ensure that the HNP's nodename gets DNS-resolved when comparing
against RM-provided hostnames. Note that this may be an issue for RM-based
clusters that don't have local DNS resolution, but hopefully that is more
indicative of a poorly configured system.
Text files modified:
trunk/orte/mca/ras/base/ras_base_node.c | 6 +++
trunk/orte/mca/rmaps/base/base.h | 4 ++
trunk/orte/mca/rmaps/base/rmaps_base_open.c | 10 +++++++
trunk/orte/mca/rmaps/base/rmaps_base_support_fns.c | 55 +++++++++++++++++++--------------------
trunk/orte/mca/rmaps/round_robin/rmaps_rr.c | 50 ++++++++++++++++++++++++++++++++----
trunk/orte/tools/orterun/orterun.c | 3 ++
6 files changed, 92 insertions(+), 36 deletions(-)
Modified: trunk/orte/mca/ras/base/ras_base_node.c
==============================================================================
--- trunk/orte/mca/ras/base/ras_base_node.c (original)
+++ trunk/orte/mca/ras/base/ras_base_node.c 2008-04-23 10:52:09 EDT (Wed,
23 Apr 2008)
@@ -23,6 +23,7 @@
#include "opal/util/output.h"
#include "opal/util/argv.h"
+#include "opal/util/if.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/name_fns.h"
@@ -111,7 +112,7 @@
* first position since it is the first one entered. We need to check
to see
* if this node is the same as the HNP's node so we don't double-enter
it
*/
- if (0 == strcmp(node->name, hnp_node->name)) {
+ if (0 == strcmp(node->name, hnp_node->name) ||
opal_ifislocal(node->name)) {
OPAL_OUTPUT_VERBOSE((5, orte_ras_base.ras_output,
"%s ras:base:node_insert updating HNP info to %ld
slots",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
@@ -124,6 +125,9 @@
hnp_node->slots_alloc = node->slots_alloc;
hnp_node->slots_max = node->slots_max;
hnp_node->launch_id = node->launch_id;
+ /* use the RM's name for the node */
+ free(hnp_node->name);
+ hnp_node->name = strdup(node->name);
/* set the node to available for use */
hnp_node->allocate = true;
/* update the total slots in the job */
Modified: trunk/orte/mca/rmaps/base/base.h
==============================================================================
--- trunk/orte/mca/rmaps/base/base.h (original)
+++ trunk/orte/mca/rmaps/base/base.h 2008-04-23 10:52:09 EDT (Wed, 23 Apr
2008)
@@ -57,10 +57,12 @@
bool pernode;
/** number of ppn for n_per_node mode */
int npernode;
- /* do we not allow use of the localhost */
+ /* do not allow use of the localhost */
bool no_use_local;
/* display the map after it is computed */
bool display_map;
+ /* balance load across nodes */
+ bool loadbalance;
} orte_rmaps_base_t;
/**
Modified: trunk/orte/mca/rmaps/base/rmaps_base_open.c
==============================================================================
--- trunk/orte/mca/rmaps/base/rmaps_base_open.c (original)
+++ trunk/orte/mca/rmaps/base/rmaps_base_open.c 2008-04-23 10:52:09 EDT (Wed,
23 Apr 2008)
@@ -125,6 +125,16 @@
orte_rmaps_base.oversubscribe = true;
}
+ /* Do we want to loadbalance the job */
+ param = mca_base_param_reg_int_name("rmaps", "base_loadbalance",
+ "Balance total number of procs across all
allocated nodes",
+ false, false, (int)false, &value);
+ orte_rmaps_base.loadbalance = OPAL_INT_TO_BOOL(value);
+ /* if we are doing npernode or pernode, then we cannot loadbalance */
+ if (orte_rmaps_base.pernode) {
+ orte_rmaps_base.loadbalance = false;
+ }
+
/* should we display the map after determining it? */
mca_base_param_reg_int_name("rmaps", "base_display_map",
"Whether to display the process map after it is
computed",
Modified: trunk/orte/mca/rmaps/base/rmaps_base_support_fns.c
==============================================================================
--- trunk/orte/mca/rmaps/base/rmaps_base_support_fns.c (original)
+++ trunk/orte/mca/rmaps/base/rmaps_base_support_fns.c 2008-04-23 10:52:09 EDT
(Wed, 23 Apr 2008)
@@ -88,18 +88,17 @@
ORTE_ERROR_LOG(rc);
return rc;
}
- }
-
- /** check that anything is here */
- if (0 == opal_list_get_size(allocated_nodes)) {
- opal_show_help("help-orte-rmaps-base.txt",
- "orte-rmaps-base:no-available-resources",
- true);
- return ORTE_ERR_SILENT;
+ /** check that anything is here */
+ if (0 == opal_list_get_size(allocated_nodes)) {
+ opal_show_help("help-orte-rmaps-base.txt",
+ "orte-rmaps-base:no-available-resources",
+ true);
+ return ORTE_ERR_SILENT;
+ }
}
/* did the app_context contain a hostfile? */
- if (NULL != app->hostfile) {
+ if (NULL != app && NULL != app->hostfile) {
/* yes - filter the node list through the file, removing
* any nodes not found in the file
*/
@@ -108,27 +107,27 @@
ORTE_ERROR_LOG(rc);
return rc;
}
+ /** check that anything is here */
+ if (0 == opal_list_get_size(allocated_nodes)) {
+ opal_show_help("help-orte-rmaps-base.txt",
"orte-rmaps-base:no-mapped-node",
+ true, app->app, app->hostfile);
+ return ORTE_ERR_SILENT;
+ }
}
- /** check that anything is here */
- if (0 == opal_list_get_size(allocated_nodes)) {
- opal_show_help("help-orte-rmaps-base.txt",
"orte-rmaps-base:no-mapped-node",
- true, app->app, app->hostfile);
- return ORTE_ERR_SILENT;
- }
-
- /* now filter the list through any -host specification */
- if (ORTE_SUCCESS != (rc = orte_util_filter_dash_host_nodes(allocated_nodes,
-
app->dash_host))) {
- ORTE_ERROR_LOG(rc);
- return rc;
- }
-
- /** check that anything is left! */
- if (0 == opal_list_get_size(allocated_nodes)) {
- opal_show_help("help-orte-rmaps-base.txt",
"orte-rmaps-base:no-mapped-node",
- true, app->app, "");
- return ORTE_ERR_SILENT;
+ /* now filter the list through any -host specification */
+ if (NULL != app) {
+ if (ORTE_SUCCESS != (rc =
orte_util_filter_dash_host_nodes(allocated_nodes,
+
app->dash_host))) {
+ ORTE_ERROR_LOG(rc);
+ return rc;
+ }
+ /** check that anything is left! */
+ if (0 == opal_list_get_size(allocated_nodes)) {
+ opal_show_help("help-orte-rmaps-base.txt",
"orte-rmaps-base:no-mapped-node",
+ true, app->app, "");
+ return ORTE_ERR_SILENT;
+ }
}
/* If the "no local" option was set, then remove the local node
Modified: trunk/orte/mca/rmaps/round_robin/rmaps_rr.c
==============================================================================
--- trunk/orte/mca/rmaps/round_robin/rmaps_rr.c (original)
+++ trunk/orte/mca/rmaps/round_robin/rmaps_rr.c 2008-04-23 10:52:09 EDT (Wed,
23 Apr 2008)
@@ -46,6 +46,7 @@
* Local variable
*/
static opal_list_item_t *cur_node_item = NULL;
+static int ppn = 0;
/*
* Create a default mapping for the application, scheduling round
@@ -228,10 +229,12 @@
/* Update the number of procs allocated */
++num_alloc;
- /** if all the procs have been mapped OR we have fully used up this node, then
- * break from the loop
+ /** if all the procs have been mapped OR we have fully used up
this node
+ * OR we are at our ppn and loadbalancing, then break from the loop
*/
- if(num_alloc == app->num_procs || ORTE_ERR_NODE_FULLY_USED == rc) {
+ if (num_alloc == app->num_procs ||
+ ORTE_ERR_NODE_FULLY_USED == rc ||
+ (orte_rmaps_base.loadbalance && i == ppn)) {
break;
}
}
@@ -241,7 +244,9 @@
* node is NOT max'd out
*
*/
- if (i < (num_slots_to_take-1) && ORTE_ERR_NODE_FULLY_USED != rc) {
+ if (i < (num_slots_to_take-1) &&
+ ORTE_ERR_NODE_FULLY_USED != rc &&
+ i != ppn) {
continue;
}
cur_node_item = next;
@@ -261,7 +266,7 @@
orte_std_cntr_t i;
opal_list_t node_list;
opal_list_item_t *item;
- orte_node_t *node;
+ orte_node_t *node, **nodes;
orte_vpid_t vpid_start;
orte_std_cntr_t num_nodes, num_slots;
int rc;
@@ -276,6 +281,39 @@
/* start at the beginning... */
vpid_start = 0;
+ /* if loadbalancing is requested, then we need to compute
+ * the #procs/node - note that this cannot be done
+ * if we are doing pernode or if #procs was not given
+ */
+ if (orte_rmaps_base.loadbalance && !map->pernode) {
+ /* compute total #procs */
+ for(i=0; i < jdata->num_apps; i++) {
+ app = apps[i];
+ if (0 == app->num_procs) {
+ /* can't do it - just move on */
+ opal_show_help("help-orte-rmaps-rr.txt",
+ "orte-rmaps-rr:loadbalance-and-zero-np",
+ true);
+ rc = ORTE_ERR_SILENT;
+ goto error;
+ }
+ ppn += app->num_procs;
+ }
+ /* get the total avail nodes */
+ nodes = (orte_node_t**)orte_node_pool->addr;
+ num_nodes=0;
+ for (i=0; i < orte_node_pool->size; i++) {
+ if (NULL == nodes[i]) {
+ break; /* nodes are left aligned, so stop when we hit a null
*/
+ }
+ if (nodes[i]->allocate) {
+ num_nodes++;
+ }
+ }
+ /* compute the balance */
+ ppn = ppn / num_nodes;
+ }
+
/* cycle through the app_contexts, mapping them sequentially */
for(i=0; i < jdata->num_apps; i++) {
app = apps[i];
@@ -387,7 +425,7 @@
goto error;
}
}
-
+
/** track the total number of processes we mapped */
jdata->num_procs += app->num_procs;
Modified: trunk/orte/tools/orterun/orterun.c
==============================================================================
--- trunk/orte/tools/orterun/orterun.c (original)
+++ trunk/orte/tools/orterun/orterun.c 2008-04-23 10:52:09 EDT (Wed, 23 Apr
2008)
@@ -215,6 +215,9 @@
{ "rmaps", "base", "no_oversubscribe", '\0', "nooversubscribe",
"nooversubscribe", 0,
NULL, OPAL_CMD_LINE_TYPE_BOOL,
"Nodes are not to be oversubscribed, even if the system supports such
operation"},
+ { "rmaps", "base", "loadbalance", '\0', "loadbalance", "loadbalance", 0,
+ NULL, OPAL_CMD_LINE_TYPE_BOOL,
+ "Balance total number of procs across all allocated nodes"},
{ "rmaps", "base", "display_map", '\0', "display-map", "display-map", 0,
NULL, OPAL_CMD_LINE_TYPE_BOOL,
"Display the process map just before launch"},
_______________________________________________
svn mailing list
s...@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/svn