Question: for all other mappers, the relative rank is given with
respect to the allocation. It looks here like you are doing it
relative to the list of nodes, which is compiled from the allocation
passed through hostfile and -host options.
Do you want to conform to the behavior of the other mappers? Or do
something different here?
On Jun 25, 2009, at 10:10 AM, Lenny Verkhovsky wrote:
Hi,
Proposed small patch to extend current rankfile syntax to be
compliant with orte_hosts syntax
making it possible to claim relative hosts from the hostfile/scheduler
by using +n# hostname, where 0 <= # < np
ex:
cat ~/work/svn/hpc/dev/test/Rankfile/rankfile
rank 0=+n0 slot=0
rank 1=+n0 slot=1
rank 2=+n1 slot=2
rank 3=+n1 slot=1
for your review and blessing before I commit it to the trunk.
I also ask to add it to 1.3 branch.
thanks,
Lenny.
Index: orte/mca/rmaps/rank_file/help-rmaps_rank_file.txt
===================================================================
--- orte/mca/rmaps/rank_file/help-rmaps_rank_file.txt (revision 21529)
+++ orte/mca/rmaps/rank_file/help-rmaps_rank_file.txt (working copy)
@@ -56,6 +56,9 @@
Please review your rank-slot assignments and your host allocation
to ensure
a proper match.
+[bad-index]
+Rankfile claimed host %s by index that is bigger than number of
allocated hosts.
+
[orte-rmaps-rf:alloc-error]
There are not enough slots available in the system to satisfy the
%d slots
that were requested by the application:
Index: orte/mca/rmaps/rank_file/rmaps_rank_file_lex.h
===================================================================
--- orte/mca/rmaps/rank_file/rmaps_rank_file_lex.h (revision 21529)
+++ orte/mca/rmaps/rank_file/rmaps_rank_file_lex.h (working copy)
@@ -75,6 +75,7 @@
#define ORTE_RANKFILE_NEWLINE 13
#define ORTE_RANKFILE_IPV6 14
#define ORTE_RANKFILE_SLOT 15
+#define ORTE_RANKFILE_RELATIVE 16
#if defined(c_plusplus) || defined(__cplusplus)
}
Index: orte/mca/rmaps/rank_file/rmaps_rank_file.c
===================================================================
--- orte/mca/rmaps/rank_file/rmaps_rank_file.c (revision 21529)
+++ orte/mca/rmaps/rank_file/rmaps_rank_file.c (working copy)
@@ -273,11 +273,11 @@
orte_vpid_t total_procs;
opal_list_t node_list;
opal_list_item_t *item;
- orte_node_t *node, *nd;
+ orte_node_t *node, *nd, *root_node;
orte_vpid_t rank, vpid_start;
orte_std_cntr_t num_nodes, num_slots;
orte_rmaps_rank_file_map_t *rfmap;
- orte_std_cntr_t slots_per_node;
+ orte_std_cntr_t slots_per_node, relative_index, tmp_cnt;
int rc;
/* convenience def */
@@ -411,7 +411,25 @@
0 == strcmp(nd->name, rfmap->node_name)) {
node = nd;
break;
- }
+ } else if (NULL != rfmap->node_name &&
+ (('+' == rfmap->node_name[0]) &&
+ (('n' == rfmap->node_name[1]) ||
+ ('N' == rfmap->node_name[1])))) {
+
+ relative_index=atoi(strtok(rfmap->node_name,"+n"));
+ if ( relative_index >= opal_list_get_size (&node_list) || ( 0 >
relative_index)){
+ orte_show_help("help-rmaps_rank_file.txt","bad-index", true,rfmap-
>node_name);
+ ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
+ return ORTE_ERR_BAD_PARAM;
+ }
+ root_node = (orte_node_t*) opal_list_get_first(&node_list);
+ for(tmp_cnt=0; tmp_cnt<relative_index; tmp_cnt++) {
+ root_node = (orte_node_t*) opal_list_get_next(root_node);
+ }
+ node = root_node;
+ break;
+ }
+
}
if (NULL == node) {
orte_show_help("help-rmaps_rank_file.txt","bad-host", true, rfmap-
>node_name);
@@ -631,6 +649,7 @@
case ORTE_RANKFILE_IPV6:
case ORTE_RANKFILE_STRING:
case ORTE_RANKFILE_INT:
+ case ORTE_RANKFILE_RELATIVE:
if(ORTE_RANKFILE_INT == token) {
sprintf(buff,"%d", orte_rmaps_rank_file_value.ival);
value = buff;
Index: orte/mca/rmaps/rank_file/rmaps_rank_file_lex.l
===================================================================
--- orte/mca/rmaps/rank_file/rmaps_rank_file_lex.l (revision 21529)
+++ orte/mca/rmaps/rank_file/rmaps_rank_file_lex.l (working copy)
@@ -111,6 +111,9 @@
orte_rmaps_rank_file_value.sval = yytext;
return ORTE_RANKFILE_HOSTNAME; }
+\+n[0-9]+ { orte_rmaps_rank_file_value.sval = yytext;
+ return ORTE_RANKFILE_RELATIVE; }
+
. { orte_rmaps_rank_file_value.sval = yytext;
return ORTE_RANKFILE_ERROR; }
<rankfile.patch>_______________________________________________
devel mailing list
de...@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/devel