Strange - it ran fine for me on multiple tests. I'll check to see if something 
strange got into the mix and recommit.

Not sure it is the same issue but it looks like all my MTT tests on the trunk r25308 are timing out.
--td

On Oct 17, 2011, at 8:51 PM, George Bosilca wrote:

This commit put the mpirun process in an infinite loop for the simple case
mpirun -np 2 --mca orte_default_hostfile machinefile --bynode *my_app*

  george.

On Oct 17, 2011, at 15:49 , r...@osl.iu.edu wrote:

Author: rhc
Date: 2011-10-17 15:49:04 EDT (Mon, 17 Oct 2011)
New Revision: 25302
URL: https://svn.open-mpi.org/trac/ompi/changeset/25302

Log:
Fix the mapping algo for computing vpids - it was borked for bynode operations 
when using nperxxx directives

Text files modified:
  trunk/orte/mca/rmaps/base/rmaps_base_support_fns.c |    67 
++++++++++++++++++++-------------------
  1 files changed, 34 insertions(+), 33 deletions(-)

Modified: trunk/orte/mca/rmaps/base/rmaps_base_support_fns.c
==============================================================================
--- trunk/orte/mca/rmaps/base/rmaps_base_support_fns.c  (original)
+++ trunk/orte/mca/rmaps/base/rmaps_base_support_fns.c  2011-10-17 15:49:04 EDT 
(Mon, 17 Oct 2011)
@@ -527,7 +527,7 @@
int orte_rmaps_base_compute_vpids(orte_job_t *jdata)
{
    orte_job_map_t *map;
-    orte_vpid_t vpid;
+    orte_vpid_t vpid, cnt;
    int i, j;
    orte_node_t *node;
    orte_proc_t *proc;
@@ -539,6 +539,7 @@
        ORTE_MAPPING_BYSOCKET&  map->policy ||
        ORTE_MAPPING_BYBOARD&  map->policy) {
        /* assign the ranks sequentially */
+        vpid = 0;
        for (i=0; i<  map->nodes->size; i++) {
            if (NULL == (node = 
(orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
                continue;
@@ -553,12 +554,10 @@
                }
                if (ORTE_VPID_INVALID == proc->name.vpid) {
                    /* find the next available vpid */
-                    for (vpid=0; vpid<  jdata->num_procs; vpid++) {
-                        if (NULL == opal_pointer_array_get_item(jdata->procs, 
vpid)) {
-                            break;
-                        }
+                    while (NULL != opal_pointer_array_get_item(jdata->procs, 
vpid)) {
+                        vpid++;
                    }
-                    proc->name.vpid = vpid;
+                    proc->name.vpid = vpid++;
                    ORTE_EPOCH_SET(proc->name.epoch,ORTE_EPOCH_INVALID);
                    
ORTE_EPOCH_SET(proc->name.epoch,orte_ess.proc_get_epoch(&proc->name));

@@ -580,39 +579,41 @@

    if (ORTE_MAPPING_BYNODE&  map->policy) {
        /* assign the ranks round-robin across nodes */
-        for (i=0; i<  map->nodes->size; i++) {
-            if (NULL == (node = 
(orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
-                continue;
-            }
-            for (j=0; j<  node->procs->size; j++) {
-                if (NULL == (proc = 
(orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
+        cnt = 0;
+        vpid = 0;
+        do {
+            for (i=0; i<  map->nodes->size; i++) {
+                if (NULL == (node = 
(orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
                    continue;
                }
-                /* ignore procs from other jobs */
-                if (proc->name.jobid != jdata->jobid) {
-                    continue;
-                }
-                if (ORTE_VPID_INVALID == proc->name.vpid) {
-                    /* find the next available vpid */
-                    vpid = i;
-                    while (NULL != opal_pointer_array_get_item(jdata->procs, 
vpid)) {
-                        vpid += map->num_nodes;
-                        if (jdata->num_procs<= vpid) {
-                            vpid = vpid - jdata->num_procs;
+                for (j=0; j<  node->procs->size; j++) {
+                    if (NULL == (proc = 
(orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
+                        continue;
+                    }
+                    /* ignore procs from other jobs */
+                    if (proc->name.jobid != jdata->jobid) {
+                        continue;
+                    }
+                    if (ORTE_VPID_INVALID == proc->name.vpid) {
+                        /* find next available vpid */
+                        while (NULL != 
opal_pointer_array_get_item(jdata->procs, vpid)) {
+                            vpid++;
+                        }
+                        proc->name.vpid = vpid++;
+                        ORTE_EPOCH_SET(proc->name.epoch,ORTE_EPOCH_INVALID);
+                        
ORTE_EPOCH_SET(proc->name.epoch,orte_ess.proc_get_epoch(&proc->name));
+                        if (ORTE_SUCCESS != (rc = 
opal_pointer_array_set_item(jdata->procs,
+                                                                              
proc->name.vpid, proc))) {
+                            ORTE_ERROR_LOG(rc);
+                            return rc;
                        }
+                        cnt++;
+                        break;  /* move to next node */
                    }
-                    proc->name.vpid = vpid;
-                    ORTE_EPOCH_SET(proc->name.epoch,ORTE_EPOCH_INVALID);
-                    
ORTE_EPOCH_SET(proc->name.epoch,orte_ess.proc_get_epoch(&proc->name));
-                }
-                if (NULL == opal_pointer_array_get_item(jdata->procs, 
proc->name.vpid)) {
-                    if (ORTE_SUCCESS != (rc = 
opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
-                        ORTE_ERROR_LOG(rc);
-                        return rc;
-                    }
                }
            }
-        }
+        } while (cnt<  jdata->num_procs);
+
        return ORTE_SUCCESS;
    }

_______________________________________________
svn mailing list
s...@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/svn

_______________________________________________
devel mailing list
de...@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/devel

_______________________________________________
devel mailing list
de...@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/devel

--
Oracle
Terry D. Dontje | Principal Software Engineer
Developer Tools Engineering | +1.781.442.2631
Oracle *- Performance Technologies*
95 Network Drive, Burlington, MA 01803
Email terry.don...@oracle.com <mailto:terry.don...@oracle.com>



Reply via email to