Hi,

The attached program intercommunicator-iallgather.c outputs
message "MPI Error in MPI_Testall() (18)" forever and doesn't
finish. This is because libnbc has typos of send/recv.

See attached intercommunicator-iallgather.patch for the fix.
The patch modifies iallgather_inter and iallgather_intra.
The modification of iallgather_intra is just for symmetry with
iallgather_inter. Users guarantee the consistency of send/recv.

Both trunk and v1.8 branch have this issue.

Regards,
Takahiro Kawashima,
MPI development team,
Fujitsu
#include <stdio.h>
#include <stdlib.h>
#include "mpi.h"
/*
 $ mpiexec -n 2 a.out

 MPI Error in MPI_Testall() (18)
 MPI Error in MPI_Testall() (18)
 MPI Error in MPI_Testall() (18)
        :
        :
*/

int main(int argc, char **argv)
{
    MPI_Comm inter_comm, local_comm;
    int rank, color, rsize, i;
    int *buf;
    MPI_Request  rq;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    color = rank%2;
    MPI_Comm_split(MPI_COMM_WORLD, color, rank, &local_comm);
    MPI_Intercomm_create(local_comm, 0, MPI_COMM_WORLD,
                         (color+1)%2, 0, &inter_comm);
    MPI_Comm_remote_size(inter_comm, &rsize);
    
    buf = malloc(sizeof(int)*rsize);
    for (i=0;i<rsize;i++) {
        buf[i] = -1;
    }

    if (0==color) {
        MPI_Iallgather(&rank, 1, MPI_INT, 
                       buf,   0, MPI_INT, inter_comm, &rq);
    } else {
        MPI_Iallgather(&rank, 0, MPI_INT, 
                       buf,   1, MPI_INT, inter_comm, &rq);
    }
    MPI_Wait(&rq, MPI_STATUS_IGNORE);

    for (i=0;i<rsize;i++) {
        printf("[rank %d] buf[%d]=%d\n", rank, i, buf[i]);
    }
    fflush(stdout);

    MPI_Comm_free(&inter_comm);
    MPI_Comm_free(&local_comm);
    MPI_Finalize();
    free(buf);
    
    return 0;
}
Index: ompi/mca/coll/libnbc/nbc_iallgather.c
===================================================================
--- ompi/mca/coll/libnbc/nbc_iallgather.c	(revision 32798)
+++ ompi/mca/coll/libnbc/nbc_iallgather.c	(working copy)
@@ -98,7 +98,7 @@
         res = NBC_Sched_recv(rbuf, false, recvcount, recvtype, r, schedule);
         if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
         /* send to rank r - not from the sendbuf to optimize MPI_IN_PLACE */
-        res = NBC_Sched_send(sbuf, false, recvcount, recvtype, r, schedule);
+        res = NBC_Sched_send(sbuf, false, sendcount, sendtype, r, schedule);
         if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
       }
     }
@@ -174,7 +174,7 @@
     if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
 
     /* send to rank r */
-    res = NBC_Sched_send(sendbuf, false, recvcount, recvtype, r, schedule);
+    res = NBC_Sched_send(sendbuf, false, sendcount, sendtype, r, schedule);
     if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
   }
 

Reply via email to