Hi,
The attached program intercommunicator-iallgather.c outputs
message "MPI Error in MPI_Testall() (18)" forever and doesn't
finish. This is because libnbc has typos of send/recv.
See attached intercommunicator-iallgather.patch for the fix.
The patch modifies iallgather_inter and iallgather_intra.
The modification of iallgather_intra is just for symmetry with
iallgather_inter. Users guarantee the consistency of send/recv.
Both trunk and v1.8 branch have this issue.
Regards,
Takahiro Kawashima,
MPI development team,
Fujitsu
#include <stdio.h>
#include <stdlib.h>
#include "mpi.h"
/*
$ mpiexec -n 2 a.out
MPI Error in MPI_Testall() (18)
MPI Error in MPI_Testall() (18)
MPI Error in MPI_Testall() (18)
:
:
*/
int main(int argc, char **argv)
{
MPI_Comm inter_comm, local_comm;
int rank, color, rsize, i;
int *buf;
MPI_Request rq;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
color = rank%2;
MPI_Comm_split(MPI_COMM_WORLD, color, rank, &local_comm);
MPI_Intercomm_create(local_comm, 0, MPI_COMM_WORLD,
(color+1)%2, 0, &inter_comm);
MPI_Comm_remote_size(inter_comm, &rsize);
buf = malloc(sizeof(int)*rsize);
for (i=0;i<rsize;i++) {
buf[i] = -1;
}
if (0==color) {
MPI_Iallgather(&rank, 1, MPI_INT,
buf, 0, MPI_INT, inter_comm, &rq);
} else {
MPI_Iallgather(&rank, 0, MPI_INT,
buf, 1, MPI_INT, inter_comm, &rq);
}
MPI_Wait(&rq, MPI_STATUS_IGNORE);
for (i=0;i<rsize;i++) {
printf("[rank %d] buf[%d]=%d\n", rank, i, buf[i]);
}
fflush(stdout);
MPI_Comm_free(&inter_comm);
MPI_Comm_free(&local_comm);
MPI_Finalize();
free(buf);
return 0;
}
Index: ompi/mca/coll/libnbc/nbc_iallgather.c
===================================================================
--- ompi/mca/coll/libnbc/nbc_iallgather.c (revision 32798)
+++ ompi/mca/coll/libnbc/nbc_iallgather.c (working copy)
@@ -98,7 +98,7 @@
res = NBC_Sched_recv(rbuf, false, recvcount, recvtype, r, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
/* send to rank r - not from the sendbuf to optimize MPI_IN_PLACE */
- res = NBC_Sched_send(sbuf, false, recvcount, recvtype, r, schedule);
+ res = NBC_Sched_send(sbuf, false, sendcount, sendtype, r, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
}
}
@@ -174,7 +174,7 @@
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
/* send to rank r */
- res = NBC_Sched_send(sendbuf, false, recvcount, recvtype, r, schedule);
+ res = NBC_Sched_send(sendbuf, false, sendcount, sendtype, r, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
}