Hi Georges, Thanks for your patch, but I'm not sure I got it correctly. The patch I got modify a few arguments passed to isend()/irecv()/recv() in coll_basic_allgather.c. Here is the patch I applied:
Index: ompi/mca/coll/basic/coll_basic_allgather.c =================================================================== --- ompi/mca/coll/basic/coll_basic_allgather.c (revision 17814) +++ ompi/mca/coll/basic/coll_basic_allgather.c (working copy) @@ -149,7 +149,7 @@ } /* Do a send-recv between the two root procs. to avoid deadlock */ - err = MCA_PML_CALL(isend(sbuf, scount, sdtype, 0, + err = MCA_PML_CALL(isend(sbuf, scount, sdtype, root, MCA_COLL_BASE_TAG_ALLGATHER, MCA_PML_BASE_SEND_STANDARD, comm, &reqs[rsize])); @@ -157,7 +157,7 @@ return err; } - err = MCA_PML_CALL(irecv(rbuf, rcount, rdtype, 0, + err = MCA_PML_CALL(irecv(rbuf, rcount, rdtype, root, MCA_COLL_BASE_TAG_ALLGATHER, comm, &reqs[0])); if (OMPI_SUCCESS != err) { @@ -186,14 +186,14 @@ return err; } - err = MCA_PML_CALL(isend(rbuf, rsize * rcount, rdtype, 0, + err = MCA_PML_CALL(isend(rbuf, rsize * scount, sdtype, root, MCA_COLL_BASE_TAG_ALLGATHER, MCA_PML_BASE_SEND_STANDARD, comm, &req)); if (OMPI_SUCCESS != err) { goto exit; } - err = MCA_PML_CALL(recv(tmpbuf, size * scount, sdtype, 0, + err = MCA_PML_CALL(recv(tmpbuf, size * rcount, rdtype, root, MCA_COLL_BASE_TAG_ALLGATHER, comm, MPI_STATUS_IGNORE)); if (OMPI_SUCCESS != err) { However with this patch, I still have the problem. Suppose I start the server with three process and the client with two, the clients prints: [audet@linux15 dyn_connect]$ mpiexec --universe univ1 -n 2 ./aclient '0.2.0:2000' intercomm_flag = 1 intercomm_remote_size = 3 rem_rank_tbl[3] = { 0 1 2} [linux15:26114] *** An error occurred in MPI_Allgather [linux15:26114] *** on communicator [linux15:26114] *** MPI_ERR_TRUNCATE: message truncated [linux15:26114] *** MPI_ERRORS_ARE_FATAL (goodbye) mpiexec noticed that job rank 0 with PID 26113 on node linux15 exited on signal 15 (Terminated). [audet@linux15 dyn_connect]$ and abort. The server on the other side simply hang (as before). Regards, Martin