9th question is as follows: (9) The communication which has different element size in sender and receiver deadlocks after taking checkpoint.
Framework : crcp Component : bkmrk The source file : ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c The function name : drain_message_find Here's the code that causes the problem: #define WORKBUFSIZE 4 #define SLPTIME 60 int rbuf[WORKBUFSIZE]; int j; MPI_Barrier(MPI_COMM_WORLD); if (rank == 1) { printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); /** take checkpoint at this point **/ printf(" rank=%d sleep end \n",rank); fflush(stdout); MPI_Irecv(&rbuf[0],WORKBUFSIZE,MPI_INT,0,1000,MPI_COMM_WORLD,&req); MPI_Wait(&req,&sts); j=rbuf[0]; } else { /* rank 0 */ j=100; MPI_Isend(&j,1,MPI_INT,1,1000,MPI_COMM_WORLD,&req); printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); /** take checkpoint at this point **/ printf(" rank=%d sleep end \n",rank); fflush(stdout); MPI_Wait(&req,&sts); } printf(" rank=%d pass-2 %d %d \n",rank,j,sts._count); fflush(stdout); * Take checkpoint while Process 0 and Process 1 are in sleep function, then MPI program deadlocks. * element size does not match in drain_message_find. drain_message_find:My=1 drain_msg=e6fc80 [peer=0/0 count=4/1 comm=6014e0 ID 0/0/0 R=1/1 tag=1000/1000 ddt_size=4/4 [datatype->size=1]] [done=1 active=0 already_posted=0] /* Check the datatype size, if specified for a match */ if( ddt_size != PROBE_ANY_SIZE && count != PROBE_ANY_COUNT) { /* Check the datatype size and count to make sure it matches */ if((drain_msg->count ) != count || (drain_msg->ddt_size) != ddt_size) { continue; } } drain_msg->count is 1. count is 4. drain_msg->ddt_size is 4. ddt_size is 4. * If Open MPI is built with --enable-debug configure option, and openib btl is selected on running MPI job, the following message is printed in mca_btl_openib_ft_event. t_mpi_question-9.out: ../../../../../ompi/mca/btl/openib/btl_openib.c:1433: mca_btl_openib_ft_event: Assertion `((0xdeafbeedULL << 32) + 0xdeafbeedULL) == ((opal_object_t *) (&mca_btl_openib_component.ib_procs))->obj_magic_id' failed. * The following programs behave in the same. 1) t_mpi_question-9-packunpack.c Sender : MPI_Isend(&workbuf[0],j,MPI_PACKED,1,1000,MPI_COMM_WORLD,&req); Receiver: #define WORKBUFSIZ 64 char workbuf[WORKBUFSIZ]; MPI_Irecv(&workbuf[0],WORKBUFSIZ,MPI_PACKED,0,1000,MPI_COMM_WORLD,&req); drain_message_find:My=1 drain_msg=794200 [peer=0/0 count=64/20 comm=601ba0 ID 0/0/0 R=1/1 tag=1000/1000 ddt_size=1/1 [datatype->size=1]] [done=1 active=0 already_posted=0] drain_msg->count is 20. count is 64. 2) t_mpi_question-9-contiguous.c Sender : cc=MPI_Type_contiguous(50,MPI_INT,&newdt); cc=MPI_Isend(&buf[0][0],1,newdt,1,1000,MPI_COMM_WORLD,&req); Receiver: cc=MPI_Irecv(&buf[0][0],50,MPI_INT,0,1000,MPI_COMM_WORLD,&req); drain_message_find:My=1 drain_msg=1658200 [peer=0/0 count=50/1 comm=601840 ID 0/0/0 R=1/1 tag=1000/1000 ddt_size=4/200 [datatype->size=1]] [done=1 active=0 already_posted=0] drain_msg->count is 1. count is 50. drain_msg->ddt_size is 200. ddt_size is 4. 3) t_mpi_question-9-vector.c Sender : cc=MPI_Type_vector(10,1,10,MPI_INT,&newdt); cc=MPI_Isend(&buf[0][0],1,newdt,1,1000,MPI_COMM_WORLD,&req); Recevier: cc=MPI_Irecv(&buf[0][0],10,MPI_INT,0,1000,MPI_COMM_WORLD,&req); drain_message_find:My=1 drain_msg=20ad900 [peer=0/0 count=10/1 comm=601840 ID 0/0/0 R=1/1 tag=1000/1000 ddt_size=4/40 [datatype->size=1]] [done=1 active=0 already_posted=0] drain_msg->count is 1. count is 10. drain_msg->ddt_size is 40. ddt_size is 4. -bash-3.2$ cat t_mpi_question-9.c #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <string.h> #include "mpi.h" #define WORKBUFSIZE 4 #define SLPTIME 60 int main(int ac,char **av) { int rank,size,cc,i,j; MPI_Request req; MPI_Status sts; int rbuf[WORKBUFSIZE]; rank=0; j=0; memset((void *)rbuf,0,sizeof(int)*WORKBUFSIZE); MPI_Init(&ac,&av); MPI_Comm_rank(MPI_COMM_WORLD,&rank); MPI_Comm_size(MPI_COMM_WORLD,&size); MPI_Barrier(MPI_COMM_WORLD); if (rank == 1) { printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); printf(" rank=%d sleep end \n",rank); fflush(stdout); MPI_Irecv(&rbuf[0],WORKBUFSIZE,MPI_INT,0,1000,MPI_COMM_WORLD,&req); MPI_Wait(&req,&sts); j=rbuf[0]; } else { j=100; MPI_Isend(&j,1,MPI_INT,1,1000,MPI_COMM_WORLD,&req); printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); printf(" rank=%d sleep end \n",rank); fflush(stdout); MPI_Wait(&req,&sts); } printf(" rank=%d pass-2 %d %d \n",rank,j,sts._count); fflush(stdout); MPI_Finalize(); if (rank ==0) { printf(" rank=%d program end \n",rank); fflush(stdout); } return(0); } -bash-3.2$ cat t_mpi_question-9-cotiguous.c #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include "mpi.h" #define SLPTIME 60 #define ITEMNUM 10 int buf[ITEMNUM][ITEMNUM]; int main(int ac,char **av) { int rank,size,cc,i,j; MPI_Request req; MPI_Status sts; MPI_Datatype newdt; MPI_Init(&ac,&av); MPI_Comm_rank(MPI_COMM_WORLD,&rank); MPI_Comm_size(MPI_COMM_WORLD,&size); for (i=0;i<ITEMNUM;i++) { for (j=0;j<ITEMNUM;j++) { if (rank == 0) { buf[i][j] = (i*100)+j; } else { buf[i][j] = -1; } } } cc=MPI_Type_contiguous(50,MPI_INT,&newdt); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } cc=MPI_Type_commit(&newdt); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) { cc=MPI_Isend(&buf[0][0],1,newdt,1,1000,MPI_COMM_WORLD,&req); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); printf(" rank=%d sleep end \n",rank); fflush(stdout); cc=MPI_Wait(&req,&sts); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } cc=MPI_Type_free(&newdt); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } } else { printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); printf(" rank=%d sleep end \n",rank); fflush(stdout); cc=MPI_Irecv(&buf[0][0],50,MPI_INT,0,1000,MPI_COMM_WORLD,&req); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } cc=MPI_Wait(&req,&sts); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } cc=MPI_Type_free(&newdt); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } } for (i=0;i<ITEMNUM;i++) { printf(" rank=%d size=%d i=%d [%3d %3d %3d %3d %3d %3d %3d %3d %3d %3d] \n" ,rank,size,i ,buf[i][0],buf[i][1],buf[i][2],buf[i][3],buf[i][4] ,buf[i][5],buf[i][6],buf[i][7],buf[i][8],buf[i][9] ); fflush(stdout); } MPI_Finalize(); if (rank ==0) { printf(" rank=%d program end \n",rank); fflush(stdout); } return(0); } -bash-3.2$ cat t_mpi_question-9-vector.c #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include "mpi.h" #define SLPTIME 60 #define ITEMNUM 10 int buf[ITEMNUM][ITEMNUM]; int main(int ac,char **av) { int rank,size,cc,i,j; MPI_Request req; MPI_Status sts; MPI_Datatype newdt; MPI_Init(&ac,&av); MPI_Comm_rank(MPI_COMM_WORLD,&rank); MPI_Comm_size(MPI_COMM_WORLD,&size); for (i=0;i<ITEMNUM;i++) { for (j=0;j<ITEMNUM;j++) { if (rank == 0) { buf[i][j] = (i*100)+j; } else { buf[i][j] = -1; } } } cc=MPI_Type_vector(10,1,10,MPI_INT,&newdt); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } cc=MPI_Type_commit(&newdt); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) { cc=MPI_Isend(&buf[0][0],1,newdt,1,1000,MPI_COMM_WORLD,&req); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); printf(" rank=%d sleep end \n",rank); fflush(stdout); cc=MPI_Wait(&req,&sts); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } cc=MPI_Type_free(&newdt); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } } else { printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); printf(" rank=%d sleep end \n",rank); fflush(stdout); cc=MPI_Irecv(&buf[0][0],10,MPI_INT,0,1000,MPI_COMM_WORLD,&req); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } cc=MPI_Wait(&req,&sts); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } cc=MPI_Type_free(&newdt); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } } for (i=0;i<ITEMNUM;i++) { printf(" rank=%d size=%d i=%d [%3d %3d %3d %3d %3d %3d %3d %3d %3d %3d] \n" ,rank,size,i ,buf[i][0],buf[i][1],buf[i][2],buf[i][3],buf[i][4] ,buf[i][5],buf[i][6],buf[i][7],buf[i][8],buf[i][9] ); fflush(stdout); } MPI_Finalize(); if (rank ==0) { printf(" rank=%d program end \n",rank); fflush(stdout); } return(0); }