8th question is as follows: (8) The result of communication which uses derived datatypes which was constructed using MPI_Type_vector,MPI_Type_indexed is incorrect after taking checkpoint.
Framework : datatype Component : datatype The source file : ompi/datatype/dt_copy.c The function name : ompi_ddt_copy_content_same_ddt Framework : crcp Component : bkmrk The source file : ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c The function name : ? Here's the code that causes the problem: #define SLPTIME 60 #define ITEMNUM 10 int buf[ITEMNUM][ITEMNUM]; MPI_Type_vector(10,1,10,MPI_INT,&newdt); MPI_Type_commit(&newdt); MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) { MPI_Isend(&buf[0][0],1,newdt,1,1000,MPI_COMM_WORLD,&req); printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); /** take checkpoint at this point **/ printf(" rank=%d sleep end \n",rank); fflush(stdout); MPI_Wait(&req,&sts); MPI_Type_free(&newdt); } else { printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); /** take checkpoint at this point **/ printf(" rank=%d sleep end \n",rank); fflush(stdout); MPI_Irecv(&buf[0][0],1,newdt,0,1000,MPI_COMM_WORLD,&req); MPI_Wait(&req,&sts); MPI_Type_free(&newdt); } for (i=0;i<ITEMNUM;i++) { /* showing the result */ printf(" rank=%d size=%d i=%d [%3d %3d %3d %3d %3d %3d %3d %3d %3d %3d] \n" ,rank,size,i ,buf[i][0],buf[i][1],buf[i][2],buf[i][3],buf[i][4] ,buf[i][5],buf[i][6],buf[i][7],buf[i][8],buf[i][9] ); fflush(stdout); } * Take checkpoint while Process 0 and Process 1 are in sleep function * After receiving data(by MPI_Irecv/Wait), incorrect results are printed. Incorrect results are as follows: rank=1 size=2 i=0 [ 0 -1 -1 -1 -1 -1 -1 -1 -1 -1] rank=1 size=2 i=1 [113 -1 -1 -1 -1 -1 -1 -1 -1 -1] rank=1 size=2 i=2 [1734306160 -1 -1 -1 -1 -1 -1 -1 -1 -1] rank=1 size=2 i=3 [1953458291 -1 -1 -1 -1 -1 -1 -1 -1 -1] rank=1 size=2 i=4 [1836017711 -1 -1 -1 -1 -1 -1 -1 -1 -1] rank=1 size=2 i=5 [1853054828 -1 -1 -1 -1 -1 -1 -1 -1 -1] rank=1 size=2 i=6 [7630955 -1 -1 -1 -1 -1 -1 -1 -1 -1] rank=1 size=2 i=7 [1919907629 -1 -1 -1 -1 -1 -1 -1 -1 -1] rank=1 size=2 i=8 [1600938352 -1 -1 -1 -1 -1 -1 -1 -1 -1] rank=1 size=2 i=9 [23761296 -1 -1 -1 -1 -1 -1 -1 -1 -1] It seems that ten elements may be copied, but these value are incorrect. * Here's my debugging output. ft_event_post_drain_message:Irecv drain_msg_ref=e84200 rank=0 tag=1000 cnt=1 ddt=40 to=e8d3a0 [datatype->size=1] wait_quiesce_drained:xx=0 0 wait_quiesce_drained:xx=1 100 wait_quiesce_drained:xx=2 200 wait_quiesce_drained:xx=3 300 wait_quiesce_drained:xx=4 400 wait_quiesce_drained:xx=5 500 wait_quiesce_drained:xx=6 600 wait_quiesce_drained:xx=7 700 wait_quiesce_drained:xx=8 800 wait_quiesce_drained:xx=9 900 ompi_ddt_copy_content_same_ddt:Start size=40 flag=102/4 count=1 * I think that receiver received message correctly in the bkmrk. Received messages are contiguous. * I think that the problem is copy processing in ompi_ddt_copy_content_same_ddt. Or is using ompi_ddt_copy_content_same_ddt function wrong? * the first argument(datatype) of ompi_ddt_copy_content_same_ddt function in drain_message_copy_remove is specified by user's application Hexadecimal value of datatype->flags is 0x102. It does not contain DT_FLAG_CONTIGUOUS and it will mean derived datatype. * I think that problem occurs at the following parts of ompi_ddt_copy_content_same_ddt function. Both source and destination use the same information of datatype which is specified by user's application. But source(received messages in the bkmrk) is simple contiguous messages. ------------------- destination += datatype->true_lb; source += datatype->true_lb; ------------------- ptrdiff_t extent = (datatype->ub - datatype->lb); destination += extent; source += extent; ------------------- pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->btypes[DT_LOOP] + 1) ); source = (unsigned char*)source_base + pStack->disp; destination = (unsigned char*)destination_base + pStack->disp; * If the source datatype is different from the destination datatype, Should not ompi_ddt_copy_content_same_ddt function be used? -bash-3.2$ cat t_mpi_question-8.c #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include "mpi.h" #define SLPTIME 60 #define ITEMNUM 10 int buf[ITEMNUM][ITEMNUM]; int main(int ac,char **av) { int rank,size,cc,i,j; MPI_Request req; MPI_Status sts; MPI_Datatype newdt; MPI_Init(&ac,&av); MPI_Comm_rank(MPI_COMM_WORLD,&rank); MPI_Comm_size(MPI_COMM_WORLD,&size); for (i=0;i<ITEMNUM;i++) { for (j=0;j<ITEMNUM;j++) { if (rank == 0) { buf[i][j] = (i*100)+j; } else { buf[i][j] = -1; } } } cc=MPI_Type_vector(10,1,10,MPI_INT,&newdt); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } cc=MPI_Type_commit(&newdt); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) { cc=MPI_Isend(&buf[0][0],1,newdt,1,1000,MPI_COMM_WORLD,&req); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); printf(" rank=%d sleep end \n",rank); fflush(stdout); cc=MPI_Wait(&req,&sts); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } cc=MPI_Type_free(&newdt); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } } else { printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); printf(" rank=%d sleep end \n",rank); fflush(stdout); cc=MPI_Irecv(&buf[0][0],1,newdt,0,1000,MPI_COMM_WORLD,&req); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } cc=MPI_Wait(&req,&sts); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } cc=MPI_Type_free(&newdt); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } } for (i=0;i<ITEMNUM;i++) { printf(" rank=%d size=%d i=%d [%3d %3d %3d %3d %3d %3d %3d %3d %3d %3d] \n" ,rank,size,i ,buf[i][0],buf[i][1],buf[i][2],buf[i][3],buf[i][4] ,buf[i][5],buf[i][6],buf[i][7],buf[i][8],buf[i][9] ); fflush(stdout); } cc = MPI_Finalize(); if (rank ==0) { printf(" rank=%d program end \n",rank); fflush(stdout); } return(0); } -bash-3.2$ cat t_mpi_question-8-type_indexed.c #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include "mpi.h" #define SLPTIME 60 #define ITEMNUM 10 int buf[ITEMNUM][ITEMNUM]; int main(int ac,char **av) { int rank,size,cc,i,j; MPI_Request req; MPI_Status sts; MPI_Datatype newdt; int block_length[ITEMNUM]; int disp[ITEMNUM]; MPI_Init(&ac,&av); MPI_Comm_rank(MPI_COMM_WORLD,&rank); MPI_Comm_size(MPI_COMM_WORLD,&size); for (i=0;i<ITEMNUM;i++) { block_length[i] = 1; disp[i] = (i * ITEMNUM); for (j=0;j<ITEMNUM;j++) { if (rank == 0) { buf[i][j] = (i*100)+j; } else { buf[i][j] = -1; } } } cc=MPI_Type_indexed(10,&block_length[0],&disp[0],MPI_INT,&newdt); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } cc=MPI_Type_commit(&newdt); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) { cc=MPI_Isend(&buf[0][0],1,newdt,1,1000,MPI_COMM_WORLD,&req); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); printf(" rank=%d sleep end \n",rank); fflush(stdout); cc=MPI_Wait(&req,&sts); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } cc=MPI_Type_free(&newdt); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } } else { printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); printf(" rank=%d sleep end \n",rank); fflush(stdout); cc=MPI_Irecv(&buf[0][0],1,newdt,0,1000,MPI_COMM_WORLD,&req); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } cc=MPI_Wait(&req,&sts); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } cc=MPI_Type_free(&newdt); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } } for (i=0;i<ITEMNUM;i++) { printf(" rank=%d size=%d i=%d [%3d %3d %3d %3d %3d %3d %3d %3d %3d %3d] \n" ,rank,size,i ,buf[i][0],buf[i][1],buf[i][2],buf[i][3],buf[i][4] ,buf[i][5],buf[i][6],buf[i][7],buf[i][8],buf[i][9] ); fflush(stdout); } cc = MPI_Finalize(); if (rank ==0) { printf(" rank=%d program end \n",rank); fflush(stdout); } return(0); } -bash-3.2$ cat t_mpi_question-8-LBUB.c #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include "mpi.h" #define ITEMNUM_1 10 #define SLPTIME 60 int buf[ITEMNUM_1][ITEMNUM_1]; int main(int ac,char **av) { int rank,size,cc,i,j,k; MPI_Request req; MPI_Status sts; MPI_Datatype newdt; int itmnum,newdt_size; int b_l[3]; MPI_Aint dp[3],newdt_extent,newdt_lb,newdt_ub; MPI_Datatype dt[3]; itmnum = 10; rank=0; MPI_Init(&ac,&av); MPI_Comm_rank(MPI_COMM_WORLD,&rank); MPI_Comm_size(MPI_COMM_WORLD,&size); for (i=0;i<ITEMNUM_1;i++) { for (j=0;j<ITEMNUM_1;j++) { if (rank == 0) { buf[i][j] = (i*100)+j; } else { buf[i][j] = -1; } } } b_l[0] = b_l[1] = b_l[2] = 1; dt[0] = MPI_LB; dt[1] = MPI_INT; dt[2] = MPI_UB; dp[0] = 0; dp[1] = 8; dp[2] = 20; MPI_Type_struct(3,&b_l[0],&dp[0],&dt[0],&newdt); MPI_Type_commit(&newdt); MPI_Type_size(newdt,&newdt_size); MPI_Type_extent(newdt,&newdt_extent); MPI_Type_lb(newdt,&newdt_lb); MPI_Type_ub(newdt,&newdt_ub); printf(" rank=%d newdt size=%d extent=%d lb=%d ub=%d itmnum=%d \n" ,rank,newdt_size,newdt_extent,newdt_lb,newdt_ub,itmnum); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) { MPI_Isend(&buf[0][0],itmnum,newdt,1,1000,MPI_COMM_WORLD,&req); printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); printf(" rank=%d sleep end \n",rank); fflush(stdout); MPI_Wait(&req,&sts); MPI_Type_free(&newdt); } else { printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); printf(" rank=%d sleep end \n",rank); fflush(stdout); MPI_Irecv(&buf[0][0],itmnum,newdt,0,1000,MPI_COMM_WORLD,&req); MPI_Wait(&req,&sts); MPI_Type_free(&newdt); } for (i=0;i<ITEMNUM_1;i++) { printf(" rank=%d size=%d i=%d/%d [%3d %3d %3d %3d %3d %3d %3d %3d %3d %3d] \n" ,rank,size,i,itmnum ,buf[i][0],buf[i][1],buf[i][2],buf[i][3],buf[i][4] ,buf[i][5],buf[i][6],buf[i][7],buf[i][8],buf[i][9] ); fflush(stdout); } MPI_Finalize(); if (rank ==0) { printf(" rank=%d program end \n",rank); fflush(stdout); } return(0); }