(10) Receiving which has element size 0 terminates abnormally after taking checkpoint.
Framework : crcp Component : bkmrk The source file : ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c The function name : drain_message_copy_remove if (rank == 0) { j=100; MPI_Isend(&j,0,MPI_INT,1,1000,MPI_COMM_WORLD,&req); printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); /** take checkpoint at this point **/ printf(" rank=%d sleep end \n",rank); fflush(stdout); } else { printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); /** take checkpoint at this point **/ printf(" rank=%d sleep end \n",rank); fflush(stdout); MPI_Irecv(&j,0,MPI_INT,0,1000,MPI_COMM_WORLD,&req); } MPI_Wait(&req,&sts); * Take checkpoint while Process 0 and Process 1 are in sleep function, then program terminates abnormally with following message: *** An error occurred in MPI_Irecv *** on communicator MPI_COMM_WORLD *** MPI_ERR_BUFFER: invalid buffer pointer *** MPI_ERRORS_ARE_FATAL (your MPI job will now abort) * ompi_ddt_copy_content_same_ddt function returns true in drain_message_copy_remove function and an error occurs. * In drain_message_copy_remove function, If count is 0, it returns true. it is as follows: /* empty data ? then do nothing. This should normally be trapped * at a higher level. */ if( 0 == count ) return 1; * If count is 0, Is it necessary that drain_message_copy_remove function calls copy function(ompi_ddt_copy_content_same_ddt)? -bash-3.2$ cat t_mpi_question-10.c #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include "mpi.h" #define SLPTIME 60 main(int ac,char **av) { int rank,size,cc,i,j; MPI_Request req; MPI_Status sts; rank=0; j=0; MPI_Init(&ac,&av); MPI_Comm_rank(MPI_COMM_WORLD,&rank); MPI_Comm_size(MPI_COMM_WORLD,&size); MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) { j=100; MPI_Isend(&j,0,MPI_INT,1,1000,MPI_COMM_WORLD,&req); printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); printf(" rank=%d sleep end \n",rank); fflush(stdout); } else { printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); printf(" rank=%d sleep end \n",rank); fflush(stdout); MPI_Irecv(&j,0,MPI_INT,0,1000,MPI_COMM_WORLD,&req); } MPI_Wait(&req,&sts); printf(" rank=%d pass-2 %d \n",rank,j); fflush(stdout); if ((rank == 1) && (j != 0)) { MPI_Abort(MPI_COMM_WORLD,1); } MPI_Finalize(); if (rank ==0) { printf(" rank=%d program end \n",rank); fflush(stdout); } return(0); }