(10) Receiving which has element size 0 terminates abnormally after taking
checkpoint.
Framework : crcp
Component : bkmrk
The source file : ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c
The function name : drain_message_copy_remove
if (rank == 0) {
j=100;
MPI_Isend(&j,0,MPI_INT,1,1000,MPI_COMM_WORLD,&req);
printf(" rank=%d sleep start \n",rank); fflush(stdout);
sleep(SLPTIME); /** take checkpoint at this point **/
printf(" rank=%d sleep end \n",rank); fflush(stdout);
}
else {
printf(" rank=%d sleep start \n",rank); fflush(stdout);
sleep(SLPTIME); /** take checkpoint at this point **/
printf(" rank=%d sleep end \n",rank); fflush(stdout);
MPI_Irecv(&j,0,MPI_INT,0,1000,MPI_COMM_WORLD,&req);
}
MPI_Wait(&req,&sts);
* Take checkpoint while Process 0 and Process 1 are in sleep function,
then program terminates abnormally with following message:
*** An error occurred in MPI_Irecv
*** on communicator MPI_COMM_WORLD
*** MPI_ERR_BUFFER: invalid buffer pointer
*** MPI_ERRORS_ARE_FATAL (your MPI job will now abort)
* ompi_ddt_copy_content_same_ddt function returns true
in drain_message_copy_remove function and an error occurs.
* In drain_message_copy_remove function,
If count is 0, it returns true.
it is as follows:
/* empty data ? then do nothing. This should normally be trapped
* at a higher level.
*/
if( 0 == count ) return 1;
* If count is 0,
Is it necessary that drain_message_copy_remove function calls
copy function(ompi_ddt_copy_content_same_ddt)?
-bash-3.2$ cat t_mpi_question-10.c
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "mpi.h"
#define SLPTIME 60
main(int ac,char **av)
{
int rank,size,cc,i,j;
MPI_Request req;
MPI_Status sts;
rank=0;
j=0;
MPI_Init(&ac,&av);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
MPI_Comm_size(MPI_COMM_WORLD,&size);
MPI_Barrier(MPI_COMM_WORLD);
if (rank == 0) {
j=100;
MPI_Isend(&j,0,MPI_INT,1,1000,MPI_COMM_WORLD,&req);
printf(" rank=%d sleep start \n",rank); fflush(stdout);
sleep(SLPTIME);
printf(" rank=%d sleep end \n",rank); fflush(stdout);
}
else {
printf(" rank=%d sleep start \n",rank); fflush(stdout);
sleep(SLPTIME);
printf(" rank=%d sleep end \n",rank); fflush(stdout);
MPI_Irecv(&j,0,MPI_INT,0,1000,MPI_COMM_WORLD,&req);
}
MPI_Wait(&req,&sts);
printf(" rank=%d pass-2 %d \n",rank,j); fflush(stdout);
if ((rank == 1) && (j != 0)) { MPI_Abort(MPI_COMM_WORLD,1); }
MPI_Finalize();
if (rank ==0) {
printf(" rank=%d program end \n",rank); fflush(stdout);
}
return(0);
}