9th question is as follows:
(9) The communication which has different element size in sender and receiver
deadlocks after taking checkpoint.
Framework : crcp
Component : bkmrk
The source file : ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c
The function name : drain_message_find
Here's the code that causes the problem:
#define WORKBUFSIZE 4
#define SLPTIME 60
int rbuf[WORKBUFSIZE];
int j;
MPI_Barrier(MPI_COMM_WORLD);
if (rank == 1) {
printf(" rank=%d sleep start \n",rank); fflush(stdout);
sleep(SLPTIME); /** take checkpoint at this point **/
printf(" rank=%d sleep end \n",rank); fflush(stdout);
MPI_Irecv(&rbuf[0],WORKBUFSIZE,MPI_INT,0,1000,MPI_COMM_WORLD,&req);
MPI_Wait(&req,&sts);
j=rbuf[0];
}
else { /* rank 0 */
j=100;
MPI_Isend(&j,1,MPI_INT,1,1000,MPI_COMM_WORLD,&req);
printf(" rank=%d sleep start \n",rank); fflush(stdout);
sleep(SLPTIME); /** take checkpoint at this point **/
printf(" rank=%d sleep end \n",rank); fflush(stdout);
MPI_Wait(&req,&sts);
}
printf(" rank=%d pass-2 %d %d \n",rank,j,sts._count); fflush(stdout);
* Take checkpoint while Process 0 and Process 1 are in sleep function,
then MPI program deadlocks.
* element size does not match in drain_message_find.
drain_message_find:My=1 drain_msg=e6fc80 [peer=0/0 count=4/1 comm=6014e0 ID
0/0/0 R=1/1 tag=1000/1000 ddt_size=4/4 [datatype->size=1]] [done=1
active=0 already_posted=0]
/* Check the datatype size, if specified for a match */
if( ddt_size != PROBE_ANY_SIZE &&
count != PROBE_ANY_COUNT) {
/* Check the datatype size and count to make sure it matches */
if((drain_msg->count ) != count ||
(drain_msg->ddt_size) != ddt_size) {
continue;
}
}
drain_msg->count is 1.
count is 4.
drain_msg->ddt_size is 4.
ddt_size is 4.
* If Open MPI is built with --enable-debug configure option,
and openib btl is selected on running MPI job,
the following message is printed in mca_btl_openib_ft_event.
t_mpi_question-9.out: ../../../../../ompi/mca/btl/openib/btl_openib.c:1433:
mca_btl_openib_ft_event: Assertion `((0xdeafbeedULL << 32) + 0xdeafbeedULL)
== ((opal_object_t *)
(&mca_btl_openib_component.ib_procs))->obj_magic_id' failed.
* The following programs behave in the same.
1) t_mpi_question-9-packunpack.c
Sender : MPI_Isend(&workbuf[0],j,MPI_PACKED,1,1000,MPI_COMM_WORLD,&req);
Receiver: #define WORKBUFSIZ 64
char workbuf[WORKBUFSIZ];
MPI_Irecv(&workbuf[0],WORKBUFSIZ,MPI_PACKED,0,1000,MPI_COMM_WORLD,&req);
drain_message_find:My=1 drain_msg=794200 [peer=0/0 count=64/20 comm=601ba0
ID 0/0/0 R=1/1 tag=1000/1000 ddt_size=1/1 [datatype->size=1]] [done=1
active=0 already_posted=0]
drain_msg->count is 20.
count is 64.
2) t_mpi_question-9-contiguous.c
Sender : cc=MPI_Type_contiguous(50,MPI_INT,&newdt);
cc=MPI_Isend(&buf[0][0],1,newdt,1,1000,MPI_COMM_WORLD,&req);
Receiver: cc=MPI_Irecv(&buf[0][0],50,MPI_INT,0,1000,MPI_COMM_WORLD,&req);
drain_message_find:My=1 drain_msg=1658200 [peer=0/0 count=50/1 comm=601840
ID 0/0/0 R=1/1 tag=1000/1000 ddt_size=4/200 [datatype->size=1]] [done=1
active=0 already_posted=0]
drain_msg->count is 1.
count is 50.
drain_msg->ddt_size is 200.
ddt_size is 4.
3) t_mpi_question-9-vector.c
Sender : cc=MPI_Type_vector(10,1,10,MPI_INT,&newdt);
cc=MPI_Isend(&buf[0][0],1,newdt,1,1000,MPI_COMM_WORLD,&req);
Recevier: cc=MPI_Irecv(&buf[0][0],10,MPI_INT,0,1000,MPI_COMM_WORLD,&req);
drain_message_find:My=1 drain_msg=20ad900 [peer=0/0 count=10/1 comm=601840
ID 0/0/0 R=1/1 tag=1000/1000 ddt_size=4/40 [datatype->size=1]] [done=1
active=0 already_posted=0]
drain_msg->count is 1.
count is 10.
drain_msg->ddt_size is 40.
ddt_size is 4.
-bash-3.2$ cat t_mpi_question-9.c
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include "mpi.h"
#define WORKBUFSIZE 4
#define SLPTIME 60
int main(int ac,char **av)
{
int rank,size,cc,i,j;
MPI_Request req;
MPI_Status sts;
int rbuf[WORKBUFSIZE];
rank=0;
j=0;
memset((void *)rbuf,0,sizeof(int)*WORKBUFSIZE);
MPI_Init(&ac,&av);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
MPI_Comm_size(MPI_COMM_WORLD,&size);
MPI_Barrier(MPI_COMM_WORLD);
if (rank == 1) {
printf(" rank=%d sleep start \n",rank); fflush(stdout);
sleep(SLPTIME);
printf(" rank=%d sleep end \n",rank); fflush(stdout);
MPI_Irecv(&rbuf[0],WORKBUFSIZE,MPI_INT,0,1000,MPI_COMM_WORLD,&req);
MPI_Wait(&req,&sts);
j=rbuf[0];
}
else {
j=100;
MPI_Isend(&j,1,MPI_INT,1,1000,MPI_COMM_WORLD,&req);
printf(" rank=%d sleep start \n",rank); fflush(stdout);
sleep(SLPTIME);
printf(" rank=%d sleep end \n",rank); fflush(stdout);
MPI_Wait(&req,&sts);
}
printf(" rank=%d pass-2 %d %d \n",rank,j,sts._count); fflush(stdout);
MPI_Finalize();
if (rank ==0) {
printf(" rank=%d program end \n",rank); fflush(stdout);
}
return(0);
}
-bash-3.2$ cat t_mpi_question-9-cotiguous.c
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "mpi.h"
#define SLPTIME 60
#define ITEMNUM 10
int buf[ITEMNUM][ITEMNUM];
int main(int ac,char **av)
{
int rank,size,cc,i,j;
MPI_Request req;
MPI_Status sts;
MPI_Datatype newdt;
MPI_Init(&ac,&av);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
MPI_Comm_size(MPI_COMM_WORLD,&size);
for (i=0;i<ITEMNUM;i++) {
for (j=0;j<ITEMNUM;j++) {
if (rank == 0) { buf[i][j] = (i*100)+j; }
else { buf[i][j] = -1; }
}
}
cc=MPI_Type_contiguous(50,MPI_INT,&newdt);
if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
cc=MPI_Type_commit(&newdt);
if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
MPI_Barrier(MPI_COMM_WORLD);
if (rank == 0) {
cc=MPI_Isend(&buf[0][0],1,newdt,1,1000,MPI_COMM_WORLD,&req);
if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
printf(" rank=%d sleep start \n",rank); fflush(stdout);
sleep(SLPTIME);
printf(" rank=%d sleep end \n",rank); fflush(stdout);
cc=MPI_Wait(&req,&sts);
if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
cc=MPI_Type_free(&newdt);
if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
}
else {
printf(" rank=%d sleep start \n",rank); fflush(stdout);
sleep(SLPTIME);
printf(" rank=%d sleep end \n",rank); fflush(stdout);
cc=MPI_Irecv(&buf[0][0],50,MPI_INT,0,1000,MPI_COMM_WORLD,&req);
if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
cc=MPI_Wait(&req,&sts);
if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
cc=MPI_Type_free(&newdt);
if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
}
for (i=0;i<ITEMNUM;i++) {
printf(" rank=%d size=%d i=%d [%3d %3d %3d %3d %3d %3d %3d %3d %3d %3d] \n"
,rank,size,i
,buf[i][0],buf[i][1],buf[i][2],buf[i][3],buf[i][4]
,buf[i][5],buf[i][6],buf[i][7],buf[i][8],buf[i][9]
);
fflush(stdout);
}
MPI_Finalize();
if (rank ==0) {
printf(" rank=%d program end \n",rank); fflush(stdout);
}
return(0);
}
-bash-3.2$ cat t_mpi_question-9-vector.c
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "mpi.h"
#define SLPTIME 60
#define ITEMNUM 10
int buf[ITEMNUM][ITEMNUM];
int main(int ac,char **av)
{
int rank,size,cc,i,j;
MPI_Request req;
MPI_Status sts;
MPI_Datatype newdt;
MPI_Init(&ac,&av);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
MPI_Comm_size(MPI_COMM_WORLD,&size);
for (i=0;i<ITEMNUM;i++) {
for (j=0;j<ITEMNUM;j++) {
if (rank == 0) { buf[i][j] = (i*100)+j; }
else { buf[i][j] = -1; }
}
}
cc=MPI_Type_vector(10,1,10,MPI_INT,&newdt);
if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
cc=MPI_Type_commit(&newdt);
if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
MPI_Barrier(MPI_COMM_WORLD);
if (rank == 0) {
cc=MPI_Isend(&buf[0][0],1,newdt,1,1000,MPI_COMM_WORLD,&req);
if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
printf(" rank=%d sleep start \n",rank); fflush(stdout);
sleep(SLPTIME);
printf(" rank=%d sleep end \n",rank); fflush(stdout);
cc=MPI_Wait(&req,&sts);
if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
cc=MPI_Type_free(&newdt);
if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
}
else {
printf(" rank=%d sleep start \n",rank); fflush(stdout);
sleep(SLPTIME);
printf(" rank=%d sleep end \n",rank); fflush(stdout);
cc=MPI_Irecv(&buf[0][0],10,MPI_INT,0,1000,MPI_COMM_WORLD,&req);
if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
cc=MPI_Wait(&req,&sts);
if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
cc=MPI_Type_free(&newdt);
if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
}
for (i=0;i<ITEMNUM;i++) {
printf(" rank=%d size=%d i=%d [%3d %3d %3d %3d %3d %3d %3d %3d %3d %3d] \n"
,rank,size,i
,buf[i][0],buf[i][1],buf[i][2],buf[i][3],buf[i][4]
,buf[i][5],buf[i][6],buf[i][7],buf[i][8],buf[i][9]
);
fflush(stdout);
}
MPI_Finalize();
if (rank ==0) {
printf(" rank=%d program end \n",rank); fflush(stdout);
}
return(0);
}