11th question is as follows: (11) The communication which uses inter-communicator deadlocks after taking checkpoint.
Framework : crcp Component : bkmrk The source file : ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c The function name : :drain_message_find_any Here's the code that causes the problem: #define SLPTIME 60 buf = -1; if (rank == 0) { buf = 9014; MPI_Isend(&buf,1,MPI_INT,0,1000,intercomm,&req); /* using inter-communicator */ printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); /** take checkpoint at this point **/ printf(" rank=%d sleep end \n",rank); fflush(stdout); MPI_Wait(&req,&sts); } else if (rank==1) { printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); /** take checkpoint at this point **/ printf(" rank=%d sleep end \n",rank); fflush(stdout); buf = 0; MPI_Irecv(&buf,1,MPI_INT,0,1000,intercomm,&req); /* using inter-communicator */ MPI_Wait(&req,&sts); } * Take checkpoint while Process 0 and Process 1 are in sleep function, then MPI program deadlocks. * Here's my debugging output. ft_event_post_drain_message:Irecv drain_msg_ref=8a2f80 rank=0 tag=1000 cnt=1 ddt=4 to=8c27c0 [datatype->size=1] wait_quiesce_drained:xx=0 9014 drain_message_find_any:Compare[peer=0] vpid=0 1 jobid=-431423487 -431423487 grp_proc_count=1 89cea0 1 drain_message_find_any:Compare[peer=0] -> Continue * Because matching of vpid,jobid by orte_util_compare_name_fields is failed, drain_message_find_any function does not call drain_message_find. And received messages in bkmrk is not found. Is orte_util_compare_name_fields function corresponding to inter-communicator? -bash-3.2$ cat t_mpi_question-11.c #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include "mpi.h" #define SLPTIME 60 int main(int ac,char **av) { int rank,size,cc,j,i,buf; MPI_Request req; MPI_Status sts; MPI_Comm localcomm,intercomm; MPI_Group worldgrp,localgrp; int local_grp_size,localrank,localsize,interrank,intersize; int *rank_list; int local_leader,remote_leader; rank=0; MPI_Init(&ac,&av); MPI_Comm_rank(MPI_COMM_WORLD,&rank); MPI_Comm_size(MPI_COMM_WORLD,&size); if (size%2 != 0) { MPI_Abort(MPI_COMM_WORLD,-1); } printf(" rank=%d pass-1 \n",rank); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD); MPI_Comm_group(MPI_COMM_WORLD,&worldgrp); local_grp_size = size / 2; rank_list = (int *)malloc(sizeof(int) * local_grp_size); if (rank_list == NULL) { MPI_Abort(MPI_COMM_WORLD,-1); } j = ((rank % 2) == 0) ? 0 : 1; for (i=0;i<local_grp_size;i++) { rank_list[i] = j; j+=2; } MPI_Group_incl(worldgrp,local_grp_size,rank_list,&localgrp); MPI_Comm_create(MPI_COMM_WORLD,localgrp,&localcomm); MPI_Comm_rank(localcomm,&localrank); MPI_Comm_size(localcomm,&localsize); printf(" rank=%d size=%d pass-3 LOCAL rank=%d size=%d \n" ,rank,size,localrank,localsize); fflush(stdout); MPI_Barrier(localcomm); MPI_Barrier(MPI_COMM_WORLD); local_leader = 0; remote_leader = ((rank % 2) == 0) ? 1 : 0; MPI_Intercomm_create(localcomm,local_leader,MPI_COMM_WORLD, remote_leader,999,&intercomm); MPI_Comm_rank(intercomm,&interrank); MPI_Comm_size(intercomm,&intersize); printf(" rank=%d size=%d pass-4 LOCAL rank=%d size=%d INTER rank=%d size=%d \n" ,rank,size,localrank,localsize,interrank,intersize); fflush(stdout); MPI_Barrier(intercomm); MPI_Barrier(localcomm); MPI_Barrier(MPI_COMM_WORLD); buf = -1; if (rank == 0) { buf = 9014; MPI_Isend(&buf,1,MPI_INT,0,1000,intercomm,&req); printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); printf(" rank=%d sleep end \n",rank); fflush(stdout); MPI_Wait(&req,&sts); } else if (rank==1) { printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); printf(" rank=%d sleep end \n",rank); fflush(stdout); buf = 0; MPI_Irecv(&buf,1,MPI_INT,0,1000,intercomm,&req); MPI_Wait(&req,&sts); } printf(" rank=%d pass-5 buf=%d \n",rank,buf); fflush(stdout); MPI_Barrier(intercomm); MPI_Barrier(localcomm); MPI_Barrier(MPI_COMM_WORLD); MPI_Comm_free(&intercomm); MPI_Comm_free(&localcomm); MPI_Group_free(&localgrp); MPI_Finalize(); if (rank ==0) { printf(" rank=%d program end \n",rank); fflush(stdout); } return(0); }