Hi Open MPI developers, I found a small bug in Open MPI.
See attached program cancelled.c. In this program, rank 1 tries to cancel a MPI_Irecv and calls a MPI_Recv instead if the cancellation succeeds. This program should terminate whether the cancellation succeeds or not. But it leads a deadlock in MPI_Recv after printing "MPI_Test_cancelled: 1". I confirmed it works fine with MPICH2. The problem is in mca_pml_ob1_recv_request_cancel function in ompi/mca/pml/ob1/pml_ob1_recvreq.c. It accepts the cancellation unless the request has been completed. I think it should not accept the cancellation if the request has been matched. If it want to accept the cancellation, it must push the recv frag to the unexpected message queue back and redo matching. Furthermore, the receive buffer must be reverted if the received message has been written to the receive buffer partially in a pipeline protocol. Attached patch cancel-recv.patch is a sample fix for this bug for Open MPI trunk. Though this patch has 65 lines, main modifications are adding one if-statement and deleting one if-statement. Other lines are just for indent alignment. I cannot confirm the MEMCHECKER part is correct. Could anyone review it before committing? Regards, Takahiro Kawashima, MPI development team, Fujitsu
#include <stdio.h> #include <stdlib.h> #include <mpi.h> /* rendezvous */ #define BUFSIZE1 (1024*1024) /* eager */ #define BUFSIZE2 (8) int main(int argc, char *argv[]) { int myrank, cancelled; void *buf1, *buf2; MPI_Request request; MPI_Status status; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &myrank); buf1 = malloc(BUFSIZE1); buf2 = malloc(BUFSIZE2); if (myrank == 0) { MPI_Isend(buf1, BUFSIZE1, MPI_BYTE, 1, 0, MPI_COMM_WORLD, &request); MPI_Send(buf2, BUFSIZE2, MPI_BYTE, 1, 1, MPI_COMM_WORLD); MPI_Wait(&request, &status); } else if (myrank == 1) { MPI_Irecv(buf1, BUFSIZE1, MPI_BYTE, 0, 0, MPI_COMM_WORLD, &request); MPI_Recv(buf2, BUFSIZE2, MPI_BYTE, 0, 1, MPI_COMM_WORLD, &status); MPI_Cancel(&request); MPI_Wait(&request, &status); MPI_Test_cancelled(&status, &cancelled); printf("MPI_Test_cancelled: %d\n", cancelled); fflush(stdout); if (cancelled) { MPI_Recv(buf1, BUFSIZE1, MPI_BYTE, 0, 0, MPI_COMM_WORLD, &status); } } MPI_Finalize(); free(buf1); free(buf2); return 0; }
Index: ompi/mca/pml/ob1/pml_ob1_recvreq.c =================================================================== --- ompi/mca/pml/ob1/pml_ob1_recvreq.c (revision 26836) +++ ompi/mca/pml/ob1/pml_ob1_recvreq.c (working copy) @@ -97,36 +97,36 @@ mca_pml_ob1_recv_request_t* request = (mca_pml_ob1_recv_request_t*)ompi_request; mca_pml_ob1_comm_t* comm = request->req_recv.req_base.req_comm->c_pml_comm; - if( true == ompi_request->req_complete ) { /* way to late to cancel this one */ - /* - * Receive request completed, make user buffer accessable. - */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - request->req_recv.req_base.req_addr, - request->req_recv.req_base.req_count, - request->req_recv.req_base.req_datatype); - ); + if( true == request->req_match_received ) { /* way to late to cancel this one */ + if( true == ompi_request->req_complete ) { + /* + * Receive request completed, make user buffer accessable. + */ + MEMCHECKER( + memchecker_call(&opal_memchecker_base_mem_defined, + request->req_recv.req_base.req_addr, + request->req_recv.req_base.req_count, + request->req_recv.req_base.req_datatype); + ); + } return OMPI_SUCCESS; } /* The rest should be protected behind the match logic lock */ OPAL_THREAD_LOCK(&comm->matching_lock); - if( OMPI_ANY_TAG == ompi_request->req_status.MPI_TAG ) { /* the match has not been already done */ - if( request->req_recv.req_base.req_peer == OMPI_ANY_SOURCE ) { - opal_list_remove_item( &comm->wild_receives, (opal_list_item_t*)request ); - } else { - mca_pml_ob1_comm_proc_t* proc = comm->procs + request->req_recv.req_base.req_peer; - opal_list_remove_item(&proc->specific_receives, (opal_list_item_t*)request); - } - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q, - &(request->req_recv.req_base), PERUSE_RECV ); - /** - * As now the PML is done with this request we have to force the pml_complete - * to true. Otherwise, the request will never be freed. - */ - request->req_recv.req_base.req_pml_complete = true; + if( request->req_recv.req_base.req_peer == OMPI_ANY_SOURCE ) { + opal_list_remove_item( &comm->wild_receives, (opal_list_item_t*)request ); + } else { + mca_pml_ob1_comm_proc_t* proc = comm->procs + request->req_recv.req_base.req_peer; + opal_list_remove_item(&proc->specific_receives, (opal_list_item_t*)request); } + PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q, + &(request->req_recv.req_base), PERUSE_RECV ); + /** + * As now the PML is done with this request we have to force the pml_complete + * to true. Otherwise, the request will never be freed. + */ + request->req_recv.req_base.req_pml_complete = true; OPAL_THREAD_UNLOCK(&comm->matching_lock); OPAL_THREAD_LOCK(&ompi_request_lock);
Copyright (c) 2012 FUJITSU LIMITED. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer listed in this license in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holders nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. The copyright holders provide no reassurances that the source code provided does not infringe any patent, copyright, or any other intellectual property rights of third parties. The copyright holders disclaim any liability to any recipient for claims brought against recipient by any third party for infringement of that parties intellectual property rights. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.