George, Copyright-added patch is attached. I don't have my svn account so want someone to commit it.
All my reported issues are in the ALLTOALL(V|W) MPI_IN_PLACE code, which was implemented two months ago for MPI-2.2 conformance. Not so surprising. P.S. Fujitsu does not yet signed the contribution agreement. I must talk with the legal department again to sign it, sigh.... This patch is very trivial and so no issues will arise. Thanks, Takahiro Kawashima, MPI development team, Fujitsu > Takahiro, > > Good catches. It's absolutely amazing that some of these errors lasted for so > long before being discovered (especially the extent issue in the > MPI_ALLTOALL). Please feel free to apply your patch and add the correct > copyright at the beginning of all altered files. > > Thanks, > George. > > > > On Sep 17, 2013, at 07:36 , "Kawashima, Takahiro" > <t-kawash...@jp.fujitsu.com> wrote: > > > Hi, > > > > My colleague tested MPI_IN_PLACE for MPI_ALLTOALL, MPI_ALLTOALLV, > > and MPI_ALLTOALLW, which was implemented two months ago in Open MPI > > trunk. And he found three bugs and created a patch. > > > > Found bugs are: > > > > (A) Missing MPI_IN_PLACE support in self COLL component > > > > The attached alltoall-self-inplace.c fails with MPI_ERR_ARG. > > self COLL component also must support MPI_IN_PLACE. > > > > (B) Incorrect rcount[] index > > > > A trivial bug in the following code. > > > > for (i = 0, max_size = 0 ; i < size ; ++i) { > > size_t size = ext * rcounts[rank]; // should be rcounts[i] > > > > max_size = size > max_size ? size : max_size; > > } > > > > This causes SEGV or something. > > > > (C) For MPI_ALLTOALLV, the unit of displacements is extent, not byte > > > > Though the unit of displacements is byte for MPI_ALLTOALLW, > > the unit of displacements is extent for MPI_ALLTOALLV. > > > > MPI-2.2 (page 171) says: > > > > The outcome is as if each process sent a message to every > > other process with, > > MPI_Send(sendbuf + sdispls[i] · extent(sendtype), > > sendcounts[i], sendtype, i, ...), > > and received a message from every other process with a call to > > MPI_Recv(recvbuf + rdispls[i] · extent(recvtype), > > recvcounts[i], recvtype, i, ...). > > > > I attached his patch (alltoall-inplace.patch) to fix these three bugs.
Index: ompi/mca/coll/self/coll_self_alltoall.c =================================================================== --- ompi/mca/coll/self/coll_self_alltoall.c (revision 29185) +++ ompi/mca/coll/self/coll_self_alltoall.c (working copy) @@ -9,6 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2013 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -37,6 +38,10 @@ struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { + if (MPI_IN_PLACE == sbuf) { + return MPI_SUCCESS; + } + return ompi_datatype_sndrcv(sbuf, scount, sdtype, rbuf, rcount, rdtype); } Index: ompi/mca/coll/self/coll_self_alltoallv.c =================================================================== --- ompi/mca/coll/self/coll_self_alltoallv.c (revision 29185) +++ ompi/mca/coll/self/coll_self_alltoallv.c (working copy) @@ -9,6 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2013 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -40,6 +41,11 @@ { int err; ptrdiff_t lb, rextent, sextent; + + if (MPI_IN_PLACE == sbuf) { + return MPI_SUCCESS; + } + err = ompi_datatype_get_extent(sdtype, &lb, &sextent); if (OMPI_SUCCESS != err) { return OMPI_ERROR; Index: ompi/mca/coll/self/coll_self_alltoallw.c =================================================================== --- ompi/mca/coll/self/coll_self_alltoallw.c (revision 29185) +++ ompi/mca/coll/self/coll_self_alltoallw.c (working copy) @@ -9,6 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2013 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -39,6 +40,11 @@ { int err; ptrdiff_t lb, rextent, sextent; + + if (MPI_IN_PLACE == sbuf) { + return MPI_SUCCESS; + } + err = ompi_datatype_get_extent(sdtypes[0], &lb, &sextent); if (OMPI_SUCCESS != err) { return OMPI_ERROR; Index: ompi/mca/coll/basic/coll_basic_alltoallv.c =================================================================== --- ompi/mca/coll/basic/coll_basic_alltoallv.c (revision 29185) +++ ompi/mca/coll/basic/coll_basic_alltoallv.c (working copy) @@ -12,6 +12,7 @@ * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2013 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -56,7 +57,7 @@ /* Find the largest receive amount */ ompi_datatype_type_extent (rdtype, &ext); for (i = 0, max_size = 0 ; i < size ; ++i) { - size_t size = ext * rcounts[rank]; + size_t size = ext * rcounts[i]; max_size = size > max_size ? size : max_size; } @@ -76,11 +77,11 @@ if (i == rank && rcounts[j]) { /* Copy the data into the temporary buffer */ err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[j], - tmp_buffer, (char *) rbuf + rdisps[j]); + tmp_buffer, (char *) rbuf + rdisps[j] * ext); if (MPI_SUCCESS != err) { goto error_hndl; } /* Exchange data with the peer */ - err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[j], rcounts[j], rdtype, + err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[j] * ext, rcounts[j], rdtype, j, MCA_COLL_BASE_TAG_ALLTOALLV, comm, preq++)); if (MPI_SUCCESS != err) { goto error_hndl; } @@ -91,11 +92,11 @@ } else if (j == rank && rcounts[i]) { /* Copy the data into the temporary buffer */ err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[i], - tmp_buffer, (char *) rbuf + rdisps[i]); + tmp_buffer, (char *) rbuf + rdisps[i] * ext); if (MPI_SUCCESS != err) { goto error_hndl; } /* Exchange data with the peer */ - err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[i], rcounts[i], rdtype, + err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[i] * ext, rcounts[i], rdtype, i, MCA_COLL_BASE_TAG_ALLTOALLV, comm, preq++)); if (MPI_SUCCESS != err) { goto error_hndl; } Index: ompi/mca/coll/basic/coll_basic_alltoallw.c =================================================================== --- ompi/mca/coll/basic/coll_basic_alltoallw.c (revision 29185) +++ ompi/mca/coll/basic/coll_basic_alltoallw.c (working copy) @@ -13,6 +13,7 @@ * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2013 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -56,7 +57,7 @@ /* Find the largest receive amount */ for (i = 0, max_size = 0 ; i < size ; ++i) { ompi_datatype_type_extent (rdtypes[i], &ext); - ext *= rcounts[rank]; + ext *= rcounts[i]; max_size = ext > max_size ? ext : max_size; } Index: ompi/mca/coll/tuned/coll_tuned_alltoallv.c =================================================================== --- ompi/mca/coll/tuned/coll_tuned_alltoallv.c (revision 29185) +++ ompi/mca/coll/tuned/coll_tuned_alltoallv.c (working copy) @@ -13,6 +13,7 @@ * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All Rights * reserved. + * Copyright (c) 2013 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -71,7 +72,7 @@ /* Find the largest receive amount */ ompi_datatype_type_extent (rdtype, &ext); for (i = 0, max_size = 0 ; i < size ; ++i) { - size_t size = ext * rcounts[rank]; + size_t size = ext * rcounts[i]; max_size = size > max_size ? size : max_size; } @@ -91,11 +92,11 @@ if (i == rank && rcounts[j]) { /* Copy the data into the temporary buffer */ err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[j], - tmp_buffer, (char *) rbuf + rdisps[j]); + tmp_buffer, (char *) rbuf + rdisps[j] * ext); if (MPI_SUCCESS != err) { goto error_hndl; } /* Exchange data with the peer */ - err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[j], rcounts[j], rdtype, + err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[j] * ext, rcounts[j], rdtype, j, MCA_COLL_BASE_TAG_ALLTOALLV, comm, preq++)); if (MPI_SUCCESS != err) { goto error_hndl; } @@ -106,11 +107,11 @@ } else if (j == rank && rcounts[i]) { /* Copy the data into the temporary buffer */ err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[i], - tmp_buffer, (char *) rbuf + rdisps[i]); + tmp_buffer, (char *) rbuf + rdisps[i] * ext); if (MPI_SUCCESS != err) { goto error_hndl; } /* Exchange data with the peer */ - err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[i], rcounts[i], rdtype, + err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[i] * ext, rcounts[i], rdtype, i, MCA_COLL_BASE_TAG_ALLTOALLV, comm, preq++)); if (MPI_SUCCESS != err) { goto error_hndl; }