Problem description:
Elements from all ranks are gathered correctly except for the
element belonging to the root/target rank of the gather operation
when using certain custom MPI-datatypes (see reproducer code).

The bug can be toggled by commenting/uncommenting line 11 in the .F90-file.

Even though all this is for MPI_Gather the same seems to go for MPI_Gatherv 
too.

I have verified the bad behaviour with several OpenMPI versions from 1.2.3 to 
1.3b2. Correct behaviour has been observed on mvapich2 and PlatformMPI. Both 
gfortran and ifort has been tried.

Attached files:
 BUILD                      Build instructions
 RUN                        Run instructions
 mpi_gather_test.F90        Reproducer source code
 4rank_bad_output.txt       Bad output
 4rank_expected_output.txt  Good output

/Peter
mpif90.openmpi -o  mpi_gather_test.local_ompils mpi_gather_test.F90
mpirun.openmpi -np 4 ./mpi_gather_test.local_ompils  | sort -nk 2
Module global
  implicit none
  include 'mpif.h'

! Handle for MPI_Type_create_struct
  Integer                                 :: my_mpi_struct

  Type my_fortran_struct
! With the following line the bug bites, with it commented out the
! behaviour is as expected
     Integer                              :: unused_data
     Integer                              :: used_data
  End Type my_fortran_struct

End Module global


! -----------------------------------------------------


Program mpi_gather_test
  use global

  Integer            :: i
  Integer            :: nranks
  Integer, Parameter :: gather_target = 1
  Integer            :: rank
  Integer            :: ierror

  Type (my_fortran_struct), Pointer     :: source_vector (:)
  Type (my_fortran_struct), Pointer     :: dest_vector(:)

  call MPI_Init ( ierror )
  call MPI_Comm_rank ( MPI_COMM_WORLD, rank, ierror )
  call MPI_Comm_size ( MPI_COMM_WORLD, nranks, ierror )

  Allocate (source_vector(1), STAT = ierror)
  Allocate (dest_vector(1:nranks), STAT = ierror)

! Each rank initializes the data to be gathered to its rank number
! for tracing purposes (So we can see what goes where)
  source_vector(:)%used_data = rank

! Each rank initializes the target buffer with tracing data. The
! expectation is that on the root rank this will be completely over-
! written while on the rest of the ranks it will be unchanged.
  do i = 1, nranks
     dest_vector(i)%used_data = 10 * i + rank * 100 + 1000
  enddo

! Call the subroutine below that creates the MPI-datatype.
  call create_datatype ( ierror )

! Run the actual gather.
  call MPI_Gather (source_vector, 1,              my_mpi_struct, &
                   dest_vector,   1,              my_mpi_struct, &
                   gather_target, MPI_COMM_WORLD, ierror)

! Output the content of the used_data part of the dest_vector on
! all ranks. On the root-rank of the gather it is expected that the
! initial data is overwritten with the data from the source_vectors
! gathered from all ranks.
  do i = 1, nranks
     print *, 'rank:', rank, 'element:', i, 'dest_vector%used_data: ', &
              dest_vector(i)%used_data
  enddo
  
  call MPI_Finalize (ierror)
end program mpi_gather_test


! -----------------------------------------------------


subroutine create_datatype (ierror)
  use global

  integer, Intent (Out) :: ierror

  integer (kind=MPI_ADDRESS_KIND) :: start, loc_used_data, loc_ub
  integer (kind=MPI_ADDRESS_KIND) :: disp (3)
  integer                         :: lengths (3), types (3), ext_size

  Type (my_fortran_struct)              :: template (2)

  ierror = 0

! Get the offsets (displacements) from the template vector of
! my_fortran_struct type
  call MPI_Get_address (template(1), start, ierror)
  call MPI_Get_address (template(1)%used_data, loc_used_data, ierror)
  call MPI_Get_address (template(2), loc_ub, ierror)

  disp (1)     = 0
  disp (2)     = loc_used_data - start
  disp (3)     = loc_ub        - start

  lengths (1:3) = 1

  types (1) = MPI_LB
  types (2) = MPI_INTEGER
  types (3) = MPI_UB

! Create the MPI-type
  call MPI_Type_create_struct (3, lengths, disp, types, &
       my_mpi_struct, ierror)

  call MPI_Type_commit (my_mpi_struct, ierror)

end subroutine create_datatype
 rank:           0 element:           1 dest_vector%used_data:         1010
 rank:           0 element:           2 dest_vector%used_data:         1020
 rank:           0 element:           3 dest_vector%used_data:         1030
 rank:           0 element:           4 dest_vector%used_data:         1040
 rank:           1 element:           1 dest_vector%used_data:            0
 rank:           1 element:           2 dest_vector%used_data:         1120
 rank:           1 element:           3 dest_vector%used_data:            2
 rank:           1 element:           4 dest_vector%used_data:            3
 rank:           2 element:           1 dest_vector%used_data:         1210
 rank:           2 element:           2 dest_vector%used_data:         1220
 rank:           2 element:           3 dest_vector%used_data:         1230
 rank:           2 element:           4 dest_vector%used_data:         1240
 rank:           3 element:           1 dest_vector%used_data:         1310
 rank:           3 element:           2 dest_vector%used_data:         1320
 rank:           3 element:           3 dest_vector%used_data:         1330
 rank:           3 element:           4 dest_vector%used_data:         1340
 rank:           0 element:           1 dest_vector%used_data:         1010
 rank:           0 element:           2 dest_vector%used_data:         1020
 rank:           0 element:           3 dest_vector%used_data:         1030
 rank:           0 element:           4 dest_vector%used_data:         1040
 rank:           1 element:           1 dest_vector%used_data:            0
 rank:           1 element:           2 dest_vector%used_data:            1
 rank:           1 element:           3 dest_vector%used_data:            2
 rank:           1 element:           4 dest_vector%used_data:            3
 rank:           2 element:           1 dest_vector%used_data:         1210
 rank:           2 element:           2 dest_vector%used_data:         1220
 rank:           2 element:           3 dest_vector%used_data:         1230
 rank:           2 element:           4 dest_vector%used_data:         1240
 rank:           3 element:           1 dest_vector%used_data:         1310
 rank:           3 element:           2 dest_vector%used_data:         1320
 rank:           3 element:           3 dest_vector%used_data:         1330
 rank:           3 element:           4 dest_vector%used_data:         1340

Attachment: signature.asc
Description: This is a digitally signed message part.

Reply via email to