Retrying, as the last version ended up as HTML garbage... :-(
 
***
 
Hi Andre,
 
I've tried to work out a (rather old-style) program that structurally resembles your testcase, but using plain MPI, with mpi_put for 1-sided communication as you suggested, and avoiding associate to avoid potential issues with buffers and asynchronous communication (see another recent PR), or aliasing.  It works with any number of processes that is a squared integer.  One can play with it and see that it works only if one uses the right communicators etc.  See attached.
 
Can you see how that maps to the ways you want to use teams?  If it is possible to have a testcase that works with at least one other compiler, that would be a real benefit.
 
Cheers,
Harald
 
program p
  use mpi
  implicit none
  integer :: comm_world, comm_col, win_world, win_col
  integer :: rank_world, size_world, rank_col, size_col
  integer :: i, ierr, icol, jcol, n
  integer(MPI_ADDRESS_KIND) :: disp, winsize
  integer, allocatable, target :: data(:), col_data(:)
  integer, pointer, contiguous :: parr(:,:)

  ! initialize
  call mpi_init (ierr)
  if (ierr /= MPI_SUCCESS) error stop 1
  comm_world = MPI_COMM_WORLD
  call mpi_comm_size (comm_world, size_world, ierr)
  call mpi_comm_rank (comm_world, rank_world, ierr)
  if (rank_world == 0) write(0,*) "Total number of processes:", size_world

  ! partition into n*n processes (n columns)
  n = nint (sqrt (real (size_world)))
  if (n*n /= size_world) error stop "num pes must be a squared integer"
  icol =      rank_world / n
  jcol = mod (rank_world, n)

  ! create communicator for n groups of n processes (columns)
  call mpi_comm_split (comm_world, icol, jcol, comm_col, ierr)
  call mpi_comm_size (comm_col, size_col, ierr)
  call mpi_comm_rank (comm_col, rank_col, ierr)
  if (size_col /= n)    error stop 2
  if (rank_col /= jcol) error stop 3

  ! prepare the "real work"
  allocate (data(size_world), source=-1)
  parr(0:n-1,0:n-1) => data

  ! Let each process does its work...
  do i = 0, size_world-1
     call mpi_barrier (comm_world, ierr)  ! for ordered output
     if (rank_world == i) write(0,*) "rank", i,': rank_col=', jcol
  end do
  parr(rank_col,icol) = (rank_col+1) + 100*(icol+1)  ! the "result"

  ! ...and gather the results:
  ! (1) column-wise using an auxiliary array for each column
  !     create MPI window for 1-sided communication
  if (rank_col == 0) then
     allocate (col_data(0:n-1), source=-42)
     winsize = n * sizeof(1)
  else
     allocate (col_data(0))
     winsize = 0
  end if
  call mpi_barrier (comm_world, ierr)
  call mpi_win_create (col_data, winsize, sizeof(1), &
                       MPI_INFO_NULL, comm_col, win_col, ierr)
  call mpi_win_fence (0, win_col, ierr)
  disp = rank_col
  call mpi_put (parr(rank_col, icol), 1, MPI_INTEGER, &
                0, disp, 1, MPI_INTEGER, win_col, ierr)
  call mpi_win_fence (0, win_col, ierr)
  call mpi_win_free (win_col, ierr)

  ! (2) gather columns: create MPI window for 1-sided communication
  if (rank_world == 0) then
     winsize = size(parr) * sizeof(1)
  else
     winsize = 0
  end if
  call mpi_win_create (parr, winsize, sizeof(1), &
                       MPI_INFO_NULL, comm_world, win_world, ierr)
  call mpi_win_fence (0, win_world, ierr)
  if (rank_col == 0) then
     disp = icol*n
     call mpi_put (col_data, size(col_data), MPI_INTEGER, &
                   0, disp, size(col_data), MPI_INTEGER, win_world, ierr)
  end if
  call mpi_win_fence (0, win_world, ierr)
  call mpi_win_free (win_world, ierr)
  deallocate (col_data)

  ! show results on processor 0:
  if (rank_world == 0) then
     write(0,*) "data=", data
  end if
  call mpi_finalize (ierr)
end program p

Reply via email to