Retrying, as the last version ended up as HTML garbage... :-(
***
Hi Andre,
I've tried to work out a (rather old-style) program that structurally resembles your testcase, but using plain MPI, with mpi_put for 1-sided communication as you suggested, and avoiding associate to avoid potential issues with buffers and asynchronous communication (see another recent PR), or aliasing. It works with any number of processes that is a squared integer. One can play with it and see that it works only if one uses the right communicators etc. See attached.
Can you see how that maps to the ways you want to use teams? If it is possible to have a testcase that works with at least one other compiler, that would be a real benefit.
Cheers,
Harald
program p use mpi implicit none integer :: comm_world, comm_col, win_world, win_col integer :: rank_world, size_world, rank_col, size_col integer :: i, ierr, icol, jcol, n integer(MPI_ADDRESS_KIND) :: disp, winsize integer, allocatable, target :: data(:), col_data(:) integer, pointer, contiguous :: parr(:,:)
! initialize call mpi_init (ierr) if (ierr /= MPI_SUCCESS) error stop 1 comm_world = MPI_COMM_WORLD call mpi_comm_size (comm_world, size_world, ierr) call mpi_comm_rank (comm_world, rank_world, ierr) if (rank_world == 0) write(0,*) "Total number of processes:", size_world ! partition into n*n processes (n columns) n = nint (sqrt (real (size_world))) if (n*n /= size_world) error stop "num pes must be a squared integer" icol = rank_world / n jcol = mod (rank_world, n) ! create communicator for n groups of n processes (columns) call mpi_comm_split (comm_world, icol, jcol, comm_col, ierr) call mpi_comm_size (comm_col, size_col, ierr) call mpi_comm_rank (comm_col, rank_col, ierr) if (size_col /= n) error stop 2 if (rank_col /= jcol) error stop 3 ! prepare the "real work" allocate (data(size_world), source=-1) parr(0:n-1,0:n-1) => data ! Let each process does its work... do i = 0, size_world-1 call mpi_barrier (comm_world, ierr) ! for ordered output if (rank_world == i) write(0,*) "rank", i,': rank_col=', jcol end do parr(rank_col,icol) = (rank_col+1) + 100*(icol+1) ! the "result" ! ...and gather the results: ! (1) column-wise using an auxiliary array for each column ! create MPI window for 1-sided communication if (rank_col == 0) then allocate (col_data(0:n-1), source=-42) winsize = n * sizeof(1) else allocate (col_data(0)) winsize = 0 end if call mpi_barrier (comm_world, ierr) call mpi_win_create (col_data, winsize, sizeof(1), & MPI_INFO_NULL, comm_col, win_col, ierr) call mpi_win_fence (0, win_col, ierr) disp = rank_col call mpi_put (parr(rank_col, icol), 1, MPI_INTEGER, & 0, disp, 1, MPI_INTEGER, win_col, ierr) call mpi_win_fence (0, win_col, ierr) call mpi_win_free (win_col, ierr) ! (2) gather columns: create MPI window for 1-sided communication if (rank_world == 0) then winsize = size(parr) * sizeof(1) else winsize = 0 end if call mpi_win_create (parr, winsize, sizeof(1), & MPI_INFO_NULL, comm_world, win_world, ierr) call mpi_win_fence (0, win_world, ierr) if (rank_col == 0) then disp = icol*n call mpi_put (col_data, size(col_data), MPI_INTEGER, & 0, disp, size(col_data), MPI_INTEGER, win_world, ierr) end if call mpi_win_fence (0, win_world, ierr) call mpi_win_free (win_world, ierr) deallocate (col_data) ! show results on processor 0: if (rank_world == 0) then write(0,*) "data=", data end if call mpi_finalize (ierr) end program p