Bart, It looks like a bug that involves the osc/rdma component.
Meanwhile, you can mpirun --mca osc ^rdma ... Cheers, Gilles On Sat, Feb 16, 2019 at 8:43 PM b...@bartjanssens.org <b...@bartjanssens.org> wrote: > > Hi, > > Running the following test code on two processes: > > #include <mpi.h> > #include <stdio.h> > #include <unistd.h> > > #define N 2 > > int main(int argc, char **argv) > { > int i, rank, num_procs, len, received[N], buf[N]; > MPI_Aint addrbuf[1], recvaddr[1]; > MPI_Win win, awin; > > MPI_Init(&argc, &argv); > MPI_Comm_rank(MPI_COMM_WORLD, &rank); > MPI_Comm_size(MPI_COMM_WORLD, &num_procs); > > MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &win); > MPI_Win_attach(win, buf, sizeof(int)*N); > MPI_Win_create(addrbuf, sizeof(MPI_Aint), sizeof(MPI_Aint), MPI_INFO_NULL, > MPI_COMM_WORLD, &awin); > > MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, 0, awin); > MPI_Get_address(buf, &addrbuf[0]); > MPI_Win_unlock(rank,awin); > > if(rank == 0) > { > printf("Process %d is waiting for debugger attach\n", getpid()); > sleep(15); > } > > MPI_Barrier(MPI_COMM_WORLD); > > if(rank == 0) > { > for(int r = 0; r != N; ++r) > { > MPI_Win_lock(MPI_LOCK_EXCLUSIVE, r, 0, awin); > MPI_Get(recvaddr, 1, MPI_AINT, r, 0, 1, MPI_AINT, awin); > MPI_Win_unlock(r, awin); > MPI_Win_lock(MPI_LOCK_EXCLUSIVE, r, 0, win); > MPI_Get(received, N, MPI_INT, r, recvaddr[0], N, MPI_INT, win); > printf("First value from %d is %d\n", r, received[0]); > MPI_Win_unlock(r, win); > } > } > > MPI_Barrier(MPI_COMM_WORLD); > > MPI_Win_free(&win); > MPI_Finalize(); > return 0; > } > > > results in a crash with this backtrace (starting at the second MPI_Get line > in my code above): > > #0 mca_btl_vader_get_cma (btl=0x7f44888d0220 <mca_btl_vader>, endpoint=0x0, > local_address=0x7ffff4a13c18, remote_address=<optimized out>, > local_handle=0x0, > remote_handle=<optimized out>, size=8, flags=0, order=255, > cbfunc=0x7f4488231250 <ompi_osc_rdma_get_complete>, cbcontext=0x555d01e1c060, > cbdata=0x0) at btl_vader_get.c:95 > #1 0x00007f44882308c1 in ompi_osc_rdma_get_contig > (sync=sync@entry=0x555d01e1be90, peer=peer@entry=0x555d01e16f10, > source_address=<optimized out>, > source_address@entry=140737297595424, > source_handle=source_handle@entry=0x7f448a747180, target_buffer=<optimized > out>, target_buffer@entry=0x7ffff4a13c18, size=size@entry=8, > request=<optimized out>) at osc_rdma_comm.c:698 > #2 0x00007f44882354b6 in ompi_osc_rdma_master (alloc_reqs=true, > rdma_fn=0x7f4488230610 <ompi_osc_rdma_get_contig>, max_rdma_len=<optimized > out>, request=0x555d01e1c060, > remote_datatype=0x555d0004a2c0 <ompi_mpi_int>, remote_count=<optimized > out>, remote_handle=0x7f448a747180, remote_address=<optimized out>, > peer=<optimized out>, > local_datatype=0x555d0004a2c0 <ompi_mpi_int>, local_count=<optimized > out>, local_address=0x7ffff4a13c18, sync=0x555d01e1be90) at > osc_rdma_comm.c:349 > #3 ompi_osc_rdma_get_w_req (request=0x0, source_datatype=0x555d0004a2c0 > <ompi_mpi_int>, source_count=<optimized out>, source_disp=<optimized out>, > peer=<optimized out>, > origin_datatype=0x555d0004a2c0 <ompi_mpi_int>, origin_count=<optimized > out>, origin_addr=0x7ffff4a13c18, sync=0x555d01e1be90) at osc_rdma_comm.c:803 > #4 ompi_osc_rdma_get (origin_addr=0x7ffff4a13c18, origin_count=<optimized > out>, origin_datatype=0x555d0004a2c0 <ompi_mpi_int>, source_rank=<optimized > out>, > source_disp=<optimized out>, source_count=<optimized out>, > source_datatype=0x555d0004a2c0 <ompi_mpi_int>, win=0x555d01e0aae0) at > osc_rdma_comm.c:880 > #5 0x00007f448b404b6b in PMPI_Get (origin_addr=0x7ffff4a13c18, > origin_count=2, origin_datatype=0x555d0004a2c0 <ompi_mpi_int>, > target_rank=<optimized out>, > target_disp=<optimized out>, target_count=<optimized out>, > target_datatype=0x555d0004a2c0 <ompi_mpi_int>, win=0x555d01e0aae0) at > pget.c:81 > #6 0x0000555d00047430 in main (argc=1, argv=0x7ffff4a13d18) at > onesided_crash_report.c:41 > > On OpenMPI 3.1.3 the code works fine. Am I doing something wrong, or is this > a bug? > > Kind regards, > > Bart _______________________________________________ > users mailing list > users@lists.open-mpi.org > https://lists.open-mpi.org/mailman/listinfo/users _______________________________________________ users mailing list users@lists.open-mpi.org https://lists.open-mpi.org/mailman/listinfo/users