Dear list,
the attached program deadlocks in MPI_File_write_all when run with 16
processes on two 8 core nodes of an Infiniband cluster. It runs fine when I
a) use tcp
or
b) replace MPI_File_write_all by MPI_File_write
I'm using openmpi V. 1.3.2 (but I checked that the problem is also
occurs with version 1.3.3). The OFED version is 1.4 (installed via
Rocks). The Operating system is CentOS 5.2
I compile with gcc-4.1.2. The openmpi configure flags are
../../configure --prefix=/share/apps/openmpi/1.3.2/gcc-4.1.2/
--with-io-romio-flags=--with-file-system=nfs+ufs+pvfs2
--with-wrapper-ldflags=-L/share/apps/pvfs2/lib
CPPFLAGS=-I/share/apps/pvfs2/include/ LDFLAGS=-L/share/apps/pvfs2/lib
LIBS=-lpvfs2 -lpthread
The user home directories are mounted via nfs.
Is it a problem with the user code, the system or with openmpi?
Thanks,
Dorian
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <assert.h>
int blocklength[16][3] = { { 2471, 2471, 2471 },
{ 4392, 4392, 4392 },
{ 4392, 4392, 4392 },
{ 2470, 2470, 2470 },
{ 2956, 2956, 2956 },
{ 5256, 5256, 5256 },
{ 5256, 5256, 5256 },
{ 2957, 2957, 2957 },
{ 2903, 2903, 2903 },
{ 5160, 5160, 5160 },
{ 5160, 5160, 5160 },
{ 2902, 2902, 2902 },
{ 2470, 2470, 2470 },
{ 4392, 4392, 4392 },
{ 4392, 4392, 4392 },
{ 2471, 2471, 2471 }
};
int displ[16][3] = { { 0, 60000, 120000 },
{ 2471, 62471, 122471 },
{ 6863, 66863, 126863 },
{ 11255, 71255, 131255 },
{ 13725, 73725, 133725 },
{ 16681, 76681, 136681 },
{ 21937, 81937, 141937 },
{ 27193, 87193, 147193 },
{ 30150, 90150, 150150 },
{ 33053, 93053, 153053 },
{ 38213, 98213, 158213 },
{ 43373, 103373, 163373 },
{ 46275, 106275, 166275 },
{ 48745, 108745, 168745 },
{ 53137, 113137, 173137 },
{ 57529, 117529, 177529 }
};
int main(int argc, char **argv)
{
int b[3], d[3], size;
MPI_Init(&argc,&argv);
int myrank;
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
int numprocs;
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
MPI_File fh;
MPI_File_open(MPI_COMM_WORLD, "write.data",
MPI_MODE_CREATE | MPI_MODE_WRONLY |
MPI_MODE_UNIQUE_OPEN,
MPI_INFO_NULL, &fh);
/* Just a check */
if(0 < myrank) {
for(int k = 0; k < 3; ++k) {
assert(displ[myrank][k] ==
displ[myrank-1][k]+blocklength[myrank-1][k]);
}
}
size = 0;
for(int k = 0; k < 3; ++k) {
b[k] = 96*blocklength[myrank][k];
d[k] = 96*displ[myrank][k];
size += b[k];
}
MPI_Datatype block;
MPI_Type_indexed(3, b, d, MPI_CHAR, &block);
MPI_Type_commit(&block);
MPI_File_set_view(fh, 0, MPI_CHAR, block, "native", MPI_INFO_NULL);
char *buf = new char[size]();
MPI_File_write_all(fh, buf, size, MPI_CHAR, MPI_STATUS_IGNORE);
MPI_File_close(&fh);
MPI_Type_free(&block);
delete[] buf;
return MPI_Finalize();
}