Hi all,

(Sorry, I have sent this to "users" but I should have sent it to "devel" list instead. Sorry for the mess...)

I have attached a very small example which raise an assertion.

The problem is arising from a process which does not have any element to write in a file (and then in the MPI_File_set_view)...

You can see this "bug" with openmpi 1.6.3, 1.6.4 and 1.7.0 configured with:

./configure --enable-mem-debug --enable-mem-profile --enable-memchecker
 --with-mpi-param-check --enable-debug

Just compile the given example (idx_null.cc) as-is with

mpicxx -o idx_null idx_null.cc

and run with 3 processes:

mpirun -n 3 idx_null

You can modify the example by commenting "#define WITH_ZERO_ELEMNT_BUG" to see that everything is going well when all processes have something to write.

There is no "bug" if you use openmpi 1.6.3 (and higher) without the debugging options.

Also, all is working well with mpich-3.0.3 configured with:

./configure --enable-g=yes


So, is this a wrong "assert" in openmpi?

Is there a real problem to use this example in a "release" mode?

Thanks,

Eric
#include "mpi.h"
#include <cstdio>
#include <cstdlib>

using namespace std;

void abortOnError(int ierr) {
  if (ierr != MPI_SUCCESS) {
    printf("ERROR Returned by MPI: %d\n",ierr);
    char* lCharPtr = new char[MPI_MAX_ERROR_STRING];
    int lLongueur = 0;
    MPI_Error_string(ierr,lCharPtr, &lLongueur);
    printf("ERROR_string Returned by MPI: %s\n",lCharPtr);
    MPI_Abort( MPI_COMM_WORLD, 1 );
  }
}
// This main is showing how to have an assertion raised if you try
// to create a MPI_File_set_view with some process holding no data

#define WITH_ZERO_ELEMNT_BUG

int main(int argc, char *argv[])
{
  int rank, size, i;
  MPI_Datatype lTypeIndexIntWithExtent, lTypeIndexIntWithoutExtent;

  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &size);
  if (size != 3)
  {
    printf("Please run with 3 processes.\n");
    MPI_Finalize();
    return 1;
  }
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);

  int displacement[3];
  int* buffer = 0;

  int lTailleBuf = 0;
  if (rank == 0)
  {
    lTailleBuf = 3;
    displacement[0] = 0;
    displacement[1] = 4;
    displacement[2] = 5;
    buffer = new int[lTailleBuf];
    for (i=0; i<lTailleBuf; i++) buffer[i] = 10*(i+1);
  }
  if (rank == 1)
  {
    lTailleBuf = 2;
    displacement[0] = 1;
    displacement[1] = 2;

#ifdef WITH_ZERO_ELEMNT_BUG
    displacement[2] = 3;
    ++lTailleBuf;
#endif

    buffer = new int[lTailleBuf];
    for (i=0; i<lTailleBuf; i++) buffer[i] = -(i+1);
  }
  // BUG: A rank without any "element"
  if (rank == 2)
  {
#ifdef WITH_ZERO_ELEMNT_BUG
    lTailleBuf = 0;
#else
    displacement[0] = 0;
    lTailleBuf = 1;
    buffer = new int[lTailleBuf];
    for (i=0; i<lTailleBuf; i++) buffer[i] = 1000*(i+1);
#endif
  }

  MPI_File lFile;

  abortOnError(MPI_File_open( MPI_COMM_WORLD, const_cast<char*>("temp"), 
MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &lFile ));

  MPI_Type_create_indexed_block(lTailleBuf, 1, displacement, MPI_INT, 
&lTypeIndexIntWithoutExtent);
  MPI_Type_commit(&lTypeIndexIntWithoutExtent);

  // Here we compute the total number of int to write to resize the type:
  // Ici, on veut s'échanger le nb total de int à écrire à chaque appel pcqu'on 
doit calculer le bon "extent" du type.
  // Ça revient à dire que chaque processus ne n'écrira qu'une petite partie du 
fichier, mais devra avancer son pointeur
  // local d'écriture suffisamment loin pour ne pas écrire par dessus les 
données des autres
  int lTailleGlobale = 0;
  printf("[%d] Local size : %d \n",rank,lTailleBuf);

  MPI_Allreduce( &lTailleBuf, &lTailleGlobale, 1, MPI_INT, MPI_SUM, 
MPI_COMM_WORLD );

  printf("[%d] MPI_AllReduce : %d \n",rank,lTailleGlobale);

  //We now modify the extent of the type "type_without_extent"
  MPI_Type_create_resized( lTypeIndexIntWithoutExtent, 0, 
lTailleGlobale*sizeof(int), &lTypeIndexIntWithExtent );
  MPI_Type_commit(&lTypeIndexIntWithExtent);

  abortOnError(MPI_File_set_view( lFile, 0, MPI_INT, lTypeIndexIntWithExtent, 
const_cast<char*>("native"), MPI_INFO_NULL));

  for (int i =0; i<2;++i) {
    abortOnError(MPI_File_write_all( lFile, buffer, lTailleBuf, MPI_INT, 
MPI_STATUS_IGNORE));
    MPI_Offset lOffset,lSharedOffset;
    MPI_File_get_position(lFile, &lOffset);
    MPI_File_get_position_shared(lFile, &lSharedOffset);
    printf("[%d] Offset after write : %d int: Local: %ld Shared: %ld 
\n",rank,lTailleBuf,lOffset,lSharedOffset);

  }

  abortOnError(MPI_File_close( &lFile ));

  abortOnError(MPI_Type_free(&lTypeIndexIntWithExtent));
  abortOnError(MPI_Type_free(&lTypeIndexIntWithoutExtent));

  MPI_Finalize();
  return 0;
}

Reply via email to