Sorry,

here is the attachment...

Eric

On 04/23/2013 09:54 AM, Eric Chamberland wrote:
Hi,

I have sent a previous message showing something that I think is a bug
(or maybe a misuse, but...).

I worked on the example sent to have it simplified: now it is almost
half of the lines of code and the structures are more simple... but
still showing the wrong behaviour.

Briefly, we construct different MPI_datatype and nests them into a final
type which is a:
{MPI_LONG,{{MPI_LONG,MPI_CHAR}*2}

Here is the output from OpenMPI 1.6.3:

  Rank 0 send this:
  i: 0 => {{0},{{3,%},{7,5}}}
  i: 1 => {{1},{{3,%},{7,5}}}
  i: 2 => {{2},{{3,%},{7,5}}}
  i: 3 => {{3},{{3,%},{7,5}}}
  i: 4 => {{4},{{3,%},{7,5}}}
  i: 5 => {{5},{{3,%},{7,5}}}
MPI_Recv returned success and everything in MPI_Status is correct after
receive.
  Rank 1 received this:
  i: 0 => {{0},{{3,%},{-999,$}}} *** ERROR ****
  i: 1 => {{1},{{3,%},{-999,$}}} *** ERROR ****
  i: 2 => {{2},{{3,%},{-999,$}}} *** ERROR ****
  i: 3 => {{3},{{3,%},{-999,$}}} *** ERROR ****
  i: 4 => {{4},{{3,%},{-999,$}}} *** ERROR ****
  i: 5 => {{5},{{3,%},{-999,$}}} *** ERROR ****

Here is the expected output, obtained with mpich-3.0.3:

  Rank 0 send this:
  i: 0 => {{0},{{3,%},{7,5}}}
  i: 1 => {{1},{{3,%},{7,5}}}
  i: 2 => {{2},{{3,%},{7,5}}}
  i: 3 => {{3},{{3,%},{7,5}}}
  i: 4 => {{4},{{3,%},{7,5}}}
  i: 5 => {{5},{{3,%},{7,5}}}
MPI_Recv returned success and everything in MPI_Status is correct after
receive.
  Rank 1 received this:
  i: 0 => {{0},{{3,%},{7,5}}} OK
  i: 1 => {{1},{{3,%},{7,5}}} OK
  i: 2 => {{2},{{3,%},{7,5}}} OK
  i: 3 => {{3},{{3,%},{7,5}}} OK
  i: 4 => {{4},{{3,%},{7,5}}} OK
  i: 5 => {{5},{{3,%},{7,5}}} OK

Is it related to the bug reported here:
  http://www.open-mpi.org/community/lists/devel/2013/04/12267.php ?

Thanks,

Eric


_______________________________________________
devel mailing list
de...@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/devel

#include "mpi.h"
#include <iostream>

//**************************************************************************
//
// This example is showing a problem with nested types!
// It works perfectly with mpich-3.0.3 but seems to do a wrong transmission
// with openmpi 1.6.3, 1.6.4, 1.7.0 and 1.7.1
//
// The basic problem seems to arise with a vector of PALong_2Pairs which is a
// MPI nested type constructed like this:
//--------------------------------------
// Struct          | is composed of
//--------------------------------------
// PAPairLC        |  {long, char}
// PALong_2Pairs   |  {long,{PAPairLC,PAPairLC}}
//--------------------------------------
//
//**************************************************************************

using namespace std;

//! Function to abort on any MPI error:
void abortOnError(int ierr) {
  if (ierr != MPI_SUCCESS) {
    std::cerr << "ERROR Returned by MPI: " << ierr << std::endl;
    char* lCharPtr = new char[MPI_MAX_ERROR_STRING];
    int lLongueur = 0;
    MPI_Error_string(ierr,lCharPtr, &lLongueur);
    std::cerr << "ERROR_string Returned by MPI: " << lCharPtr << std::endl;
    MPI_Abort( MPI_COMM_WORLD, 1 );
  }
}

// a constant:
#define FIRST_CHAR 32

//*****************************************************
//
// PAPairLC is a pair: {long, char}
//
//*****************************************************

class PAPairLC
{
public:
  PAPairLC() :aLong(-999), aChar(FIRST_CHAR+4) {}

  long   aLong;
  char   aChar;

  static MPI_Datatype  asMPIDatatype;
  static MPI_Datatype& reqMPIDatatype() { return asMPIDatatype;}

  void print(std::ostream& pOS) {pOS << "{" << aLong << "," << aChar << "}";}

  static void createMPIDatatype() {

    PAPairLC lPAType;

    MPI_Datatype lTypes[2];

    lTypes[0] = MPI_LONG;
    lTypes[1] = MPI_CHAR;

    MPI_Aint lDeplacements[2];

    MPI_Aint lPtrBase = 0;
    MPI_Get_address(&lPAType,                             &lPtrBase);
    MPI_Get_address(&lPAType.aLong,                       &lDeplacements[0]);
    MPI_Get_address(&lPAType.aChar,                         &lDeplacements[1]);

    //Compute the "displacement" from lPtrBase
    lDeplacements[0] -= lPtrBase;
    lDeplacements[1] -= lPtrBase;

    int lBlocLen[2] = {1,1};

    abortOnError(MPI_Type_create_struct(2, lBlocLen, lDeplacements, lTypes, 
&asMPIDatatype));

    abortOnError(MPI_Type_commit(&asMPIDatatype));

  }
};
MPI_Datatype PAPairLC::asMPIDatatype = MPI_DATATYPE_NULL;

//*****************************************************
//
// PALong_2Pairs is a struct of: {long, PAPairLC[2]}
//
//*****************************************************
class PALong_2Pairs
{
public:
  PALong_2Pairs()  {}

  long      aFirst;
  PAPairLC a2Pairs[2];

  static MPI_Datatype  asMPIDatatype;
  static MPI_Datatype& reqMPIDatatype() { return asMPIDatatype;}
  void print(std::ostream& pOS) {
    pOS << "{{" << aFirst << "}," << "{{"
        << a2Pairs[0].aLong<< ","
        << a2Pairs[0].aChar<< "},{"
        << a2Pairs[1].aLong<< ","
        << a2Pairs[1].aChar<< "}}}";
  }
  static void createMPIDatatype()
  {
    PALong_2Pairs lPAType;

    MPI_Datatype lTypes[2] = {MPI_LONG, PAPairLC::reqMPIDatatype()};

    MPI_Aint lDeplacements[2];

    MPI_Aint lPtrBase = 0;
    MPI_Get_address(&lPAType,                        &lPtrBase);
    MPI_Get_address(&lPAType.aFirst,                 &lDeplacements[0]);
    MPI_Get_address(&lPAType.a2Pairs[0],             &lDeplacements[1]);

    //Compute the "displacement" from lPtrBase
    lDeplacements[0] -= lPtrBase;
    lDeplacements[1] -= lPtrBase;

    int lBlocLen[2] = {1,2};

    abortOnError(MPI_Type_create_struct(2, lBlocLen, lDeplacements, lTypes, 
&asMPIDatatype));
    abortOnError(MPI_Type_commit(&asMPIDatatype));
  }
};

MPI_Datatype PALong_2Pairs::asMPIDatatype = MPI_DATATYPE_NULL;


//****************************
//
// Here is now the main...
//
//****************************
int main(int argc, char *argv[])
{
  int rank, size;

  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &size);

  if (size != 2)
  {
    std::cout << "Please run with 2 processes.\n";
    MPI_Finalize();
    return 1;
  }

  //Calls the types creations once:
  PAPairLC::createMPIDatatype();
  PALong_2Pairs::createMPIDatatype();

  // Here is the array we wil try to transmit:
  PALong_2Pairs* lBuf = 0;

  const int lBufSize = 6;
  const int lTag = 123;
  lBuf = new PALong_2Pairs[lBufSize];

  if (rank == 0)
  {
    // Some filling of values (to override the default ones from the 
constructors):
    for (int i=0; i<lBufSize; i++) {
      lBuf[i].aFirst                 = i;
      lBuf[i].a2Pairs[0].aLong    = 3;
      lBuf[i].a2Pairs[0].aChar    = FIRST_CHAR+5;
      lBuf[i].a2Pairs[1].aLong    = 7;
      lBuf[i].a2Pairs[1].aChar    = FIRST_CHAR+21;
    }
    // Print what we will send:
    std::cout << " Rank 0 send this: " << std::endl;
    for (int i=0; i<lBufSize; i++) {
      std::cout << " i: " << i << " => ";
      lBuf[i].print(std::cout);
      std::cout << std::endl;
    }
    //Now send this vector!
    abortOnError(MPI_Send(lBuf, lBufSize, PALong_2Pairs::reqMPIDatatype(), 1, 
lTag, MPI_COMM_WORLD));
  }

  if (rank == 1)
  {
    MPI_Status status;
    status.MPI_SOURCE = -1;
    status.MPI_TAG = -1;
    status.MPI_ERROR = MPI_SUCCESS;
    abortOnError(MPI_Recv(lBuf, lBufSize, PALong_2Pairs::reqMPIDatatype(), 0, 
lTag, MPI_COMM_WORLD, &status));

    // For verifying the status:
    int lCount = -1;
    abortOnError(MPI_Get_count(&status, 
PALong_2Pairs::reqMPIDatatype(),&lCount));
    const bool lAllOK = 0           == status.MPI_SOURCE &&
                        lTag        == status.MPI_TAG &&
                        MPI_SUCCESS == status.MPI_ERROR &&
                        lCount      == lBufSize;
    if (lAllOK)
    {
      std::cout << "MPI_Recv returned success and everything in MPI_Status is 
correct after receive." << std::endl;
    }
    else {
      std::cout << "MPI_Status is not correct!"<< std::endl;
      return 1;
    }

    std::cout << " Rank 1 received this: " << std::endl;
    for (int i=0; i<lBufSize; i++) {
      std::cout << " i: " << i << " => ";
      lBuf[i].print(std::cout);
      // Verifications of what we should have received:
      bool lOK = true;
      lOK &= lBuf[i].aFirst                 == i;
      lOK &= lBuf[i].a2Pairs[0].aLong    == 3;
      lOK &= lBuf[i].a2Pairs[0].aChar    == FIRST_CHAR+5;
      lOK &= lBuf[i].a2Pairs[1].aLong    == 7;
      lOK &= lBuf[i].a2Pairs[1].aChar    == FIRST_CHAR+21;

      //If it is not what we expect, print an error:
      std::cout << (lOK ? " OK " : " *** ERROR ****") << std::endl;
    }
  }

  MPI_Barrier(MPI_COMM_WORLD);
  delete [] lBuf;

  abortOnError(MPI_Type_free(&PAPairLC::reqMPIDatatype()));
  abortOnError(MPI_Type_free(&PALong_2Pairs::reqMPIDatatype()));

  abortOnError(MPI_Finalize());

  return 0;
}

Reply via email to