Hi Jeff,
thanks for your answer!
You inserted a doubt in my mind... and gave me hope... :-)
So I did some modifications on the code to help everyone:
1- it's now in "C"... :-)
2- Concerning your remark about arbitrary address: I am now using the
"offsetof" macro defined in "stddef.h" to compute the offset, or
displacement needed to create the datatype
3- I have simplified and reduced (again) the number of lines to
reproduce the error...
see "nested_bug.c" attached to this mail...
Output with openmpi 1.6.3:
----------------------------------------
Rank 0 send this:
{{1},{{2,3},{4,5}}}
Rank 1 received this: {{1},{{2,3},{4199789,15773951}}} *** ERROR ****
Expected output (still ok with mpich 3.0.3 and intel mpi 4):
----------------------------------------
Rank 0 send this:
{{1},{{2,3},{4,5}}}
Rank 1 received this: {{1},{{2,3},{4,5}}} OK
Thanks!
Eric
Le 2013-04-23 18:03, Jeff Squyres (jsquyres) a écrit :
Sorry for the delay.
My C++ is a bit rusty, but this does not seem correct to me.
You're making the datatypes relative to an arbitrary address (&lPtrBase) in a
static method on each class. You really need the datatypes to be relative to each
instance's *this* pointer.
Doing so allows MPI to read/write the data relative to the specific instance of
the objects that you're trying to send/receive.
Make sense?
On Apr 23, 2013, at 5:01 PM, Eric Chamberland
<eric.chamberl...@giref.ulaval.ca> wrote:
another information: I just tested the example with Intel MPI 4.0.1.007 and it
works correctly...
So the problem seems to be only with OpenMPI... which is the default
distribution we use... :-/
Is my example code too long?
Eric
Le 2013-04-23 09:55, Eric Chamberland a écrit :
Sorry,
here is the attachment...
Eric
On 04/23/2013 09:54 AM, Eric Chamberland wrote:
Hi,
I have sent a previous message showing something that I think is a bug
(or maybe a misuse, but...).
I worked on the example sent to have it simplified: now it is almost
half of the lines of code and the structures are more simple... but
still showing the wrong behaviour.
Briefly, we construct different MPI_datatype and nests them into a final
type which is a:
{MPI_LONG,{{MPI_LONG,MPI_CHAR}*2}
Here is the output from OpenMPI 1.6.3:
Rank 0 send this:
i: 0 => {{0},{{3,%},{7,5}}}
i: 1 => {{1},{{3,%},{7,5}}}
i: 2 => {{2},{{3,%},{7,5}}}
i: 3 => {{3},{{3,%},{7,5}}}
i: 4 => {{4},{{3,%},{7,5}}}
i: 5 => {{5},{{3,%},{7,5}}}
MPI_Recv returned success and everything in MPI_Status is correct after
receive.
Rank 1 received this:
i: 0 => {{0},{{3,%},{-999,$}}} *** ERROR ****
i: 1 => {{1},{{3,%},{-999,$}}} *** ERROR ****
i: 2 => {{2},{{3,%},{-999,$}}} *** ERROR ****
i: 3 => {{3},{{3,%},{-999,$}}} *** ERROR ****
i: 4 => {{4},{{3,%},{-999,$}}} *** ERROR ****
i: 5 => {{5},{{3,%},{-999,$}}} *** ERROR ****
Here is the expected output, obtained with mpich-3.0.3:
Rank 0 send this:
i: 0 => {{0},{{3,%},{7,5}}}
i: 1 => {{1},{{3,%},{7,5}}}
i: 2 => {{2},{{3,%},{7,5}}}
i: 3 => {{3},{{3,%},{7,5}}}
i: 4 => {{4},{{3,%},{7,5}}}
i: 5 => {{5},{{3,%},{7,5}}}
MPI_Recv returned success and everything in MPI_Status is correct after
receive.
Rank 1 received this:
i: 0 => {{0},{{3,%},{7,5}}} OK
i: 1 => {{1},{{3,%},{7,5}}} OK
i: 2 => {{2},{{3,%},{7,5}}} OK
i: 3 => {{3},{{3,%},{7,5}}} OK
i: 4 => {{4},{{3,%},{7,5}}} OK
i: 5 => {{5},{{3,%},{7,5}}} OK
Is it related to the bug reported here:
http://www.open-mpi.org/community/lists/devel/2013/04/12267.php ?
Thanks,
Eric
_______________________________________________
devel mailing list
de...@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/devel
_______________________________________________
devel mailing list
de...@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/devel
#include "mpi.h"
#include <malloc.h>
#include <stdio.h>
#include <stddef.h>
/**************************************************************************
//
// This example is showing a problem with nested types!
// It works perfectly with mpich-3.0.3 but seems to do a wrong transmission
// with openmpi 1.6.3, 1.6.4, 1.7.0 and 1.7.1
//
// The basic problem seems to arise with a vector of PALong_2Pairs which is a
// MPI nested type constructed like this:
//--------------------------------------
// Struct | is composed of
//--------------------------------------
// PAPairLI | {long, int}
// PALong_2Pairs | {long,{PAPairLI,PAPairLI}}
//--------------------------------------
//
*/
/*! Function to abort on any MPI error:*/
void abortOnError(int ierr) {
if (ierr != MPI_SUCCESS) {
printf("ERROR Returned by MPI: %d\n",ierr);
char* lCharPtr = malloc(sizeof(char)*MPI_MAX_ERROR_STRING);
int lLongueur = 0;
MPI_Error_string(ierr,lCharPtr, &lLongueur);
printf("ERROR_string Returned by MPI: %s\n",lCharPtr);
MPI_Abort( MPI_COMM_WORLD, 1 );
}
}
/* a constant:*/
#define FIRST_CHAR 32
/*****************************************************
//
// PAPairLI is a pair: {long, int}
//
*/
struct PAPairLI
{
long aLong;
int aInt;
};
/*Global variable*/
MPI_Datatype gPAPairLI_datatype = MPI_DATATYPE_NULL;
MPI_Datatype gPALong_2Pairs_datatype = MPI_DATATYPE_NULL;
void createPAPaireLI_datatype()
{
MPI_Datatype lTypes[2] = {MPI_LONG, MPI_INT};
/*Compute the offset:*/
MPI_Aint lOffset[2];
lOffset[0] = offsetof(struct PAPairLI, aLong);
lOffset[1] = offsetof(struct PAPairLI, aInt);
int lBlocLen[2] = {1,1};
abortOnError(MPI_Type_create_struct(2, lBlocLen, lOffset, lTypes,
&gPAPairLI_datatype));
abortOnError(MPI_Type_commit(&gPAPairLI_datatype));
}
/*****************************************************
//
// PALong_2Pairs is a struct of: {long, PAPairLI[2]}
//
*/
struct PALong_2Pairs
{
long aFirst;
struct PAPairLI a2Pairs[2];
};
void printPALong_2Pairs(struct PALong_2Pairs* pObj) {
printf("{{%ld},{{%ld,%d},{%ld,%d}}}",pObj->aFirst,
pObj->a2Pairs[0].aLong,pObj->a2Pairs[0].aInt,pObj->a2Pairs[1].aLong,pObj->a2Pairs[1].aInt);
}
void createPALong_2Pairs_datatype()
{
MPI_Datatype lTypes[2] = {MPI_LONG, gPAPairLI_datatype};
/*Compute the offset:*/
MPI_Aint lOffset[2];
lOffset[0] = offsetof(struct PALong_2Pairs, aFirst);
lOffset[1] = offsetof(struct PALong_2Pairs, a2Pairs);
int lBlocLen[2] = {1,2};
abortOnError(MPI_Type_create_struct(2, lBlocLen, lOffset, lTypes,
&gPALong_2Pairs_datatype));
abortOnError(MPI_Type_commit(&gPALong_2Pairs_datatype));
}
/****************************
//
// Here is now the main...
//
*/
int main(int argc, char *argv[])
{
int rank, size;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
if (size != 2)
{
printf("Please run with 2 processes.\n");
MPI_Finalize();
return 1;
}
/*Calls the types creations once:*/
createPAPaireLI_datatype();
createPALong_2Pairs_datatype();
/* Here is the object we wil try to transmit:*/
struct PALong_2Pairs lBuf;
const int lTag = 123;
if (rank == 0)
{
/* Some filling of values (to override the default ones from the
constructors):*/
lBuf.aFirst = 1;
lBuf.a2Pairs[0].aLong = 2;
lBuf.a2Pairs[0].aInt = 3;
lBuf.a2Pairs[1].aLong = 4;
lBuf.a2Pairs[1].aInt = 5;
/* Print what we will send:*/
printf(" Rank 0 send this:\n");
printPALong_2Pairs(&lBuf);
printf("\n");
/*Now send this object!*/
abortOnError(MPI_Send(&lBuf, 1, gPALong_2Pairs_datatype, 1, lTag,
MPI_COMM_WORLD));
}
if (rank == 1)
{
MPI_Status status;
status.MPI_SOURCE = -1;
status.MPI_TAG = -1;
status.MPI_ERROR = MPI_SUCCESS;
abortOnError(MPI_Recv(&lBuf, 1, gPALong_2Pairs_datatype, 0, lTag,
MPI_COMM_WORLD, &status));
/* For verifying the status:*/
int lCount = -1;
abortOnError(MPI_Get_count(&status, gPALong_2Pairs_datatype,&lCount));
const int lAllOK = 0 == status.MPI_SOURCE &&
lTag == status.MPI_TAG &&
MPI_SUCCESS == status.MPI_ERROR &&
lCount == 1;
if (!lAllOK) {
printf("MPI_Status is not correct!");
MPI_Abort(MPI_COMM_WORLD,1);
}
printf(" Rank 1 received this: ");
printPALong_2Pairs(&lBuf);
/* Verifications of what we should have received:*/
int lOK = 1;
lOK &= lBuf.aFirst == 1;
lOK &= lBuf.a2Pairs[0].aLong == 2;
lOK &= lBuf.a2Pairs[0].aInt == 3;
lOK &= lBuf.a2Pairs[1].aLong == 4;
lOK &= lBuf.a2Pairs[1].aInt == 5;
/*If it is not what we expect, print an error:*/
char* lOkOrNot = (lOK ? " OK " : " *** ERROR ****");
printf("%s\n",lOkOrNot);
}
MPI_Barrier(MPI_COMM_WORLD);
abortOnError(MPI_Type_free(&gPALong_2Pairs_datatype));
abortOnError(MPI_Type_free(&gPAPairLI_datatype));
abortOnError(MPI_Finalize());
return 0;
}