Hi George,

Just out of curiosity, what version of OpenMPI that you used works fine
with Jeff's program (after adding MPI_Finalize)?  The program aborts with
either mpich2-1.0.5p4 or OpenMPI-1.2.3 on a AMD x86_64 box(Ubuntu 7.04)
because MPI_Comm_rank() is called with MPI_COMM_NULL.

With OpenMPI:
> ~/openmpi/install_linux64_123_gcc4_thd/bin/mpiexec -n 2 a.out
...
[octagon.mcs.anl.gov:23279] *** An error occurred in MPI_Comm_rank
[octagon.mcs.anl.gov:23279] *** on communicator MPI_COMM_WORLD
[octagon.mcs.anl.gov:23279] *** MPI_ERR_COMM: invalid communicator
[octagon.mcs.anl.gov:23279] *** MPI_ERRORS_ARE_FATAL (goodbye)

OpenMPI hangs at abort that I need to kill the mpiexec process by hand.
You can reproduce the hang with the following test program with
OpenMPI-1.2.3.

/homes/chan/tmp/tmp6> cat test_comm_rank.c
#include <stdio.h>
#include "mpi.h"

int main( int argc, char *argv[] )
{
    int myrank;

    MPI_Init( &argc, &argv );
    MPI_Comm_rank( MPI_COMM_NULL, &myrank );
    printf( "myrank = %d\n", myrank );
    MPI_Finalize();
    return 0;
}

Since mpiexec hangs, so it may be a bug somewhere in 1.2.3 release.

A.Chan



On Wed, 20 Jun 2007, George Bosilca wrote:

> Jeff,
>
> With the proper MPI_Finalize added at the end of the main function,
> your program orks fine with the current version of Open MPI up to 32
> processors. Here is the output I got for 4 processors:
>
> I am 2 of 4 WORLD procesors
> I am 3 of 4 WORLD procesors
> I am 0 of 4 WORLD procesors
> I am 1 of 4 WORLD procesors
> Initial inttemp 1
> Initial inttemp 0
> final inttemp 0,0
> 0, WORLD barrier leaving routine
> final inttemp 1,0
> 1, WORLD barrier leaving routine
> Initial inttemp 2
> final inttemp 2,0
> 2, WORLD barrier leaving routine
> SERVER Got a DONE flag
> Initial inttemp 3
> final inttemp 3,0
> 3, WORLD barrier leaving routine
>
> This output seems to indicate that the program is running to
> completion and it does what you expect it to do.
>
> Btw, what version of Open MPI are you using and on what kind of
> hardware ?
>
>    george.
>
> On Jun 20, 2007, at 6:31 PM, Jeffrey L. Tilson wrote:
>
> > Hi,
> > ANL suggested I post this question to you. This is my second
> > posting......but now with the proper attachments.
> >
> > From: Jeffrey Tilson <jltil...@nc.rr.com>
> > Date: June 20, 2007 5:17:50 PM PDT
> > To: mpich2-ma...@mcs.anl.gov, Jeffrey Tilson <jtil...@renci.org>
> > Subject: MPI question/problem
> >
> >
> > Hello All,
> > This will probably turn out to be my fault as I haven't used MPI in
> > a few years.
> >
> > I am attempting to use an MPI implementation of a "nxtval" (see the
> > MPI book). I am using the client-server scenario. The MPI book
> > specifies the three functions required. Two are collective and one
> > is not. Only the  two collectives are tested in the supplied code.
> > All three of the MPI functions are reproduced in the attached code,
> > however.  I wrote a tiny application to create and free a counter
> > object and it fails.
> >
> > I need to know if this is a bug in the MPI book and a
> > misunderstanding on my part.
> >
> > The complete code is attached. I was using openMPI/intel to compile
> > and run.
> >
> > The error I get is:
> >
> >> [compute-0-1.local:22637] *** An error occurred in MPI_Comm_rank
> >> [compute-0-1.local:22637] *** on communicator MPI_COMM_WORLD
> >> [compute-0-1.local:22637] *** MPI_ERR_COMM: invalid communicator
> >> [compute-0-1.local:22637] *** MPI_ERRORS_ARE_FATAL (goodbye)
> >> mpirun noticed that job rank 0 with PID 22635 on node
> >> "compute-0-1.local" exited on signal 15.
> >
> > I've attempted to google my way to understanding but with little
> > success. If someone could point me to
> > a sample application that actually uses these functions, I would
> > appreciate it.
> >
> > Sorry if this is the wrong list, it is not an MPICH question and I
> > wasn't sure where to turn.
> >
> > Thanks,
> > --jeff
> >
> > ----------------------------------------------------------------------
> > --
> >
> > /* A beginning piece of code to perform large-scale web
> > construction.  */
> > #include <stdio.h>
> > #include <stdlib.h>
> > #include <string.h>
> > #include "mpi.h"
> >
> > typedef struct {
> > char description[1024];
> > double startwtime;
> > double endwtime;
> > double difftime;
> > } Timer;
> >
> > /* prototypes */
> > int MPE_Counter_nxtval(MPI_Comm , int *);
> > int MPE_Counter_free( MPI_Comm *, MPI_Comm * );
> > void MPE_Counter_create( MPI_Comm , MPI_Comm *, MPI_Comm *);
> > /* End prototypes */
> >
> > /* Globals */
> >         int          rank,numsize;
> >
> > int main( argc, argv )
> > int argc;
> > char **argv;
> > {
> >
> >         int i,j;
> >         MPI_Status   status;
> >         MPI_Request  r;
> >         MPI_Comm  smaller_comm,  counter_comm;
> >
> >
> >     int numtimings=0;
> >     int inttemp;
> >     int value=-1;
> >     int server;
> >
> > //Init parallel environment
> >
> >         MPI_Init( &argc, &argv );
> >         MPI_Comm_rank( MPI_COMM_WORLD, &rank );
> >         MPI_Comm_size( MPI_COMM_WORLD, &numsize );
> >
> >         printf("I am %i of %i WORLD procesors\n",rank,numsize);
> >     server = numsize -1;
> >
> >     MPE_Counter_create( MPI_COMM_WORLD, &smaller_comm, &counter_comm );
> >     printf("Initial inttemp %i\n",rank);
> >
> >     inttemp = MPE_Counter_free( &smaller_comm, &counter_comm );
> >     printf("final inttemp %i,%i\n",rank,inttemp);
> >
> >         printf("%i, WORLD barrier leaving routine\n",rank);
> >         MPI_Barrier( MPI_COMM_WORLD );
> > }
> >
> > //// Add new MPICH based shared counter.
> > //// grabbed from http://www-unix.mcs.anl.gov/mpi/usingmpi/examples/
> > advanced/nxtval_create_c.htm
> >
> > /* tag values */
> > #define REQUEST 0
> > #define GOAWAY  1
> > #define VALUE   2
> > #define MPE_SUCCESS 0
> >
> > void MPE_Counter_create( MPI_Comm oldcomm, MPI_Comm * smaller_comm,
> > MPI_Comm * counter_comm )
> > {
> >     int counter = 0;
> >     int message, done = 0, myid, numprocs, server, color,ranks[1];
> >     MPI_Status status;
> >     MPI_Group oldgroup, smaller_group;
> >
> >     MPI_Comm_size(oldcomm, &numprocs);
> >     MPI_Comm_rank(oldcomm, &myid);
> >     server = numprocs-1;     /*   last proc is server */
> >     MPI_Comm_dup( oldcomm, counter_comm ); /* make one new comm */
> >     if (myid == server) color = MPI_UNDEFINED;
> >     else color =0;
> >     MPI_Comm_split( oldcomm, color, myid, smaller_comm);
> >
> >     if (myid == server) {       /* I am the server */
> >         while (!done) {
> >             MPI_Recv(&message, 1, MPI_INT, MPI_ANY_SOURCE,
> > MPI_ANY_TAG,
> >                      *counter_comm, &status );
> >             if (status.MPI_TAG == REQUEST) {
> >                 MPI_Send(&counter, 1, MPI_INT, status.MPI_SOURCE,
> > VALUE,
> >                          *counter_comm );
> >                 counter++;
> >             }
> >             else if (status.MPI_TAG == GOAWAY) {
> >             printf("SERVER Got a DONE flag\n");
> >                 done = 1;
> >         }
> >             else {
> >                 fprintf(stderr, "bad tag sent to MPE counter\n");
> >             MPI_Abort(*counter_comm, 1);
> >        }
> >         }
> >         MPE_Counter_free( smaller_comm, counter_comm );
> >     }
> > }
> >
> > /*******************************/
> > int MPE_Counter_free( MPI_Comm *smaller_comm, MPI_Comm *
> > counter_comm )
> > {
> >     int myid, numprocs;
> >
> >     MPI_Comm_rank( *counter_comm, &myid );
> >     MPI_Comm_size( *counter_comm, &numprocs );
> >
> >     if (myid == 0)
> >         MPI_Send(NULL, 0, MPI_INT, numprocs-1, GOAWAY, *counter_comm);
> >
> >     MPI_Comm_free( counter_comm );
> >
> >     if (*smaller_comm != MPI_COMM_NULL) {
> >     MPI_Comm_free( smaller_comm );
> >     }
> >     return 0;
> > }
> >
> > /************************/
> > int MPE_Counter_nxtval(MPI_Comm counter_comm, int * value)
> > {
> >     int server,numprocs;
> >     MPI_Status status;
> >
> >     MPI_Comm_size( counter_comm, &numprocs );
> >     server = numprocs-1;
> >     MPI_Send(NULL, 0, MPI_INT, server, REQUEST, counter_comm );
> >     MPI_Recv(value, 1, MPI_INT, server, VALUE, counter_comm,
> > &status );
> >     return 0;
> > }
> >
> >
> > _______________________________________________
> > users mailing list
> > us...@open-mpi.org
> > http://www.open-mpi.org/mailman/listinfo.cgi/users
>
>

Reply via email to