Do you know if there is a limit to the number of MPI_Comm_spawn we can use in order to launch a program? I want to start and stop a program several times (with the function MPI_Comm_spawn) but every time after 31 MPI_Comm_spawn, I get a "segmentation fault". Could you give me your point of you to solve this problem? Thanks
/*file .c : spawned the file Exe*/ #include <stdio.h> #include <malloc.h> #include <unistd.h> #include "mpi.h" #include <pthread.h> #include <signal.h> #include <sys/time.h> #include <errno.h> #define EXE_TEST "/home/workspace/test_spaw1/src/Exe" int main( int argc, char **argv ) { long *lpBufferMpi; MPI_Comm lIntercom; int lErrcode; MPI_Comm lCommunicateur; int lRangMain,lRangExe,lMessageEnvoi,lIter,NiveauThreadVoulu, NiveauThreadObtenu,lTailleBuffer; int *lpMessageEnvoi=&lMessageEnvoi; MPI_Status lStatus; /*status de reception*/ lIter=0; /* MPI environnement */ printf("main*******************************\n"); printf("main : Lancement MPI*\n"); NiveauThreadVoulu = MPI_THREAD_MULTIPLE; MPI_Init_thread( &argc, &argv, NiveauThreadVoulu, &NiveauThreadObtenu ); lpBufferMpi = calloc( 10000, sizeof(long)); MPI_Buffer_attach( (void*)lpBufferMpi, 10000 * sizeof(long) ); while (lIter<1000){ lIter ++; lIntercom=(MPI_Comm)-1 ; MPI_Comm_spawn( EXE_TEST, NULL, 1, MPI_INFO_NULL, 0, MPI_COMM_WORLD, &lIntercom, &lErrcode ); printf( "%i main***MPI_Comm_spawn return : %d\n",lIter, lErrcode ); if(lIntercom == (MPI_Comm)-1 ){ printf("%i Intercom null\n",lIter); return 0; } MPI_Intercomm_merge(lIntercom, 0,&lCommunicateur ); MPI_Comm_rank( lCommunicateur, &lRangMain); lRangExe=1-lRangMain; printf("%i main***Rang main : %i Rang exe : %i \n",lIter,(int)lRangMain,(int)lRangExe); sleep(2); } /* Arret de l'environnement MPI */ lTailleBuffer=10000* sizeof(long); MPI_Buffer_detach( (void*)lpBufferMpi, &lTailleBuffer ); MPI_Comm_free( &lCommunicateur ); MPI_Finalize( ); free( lpBufferMpi ); printf( "Main = End .\n" ); return 0; } /************************************************************************************************/ Exe: #include <string.h> #include <stdlib.h> #include <stdio.h> #include <malloc.h> #include <unistd.h> /* pour sleep() */ #include <pthread.h> #include <semaphore.h> #include "mpi.h" int main( int argc, char **argv ) { /*1)pour communiaction MPI*/ MPI_Comm lCommunicateur; /*communicateur du process*/ MPI_Comm CommParent; /*Communiacteur parent à récupérer*/ int lRank; /*rang du communicateur du process*/ int lRangMain; /*rang du séquenceur si lancé en mode normal*/ int lTailleCommunicateur; /*taille du communicateur;*/ long *lpBufferMpi; /*buffer pour message*/ int lBufferSize; /*taille du buffer*/ /*2) pour les thread*/ int NiveauThreadVoulu, NiveauThreadObtenu; lCommunicateur = (MPI_Comm)-1; NiveauThreadVoulu = MPI_THREAD_MULTIPLE; int erreur = MPI_Init_thread( &argc, &argv, NiveauThreadVoulu, &NiveauThreadObtenu ); if (erreur!=0){ printf("erreur\n"); free( lpBufferMpi ); return -1; } /*2) Attachement à un buffer pour le message*/ lBufferSize=10000 * sizeof(long); lpBufferMpi = calloc( 10000, sizeof(long)); erreur = MPI_Buffer_attach( (void*)lpBufferMpi, lBufferSize ); if (erreur!=0){ printf("erreur\n"); free( lpBufferMpi ); return -1; } printf( "Exe : Lance \n" ); MPI_Comm_get_parent(&CommParent); MPI_Intercomm_merge( CommParent, 1, &lCommunicateur ); MPI_Comm_rank( lCommunicateur, &lRank ); MPI_Comm_size( lCommunicateur, &lTailleCommunicateur ); lRangMain =1-lRank; printf( "Exe: lRankExe = %d lRankMain = %d\n", lRank , lRangMain, lTailleCommunicateur); sleep(1); MPI_Buffer_detach( (void*)lpBufferMpi, &lBufferSize ); MPI_Comm_free( &lCommunicateur ); MPI_Finalize( ); free( lpBufferMpi ); printf( "Exe: Fin.\n\n\n" ); } /************************************************************************************************/ result : main******************************* main : Lancement MPI* 1 main***MPI_Comm_spawn return : 0 Exe : Lance 1 main***Rang main : 0 Rang exe : 1 Exe: lRankExe = 1 lRankMain = 0 Exe: Fin. 2 main***MPI_Comm_spawn return : 0 Exe : Lance 2 main***Rang main : 0 Rang exe : 1 Exe: lRankExe = 1 lRankMain = 0 Exe: Fin. 3 main***MPI_Comm_spawn return : 0 Exe : Lance 3 main***Rang main : 0 Rang exe : 1 Exe: lRankExe = 1 lRankMain = 0 Exe: Fin. .... 30 main***MPI_Comm_spawn return : 0 Exe : Lance 30 main***Rang main : 0 Rang exe : 1 Exe: lRankExe = 1 lRankMain = 0 Exe: Fin. 31 main***MPI_Comm_spawn return : 0 Exe : Lance 31 main***Rang main : 0 Rang exe : 1 Exe: lRankExe = 1 lRankMain = 0 Erreur de segmentation