Hi,
I have 2 example progs that I found on the internet (attached) that illustrate
a problem we are having launching multiple node jobs with OpenMPI-4.0.5 and
MPI_spawn
CentOS Linux release 8.4.2105
openmpi-4.0.5-3.el8.x86_64
Slum 20.11.8
10Gbit ethernet network, no IB or other networks
I allocate 2 nodes, each with 24 cores. They are identical systems with a
shared NFS root.
salloc -p fsc -w fsc07,fsc08 --ntasks-per-node=24
Running the hello prog with OpenMPI 4.0.5
/usr/lib64/openmpi/bin/mpirun --version
mpirun (Open MPI) 4.0.5
/usr/lib64/openmpi/bin/mpirun /home/franco/hello
MPI_Init(): 307.434000
hello, world (rank 0 of 48 fsc07)
...
MPI_Init(): 264.714000
hello, world (rank 47 of 48 fsc08)
All well and good.
Now running the MPI_spawn example prog with OpenMPI 4.0.1
/library/mpi/openmpi-4.0.1//bin/mpirun -c 1 /home/franco/spawn_example 47
I'm the parent on fsc07
Starting 47 children
I'm the spawned.
hello, world (rank 0 of 47 fsc07)
Received 999 err 0 (rank 0 of 47 fsc07)
I'm the spawned.
hello, world (rank 1 of 47 fsc07)
Received 999 err 0 (rank 1 of 47 fsc07)
....
I'm the spawned.
hello, world (rank 45 of 47 fsc08)
Received 999 err 0 (rank 45 of 47 fsc08)
I'm the spawned.
hello, world (rank 46 of 47 fsc08)
Received 999 err 0 (rank 46 of 47 fsc08)
Works fine.
Now rebuild spawn_example with 4.0.5 and run as before
ldd /home/franco/spawn_example | grep openmpi
libmpi.so.40 => /usr/lib64/openmpi/lib/libmpi.so.40 (0x00007fc2c0655000)
libopen-rte.so.40 => /usr/lib64/openmpi/lib/libopen-rte.so.40
(0x00007fc2bfdb6000)
libopen-pal.so.40 => /usr/lib64/openmpi/lib/libopen-pal.so.40
(0x00007fc2bfb08000)
/usr/lib64/openmpi/bin/mpirun --version
mpirun (Open MPI) 4.0.5
/usr/lib64/openmpi/bin/mpirun -c 1 /home/franco/spawn_example 47
I'm the parent on fsc07
Starting 47 children
[fsc08:463361] pml_ucx.c:178 Error: Failed to receive UCX worker address: Not
found (-13)
[fsc08:463361] [[42596,2],32] ORTE_ERROR_LOG: Error in file dpm/dpm.c at line
493
....
[fsc08:462917] pml_ucx.c:178 Error: Failed to receive UCX worker address: Not
found (-13)
[fsc08:462917] [[42416,2],33] ORTE_ERROR_LOG: Error in file dpm/dpm.c at line
493
ompi_dpm_dyn_init() failed
--> Returned "Error" (-1) instead of "Success" (0)
--------------------------------------------------------------------------
[fsc08:462926] *** An error occurred in MPI_Init
[fsc08:462926] *** reported by process [2779774978,42]
[fsc08:462926] *** on a NULL communicator
[fsc08:462926] *** Unknown error
[fsc08:462926] *** MPI_ERRORS_ARE_FATAL (processes in this communicator will
now abort,
[fsc08:462926] *** and potentially your MPI job)
[fsc07:1158342] *** An error occurred in MPI_Comm_spawn_multiple
[fsc07:1158342] *** reported by process [2779774977,0]
[fsc07:1158342] *** on communicator MPI_COMM_WORLD
[fsc07:1158342] *** MPI_ERR_OTHER: known error not in list
[fsc07:1158342] *** MPI_ERRORS_ARE_FATAL (processes in this communicator will
now abort,
[fsc07:1158342] *** and potentially your MPI job)
[1629952748.688500] [fsc07:1158342:0] sock.c:244 UCX ERROR
connect(fd=64, dest_addr=10.220.6.239:38471) failed: Connection refused
The IP address is for node fsc08, the program is being run from fsc07
I see the orted process running on fsc08 for both hello and spwan_example with
the same arguments. I also tried turning on various debug options but I'm none
the wiser.
If I run the spawn example with 23 children it works fine - because they are
all on fsc07.
Any idea what might be wrong?
Cheers,
Franco
//
// cc -o spawn_example spawn_example.c -std=gnu99 -I/usr/include/openmpi-x86_64 -fexceptions -pthread -Wl,-rpath -Wl,/usr/lib64/openmpi/lib -Wl,--enable-new-dtags -L/usr/lib64/openmpi/lib -lmpi
//
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
int main( int argc, char *argv[] )
{
MPI_Init( &argc, &argv );
MPI_Comm parentcomm;
MPI_Comm_get_parent( &parentcomm );
int nchildren = 20;
if(argc == 2) nchildren = atoi(argv[1]);
if (parentcomm == MPI_COMM_NULL) {
char name[64];
gethostname(name, sizeof(name));
printf("I'm the parent on %s\n", name);
printf("Starting %d children\n", nchildren);
int np[nchildren];
int errcodes[nchildren];
MPI_Comm intercomm;
char *cmds[nchildren];
MPI_Info infos[nchildren];
for(int i=0; i<nchildren; i++){
np[i] = 1;
cmds[i] = strdup("/home/franco/spawn_example");
infos[i] = MPI_INFO_NULL;
}
/* Create children */
MPI_Comm_spawn_multiple(nchildren, cmds, MPI_ARGVS_NULL, np, infos, 0, MPI_COMM_WORLD, &intercomm, errcodes );
//for(int i=0; i<nchildren; i++) printf("%d %d\n", i, errcodes[i]);
fflush(stdout);
} else {
printf("I'm the spawned.\n");
int rank;
int size;
char name[64];
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
gethostname(name, sizeof(name));
printf("hello, world (rank %d of %d %s)\n", rank, size, name);
int buf=0;
if(rank == 0) buf = 999;
int err = MPI_Bcast(&buf, 1, MPI_INT, 0, MPI_COMM_WORLD);
printf("Received %d err %d (rank %d of %d %s)\n", buf, err, rank, size, name);
fflush(stdout);
}
MPI_Finalize();
return 0;
}
// cc -o hello hello.c -std=gnu99 -I/usr/include/openmpi-x86_64 -fexceptions -pthread -Wl,-rpath -Wl,/usr/lib64/openmpi/lib -Wl,--enable-new-dtags -L/usr/lib64/openmpi/lib -lmpi
#include <mpi.h>
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <sys/time.h>
int
main(int argc, char **argv)
{
int rank;
int size;
struct timeval tv;
struct timeval tv2;
char name[64];
gettimeofday(&tv, NULL);
MPI_Init(&argc, &argv);
gettimeofday(&tv2, NULL);
printf("MPI_Init(): %f\n",
((tv2.tv_sec - tv.tv_sec) * 1000.0
+ (tv2.tv_usec - tv.tv_usec) / 1000.0));
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
gethostname(name, sizeof(name));
printf("hello world (rank %d of %d %s)\n",
rank, size, name);
MPI_Finalize();
return 0;
}