Hi, I have 2 example progs that I found on the internet (attached) that illustrate a problem we are having launching multiple node jobs with OpenMPI-4.0.5 and MPI_spawn
CentOS Linux release 8.4.2105 openmpi-4.0.5-3.el8.x86_64 Slum 20.11.8 10Gbit ethernet network, no IB or other networks I allocate 2 nodes, each with 24 cores. They are identical systems with a shared NFS root. salloc -p fsc -w fsc07,fsc08 --ntasks-per-node=24 Running the hello prog with OpenMPI 4.0.5 /usr/lib64/openmpi/bin/mpirun --version mpirun (Open MPI) 4.0.5 /usr/lib64/openmpi/bin/mpirun /home/franco/hello MPI_Init(): 307.434000 hello, world (rank 0 of 48 fsc07) ... MPI_Init(): 264.714000 hello, world (rank 47 of 48 fsc08) All well and good. Now running the MPI_spawn example prog with OpenMPI 4.0.1 /library/mpi/openmpi-4.0.1//bin/mpirun -c 1 /home/franco/spawn_example 47 I'm the parent on fsc07 Starting 47 children I'm the spawned. hello, world (rank 0 of 47 fsc07) Received 999 err 0 (rank 0 of 47 fsc07) I'm the spawned. hello, world (rank 1 of 47 fsc07) Received 999 err 0 (rank 1 of 47 fsc07) .... I'm the spawned. hello, world (rank 45 of 47 fsc08) Received 999 err 0 (rank 45 of 47 fsc08) I'm the spawned. hello, world (rank 46 of 47 fsc08) Received 999 err 0 (rank 46 of 47 fsc08) Works fine. Now rebuild spawn_example with 4.0.5 and run as before ldd /home/franco/spawn_example | grep openmpi libmpi.so.40 => /usr/lib64/openmpi/lib/libmpi.so.40 (0x00007fc2c0655000) libopen-rte.so.40 => /usr/lib64/openmpi/lib/libopen-rte.so.40 (0x00007fc2bfdb6000) libopen-pal.so.40 => /usr/lib64/openmpi/lib/libopen-pal.so.40 (0x00007fc2bfb08000) /usr/lib64/openmpi/bin/mpirun --version mpirun (Open MPI) 4.0.5 /usr/lib64/openmpi/bin/mpirun -c 1 /home/franco/spawn_example 47 I'm the parent on fsc07 Starting 47 children [fsc08:463361] pml_ucx.c:178 Error: Failed to receive UCX worker address: Not found (-13) [fsc08:463361] [[42596,2],32] ORTE_ERROR_LOG: Error in file dpm/dpm.c at line 493 .... [fsc08:462917] pml_ucx.c:178 Error: Failed to receive UCX worker address: Not found (-13) [fsc08:462917] [[42416,2],33] ORTE_ERROR_LOG: Error in file dpm/dpm.c at line 493 ompi_dpm_dyn_init() failed --> Returned "Error" (-1) instead of "Success" (0) -------------------------------------------------------------------------- [fsc08:462926] *** An error occurred in MPI_Init [fsc08:462926] *** reported by process [2779774978,42] [fsc08:462926] *** on a NULL communicator [fsc08:462926] *** Unknown error [fsc08:462926] *** MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort, [fsc08:462926] *** and potentially your MPI job) [fsc07:1158342] *** An error occurred in MPI_Comm_spawn_multiple [fsc07:1158342] *** reported by process [2779774977,0] [fsc07:1158342] *** on communicator MPI_COMM_WORLD [fsc07:1158342] *** MPI_ERR_OTHER: known error not in list [fsc07:1158342] *** MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort, [fsc07:1158342] *** and potentially your MPI job) [1629952748.688500] [fsc07:1158342:0] sock.c:244 UCX ERROR connect(fd=64, dest_addr=10.220.6.239:38471) failed: Connection refused The IP address is for node fsc08, the program is being run from fsc07 I see the orted process running on fsc08 for both hello and spwan_example with the same arguments. I also tried turning on various debug options but I'm none the wiser. If I run the spawn example with 23 children it works fine - because they are all on fsc07. Any idea what might be wrong? Cheers, Franco
// // cc -o spawn_example spawn_example.c -std=gnu99 -I/usr/include/openmpi-x86_64 -fexceptions -pthread -Wl,-rpath -Wl,/usr/lib64/openmpi/lib -Wl,--enable-new-dtags -L/usr/lib64/openmpi/lib -lmpi // #include "mpi.h" #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> int main( int argc, char *argv[] ) { MPI_Init( &argc, &argv ); MPI_Comm parentcomm; MPI_Comm_get_parent( &parentcomm ); int nchildren = 20; if(argc == 2) nchildren = atoi(argv[1]); if (parentcomm == MPI_COMM_NULL) { char name[64]; gethostname(name, sizeof(name)); printf("I'm the parent on %s\n", name); printf("Starting %d children\n", nchildren); int np[nchildren]; int errcodes[nchildren]; MPI_Comm intercomm; char *cmds[nchildren]; MPI_Info infos[nchildren]; for(int i=0; i<nchildren; i++){ np[i] = 1; cmds[i] = strdup("/home/franco/spawn_example"); infos[i] = MPI_INFO_NULL; } /* Create children */ MPI_Comm_spawn_multiple(nchildren, cmds, MPI_ARGVS_NULL, np, infos, 0, MPI_COMM_WORLD, &intercomm, errcodes ); //for(int i=0; i<nchildren; i++) printf("%d %d\n", i, errcodes[i]); fflush(stdout); } else { printf("I'm the spawned.\n"); int rank; int size; char name[64]; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); gethostname(name, sizeof(name)); printf("hello, world (rank %d of %d %s)\n", rank, size, name); int buf=0; if(rank == 0) buf = 999; int err = MPI_Bcast(&buf, 1, MPI_INT, 0, MPI_COMM_WORLD); printf("Received %d err %d (rank %d of %d %s)\n", buf, err, rank, size, name); fflush(stdout); } MPI_Finalize(); return 0; }
// cc -o hello hello.c -std=gnu99 -I/usr/include/openmpi-x86_64 -fexceptions -pthread -Wl,-rpath -Wl,/usr/lib64/openmpi/lib -Wl,--enable-new-dtags -L/usr/lib64/openmpi/lib -lmpi #include <mpi.h> #include <stdio.h> #include <time.h> #include <stdlib.h> #include <sys/time.h> int main(int argc, char **argv) { int rank; int size; struct timeval tv; struct timeval tv2; char name[64]; gettimeofday(&tv, NULL); MPI_Init(&argc, &argv); gettimeofday(&tv2, NULL); printf("MPI_Init(): %f\n", ((tv2.tv_sec - tv.tv_sec) * 1000.0 + (tv2.tv_usec - tv.tv_usec) / 1000.0)); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); gethostname(name, sizeof(name)); printf("hello world (rank %d of %d %s)\n", rank, size, name); MPI_Finalize(); return 0; }