
I have 2 example progs that I found on the internet (attached) that illustrate 
a problem we are having launching multiple node jobs with OpenMPI-4.0.5 and 

CentOS Linux release 8.4.2105
Slum 20.11.8

10Gbit ethernet network, no IB or other networks

I allocate 2 nodes, each with 24 cores. They are identical systems with a 
shared NFS root.

salloc -p fsc -w fsc07,fsc08 --ntasks-per-node=24

Running the hello prog with OpenMPI 4.0.5

/usr/lib64/openmpi/bin/mpirun --version
mpirun (Open MPI) 4.0.5

/usr/lib64/openmpi/bin/mpirun /home/franco/hello

MPI_Init(): 307.434000
hello, world (rank 0 of 48 fsc07)
MPI_Init(): 264.714000
hello, world (rank 47 of 48 fsc08)

All well and good.

Now running the MPI_spawn example prog with OpenMPI 4.0.1

/library/mpi/openmpi-4.0.1//bin/mpirun -c 1 /home/franco/spawn_example 47

I'm the parent on fsc07
Starting 47 children

I'm the spawned.
hello, world (rank 0 of 47 fsc07)
Received 999 err 0 (rank 0 of 47 fsc07)
I'm the spawned.
hello, world (rank 1 of 47 fsc07)
Received 999 err 0 (rank 1 of 47 fsc07)
I'm the spawned.
hello, world (rank 45 of 47 fsc08)
Received 999 err 0 (rank 45 of 47 fsc08)
I'm the spawned.
hello, world (rank 46 of 47 fsc08)
Received 999 err 0 (rank 46 of 47 fsc08)

Works fine.

Now rebuild spawn_example with 4.0.5 and run as before

ldd /home/franco/spawn_example | grep openmpi
        libmpi.so.40 => /usr/lib64/openmpi/lib/libmpi.so.40 (0x00007fc2c0655000)
        libopen-rte.so.40 => /usr/lib64/openmpi/lib/libopen-rte.so.40 
        libopen-pal.so.40 => /usr/lib64/openmpi/lib/libopen-pal.so.40 

/usr/lib64/openmpi/bin/mpirun --version
mpirun (Open MPI) 4.0.5

/usr/lib64/openmpi/bin/mpirun -c 1 /home/franco/spawn_example 47

I'm the parent on fsc07

Starting 47 children

[fsc08:463361] pml_ucx.c:178  Error: Failed to receive UCX worker address: Not 
found (-13)

[fsc08:463361] [[42596,2],32] ORTE_ERROR_LOG: Error in file dpm/dpm.c at line 


[fsc08:462917] pml_ucx.c:178  Error: Failed to receive UCX worker address: Not 
found (-13)

[fsc08:462917] [[42416,2],33] ORTE_ERROR_LOG: Error in file dpm/dpm.c at line 

  ompi_dpm_dyn_init() failed

  --> Returned "Error" (-1) instead of "Success" (0)


[fsc08:462926] *** An error occurred in MPI_Init

[fsc08:462926] *** reported by process [2779774978,42]

[fsc08:462926] *** on a NULL communicator

[fsc08:462926] *** Unknown error

[fsc08:462926] *** MPI_ERRORS_ARE_FATAL (processes in this communicator will 
now abort,

[fsc08:462926] ***    and potentially your MPI job)

[fsc07:1158342] *** An error occurred in MPI_Comm_spawn_multiple

[fsc07:1158342] *** reported by process [2779774977,0]

[fsc07:1158342] *** on communicator MPI_COMM_WORLD

[fsc07:1158342] *** MPI_ERR_OTHER: known error not in list

[fsc07:1158342] *** MPI_ERRORS_ARE_FATAL (processes in this communicator will 
now abort,

[fsc07:1158342] ***    and potentially your MPI job)

[1629952748.688500] [fsc07:1158342:0]           sock.c:244  UCX  ERROR 
connect(fd=64, dest_addr= failed: Connection refused

The IP address is for node fsc08, the program is being run from fsc07

I see the orted process running on fsc08 for both hello and spwan_example with 
the same arguments. I also tried turning on various debug options but I'm none 
the wiser.

If I run the spawn example with 23 children it works fine - because they are 
all on fsc07.

Any idea what might be wrong?


// cc -o spawn_example spawn_example.c -std=gnu99  -I/usr/include/openmpi-x86_64 -fexceptions -pthread -Wl,-rpath -Wl,/usr/lib64/openmpi/lib -Wl,--enable-new-dtags -L/usr/lib64/openmpi/lib -lmpi

#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

int main( int argc, char *argv[] )
    MPI_Init( &argc, &argv );
    MPI_Comm parentcomm;
    MPI_Comm_get_parent( &parentcomm );

    int nchildren = 20;
    if(argc == 2) nchildren = atoi(argv[1]);

    if (parentcomm == MPI_COMM_NULL) {
	char name[64];
        gethostname(name, sizeof(name));
        printf("I'm the parent on %s\n", name);
        printf("Starting %d children\n", nchildren);
        int np[nchildren];
        int errcodes[nchildren];
        MPI_Comm intercomm;
        char *cmds[nchildren];
        MPI_Info infos[nchildren];

        for(int i=0; i<nchildren; i++){
            np[i] = 1;
            cmds[i] = strdup("/home/franco/spawn_example");
            infos[i] = MPI_INFO_NULL;
        /* Create children */
        MPI_Comm_spawn_multiple(nchildren, cmds, MPI_ARGVS_NULL, np, infos, 0, MPI_COMM_WORLD, &intercomm, errcodes );
	//for(int i=0; i<nchildren; i++) printf("%d %d\n", i, errcodes[i]);

    } else {
        printf("I'm the spawned.\n");

        int rank;
        int size;
        char name[64];
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
        MPI_Comm_size(MPI_COMM_WORLD, &size);

        gethostname(name, sizeof(name));
        printf("hello, world (rank %d of %d %s)\n", rank, size, name);

	int buf=0;
	if(rank == 0) buf = 999;
	int err = MPI_Bcast(&buf, 1, MPI_INT, 0, MPI_COMM_WORLD);

	printf("Received %d err %d (rank %d of %d %s)\n", buf, err, rank, size, name);

    return 0;
// cc -o hello hello.c -std=gnu99  -I/usr/include/openmpi-x86_64 -fexceptions -pthread -Wl,-rpath -Wl,/usr/lib64/openmpi/lib -Wl,--enable-new-dtags -L/usr/lib64/openmpi/lib -lmpi

#include <mpi.h>
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <sys/time.h>

main(int argc, char **argv)
    int rank;
    int size;
    struct timeval tv;
    struct timeval tv2;
    char name[64];

    gettimeofday(&tv, NULL);
    MPI_Init(&argc, &argv);

    gettimeofday(&tv2, NULL);
    printf("MPI_Init(): %f\n",
           ((tv2.tv_sec - tv.tv_sec) * 1000.0
                 + (tv2.tv_usec - tv.tv_usec) / 1000.0));

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    gethostname(name, sizeof(name));
    printf("hello world (rank %d of %d %s)\n",
	   rank, size, name);


    return 0;

