Hello,

I have a very simple MPI program hanging in MPI_Reduce using the openmpi-1.2.4-1
as supplied with OFED 1.2.5.4 (running this too).

It works with same hardware using the supplied mvapich (mvapich-0.9.9).

The hardware is a Mellanox Technologies MT25418 [ConnectX IB DDR] (rev a0) HCA
(SUN/voltaire) and the switch is a voltaire ISR9024D (running at DDR rate).

------------------------------------------------------------------------------
Switch software/firmware is:
ISR9024D-2c0c> version show
ISR 9024 version: 3.4.5
        date:    Oct 09 2007 11:46:00 AM
        build Id:467

ISR9024D-2c0c> module-firmware show
Anafa self address: lid 1 firmware 1.0.0 gid 0xfe800000000000000008f10400412c0c

------------------------------------------------------------------------------
HCA info:
/tools/ofed/1.2.5.4/suse_sles_10_1/x86_64/xeon/bin$ ./ibv_devinfo
hca_id: mlx4_0
        fw_ver:                         2.2.000
        node_guid:                      0003:ba00:0100:5cf0
        sys_image_guid:                 0003:ba00:0100:5cf3
        vendor_id:                      0x03ba
        vendor_part_id:                 25418
        hw_ver:                         0xA0
        board_id:                       SUN0060000001
        phys_port_cnt:                  2
                port:   1
                        state:                  PORT_ACTIVE (4)
                        max_mtu:                2048 (4)
                        active_mtu:             2048 (4)
                        sm_lid:                 1
                        port_lid:               10
                        port_lmc:               0x00

                port:   2
                        state:                  PORT_DOWN (1)
                        max_mtu:                2048 (4)
                        active_mtu:             2048 (4)
                        sm_lid:                 0
                        port_lid:               0
                        port_lmc:               0x00


./ibstatus

Infiniband device 'mlx4_0' port 1 status:
        default gid:     fe80:0000:0000:0000:0003:ba00:0100:5cf1
        base lid:        0xa
        sm lid:          0x1
        state:           4: ACTIVE
        phys state:      5: LinkUp
        rate:            20 Gb/sec (4X DDR)


The program is an old LAM test (cpi.c)

------------------------------------------------------------------------------
#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
#include <math.h>
#include <mpi.h>

/* Constant for how many values we'll estimate */
#define NUM_ITERS 1000

/* Prototype the function that we'll use below. */
static double f(double);

int
main(int argc, char *argv[])
{
  int iter, rank, size, i;
  double PI25DT = 3.141592653589793238462643;
  double mypi, pi, h, sum, x;
  double startwtime = 0.0, endwtime;
  int namelen;
  char processor_name[MPI_MAX_PROCESSOR_NAME];

  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &size);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Get_processor_name(processor_name, &namelen);

  printf("Process %d of %d on %s\n", rank, size, processor_name);

  for (iter = 2; iter < NUM_ITERS; ++iter) {
    h = 1.0 / (double) iter;
    sum = 0.0;

    for (i = rank + 1; i <= iter; i += size) {
      x = h * ((double) i - 0.5);
      sum += f(x);
    }
    mypi = h * sum;

    MPI_Reduce(&mypi, &pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
  }
  MPI_Finalize();
  return 0;
}


static double
f(double a)
{
  return (4.0 / (1.0 + a * a));
}
------------------------------------------------------------------------------

The gcc openmpi hang from gdb looks like:
(gdb) where
#0  0x00002b60d54428e5 in pthread_spin_lock () from /lib64/libpthread.so.0
#1  0x00002b60d8705aec in mlx4_poll_cq (ibcq=0x5b0bf0, ne=1, wc=0x7fffd6051390) 
at src/cq.c:334
#2  0x00002b60d7c865bc in btl_openib_component_progress ()
    at 
/tmp/OFED-1.2.5.4/OFED/tools/ofed/1.2.5.4/suse_sles_10_1/x86_64/xeon/include/infiniband/verbs
.h:883
#3  0x00002b60d7b7925a in mca_bml_r2_progress () at bml_r2.c:106
#4  0x00002b60d4e6d11a in opal_progress () at runtime/opal_progress.c:288
#5  0x00002b60d7a6b8b8 in mca_pml_ob1_recv (addr=0x7fffd60517c8, count=1, 
datatype=0x501660, src=8,
    tag=-21, comm=<value optimized out>, status=0x0) at 
../../../../opal/threads/condition.h:81
#6  0x00002b60d84e3cfa in ompi_coll_tuned_reduce_intra_basic_linear 
(sbuf=0x7fffd60517d0,
    rbuf=0x7fffd60517c8, count=1, dtype=0x501660, op=0x5012f0, root=<value 
optimized out>,
    comm=0x5014a0) at coll_tuned_reduce.c:385
#7  0x00002b60d4bcd32f in PMPI_Reduce (sendbuf=0x7fffd60517d0, 
recvbuf=0x7fffd60517c8, count=1,
    datatype=0x501660, op=0x5012f0, root=0, comm=0x5014a0) at preduce.c:96
#8  0x0000000000400cee in main ()

A pgi compiled hang from gdb looks like:

(gdb) where
#0  0x00002ac216e408e5 in pthread_spin_lock () from /lib64/libpthread.so.0
#1  0x00002ac2177ceaec in mlx4_poll_cq (ibcq=0x5b52c0, ne=1, wc=0x7fff97255600) 
at src/cq.c:334
#2  0x00002ac216bf51c2 in ibv_poll_cq ()
   from 
/tools/ofed/1.2.5.4/suse_sles_10_1/x86_64/xeon/mpi/pgi/openmpi-1.2.4-1/lib64/openmpi/mca_btl
_openib.so
#3  0x00002ac216bf8182 in btl_openib_component_progress ()
   from 
/tools/ofed/1.2.5.4/suse_sles_10_1/x86_64/xeon/mpi/pgi/openmpi-1.2.4-1/lib64/openmpi/mca_btl
_openib.so
#4  0x00002ac216ae9b24 in mca_bml_r2_progress ()
   from 
/tools/ofed/1.2.5.4/suse_sles_10_1/x86_64/xeon/mpi/pgi/openmpi-1.2.4-1/lib64/openmpi/mca_bml
_r2.so
#5  0x00002ac213d60be4 in opal_progress ()
   from 
/tools/ofed/1.2.5.4/suse_sles_10_1/x86_64/xeon/mpi/pgi/openmpi-1.2.4-1/lib64/libopen-pal.so.
0
#6  0x00002ac2169d4f45 in opal_condition_wait ()
   from 
/tools/ofed/1.2.5.4/suse_sles_10_1/x86_64/xeon/mpi/pgi/openmpi-1.2.4-1/lib64/openmpi/mca_pml
_ob1.so
#7  0x00002ac2169d5a83 in mca_pml_ob1_recv ()
   from 
/tools/ofed/1.2.5.4/suse_sles_10_1/x86_64/xeon/mpi/pgi/openmpi-1.2.4-1/lib64/openmpi/mca_pml
_ob1.so
#8  0x00002ac2175a1e67 in ompi_coll_tuned_reduce_intra_basic_linear ()
   from 
/tools/ofed/1.2.5.4/suse_sles_10_1/x86_64/xeon/mpi/pgi/openmpi-1.2.4-1/lib64/openmpi/mca_col
l_tuned.so
#9  0x00002ac217597ca5 in ompi_coll_tuned_reduce_intra_dec_fixed ()
   from 
/tools/ofed/1.2.5.4/suse_sles_10_1/x86_64/xeon/mpi/pgi/openmpi-1.2.4-1/lib64/openmpi/mca_col
l_tuned.so
#10 0x00002ac213a07e38 in PMPI_Reduce ()
   from 
/tools/ofed/1.2.5.4/suse_sles_10_1/x86_64/xeon/mpi/pgi/openmpi-1.2.4-1/lib64/libmpi.so.0
#11 0x0000000000402551 in main ()

------------------------------------------------------------------------------
The openmpi_gcc script was:
#!/bin/ksh
set -x

export 
PATH=/tools/ofed/1.2.5.4/suse_sles_10_1/x86_64/xeon/mpi/gcc/openmpi-1.2.4-1/bin:$PATH
PREFIX="--prefix 
/tools/ofed/1.2.5.4/suse_sles_10_1/x86_64/xeon/mpi/gcc/openmpi-1.2.4-1"
MCA="-mca btl openib,self -mca btl_tcp_if_exclude lo,eth1 -mca oob_tcp_if_exclude 
lo,eth1"
mpicc cpi.c
mpirun $PREFIX $MCA -np 9 -machinefile ic48scali ./a.out


Any ideas who may be the culprit in this hang?

Regards,
Mostyn

Reply via email to