Hello,

Using a very recent svn version (1.4a1r18899) I'm getting a non-terminating
condition if I use the sm btl with tcp,self or with openib,self.
The program is not finishing a reduce operation. It works if the sm btl
is left out.

Using 2 4 core nodes.
Program is:
-----------------------------------------------------------------------------
/*
 * Copyright (c) 2001-2002 The Trustees of Indiana University.
 *                         All rights reserved.
 * Copyright (c) 1998-2001 University of Notre Dame.
 *                         All rights reserved.
 * Copyright (c) 1994-1998 The Ohio State University.
 *                         All rights reserved.
 *
 * This file is part of the LAM/MPI software package.  For license
 * information, see the LICENSE file in the top level directory of the
 * LAM/MPI source distribution.
 *
 * $HEADER$
 *
 * $Id: cpi.c,v 1.4 2002/11/23 04:06:58 jsquyres Exp $
 *
 * Portions taken from the MPICH distribution example cpi.c.
 *
 * Example program to calculate the value of pi by integrating f(x) =
 * 4 / (1 + x^2).
 */

#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
#include <math.h>
#include <mpi.h>


/* Constant for how many values we'll estimate */

#define NUM_ITERS 1000


/* Prototype the function that we'll use below. */

static double f(double);


int
main(int argc, char *argv[])
{
  int iter, rank, size, i;
  double PI25DT = 3.141592653589793238462643;
  double mypi, pi, h, sum, x;
  double startwtime = 0.0, endwtime;
  int namelen;
  char processor_name[MPI_MAX_PROCESSOR_NAME];
  pid_t pid;
  pid = getpid();
  char foo[200];

  /* Normal MPI startup */

  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &size);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Get_processor_name(processor_name, &namelen);

/*  if(rank == 7){ */
  printf("Process %d of %d on %s\n", rank, size, processor_name);
  /* system("set"); */
/*  sprintf(foo,"%s %d","/tools/linux/bin/cpu -o -p ",pid);
system(foo); */ /* } */

  /* Do approximations for 1 to 100 points */

  /* sleep(5); */
  for (iter = 2; iter < NUM_ITERS; ++iter) {
    h = 1.0 / (double) iter;
    sum = 0.0;

    /* A slightly better approach starts from large i and works back */

    if (rank == 0)
      startwtime = MPI_Wtime();

    for (i = rank + 1; i <= iter; i += size) {
      x = h * ((double) i - 0.5);
      sum += f(x);
    }
    mypi = h * sum;

    MPI_Reduce(&mypi, &pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
/*    if (rank == 0) {
     MPI_Reduce(MPI_IN_PLACE, &pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    } else {
     MPI_Reduce(&mypi, NULL, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    } */

/*    if (rank == 0) {
      printf("%d points: pi is approximately %.16f, error = %.16f\n",
             iter, pi, fabs(pi - PI25DT));
      endwtime = MPI_Wtime();
      printf("wall clock time = %f\n", endwtime - startwtime);
      fflush(stdout);
    } */
  }

  /* All done */

  MPI_Finalize();
  return 0;
}


static double
f(double a)
{
  return (4.0 / (1.0 + a * a));
} -----------------------------------------------------------------------------
A "good" non sm btl run (--mca btl tcp,self --mca opal_event_include select) 
gives
Process 1 of 8 on r4150_18
Process 3 of 8 on r4150_18
Process 5 of 8 on r4150_17
Process 4 of 8 on r4150_17
Process 7 of 8 on r4150_17
Process 6 of 8 on r4150_17
Process 0 of 8 on r4150_18
Process 2 of 8 on r4150_18

A "bad" sm btl run (--mca btl sm,tcp,self --mca opal_event_include select)
When using gdb to attach a non-terminating process shows:
(gdb) where
#0  0x000000366d4c78d3 in __select_nocancel () from /lib64/libc.so.6
#1  0x00002aaaab076546 in select_dispatch (base=0x278bc00, arg=0x278bb50, 
tv=0x7fff8bd179a0)
    at ../../.././opal/event/select.c:176
#2  0x00002aaaab073308 in opal_event_base_loop (base=0x278bc00, flags=2)
    at ../../.././opal/event/event.c:803
#3  0x00002aaaab073004 in opal_event_loop (flags=2) at 
../../.././opal/event/event.c:726
#4  0x00002aaaab0636a4 in opal_progress () at 
../.././opal/runtime/opal_progress.c:189
#5  0x00002aaaaaaf9c23 in opal_condition_wait (c=0x2aaaaadd7bc0, 
m=0x2aaaaadd7c20)
    at ../.././opal/threads/condition.h:100
#6  0x00002aaaaaafa1bd in ompi_request_default_wait_all (count=1, 
requests=0x7fff8bd17b50,
    statuses=0x0) at ../.././ompi/request/req_wait.c:262
#7  0x00002aaaaf0f913e in ompi_coll_tuned_reduce_generic 
(sendbuf=0x7fff8bd180d0,
    recvbuf=0x7fff8bd180c8, original_count=1, datatype=0x6012b0, op=0x6016a0, 
root=0,
    comm=0x601440, module=0x2832610, tree=0x28331c0, count_by_segment=1, 
max_outstanding_reqs=0)
    at ../../../../.././ompi/mca/coll/tuned/coll_tuned_reduce.c:168
#8  0x00002aaaaf0f9fcd in ompi_coll_tuned_reduce_intra_binomial 
(sendbuf=0x7fff8bd180d0,
    recvbuf=0x7fff8bd180c8, count=1, datatype=0x6012b0, op=0x6016a0, root=0, 
comm=0x601440,
    module=0x2832610, segsize=0, max_outstanding_reqs=0)
    at ../../../../.././ompi/mca/coll/tuned/coll_tuned_reduce.c:462
#9  0x00002aaaaf0ea075 in ompi_coll_tuned_reduce_intra_dec_fixed 
(sendbuf=0x7fff8bd180d0,
    recvbuf=0x7fff8bd180c8, count=1, datatype=0x6012b0, op=0x6016a0, root=0, 
comm=0x601440,
    module=0x2832610) at 
../../../../.././ompi/mca/coll/tuned/coll_tuned_decision_fixed.c:389
#10 0x00002aaaaab41348 in PMPI_Reduce (sendbuf=0x7fff8bd180d0, 
recvbuf=0x7fff8bd180c8, count=1,
    datatype=0x6012b0, op=0x6016a0, root=0, comm=0x601440) at preduce.c:105
#11 0x0000000000400bc4 in main ()

opal/event/select.c:176 is:
        res = select(sop->event_fds + 1, sop->event_readset_out,
            sop->event_writeset_out, NULL, tv);
opal/event/event.c:803 is:
        res = evsel->dispatch(base, evbase, tv_p);
opal/event/event.c:726 is:
        return event_base_loop(current_base, flags);
opal/runtime/opal_progress.c:189 is:
                events += opal_event_loop(opal_progress_event_flag);
opal/threads/condition.h:100 is:
        while (c->c_signaled == 0) {
            opal_progress(); <----------------------------------100
            OPAL_CR_TEST_CHECKPOINT_READY_STALL();
        }
ompi/request/req_wait.c:262 is:
            while (pending > ompi_request_completed - start) {
                opal_condition_wait(&ompi_request_cond, &ompi_request_lock); 
<-----262
            }

I'll stop at this point.
If the default poll is used there's a similar non-terminating loop.

Any ideas at this point?

Regards,
Mostyn

Reply via email to