Hi Sridhar,

I have committed changes that allow you to set the debg verbosity,

OMPI_MCA_btl_base_debug
0 - no debug output
1 - standard debug output
2 - very verbose debug output

Also we have run the Pallas tests and are not able to reproduce your failures. We do see a warning in the Reduce test but it does not hang and runs to completion. Attached is a simple ping pong program, try running this and let us know the results.

Thanks,

Galen


/*
 * MPI ping program
 *
 * Patterned after the example in the Quadrics documentation
 */

#define MPI_ALLOC_MEM 0
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/select.h>

#include <getopt.h>

#include "mpi.h"

static int str2size(char *str)
{
    int size;
    char mod[32];

    switch (sscanf(str, "%d%1[mMkK]", &size, mod)) {
    case 1:
        return (size);

    case 2:
        switch (*mod) {
        case 'm':
        case 'M':
            return (size << 20);

        case 'k':
        case 'K':
            return (size << 10);

        default:
            return (size);
        }

    default:
        return (-1);
    }
}


static void usage(void)
{
    fprintf(stderr,
            "Usage: mpi-ping [flags] <min bytes> [<max bytes>] [<inc bytes>]\n"
            "       mpi-ping -h\n");
    exit(EXIT_FAILURE);
}


static void help(void)
{
    printf
        ("Usage: mpi-ping [flags] <min bytes> [<max bytes>] [<inc bytes>]\n"
         "\n" "   Flags may be any of\n"
         "      -B                use blocking send/recv\n"
         "      -C                check data\n"
         "      -O                overlapping pings\n"
         "      -W                perform warm-up phase\n"
         "      -r number         repetitions to time\n"
         "      -A               use MPI_Alloc_mem to register memory\n" 
         "      -h                print this info\n" "\n"
         "   Numbers may be postfixed with 'k' or 'm'\n\n");

    exit(EXIT_SUCCESS);
}


int main(int argc, char *argv[])
{
    MPI_Status status;
    MPI_Request recv_request;
    MPI_Request send_request;
    unsigned char *rbuf;
    unsigned char *tbuf;
    int c;
    int i;
    int bytes;
    int nproc;
    int peer;
    int proc;
    int r;
    int tag = 0x666;

    /*
     * default options / arguments
     */
    int reps = 10000;
    int blocking = 0;
    int check = 0;
    int overlap = 0;
    int warmup = 0;
    int inc_bytes = 0;
    int max_bytes = 0;
    int min_bytes = 0;
    int alloc_mem = 0; 

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &proc);
    MPI_Comm_size(MPI_COMM_WORLD, &nproc);

    while ((c = getopt(argc, argv, "BCOWAr:h")) != -1) {
        switch (c) {

        case 'B':
            blocking = 1;
            break;

        case 'C':
            check = 1;
            break;

        case 'O':
            overlap = 1;
            break;

        case 'W':
            warmup = 1;
            break;


        case 'A': 
            alloc_mem=1; 
            break; 


        case 'r':
            if ((reps = str2size(optarg)) <= 0) {
                usage();
            }
            break;

        case 'h':
            help();

        default:
            usage();
        }
    }

    if (optind == argc) {
        min_bytes = 0;
    } else if ((min_bytes = str2size(argv[optind++])) < 0) {
        usage();
    }

    if (optind == argc) {
        max_bytes = min_bytes;
    } else if ((max_bytes = str2size(argv[optind++])) < min_bytes) {
        usage();
    }

    if (optind == argc) {
        inc_bytes = 0;
    } else if ((inc_bytes = str2size(argv[optind++])) < 0) {
        usage();
    }

    if (nproc == 1) {
        exit(EXIT_SUCCESS);
    }

    #if MPI_ALLOC_MEM
    if(alloc_mem) { 
         MPI_Alloc_mem(max_bytes ? max_bytes: 8, MPI_INFO_NULL, &rbuf);
         MPI_Alloc_mem(max_bytes ? max_bytes: 8, MPI_INFO_NULL, &tbuf);
    } 
    else { 
    #endif 
        if ((rbuf = (unsigned char *) malloc(max_bytes ? max_bytes : 8)) == 
NULL) { 
            perror("malloc"); 
            exit(EXIT_FAILURE); 
        } 
        if ((tbuf = (unsigned char *) malloc(max_bytes ? max_bytes : 8)) == 
NULL) { 
            perror("malloc"); 
            exit(EXIT_FAILURE); 
        } 
    #if MPI_ALLOC_MEM
    } 
    #endif 

    if (check) {
        for (i = 0; i < max_bytes; i++) {
            tbuf[i] = i & 255;
            rbuf[i] = 0;
        }
    }

    if (proc == 0) {
        if (overlap) {
            printf("mpi-ping: overlapping ping-pong\n");
        } else if (blocking) {
            printf("mpi-ping: ping-pong (using blocking send/recv)\n");
        } else {
            printf("mpi-ping: ping-pong\n");
        }
        if (check) {
            printf("data checking enabled\n");
        }
        printf("nprocs=%d, reps=%d, min bytes=%d, max bytes=%d inc bytes=%d\n",
               nproc, reps, min_bytes, max_bytes, inc_bytes);
        fflush(stdout);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    peer = proc ^ 1;

    if ((peer < nproc) && (peer & 1)) {
        printf("%d pings %d\n", proc, peer);
        fflush(stdout);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    if (warmup) {

        if (proc == 0) {
            puts("warm-up phase");
            fflush(stdout);
        }

        for (r = 0; r < reps; r++) {
            if (peer >= nproc) {
                break;
            }
            MPI_Irecv(rbuf, max_bytes, MPI_BYTE, peer, tag, MPI_COMM_WORLD,
                      &recv_request);
            MPI_Isend(tbuf, max_bytes, MPI_BYTE, peer, tag, MPI_COMM_WORLD,
                      &send_request);
            MPI_Wait(&send_request, &status);
            MPI_Wait(&recv_request, &status);
        }

        if (proc == 0) {
            puts("warm-up phase done");
            fflush(stdout);
        }
    }

    MPI_Barrier(MPI_COMM_WORLD);

    /*
     * Main loop
     */

    for (bytes = min_bytes; bytes <= max_bytes;
         bytes = inc_bytes ? bytes + inc_bytes : bytes ? 2 * bytes : 1) {

        double t = 0.0;
        double tv[2];

        r = reps;

        MPI_Barrier(MPI_COMM_WORLD);

        if (peer < nproc) {

            if (overlap) {

                /*
                 * MPI_Isend / MPI_Irecv overlapping ping-pong
                 */

                tv[0] = MPI_Wtime();

                for (r = 0; r < reps; r++) {

                    MPI_Irecv(rbuf, bytes, MPI_BYTE, peer, tag,
                              MPI_COMM_WORLD, &recv_request);
                    MPI_Isend(tbuf, bytes, MPI_BYTE, peer, tag,
                              MPI_COMM_WORLD, &send_request);
                    MPI_Wait(&send_request, &status);
                    MPI_Wait(&recv_request, &status);

                    if (check) {
                        for (i = 0; i < bytes; i++) {
                            if (rbuf[i] != (unsigned char)(i & 255)) {
                                fprintf(stderr, "Error: index=%d sent %d 
received %d\n", 
                                    i, ((unsigned char)i)&255, (unsigned 
char)rbuf[i]);
                            }
                            rbuf[i] = 0;
                        }
                    }
                }

                tv[1] = MPI_Wtime();

            } else if (blocking) {

                /*
                 * MPI_Send / MPI_Recv ping-pong
                 */

                tv[0] = MPI_Wtime();

                if (peer < nproc) {
                    if (proc & 1) {
                        r--;
                        MPI_Recv(rbuf, bytes, MPI_BYTE, peer, tag,
                                 MPI_COMM_WORLD, &status);

                        if (check) {
                            for (i = 0; i < bytes; i++) {
                                if (rbuf[i] != (unsigned char)(i & 255)) {
                                    fprintf(stderr, "Error: index=%d sent %d 
received %d\n", 
                                        i, ((unsigned char)i)&255, (unsigned 
char)rbuf[i]);
                                }
                                rbuf[i] = 0;
                            }
                        }
                    }

                    while (r-- > 0) {

                        MPI_Send(tbuf, bytes, MPI_BYTE, peer, tag,
                                 MPI_COMM_WORLD);
                        MPI_Recv(rbuf, bytes, MPI_BYTE, peer, tag,
                                 MPI_COMM_WORLD, &status);

                        if (check) {
                            for (i = 0; i < bytes; i++) {
                                if (rbuf[i] != (unsigned char)(i & 255)) {
                                    fprintf(stderr, "Error: index=%d sent %d 
received %d\n", 
                                        i, ((unsigned char)i)&255, (unsigned 
char)rbuf[i]);
                                }
                                rbuf[i] = 0;
                            }
                        }
                    }

                    if (proc & 1) {
                        MPI_Send(tbuf, bytes, MPI_BYTE, peer, tag,
                                 MPI_COMM_WORLD);
                    }
                }

                tv[1] = MPI_Wtime();

            } else {

                /*
                 * MPI_Isend / MPI_Irecv ping-pong
                 */

                tv[0] = MPI_Wtime();

                if (peer < nproc) {
                    if (proc & 1) {
                        r--;
                        MPI_Irecv(rbuf, bytes, MPI_BYTE, peer, tag,
                                  MPI_COMM_WORLD, &recv_request);
                        MPI_Wait(&recv_request, &status);

                        if (check) {
                            for (i = 0; i < bytes; i++) {
                                if (rbuf[i] != (unsigned char)(i & 255)) {
                                    fprintf(stderr, "Error: index=%d sent %d 
received %d\n", 
                                        i, ((unsigned char)i)&255, (unsigned 
char)rbuf[i]);
                                }
                                rbuf[i] = 0;
                            }
                        }
                    }

                    while (r-- > 0) {

                        MPI_Isend(tbuf, bytes, MPI_BYTE, peer, tag,
                                  MPI_COMM_WORLD, &send_request);
                        MPI_Wait(&send_request, &status);
                        MPI_Irecv(rbuf, bytes, MPI_BYTE, peer, tag,
                                  MPI_COMM_WORLD, &recv_request);
                        MPI_Wait(&recv_request, &status);

                        if (check) {
                            for (i = 0; i < bytes; i++) {
                                if (rbuf[i] != (unsigned char)(i & 255)) {
                                    fprintf(stderr, "Error: index=%d sent %d 
received %d\n", 
                                        i, ((unsigned char)i)&255, (unsigned 
char)rbuf[i]);
                                }
                                rbuf[i] = 0;
                            }
                        }
                    }

                    if (proc & 1) {
                        MPI_Isend(tbuf, bytes, MPI_BYTE, peer, tag,
                                  MPI_COMM_WORLD, &send_request);
                        MPI_Wait(&send_request, &status);
                    }
                }

                tv[1] = MPI_Wtime();
            }

            /*
             * Calculate time interval in useconds (half round trip)
             */

            t = (tv[1] - tv[0]) * 1000000.0 / (2 * reps);

        }

        MPI_Barrier(MPI_COMM_WORLD);

        if ((peer < nproc) && (peer & 1)) {
            printf("%3d pinged %3d: %8d bytes %9.2f uSec %8.2f MB/s\n",
                   proc, peer, bytes, t, bytes / (t));
            fflush(stdout);
        }
    }

    MPI_Barrier(MPI_COMM_WORLD);
    MPI_Finalize();

    return EXIT_SUCCESS;
}

On Aug 9, 2005, at 8:15 AM, Sridhar Chirravuri wrote:

The same kind of output while running Pallas "pingpong" test.

-Sridhar

-----Original Message-----
From: devel-boun...@open-mpi.org [mailto:devel-boun...@open-mpi.org] On
Behalf Of Sridhar Chirravuri
Sent: Tuesday, August 09, 2005 7:44 PM
To: Open MPI Developers
Subject: Re: [O-MPI devel] Fwd: Regarding MVAPI Component in Open MPI


I have run sendrecv function in Pallas but it failed to run it. Here is
the output

[root@micrompi-2 SRC_PMB]# mpirun -np 2 PMB-MPI1 sendrecv
Could not join a running, existing universe
Establishing a new one named: default-universe-5097
[0,1,1][btl_mvapi.c:130:mca_btl_mvapi_del_procs] Stub
[0,1,1][btl_mvapi.c:130:mca_btl_mvapi_del_procs] Stub


[0,1,0][btl_mvapi.c:130:mca_btl_mvapi_del_procs] Stub

[0,1,0][btl_mvapi.c:130:mca_btl_mvapi_del_procs] Stub

[0,1,0][btl_mvapi_endpoint.c:542:mca_btl_mvapi_endpoint_send] Connection
to endpoint closed ... connecting ...
[0,1,0][btl_mvapi_endpoint.c:318:mca_btl_mvapi_endpoint_start_connect]
Initialized High Priority QP num = 263177, Low Priority QP num = 263178,
LID = 785

[0,1,0][btl_mvapi_endpoint.c:190: mca_btl_mvapi_endpoint_send_connect_req
] Sending High Priority QP num = 263177, Low Priority QP num = 263178,
LID = 785[0,1,0][btl_mvapi_endpoint.c:542:mca_btl_mvapi_endpoint_send]
Connection to endpoint closed ... connecting ...
[0,1,0][btl_mvapi_endpoint.c:318:mca_btl_mvapi_endpoint_start_connect]
Initialized High Priority QP num = 263179, Low Priority QP num = 263180,
LID = 786

[0,1,0][btl_mvapi_endpoint.c:190: mca_btl_mvapi_endpoint_send_connect_req
] Sending High Priority QP num = 263179, Low Priority QP num = 263180,
LID = 786#---------------------------------------------------
#    PALLAS MPI Benchmark Suite V2.2, MPI-1 part
#---------------------------------------------------
# Date       : Tue Aug  9 07:11:25 2005
# Machine    : x86_64# System     : Linux
# Release    : 2.6.9-5.ELsmp
# Version    : #1 SMP Wed Jan 5 19:29:47 EST 2005

#
# Minimum message length in bytes:   0
# Maximum message length in bytes:   4194304
#
# MPI_Datatype                   :   MPI_BYTE
# MPI_Datatype for reductions    :   MPI_FLOAT
# MPI_Op                         :   MPI_SUM
#
#

# List of Benchmarks to run:

# Sendrecv
[0,1,1][btl_mvapi_endpoint.c:368: mca_btl_mvapi_endpoint_reply_start_conn
ect] Initialized High Priority QP num = 263177, Low Priority QP num =
263178,  LID = 777

[0,1,1][btl_mvapi_endpoint.c:266: mca_btl_mvapi_endpoint_set_remote_info] Received High Priority QP num = 263177, Low Priority QP num 263178, LID
= 785

[0,1,1][btl_mvapi_endpoint.c:756:mca_btl_mvapi_endpoint_qp_init_query]
Modified to init..Qp
7080096[0,1,1][btl_mvapi_endpoint.c:791: mca_btl_mvapi_endpoint_qp_init_q
uery] Modified to RTR..Qp
7080096[0,1,1][btl_mvapi_endpoint.c:814: mca_btl_mvapi_endpoint_qp_init_q
uery] Modified to RTS..Qp 7080096

[0,1,1][btl_mvapi_endpoint.c:756:mca_btl_mvapi_endpoint_qp_init_query]
Modified to init..Qp 7240736
[0,1,1][btl_mvapi_endpoint.c:791:mca_btl_mvapi_endpoint_qp_init_query]
Modified to RTR..Qp
7240736[0,1,1][btl_mvapi_endpoint.c:814: mca_btl_mvapi_endpoint_qp_init_q
uery] Modified to RTS..Qp 7240736
[0,1,1][btl_mvapi_endpoint.c:190: mca_btl_mvapi_endpoint_send_connect_req
] Sending High Priority QP num = 263177, Low Priority QP num = 263178,
LID = 777
[0,1,0][btl_mvapi_endpoint.c:266: mca_btl_mvapi_endpoint_set_remote_info] Received High Priority QP num = 263177, Low Priority QP num 263178, LID
= 777
[0,1,0][btl_mvapi_endpoint.c:756:mca_btl_mvapi_endpoint_qp_init_query]
Modified to init..Qp 7081440
[0,1,0][btl_mvapi_endpoint.c:791:mca_btl_mvapi_endpoint_qp_init_query]
Modified to RTR..Qp 7081440
[0,1,0][btl_mvapi_endpoint.c:814:mca_btl_mvapi_endpoint_qp_init_query]
Modified to RTS..Qp 7081440
[0,1,0][btl_mvapi_endpoint.c:756:mca_btl_mvapi_endpoint_qp_init_query]
Modified to init..Qp 7241888
[0,1,0][btl_mvapi_endpoint.c:791:mca_btl_mvapi_endpoint_qp_init_query]
Modified to RTR..Qp
7241888[0,1,0][btl_mvapi_endpoint.c:814: mca_btl_mvapi_endpoint_qp_init_q
uery] Modified to RTS..Qp 7241888
[0,1,1][btl_mvapi_component.c:523:mca_btl_mvapi_component_progress] Got
a recv completion


Thanks
-Sridhar




-----Original Message-----
From: devel-boun...@open-mpi.org [mailto:devel-boun...@open-mpi.org] On
Behalf Of Brian Barrett
Sent: Tuesday, August 09, 2005 7:35 PM
To: Open MPI Developers
Subject: Re: [O-MPI devel] Fwd: Regarding MVAPI Component in Open MPI

On Aug 9, 2005, at 8:48 AM, Sridhar Chirravuri wrote:

Does r6774 has lot of changes that are related to 3rd generation
point-to-point? I am trying to run some benchmark tests (ex:
pallas) with Open MPI stack and just want to compare the
performance figures with MVAPICH 095 and MVAPICH 092.

In order to use 3rd generation p2p communication, I have added the
following line in the /openmpi/etc/openmpi-mca-params.conf

pml=ob1

I also exported (as double check) OMPI_MCA_pml=ob1.

Then, I have tried running on the same machine. My machine has got
2 processors.

Mpirun -np 2 ./PMB-MPI1

I still see the following lines

Request for 0 bytes (coll_basic_reduce_scatter.c, 79)
Request for 0 bytes (coll_basic_reduce.c, 193)
Request for 0 bytes (coll_basic_reduce_scatter.c, 79)
Request for 0 bytes (coll_basic_reduce.c, 193)

These errors are coming from the collective routines, not the PML/BTL
layers.  It looks like the reduction codes are trying to call malloc
(0), which doesn't work so well.  We'll take a look as soon as we
can.  In the mean time, can you just not run the tests that call the
reduction collectives?

Brian


--
   Brian Barrett
   Open MPI developer
   http://www.open-mpi.org/


_______________________________________________
devel mailing list
de...@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/devel

_______________________________________________
devel mailing list
de...@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/devel

_______________________________________________
devel mailing list
de...@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/devel

Reply via email to