Hello, i'm posting here another problem of my installation I wanted to benchmark the differences between tcp and openib transport
if i run a simple non mpi application i get randori ~ # mpirun --mca btl tcp,self -np 2 -host randori -host tatami hostname randori tatami but as soon as i switch to my benchmark program i have mpirun --mca btl tcp,self -np 2 -host randori -host tatami graph Master thread reporting matrix size 33554432 kB, time is in [us] and instead of starting the send/receive functions it just hangs there; i also checked the transmitted packets with wireshark but after the handshake no more packets are exchanged I read in the archives that there were some problems in this area and so i tried what was suggested in previous emails mpirun --mca btl ^openib -np 2 -host randori -host tatami graph mpirun --mca pml ob1 --mca btl tcp,self -np 2 -host randori -host tatami graph gives exactly the same output as before (no mpisend/receive) while the next commands gives something more interesting mpirun --mca pml cm --mca btl tcp,self -np 2 -host randori -host tatami graph -------------------------------------------------------------------------- No available pml components were found! This means that there are no components of this type installed on your system or all the components reported that they could not be used. This is a fatal error; your MPI process is likely to abort. Check the output of the "ompi_info" command and ensure that components of this type are available on your system. You may also wish to check the value of the "component_path" MCA parameter and ensure that it has at least one directory that contains valid MCA components. -------------------------------------------------------------------------- [tatami:06619] PML cm cannot be selected mpirun noticed that job rank 0 with PID 6710 on node randori exited on signal 15 (Terminated). which is not possible as if i do ompi_info --param all there is the CM pml component MCA pml: cm (MCA v1.0, API v1.0, Component v1.2.8) MCA pml: ob1 (MCA v1.0, API v1.0, Component v1.2.8) my test program is quite simple, just a couple of MPI_Send and MPI_Recv (just after the signature) do you have any ideas that might help me? thanks a lot Vittorio ======================== #include "mpi.h" #include <stdio.h> #include <stdlib.h> #include <string.h> #include <math.h> #define M_COL 4096 #define M_ROW 524288 #define NUM_MSG 25 unsigned long int gigamatrix[M_ROW][M_COL]; int main (int argc, char *argv[]) { int numtasks, rank, dest, source, rc, tmp, count, tag=1; unsigned long int exp, exchanged; unsigned long int i, j, e; unsigned long matsize; MPI_Status Stat; struct timeval timing_start, timing_end; double inittime = 0; long int totaltime = 0; MPI_Init (&argc, &argv); MPI_Comm_size (MPI_COMM_WORLD, &numtasks); MPI_Comm_rank (MPI_COMM_WORLD, &rank); if (rank == 0) { fprintf (stderr, "Master thread reporting\n", numtasks - 1); matsize = (long) M_COL * M_ROW / 64; fprintf (stderr, "matrix size %d kB, time is in [us]\n", matsize); source = 1; dest = 1; /*warm up phase*/ rc = MPI_Send (&tmp, 1, MPI_INT, dest, tag, MPI_COMM_WORLD); rc = MPI_Recv (&tmp, 1, MPI_INT, source, tag, MPI_COMM_WORLD, &Stat); rc = MPI_Send (&tmp, 1, MPI_INT, dest, tag, MPI_COMM_WORLD); rc = MPI_Send (&tmp, 1, MPI_INT, dest, tag, MPI_COMM_WORLD); rc = MPI_Recv (&tmp, 1, MPI_INT, source, tag, MPI_COMM_WORLD, &Stat); rc = MPI_Send (&tmp, 1, MPI_INT, dest, tag, MPI_COMM_WORLD); for (e = 0; e < NUM_MSG; e++) { exp = pow (2, e); exchanged = 64 * exp; /*timing of ops*/ gettimeofday (&timing_start, NULL); rc = MPI_Send (&gigamatrix[0], exchanged, MPI_UNSIGNED_LONG, dest, tag, MPI_COMM_WORLD); rc = MPI_Recv (&gigamatrix[0], exchanged, MPI_UNSIGNED_LONG, source, tag, MPI_COMM_WORLD, &Stat); gettimeofday (&timing_end, NULL); totaltime = (timing_end.tv_sec - timing_start.tv_sec) * 1000000 + (timing_end.tv_usec - timing_start.tv_usec); memset (&timing_start, 0, sizeof(struct timeval)); memset (&timing_end, 0, sizeof(struct timeval)); fprintf (stdout, "%d kB\t%d\n", exp, totaltime); } fprintf(stderr, "task complete\n"); } else { if (rank >= 1) { dest = 0; source = 0; rc = MPI_Recv (&tmp, 1, MPI_INT, source, tag, MPI_COMM_WORLD, &Stat); rc = MPI_Send (&tmp, 1, MPI_INT, dest, tag, MPI_COMM_WORLD); rc = MPI_Recv (&tmp, 1, MPI_INT, source, tag, MPI_COMM_WORLD, &Stat); rc = MPI_Recv (&tmp, 1, MPI_INT, source, tag, MPI_COMM_WORLD, &Stat); rc = MPI_Send (&tmp, 1, MPI_INT, dest, tag, MPI_COMM_WORLD); rc = MPI_Recv (&tmp, 1, MPI_INT, source, tag, MPI_COMM_WORLD, &Stat); for (e = 0; e < NUM_MSG; e++) { exp = pow (2, e); exchanged = 64 * exp; rc = MPI_Recv (&gigamatrix[0], (unsigned) exchanged, MPI_UNSIGNED_LONG, source, tag, MPI_COMM_WORLD, &Stat); rc = MPI_Send (&gigamatrix[0], (unsigned) exchanged, MPI_UNSIGNED_LONG, dest, tag, MPI_COMM_WORLD); } } } MPI_Finalize (); return 0; }