Good to hear ;) Btw, the parameter I was talking about can accept more complex forms of inclusion/exclusions. Basically, all CIDR formats are supported.
george. On Oct 10, 2012, at 19:20 , Santhosh Kokala <santhosh.kok...@riverbed.com> wrote: > George, > You are a life saver. This solved my issue. > > From: devel-boun...@open-mpi.org [mailto:devel-boun...@open-mpi.org] On > Behalf Of George Bosilca > Sent: Wednesday, October 10, 2012 10:10 AM > To: Open MPI Developers > Subject: Re: [OMPI devel] MPI_Reduce Hangs in my Application > > I guess the TCP BTL gets confused by your virtual interfaces (vmnet?). Try to > limit the used interfaces using the "--mca btl_tcp_if_include eth0" argument. > Let us know if this solves your issue. > > Thanks, > george. > > > On Oct 10, 2012, at 18:54 , Santhosh Kokala <santhosh.kok...@riverbed.com> > wrote: > > > George, > I am using each host with 4 interfaces including loopback interface. Can you > please let me know more about your environment? > > eth0 Link encap:Ethernet HWaddr bc:30:5b:db:ae:6f > inet addr:xxx.xxx.xxx.134 Bcast:xxx.xxx.xxx.255 Mask:255.255.255.0 > inet6 addr: fe80::be30:5bff:fedb:ae6f/64 Scope:Link > UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1 > RX packets:1375598 errors:0 dropped:0 overruns:0 frame:0 > TX packets:709644 errors:0 dropped:0 overruns:0 carrier:0 > collisions:0 txqueuelen:1000 > RX bytes:1431654357 (1.4 GB) TX bytes:69604165 (69.6 MB) > Interrupt:17 > > lo Link encap:Local Loopback > inet addr:127.0.0.1 Mask:255.0.0.0 > inet6 addr: ::1/128 Scope:Host > UP LOOPBACK RUNNING MTU:16436 Metric:1 > RX packets:944 errors:0 dropped:0 overruns:0 frame:0 > TX packets:944 errors:0 dropped:0 overruns:0 carrier:0 > collisions:0 txqueuelen:0 > RX bytes:264692 (264.6 KB) TX bytes:264692 (264.6 KB) > > vmnet1 Link encap:Ethernet HWaddr 00:50:56:c0:00:01 > inet addr: xxx.xxx.xxx.1 Bcast: xxx.xxx.xxx.255 Mask:255.255.255.0 > inet6 addr: fe80::250:56ff:fec0:1/64 Scope:Link > UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1 > RX packets:0 errors:0 dropped:0 overruns:0 frame:0 > TX packets:245 errors:0 dropped:0 overruns:0 carrier:0 > collisions:0 txqueuelen:1000 > RX bytes:0 (0.0 B) TX bytes:0 (0.0 B) > > vmnet8 Link encap:Ethernet HWaddr 00:50:56:c0:00:08 > inet addr: xxx.xxx.xxx..1 Bcast: xxx.xxx.xxx..255 > Mask:255.255.255.0 > inet6 addr: fe80::250:56ff:fec0:8/64 Scope:Link > UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1 > RX packets:58357 errors:0 dropped:0 overruns:0 frame:0 > TX packets:238 errors:0 dropped:0 overruns:0 carrier:0 > collisions:0 txqueuelen:1000 > RX bytes:0 (0.0 B) TX bytes:0 (0.0 B) > > From: devel-boun...@open-mpi.org [mailto:devel-boun...@open-mpi.org] On > Behalf Of George Bosilca > Sent: Wednesday, October 10, 2012 4:41 AM > To: Open MPI Developers > Subject: Re: [OMPI devel] MPI_Reduce Hangs in my Application > > Your code works for me on two platforms. Thus, I guess the problem is with > the communication layer (BTL) is Open MPI. What network do you use? If > Ethernet how many interfaces? > > Thanks, > george. > > On Oct 10, 2012, at 09:30 , Santhosh Kokala <santhosh.kok...@riverbed.com> > wrote: > > > > I have a problem with my MPI code, it hangs when the code is run on multiple > nodes. It successfullycompletes when run on a single node. I am not sure how > to debug this. Can someone help me debug this issue? > Program Usage: > > mpicc -o string string.cpp > mpirun -np 4 -npernode 2 -hostfile hosts ./string 12 0.1 0.9 10 2 > > MPI_Reduce Hangs in 2nd iteration: (Output cout statements from my program) > > 1st Iteration (Timestep 1) > ----------------------------------------------------- > 0 Waiting for MPI_Reduce() > 0 Done Waiting for MPI_Reduce() > > 1 Waiting for MPI_Reduce() > 1 Done Waiting for MPI_Reduce() > > 2 Waiting for MPI_Reduce() > 2 Done Waiting for MPI_Reduce() > > 3 Waiting for MPI_Reduce() > 3 Done Waiting for MPI_Reduce() > > 0 Sending to right task = 1 > 0 Receiving from right task = 1 > > 1 Receiving from left task = 0 > 1 Sending to left task = 0 > > 1 Sending to right task = 2 > 1 Receiving from right task = 2 > > > 2 Receiving from left task = 1 > 2 Sending to left task = 1 > > 2 Sending to right task = 3 > 2 Receiving from right task = 3 > > 3 Receiving from left task = 2 > 3 Sending to left task = 2 > > > > 2nd Iteration (Timestep 2) > ----------------------------------------------------- > 0 Waiting for MPI_Reduce() > > 1 Waiting for MPI_Reduce() > 1 Done Waiting for MPI_Reduce() > > 2 Waiting for MPI_Reduce() > > 3 Waiting for MPI_Reduce() > > > > My Code: > > #include <iostream> > #include <vector> > #include <stdio.h> > #include <stdlib.h> > #include "mpi.h" > > #define MASTER 0 > int RtoL = 10; > int LtoR = 20; > > int main ( int argc, char **argv ) > { > int nprocs, taskid; > FILE *f = NULL; > int left, right, i_start, i_end; > float sum = 0; > MPI_Status status; > float *y, *yold; > float *v, *vold; > > // const int NUM_MASSES = 1000; > // const float Ktension = 0.1; > // const float Kdamping = 0.9; > // const float duration = 10.0; > > #if 0 > if ( argc != 5 ) { > std::cout << "usage: " << argv[0] << " NUM_MASSES durationInSecs > Ktension Kdamping\n"; > return 2; > } > #endif > > int NUM_MASSES = atoi ( argv[1] ); > float duration = atof ( argv[2] ); > float Ktension = atof ( argv[3] ); > float Kdamping = atof ( argv[4] ); > const int PICKUP_POS = NUM_MASSES / 7; > const int OVERSAMPLING = 16; > > MPI_Init(&argc,&argv); > MPI_Comm_size(MPI_COMM_WORLD,&nprocs); > MPI_Comm_rank(MPI_COMM_WORLD,&taskid); > > if (taskid == 0) { > f = fopen ( "rstring.raw", "wb" ); > if (!f) { > std::cout << "can't open output file\n"; > return 1; > } > } > > y = new float[NUM_MASSES]; > yold = new float[NUM_MASSES]; > v = new float[NUM_MASSES]; > > for (int i = 0; i < NUM_MASSES; i++ ) { > v[i] = 0.0f; > yold[i] = y[i] = 0.0f; > if (i == NUM_MASSES/2 ) > yold[i] = 1.0; > } > > if (taskid == 0) { > left = -1; > right = 1; > } else if (taskid == nprocs - 1) { > left = taskid - 1; > right = -1; > } else { > left = taskid - 1; > right = taskid + 1; > } > > i_start = taskid * (NUM_MASSES/nprocs); > i_end = i_start + (NUM_MASSES/nprocs); > > int numIters = duration * 44100 * OVERSAMPLING;; > if (argc == 6) { > numIters = atoi(argv[5]); > } > > for ( int t = 0; t < numIters; t++ ) { > float sum = 0; > float gsum = 0; > > for ( int i = i_start; i < i_end; i++ ) { > if ( i == 0 || i == NUM_MASSES-1 ) { > } else { > float accel = Ktension * (yold[i+1] + yold[i-1] - 2*yold[i]); > v[i] += accel; > v[i] *= Kdamping; > y[i] = yold[i] + v[i]; > sum += y[i]; > } > } > > std::cout << taskid << " Waiting for MPI_Reduce()" << std::endl; > MPI_Reduce(&sum, &gsum, 1, MPI_FLOAT, MPI_SUM, MASTER, > MPI_COMM_WORLD); > std::cout << taskid << " Done Waiting for MPI_Reduce()" << std::endl; > > if (taskid != 0) { > MPI_Recv(&y[i_start-1], 1, MPI_FLOAT, left, LtoR, MPI_COMM_WORLD, > &status); > std::cout << taskid << " Receiving from left task = " << left > << std::endl; > MPI_Send(&y[i_start], 1, MPI_FLOAT, left, RtoL, MPI_COMM_WORLD); > std::cout << taskid << " Sending to left task = " << left > << std::endl; > } > if (taskid != nprocs - 1) { > MPI_Send(&y[i_end-1],1, MPI_FLOAT, right, LtoR, MPI_COMM_WORLD); > std::cout << taskid <<" Sending to right task = " << right > << std::endl; > MPI_Recv(&y[i_end], 1, MPI_FLOAT, right, RtoL, MPI_COMM_WORLD, > &status); > std::cout << taskid <<" Receiving from right task = " << right > << std::endl; > } > > //printf("After Reduce task = %d yold = %f %f %f %f\n", > taskid,yold[0], yold[1], yold[2], yold[3]); > //printf("After Reduce task = %d y = %f %f %f %f\n", taskid, y[0], > y[1], y[2], y[3]); > //printf("After Reduce task = %d v = %f %f %f %f\n", taskid, v[0], > v[1], v[2], v[3]); > > float *tmp = y; > y = yold; > yold = tmp; > > if (taskid == 0) { > //std::cout<< "sum = " << gsum << std::endl; > if ( t % OVERSAMPLING == 0 ) { > fwrite ( &gsum, sizeof(float), 1, f ); > } > } > } > if (taskid == 0) { > fclose ( f ); > } > MPI_Finalize(); > } > > _______________________________________________ > devel mailing list > de...@open-mpi.org > http://www.open-mpi.org/mailman/listinfo.cgi/devel > > _______________________________________________ > devel mailing list > de...@open-mpi.org > http://www.open-mpi.org/mailman/listinfo.cgi/devel > > _______________________________________________ > devel mailing list > de...@open-mpi.org > http://www.open-mpi.org/mailman/listinfo.cgi/devel