What kind of network did you do these tests on?

Were both processes on the same machine or different machines?


On Nov 10, 2010, at 12:29 AM, Yiannis Papadopoulos wrote:

> Hello,
> 
> I am using OpenMPI 1.4.1. I have a small test case that calls
> MPI_Test() too many times. I see one or two random time spikes when
> this happens. On the other hand, if I avoid calling MPI_Test() based
> on a timeout, this problem disappears.
> 
> For example, with no timeout, the results I'm getting are:
> Total time: 1.00008 secs
> MPI_Test calls: 5541425
> Average: 0.0000001234 secs
> Max:     0.0002560616 secs [4374247]
> Min:     0.0000000000 secs [1]
> Successful MPI_Test(): 0.0000090599 secs
> 
> With timeout of 0.0001secs (which means I just delay a bit calling 
> MPI_Test()):
> Total time: 1.00009 secs
> MPI_Test calls: 9946
> Average: 0.0000000018 secs
> Max:     0.0000011921 secs [98]
> Min:     0.0000000000 secs [1]
> Successful MPI_Test(): 0.0000011921 secs
> 
> With a timeout of 0.01secs:
> Total time: 1.0001 secs
> MPI_Test calls: 101
> Average: 0.0000001936 secs
> Max:     0.0000028610 secs [100]
> Min:     0.0000000000 secs [2]
> Success: 0.0000028610 secs
> 
> 
> Things to notice:
> 1) the total time is similar
> 2) the number of MPI_Test() calls varies drastically
> 3) the average MPI_Test() call time is significantly less when a
> timeout is introduced
> 4) the call that takes the most time is not the last one (the
> successful) as one would expect except the case that the timeout is a
> bit big (0.01secs).
> 5) the time of the slowest MPI_Test() call is 2 orders of magnitude
> higher than the the successful MPI_Test() in the non-timeout version.
> 
> Does this qualify as a bug?
> 
> Thanks
> 
> 
> PS The code I have used follows. I compiled it with
> 
>  mpic++ -O3 mpi_test_delay.cc -o mpi_test_delay_no_timeout
> 
> for no timeout and
> 
>  mpic++ -O3 -DTIMEOUT=0.01 mpi_test_delay.cc -o mpi_test_delay
> 
> for a timeout of 0.01secs.
> 
> // mpi_test_delay.cc
> #include <iostream>
> #include <iomanip>
> #include <mpi.h>
> #include <unistd.h>
> #include <vector>
> #include <algorithm>
> #include <numeric>
> 
> int main(int argc, char* argv[])
> {
>  MPI_Init(&argc, &argv);
> 
>  MPI_Comm comm = MPI_COMM_WORLD;
>  int rank = MPI_PROC_NULL;
>  int size = MPI_PROC_NULL;
>  MPI_Comm_rank(comm, &rank);
>  MPI_Comm_size(comm, &size);
> 
>  if (size<2) {
>    MPI_Abort(comm, -1);
>  }
> 
>  const std::size_t bufsize = 1024;
>  char buffer[bufsize];
> 
>  // warm-up
>  unsigned int N = 0;
>  if (rank==0) {
>    sleep(1);
>    MPI_Send(buffer, bufsize, MPI_BYTE, 1, 0, comm);
>  }
>  else {
>    MPI_Request req;
>    MPI_Irecv(buffer, bufsize, MPI_BYTE, MPI_ANY_TAG, 0, comm, &req);
>    for (int flag = 0; flag==0; ++N) {
>      MPI_Test(&req, &flag, MPI_STATUS_IGNORE);
>    }
>  }
> 
>  // measurement container
>  std::vector<double> time_res;
>  time_res.reserve(N * 1.1);
> 
>  MPI_Barrier(comm);
> 
>  // benchmark MPI_Test
>  double total_time = MPI_Wtime();
> 
>  if (rank==0) {
>    sleep(1);
>    MPI_Send(buffer, bufsize, MPI_BYTE, 1, 0, comm);
>  }
>  else if (rank==1) {
>    MPI_Request req;
>    MPI_Irecv(buffer, bufsize, MPI_BYTE, MPI_ANY_TAG, 0, comm, &req);
> 
> #ifdef TIMEOUT
>    double timeout = 0.0;
> #endif
>    for (int flag = 0; flag==0; ) {
> #ifdef TIMEOUT
>      double ntimeout = MPI_Wtime();
>      if ( (ntimeout-timeout)<TIMEOUT) continue;
>      timeout = ntimeout;
> #endif
>      double time = MPI_Wtime();
>      MPI_Test(&req, &flag, MPI_STATUS_IGNORE);
>      time = MPI_Wtime() - time;
>      time_res.push_back(time);
>    }
>  }
>  total_time = MPI_Wtime() - total_time;
> 
>  MPI_Barrier(comm);
> 
>  // print total time for execution, number of MPI_Test calls,
>  // average, max and min time for MPI_Test and how much was the final
>  // MPI_Test
>  if (rank==1) {
>    double average = std::accumulate(time_res.begin(), time_res.end(),
> 0.0)/time_res.size();
>    std::vector<double>::iterator max =
> std::max_element(time_res.begin(), time_res.end());
>    std::vector<double>::iterator min =
> std::min_element(time_res.begin(), time_res.end());
> 
>    std::cout << "Total time: " << total_time << " secs\n"
>              << "MPI_Test calls: " << time_res.size() << '\n'
>              << std::fixed << std::setprecision(10)
>              << "Average: " << average << " secs\n"
>              << "Max:     " << *max << " secs [" <<
> std::distance(time_res.begin(), max) << "]\n"
>              << "Min:     " << *min << " secs [" <<
> std::distance(time_res.begin(), min) << "]\n"
>              << "Success: " << time_res[time_res.size()-1] << " secs"
> << std::endl;
>  }
> 
>  MPI_Finalize();
> 
>  return 0;
> }
> _______________________________________________
> users mailing list
> us...@open-mpi.org
> http://www.open-mpi.org/mailman/listinfo.cgi/users


-- 
Jeff Squyres
jsquy...@cisco.com
For corporate legal information go to:
http://www.cisco.com/web/about/doing_business/legal/cri/


Reply via email to