Hi Zhigang,
        Thanks for your reply.
        Firstly, I think we should add unit descripition to output, or users 
can't understand what
output means. 
        In "return time_subtract(&stop, &start, 0) / 100", 100 is iteration 
times. Users are interested 
in the time it takes when one cl api is called, not the total time. 
        And I think changing return value from int to double can fit more 
conditions. Sometimes the
execution time of one api is very short, int type is not precise enough for 
users to comparing
performance change.
        Both of 'MB/s' and 'GB/s' are ok for me. But since your internal output 
in vload_bench are 
'GB/s', I think using 'GB/s' (with return double value ) are better.

-----Original Message-----
From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of 
Zhigang Gong
Sent: Thursday, January 29, 2015 17:19
To: Weng, Chuanbo
Cc: beignet@lists.freedesktop.org
Subject: Re: [Beignet] [PATCH 1/2] Refine benchmark output.

On Tue, Jan 27, 2015 at 02:18:41PM +0800, Chuanbo Weng wrote:
> Change time unit from int to double, becasue it's not precise enough 
> for some small workload. Add unit description to output, so users can 
> understand more easily about the output. Also fix a minor bug of 
> return value in vload_bench.cpp.
> 
> Signed-off-by: Chuanbo Weng <chuanbo.w...@intel.com>
> ---
>  benchmark/benchmark_read_buffer.cpp         |  6 ++---
>  benchmark/benchmark_read_image.cpp          |  6 ++---
>  benchmark/benchmark_use_host_ptr_buffer.cpp |  6 ++---
>  benchmark/enqueue_copy_buf.cpp              |  6 ++---
>  utests/utest.hpp                            | 38 
> +++++++++++++++++++++++------
>  utests/utest_helper.cpp                     |  6 ++---
>  utests/utest_helper.hpp                     |  2 +-
>  utests/vload_bench.cpp                      |  8 +++---
>  8 files changed, 50 insertions(+), 28 deletions(-)
> 
> diff --git a/benchmark/benchmark_read_buffer.cpp 
> b/benchmark/benchmark_read_buffer.cpp
> index 31a1f59..c7fc66b 100644
> --- a/benchmark/benchmark_read_buffer.cpp
> +++ b/benchmark/benchmark_read_buffer.cpp
> @@ -1,7 +1,7 @@
>  #include "utests/utest_helper.hpp"
>  #include <sys/time.h>
>  
> -int benchmark_read_buffer(void)
> +double benchmark_read_buffer(void)
>  {
>    struct timeval start,stop;
>  
> @@ -43,7 +43,7 @@ int benchmark_read_buffer(void)
>    free(buf_data[0]);
>    buf_data[0] = NULL;
>  
> -  return time_subtract(&stop, &start, 0);
> +  return time_subtract(&stop, &start, 0) / 100;
Could you explain the above change?

>  }
>  
> -MAKE_BENCHMARK_FROM_FUNCTION(benchmark_read_buffer);
> +MAKE_EXECTIME_BENCHMARK_FROM_FUNCTION(benchmark_read_buffer);
> diff --git a/benchmark/benchmark_read_image.cpp 
> b/benchmark/benchmark_read_image.cpp
> index 48aa987..4e63c00 100644
> --- a/benchmark/benchmark_read_image.cpp
> +++ b/benchmark/benchmark_read_image.cpp
> @@ -2,7 +2,7 @@
>  #include "utests/utest_helper.hpp"
>  #include <sys/time.h>
>  
> -int benchmark_read_image(void)
> +double benchmark_read_image(void)
>  {
>    struct timeval start,stop;
>  
> @@ -61,7 +61,7 @@ int benchmark_read_image(void)
>    free(buf_data[0]);
>    buf_data[0] = NULL;
>  
> -  return time_subtract(&stop, &start, 0);
> +  return time_subtract(&stop, &start, 0) / 100;
as well this one.

>  }
>  
> -MAKE_BENCHMARK_FROM_FUNCTION(benchmark_read_image);
> +MAKE_EXECTIME_BENCHMARK_FROM_FUNCTION(benchmark_read_image);
> diff --git a/benchmark/benchmark_use_host_ptr_buffer.cpp 
> b/benchmark/benchmark_use_host_ptr_buffer.cpp
> index 80b6c5c..421cd11 100644
> --- a/benchmark/benchmark_use_host_ptr_buffer.cpp
> +++ b/benchmark/benchmark_use_host_ptr_buffer.cpp
> @@ -1,7 +1,7 @@
>  #include "utests/utest_helper.hpp"
>  #include <sys/time.h>
>  
> -int benchmark_use_host_ptr_buffer(void)
> +double benchmark_use_host_ptr_buffer(void)
>  {
>    struct timeval start,stop;
>  
> @@ -32,7 +32,7 @@ int benchmark_use_host_ptr_buffer(void)
>    free(buf_data[0]);
>    buf_data[0] = NULL;
>  
> -  return time_subtract(&stop, &start, 0);
> +  return time_subtract(&stop, &start, 0) / 100;
And this one.

>  }
>  
> -MAKE_BENCHMARK_FROM_FUNCTION(benchmark_use_host_ptr_buffer);
> +MAKE_EXECTIME_BENCHMARK_FROM_FUNCTION(benchmark_use_host_ptr_buffer);
> diff --git a/benchmark/enqueue_copy_buf.cpp 
> b/benchmark/enqueue_copy_buf.cpp index f012cf7..78dbe75 100644
> --- a/benchmark/enqueue_copy_buf.cpp
> +++ b/benchmark/enqueue_copy_buf.cpp
> @@ -28,7 +28,7 @@ void test_copy_buf(size_t sz, size_t src_off, size_t 
> dst_off, size_t cb)
>      src_off, dst_off, cb*sizeof(char), 0, NULL, NULL));  }
>  
> -int enqueue_copy_buf(void)
> +double enqueue_copy_buf(void)
>  {
>    size_t i;
>    const size_t sz = 127 *1023 * 1023; @@ -41,7 +41,7 @@ int 
> enqueue_copy_buf(void)
>    }
>  
>    gettimeofday(&stop,0);
> -  return time_subtract(&stop, &start, 0);
> +  return time_subtract(&stop, &start, 0) / 10;

Changed to 10 here? Why?

>  }
>  
> -MAKE_BENCHMARK_FROM_FUNCTION(enqueue_copy_buf);
> +MAKE_EXECTIME_BENCHMARK_FROM_FUNCTION(enqueue_copy_buf);
> diff --git a/utests/utest.hpp b/utests/utest.hpp index 
> b028b64..6742e8d 100644
> --- a/utests/utest.hpp
> +++ b/utests/utest.hpp
> @@ -96,12 +96,20 @@ struct UTest
>    static const UTest __##FN##__(__ANON__##FN##__, #FN, true);
>  
>  /*! Turn a function into a unit performance test */ -#define 
> MAKE_BENCHMARK_FROM_FUNCTION_KEEP_PROGRAM(FN, KEEP_PROGRAM) \
> -  static void __ANON__##FN##__(void) { BENCHMARK(FN()); } \
> +#define MAKE_BANDWIDTH_BENCHMARK_FROM_FUNCTION_KEEP_PROGRAM(FN, 
> +KEEP_PROGRAM) \
> +  static void __ANON__##FN##__(void) { BANDWIDTH_BENCHMARK(FN()); } \
>    static const UTest __##FN##__(__ANON__##FN##__, #FN, true, false, 
> !(KEEP_PROGRAM));
>  
> -#define MAKE_BENCHMARK_FROM_FUNCTION(FN) \
> -  static void __ANON__##FN##__(void) { BENCHMARK(FN()); } \
> +#define MAKE_EXECTIME_BENCHMARK_FROM_FUNCTION_KEEP_PROGRAM(FN, 
> +KEEP_PROGRAM) \
> +  static void __ANON__##FN##__(void) { EXECTIME_BENCHMARK(FN()); } \
> +  static const UTest __##FN##__(__ANON__##FN##__, #FN, true, false, 
> +!(KEEP_PROGRAM));
> +
> +#define MAKE_BANDWIDTH_BENCHMARK_FROM_FUNCTION(FN) \
> +  static void __ANON__##FN##__(void) { BANDWIDTH_BENCHMARK(FN()); } \
> +  static const UTest __##FN##__(__ANON__##FN##__, #FN, true);
> +
> +#define MAKE_EXECTIME_BENCHMARK_FROM_FUNCTION(FN) \
> +  static void __ANON__##FN##__(void) { EXECTIME_BENCHMARK(FN()); } \
>    static const UTest __##FN##__(__ANON__##FN##__, #FN, true);
>  
>  
> @@ -133,12 +141,12 @@ struct UTest
>      } \
>    } while (0)
>  
> -#define BENCHMARK(EXPR) \
> +#define BANDWIDTH_BENCHMARK(EXPR) \
>   do { \
> -    int ret = 0;\
> +    double ret = 0;\
>      try { \
>        ret = EXPR; \
> -      std::cout << "    [Result: " << ret << "]    [SUCCESS]" << std::endl; \
> +      std::cout << "    [Result: " << ret << " GB/S]    [SUCCESS]" << 
> std::endl; \
>        UTest::retStatistics.passCount += 1; \
>      } \
>      catch (Exception e) { \
> @@ -147,5 +155,19 @@ struct UTest
>        UTest::retStatistics.failCount++; \
>      } \
>    } while (0)
> -#endif /* __UTEST_UTEST_HPP__ */
>  
> +#define EXECTIME_BENCHMARK(EXPR) \
> + do { \
> +    double ret = 0;\
> +    try { \
> +      ret = EXPR; \
> +      std::cout << "    [Result: " << ret << " ms]    [SUCCESS]" << 
> std::endl; \
> +      UTest::retStatistics.passCount += 1; \
> +    } \
> +    catch (Exception e) { \
> +      std::cout << "  " << #EXPR << "    [FAILED]" << std::endl; \
> +      std::cout << "    " << e.what() << std::endl; \
> +      UTest::retStatistics.failCount++; \
> +    } \
> +  } while (0)
> +#endif /* __UTEST_UTEST_HPP__ */
> diff --git a/utests/utest_helper.cpp b/utests/utest_helper.cpp index 
> 591054e..d3c378e 100644
> --- a/utests/utest_helper.cpp
> +++ b/utests/utest_helper.cpp
> @@ -681,7 +681,7 @@ int cl_INT_ULP(int int_number)
>    return 0;
>  }
>  
> -int time_subtract(struct timeval *y, struct timeval *x, struct 
> timeval *result)
> +double time_subtract(struct timeval *y, struct timeval *x, struct 
> +timeval *result)
>  {
>    if ( x->tv_sec > y->tv_sec )
>      return   -1;
> @@ -699,6 +699,6 @@ int time_subtract(struct timeval *y, struct timeval *x, 
> struct timeval *result)
>      }
>    }
>  
> -  int msec = 1000.0*(y->tv_sec - x->tv_sec) + (y->tv_usec - 
> x->tv_usec)/1000.0;
> +  double msec = 1000.0*(y->tv_sec - x->tv_sec) + (y->tv_usec - 
> + x->tv_usec)/1000.0;
>    return msec;
> -}
> \ No newline at end of file
> +}
> diff --git a/utests/utest_helper.hpp b/utests/utest_helper.hpp index 
> 5d8e835..6d09766 100644
> --- a/utests/utest_helper.hpp
> +++ b/utests/utest_helper.hpp
> @@ -231,7 +231,7 @@ extern float cl_FLT_ULP(float float_number);  
> extern int cl_INT_ULP(int int_number);
>  
>  /* subtract the time */
> -int time_subtract(struct timeval *y, struct timeval *x, struct 
> timeval *result);
> +double time_subtract(struct timeval *y, struct timeval *x, struct 
> +timeval *result);
>  
>  #endif /* __UTEST_HELPER_HPP__ */
>  
> diff --git a/utests/vload_bench.cpp b/utests/vload_bench.cpp index 
> a7703fc..1963c73 100644
> --- a/utests/vload_bench.cpp
> +++ b/utests/vload_bench.cpp
> @@ -34,8 +34,8 @@ static double vload_bench(const char *kernelFunc, uint32_t 
> N, uint32_t offset, b
>      OCL_FINISH();
>      gettimeofday(&end, NULL);
>      double elapsed = (end.tv_sec - start.tv_sec) * 1e6 + (end.tv_usec - 
> start.tv_usec);
> -    double bandwidth = (globals[0] * (N_ITERATIONS) * sizeof(T) * N) / 
> elapsed;
> -    printf("\t%2.1fGB/S\n", bandwidth/1000.);
> +    double bandwidth = (globals[0] * (N_ITERATIONS) * sizeof(T) * N) / 
> (elapsed * 1000.);
> +    printf("\t%2.1fGB/S\n", bandwidth);

The return value bandwidth is using 'MB/s' as unit. That's the original 
intention not a bug. One strong reason is that the original benchmark framework 
use integer as the return value of each case, and if you use GB/s as unit, then 
you may get zero for some slow cases and get the same data for 3.0GB/s and 
3.9/GB/s which is bad.

Now, you change the return value type to double, which is no obvious need to 
change the above code, unless you want to unify all the bandwidth cases's unit 
to "GB/s".

And that should be worth to do. You may need to refine this patch to unify all 
bandwidth benchmarking case to use the same unit, GB/s is ok for me.

Thanks,
Zhigang Gong.

>      return bandwidth;
>    } else {
>      // Check result
> @@ -71,7 +71,7 @@ VLOAD_TEST(float, float)  #endif
>  
>  #define VLOAD_BENCH(T, kT) \
> -static int vload_bench_ ##kT(void) \
> +static double vload_bench_ ##kT(void) \
>  { \
>    uint8_t vectorSize[] = {2, 3, 4, 8, 16}; \
>    double totBandwidth = 0; \
> @@ -89,7 +89,7 @@ static int vload_bench_ ##kT(void) \
>    } \
>    return totBandwidth/j;\
>  }\
> -MAKE_BENCHMARK_FROM_FUNCTION_KEEP_PROGRAM(vload_bench_ ##kT, true)
> +MAKE_BANDWIDTH_BENCHMARK_FROM_FUNCTION_KEEP_PROGRAM(vload_bench_ 
> +##kT, true)
>  
>  #ifdef BUILD_BENCHMARK
>  VLOAD_BENCH(uint8_t, uchar)
> --
> 1.9.1
> 
> _______________________________________________
> Beignet mailing list
> Beignet@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
_______________________________________________
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet
_______________________________________________
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet

Reply via email to