Some refinement for copy_buf benchmark:
1. We should measure execution time of clEnqueueCopyBuffer(buffer
   creation and initialization time should not be included).
2. Add clFinish before gettimeofday.
3. Rename the file in order to keep the name format the same as
   other benchmarks.

v2: Change output measurement from time to bandwidth.

Signed-off-by: Chuanbo Weng <[email protected]>
---
 benchmark/CMakeLists.txt         |  2 +-
 benchmark/benchmark_copy_buf.cpp | 51 ++++++++++++++++++++++++++++++++++++++++
 benchmark/enqueue_copy_buf.cpp   | 49 --------------------------------------
 3 files changed, 52 insertions(+), 50 deletions(-)
 create mode 100644 benchmark/benchmark_copy_buf.cpp
 delete mode 100644 benchmark/enqueue_copy_buf.cpp

diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
index 9a2bd77..73dbe85 100644
--- a/benchmark/CMakeLists.txt
+++ b/benchmark/CMakeLists.txt
@@ -11,7 +11,7 @@ set (benchmark_sources
   ../utests/utest_file_map.cpp
   ../utests/utest_helper.cpp
   ../utests/vload_bench.cpp
-  enqueue_copy_buf.cpp
+  benchmark_copy_buf.cpp
   benchmark_use_host_ptr_buffer.cpp
   benchmark_read_buffer.cpp
   benchmark_read_image.cpp)
diff --git a/benchmark/benchmark_copy_buf.cpp b/benchmark/benchmark_copy_buf.cpp
new file mode 100644
index 0000000..e21c936
--- /dev/null
+++ b/benchmark/benchmark_copy_buf.cpp
@@ -0,0 +1,51 @@
+#include "utests/utest_helper.hpp"
+#include <sys/time.h>
+
+double benchmark_copy_buf(void)
+{
+  size_t i;
+  const size_t sz = 127 *1023 * 1023;
+  const size_t cb = sz;
+  size_t src_off =0, dst_off = 0;
+  struct timeval start,stop;
+
+  cl_char* buf0;
+
+  OCL_CREATE_BUFFER(buf[0], 0, sz * sizeof(char), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, sz * sizeof(char), NULL);
+
+  buf0 = (cl_char *)clEnqueueMapBuffer(queue, buf[0], CL_TRUE, CL_MAP_WRITE, 
0, sizeof(char), 0, NULL, NULL, NULL);
+
+  for (i=0; i < sz; i++) {
+    buf0[i]=(rand() & 0xFF);
+  }
+
+  clEnqueueUnmapMemObject(queue, buf[0], buf0, 0, NULL, NULL);
+
+  if (src_off + cb > sz || dst_off + cb > sz) {
+    /* Expect Error. */
+    OCL_ASSERT(clEnqueueCopyBuffer(queue, buf[0], buf[1],
+          src_off, dst_off, cb*sizeof(char), 0, NULL, NULL));
+  }
+
+  /* Internal kernel will be built for the first time of calling
+   * clEnqueueCopyBuffer, so the first execution time of clEnqueueCopyBuffer
+   * will be much longer. It should not be added to benchmark time. */
+  OCL_ASSERT(CL_SUCCESS == clEnqueueCopyBuffer(queue, buf[0], buf[1],
+        src_off, dst_off, cb*sizeof(char), 0, NULL, NULL));
+  OCL_FINISH();
+  gettimeofday(&start,0);
+
+  for (i=0; i<100; i++) {
+    OCL_ASSERT(CL_SUCCESS == clEnqueueCopyBuffer(queue, buf[0], buf[1],
+          src_off, dst_off, cb*sizeof(char), 0, NULL, NULL));
+  }
+  OCL_FINISH();
+
+  gettimeofday(&stop,0);
+  double elapsed = time_subtract(&stop, &start, 0);
+
+  return BANDWIDTH(sz * sizeof(char) * 100, elapsed);
+}
+
+MAKE_BENCHMARK_FROM_FUNCTION(benchmark_copy_buf);
diff --git a/benchmark/enqueue_copy_buf.cpp b/benchmark/enqueue_copy_buf.cpp
deleted file mode 100644
index 549c8b1..0000000
--- a/benchmark/enqueue_copy_buf.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-#include "utests/utest_helper.hpp"
-#include <sys/time.h>
-
-void test_copy_buf(size_t sz, size_t src_off, size_t dst_off, size_t cb)
-{
-  unsigned int i;
-  cl_char* buf0;
-
-  OCL_CREATE_BUFFER(buf[0], 0, sz * sizeof(char), NULL);
-  OCL_CREATE_BUFFER(buf[1], 0, sz * sizeof(char), NULL);
-
-  buf0 = (cl_char *)clEnqueueMapBuffer(queue, buf[0], CL_TRUE, CL_MAP_WRITE, 
0, sizeof(char), 0, NULL, NULL, NULL);
-
-  for (i=0; i < sz; i++) {
-    buf0[i]=(rand() & 0xFF);
-  }
-
-  clEnqueueUnmapMemObject(queue, buf[0], buf0, 0, NULL, NULL);
-
-  if (src_off + cb > sz || dst_off + cb > sz) {
-  /* Expect Error. */
-    OCL_ASSERT(clEnqueueCopyBuffer(queue, buf[0], buf[1],
-                 src_off, dst_off, cb*sizeof(char), 0, NULL, NULL));
-    return;
-  }
-
-  OCL_ASSERT(CL_SUCCESS == clEnqueueCopyBuffer(queue, buf[0], buf[1],
-    src_off, dst_off, cb*sizeof(char), 0, NULL, NULL));
-}
-
-double enqueue_copy_buf(void)
-{
-  size_t i;
-  const size_t sz = 127 *1023 * 1023;
-  struct timeval start,stop;
-
-  gettimeofday(&start,0);
-
-  for (i=0; i<10; i++) {
-    test_copy_buf(sz, 0, 0, sz);
-  }
-
-  gettimeofday(&stop,0);
-  double elapsed = time_subtract(&stop, &start, 0);
-
-  return BANDWIDTH(sz * sizeof(char) * 10, elapsed);
-}
-
-MAKE_BENCHMARK_FROM_FUNCTION(enqueue_copy_buf);
-- 
1.9.1

_______________________________________________
Beignet mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/beignet

Reply via email to