Is this the last version or you still have a newer version in hand. I remember you said you have a newer version a few days ago, just not sure.
On Wed, Dec 24, 2014 at 03:44:33PM +0800, Yang Rong wrote: > Add there two benchmark to compare the buffer and image performance > > V2: init the coord before read image. > Signed-off-by: Yang Rong <[email protected]> > --- > benchmark/CMakeLists.txt | 4 ++- > benchmark/benchmark_read_buffer.cpp | 49 +++++++++++++++++++++++++++ > benchmark/benchmark_read_image.cpp | 67 > +++++++++++++++++++++++++++++++++++++ > kernels/compiler_read_buffer.cl | 15 +++++++++ > kernels/compiler_read_image.cl | 25 ++++++++++++++ > 5 files changed, 159 insertions(+), 1 deletion(-) > create mode 100644 benchmark/benchmark_read_buffer.cpp > create mode 100644 benchmark/benchmark_read_image.cpp > create mode 100644 kernels/compiler_read_buffer.cl > create mode 100644 kernels/compiler_read_image.cl > > diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt > index ac2d8aa..9a2bd77 100644 > --- a/benchmark/CMakeLists.txt > +++ b/benchmark/CMakeLists.txt > @@ -12,7 +12,9 @@ set (benchmark_sources > ../utests/utest_helper.cpp > ../utests/vload_bench.cpp > enqueue_copy_buf.cpp > - benchmark_use_host_ptr_buffer.cpp) > + benchmark_use_host_ptr_buffer.cpp > + benchmark_read_buffer.cpp > + benchmark_read_image.cpp) > > > SET(CMAKE_CXX_FLAGS "-DBUILD_BENCHMARK ${CMAKE_CXX_FLAGS}") > diff --git a/benchmark/benchmark_read_buffer.cpp > b/benchmark/benchmark_read_buffer.cpp > new file mode 100644 > index 0000000..31a1f59 > --- /dev/null > +++ b/benchmark/benchmark_read_buffer.cpp > @@ -0,0 +1,49 @@ > +#include "utests/utest_helper.hpp" > +#include <sys/time.h> > + > +int benchmark_read_buffer(void) > +{ > + struct timeval start,stop; > + > + const size_t n = 1024 * 1024; > + int count = 16; > + const size_t sz = 4 * n * count; > + > + OCL_CREATE_BUFFER(buf[0], 0, sz * sizeof(float), NULL); > + OCL_CREATE_BUFFER(buf[1], 0, sz * sizeof(float), NULL); > + OCL_CREATE_BUFFER(buf[2], 0, sz * sizeof(float), NULL); > + > + OCL_CREATE_KERNEL("compiler_read_buffer"); > + > + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); > + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); > + OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); > + > + OCL_MAP_BUFFER(0); > + OCL_MAP_BUFFER(1); > + for (size_t i = 0; i < sz; i ++) { > + ((float *)(buf_data[0]))[i] = rand(); > + ((float *)(buf_data[1]))[i] = rand(); > + } > + OCL_UNMAP_BUFFER(0); > + OCL_UNMAP_BUFFER(1); > + > + // Setup kernel and buffers > + globals[0] = n; > + locals[0] = 256; > + > + gettimeofday(&start,0); > + for (size_t i=0; i<100; i++) { > + OCL_NDRANGE(1); > + } > + OCL_FINISH(); > + gettimeofday(&stop,0); > + > + clReleaseMemObject(buf[0]); > + free(buf_data[0]); > + buf_data[0] = NULL; > + > + return time_subtract(&stop, &start, 0); > +} > + > +MAKE_BENCHMARK_FROM_FUNCTION(benchmark_read_buffer); > diff --git a/benchmark/benchmark_read_image.cpp > b/benchmark/benchmark_read_image.cpp > new file mode 100644 > index 0000000..913b6e6 > --- /dev/null > +++ b/benchmark/benchmark_read_image.cpp > @@ -0,0 +1,67 @@ > +#include <string.h> > +#include "utests/utest_helper.hpp" > +#include <sys/time.h> > + > +int benchmark_read_image(void) > +{ > + struct timeval start,stop; > + > + const size_t x_count = 4; > + const size_t y_count = 4; > + const size_t w = 1024; > + const size_t h = 1024; > + const size_t sz = 4 * x_count * y_count * w * h; > + cl_image_format format; > + cl_image_desc desc; > + > + memset(&desc, 0x0, sizeof(cl_image_desc)); > + memset(&format, 0x0, sizeof(cl_image_format)); > + > + // Setup kernel and images > + OCL_CREATE_KERNEL("compiler_read_image"); > + buf_data[0] = (uint32_t*) malloc(sizeof(float) * sz); > + buf_data[1] = (uint32_t*) malloc(sizeof(float) * sz); > + for (uint32_t i = 0; i < sz; ++i) { > + ((float*)buf_data[0])[i] = rand(); > + ((float*)buf_data[1])[i] = rand(); > + } > + > + format.image_channel_order = CL_RGBA; > + format.image_channel_data_type = CL_FLOAT; > + desc.image_type = CL_MEM_OBJECT_IMAGE2D; > + desc.image_width = w; > + desc.image_height = h; > + desc.image_row_pitch = w * sizeof(float) * 4; > + OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, &format, &desc, > buf_data[0]); > + OCL_CREATE_IMAGE(buf[1], CL_MEM_COPY_HOST_PTR, &format, &desc, > buf_data[1]); > + OCL_CREATE_BUFFER(buf[2], 0, sz * sizeof(float), NULL); > + > + free(buf_data[0]); > + buf_data[0] = NULL; > + free(buf_data[1]); > + buf_data[1] = NULL; > + > + // Run the kernel > + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); > + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); > + OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); > + globals[0] = w; > + globals[1] = h; > + locals[0] = 16; > + locals[1] = 16; > + > + gettimeofday(&start,0); > + for (size_t i=0; i<100; i++) { > + OCL_NDRANGE(2); > + } > + OCL_FINISH(); > + gettimeofday(&stop,0); > + > + clReleaseMemObject(buf[0]); > + free(buf_data[0]); > + buf_data[0] = NULL; > + > + return time_subtract(&stop, &start, 0); > +} > + > +MAKE_BENCHMARK_FROM_FUNCTION(benchmark_read_image); > diff --git a/kernels/compiler_read_buffer.cl b/kernels/compiler_read_buffer.cl > new file mode 100644 > index 0000000..b6c11bd > --- /dev/null > +++ b/kernels/compiler_read_buffer.cl > @@ -0,0 +1,15 @@ > +#define COUNT 16 > + > +__kernel void > +compiler_read_buffer(__global float4* src0, __global float4* src1, __global > float4* dst) > +{ > + float4 sum = 0; > + int offset = 0, i = 0; > + int id = (int)get_global_id(0); > + int sz = (int)get_global_size(0); > + for(i=0; i<COUNT; i++) { > + sum = sum + src0[offset + i] + src1[offset + i]; > + offset += sz; > + } > + dst[id] = sum; > +} > diff --git a/kernels/compiler_read_image.cl b/kernels/compiler_read_image.cl > new file mode 100644 > index 0000000..f059743 > --- /dev/null > +++ b/kernels/compiler_read_image.cl > @@ -0,0 +1,25 @@ > +#define X_COUNT 4 > +#define Y_COUNT 4 > + > +__kernel void > +compiler_read_image(__read_only image2d_t src0, __read_only image2d_t src1, > __global float4* dst) > +{ > + float4 sum = 0; > + int2 coord; > + int x_sz = (int)get_global_size(0); > + int y_sz = (int)get_global_size(1); > + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE| CLK_ADDRESS_CLAMP| > CLK_FILTER_NEAREST; > + int i, j; > + > + int x = (int)get_global_id(0); > + int y = (int)get_global_id(1); > + > + for(i=0; i<X_COUNT; i++) { > + coord.x = x + i * x_sz; > + for(j=0; j<Y_COUNT; j++) { > + coord.y = y + j * y_sz; > + sum = sum + read_imagef(src0, sampler, coord) + read_imagef(src1, > sampler, coord); > + } > + } > + dst[y * x_sz + x] = sum; > +} > -- > 1.8.3.2 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
