From: Luo Xionghu <[email protected]> this kernl calls the llvm __builtin_clz to generate llvm.clz function then call the gen instruction clz, different from the test compiler_clz_int, which use the fbh to implement.
Signed-off-by: Luo Xionghu <[email protected]> --- kernels/compiler_clz.cl | 12 +++++++++ utests/CMakeLists.txt | 1 + utests/compiler_clz.cpp | 67 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 80 insertions(+) create mode 100644 kernels/compiler_clz.cl create mode 100644 utests/compiler_clz.cpp diff --git a/kernels/compiler_clz.cl b/kernels/compiler_clz.cl new file mode 100644 index 0000000..7ab6261 --- /dev/null +++ b/kernels/compiler_clz.cl @@ -0,0 +1,12 @@ +#define COMPILER_CLZ(TYPE) \ + kernel void compiler_clz_##TYPE(global TYPE* src, global TYPE* dst) \ +{ \ + __global TYPE* A = &src[get_global_id(0)]; \ + __global TYPE* B = &dst[get_global_id(0)]; \ + *B = __builtin_clz(*A); \ +} + +COMPILER_CLZ(uint) +COMPILER_CLZ(ulong) +COMPILER_CLZ(ushort) +COMPILER_CLZ(uchar) diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index 5b29c0b..193fef3 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -105,6 +105,7 @@ set (utests_sources compiler_write_only_shorts.cpp compiler_switch.cpp compiler_bswap.cpp + compiler_clz.cpp compiler_math.cpp compiler_atomic_functions.cpp compiler_async_copy.cpp diff --git a/utests/compiler_clz.cpp b/utests/compiler_clz.cpp new file mode 100644 index 0000000..901e19b --- /dev/null +++ b/utests/compiler_clz.cpp @@ -0,0 +1,67 @@ +#include "utest_helper.hpp" + +namespace { + +template <typename U> +U get_max() +{ + int shift_bit = sizeof(U)*8; + U u_max = 0; + for (int i = 0; i < shift_bit; i++) + u_max |= 1<<(shift_bit-i-1); + return u_max; +} + +template<typename U> +void test(const char *kernel_name) +{ + const size_t n = 64; + + // Setup kernel and buffers + OCL_CREATE_KERNEL_FROM_FILE("compiler_clz", kernel_name); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(U), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(U), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + + U max = get_max<U>(); + + OCL_MAP_BUFFER(0); + for (uint32_t i = 0; i < n; ++i) { + ((U*)buf_data[0])[i] = max >> i; + } + OCL_UNMAP_BUFFER(0); + + globals[0] = n; + locals[0] = 16; + OCL_NDRANGE(1); + OCL_MAP_BUFFER(1); + for (uint32_t i = 0; i < n; ++i) { + if(sizeof(U) == 1 && i < 8 ) + OCL_ASSERT(((U*)buf_data[1])[i] == (i+24) ); + else if(sizeof(U) == 2 && i < 16 ) + OCL_ASSERT(((U*)buf_data[1])[i] == (i+16) ); + else if(sizeof(U) == 4 && i < 32 ) + OCL_ASSERT(((U*)buf_data[1])[i] == i ); + else if(sizeof(U) == 8 && i < 32 ) + OCL_ASSERT(((U*)buf_data[1])[i] == 0 ); + else if(sizeof(U) == 8 && i > 31) + OCL_ASSERT(((U*)buf_data[1])[i] == (i-32) ); + } + OCL_UNMAP_BUFFER(1); + +} + +} + +#define compiler_clz(type, kernel) \ +static void compiler_clz_ ##type(void)\ +{\ + test<type>(# kernel);\ +}\ +MAKE_UTEST_FROM_FUNCTION(compiler_clz_ ## type); + +compiler_clz(uint64_t, compiler_clz_ulong) +compiler_clz(uint32_t, compiler_clz_uint) +compiler_clz(uint16_t, compiler_clz_ushort) +compiler_clz(uint8_t, compiler_clz_uchar) -- 1.9.1 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
