From: Luo Xionghu <xionghu....@intel.com> Signed-off-by: Luo Xionghu <xionghu....@intel.com> --- kernels/compiler_bswap.cl | 14 ++++++++++- utests/compiler_bswap.cpp | 63 ++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 72 insertions(+), 5 deletions(-)
diff --git a/kernels/compiler_bswap.cl b/kernels/compiler_bswap.cl index 3a0a373..b1432b2 100644 --- a/kernels/compiler_bswap.cl +++ b/kernels/compiler_bswap.cl @@ -1,5 +1,15 @@ +#define SWAP64(A) \ +((((A) & 0xff00000000000000) >> 56) | \ + (((A) & 0x00ff000000000000) >> 40) | \ + (((A) & 0x0000ff0000000000) >> 24) | \ + (((A) & 0x000000ff00000000) >> 8) | \ + (((A) & 0x00000000ff000000) << 8) | \ + (((A) & 0x0000000000ff0000) << 24) | \ + (((A) & 0x000000000000ff00) << 40) | \ + (((A) & 0x00000000000000ff) << 56) ) + kernel void compiler_bswap(global uint * src0, global uint * dst0, global ushort * src1, global ushort * dst1, - int src2, global int * dst2, short src3, global short * dst3) { + int src2, global int * dst2, short src3, global short * dst3, global ulong* src4, global ulong* dst4, long src5, global long* dst5) { if (get_global_id(0) % 2 == 0) { dst0[get_global_id(0)] = __builtin_bswap32(src0[get_global_id(0)]); } else { @@ -13,5 +23,7 @@ kernel void compiler_bswap(global uint * src0, global uint * dst0, global ushort dst2[get_global_id(0)] = __builtin_bswap32(src2); dst3[get_global_id(0)] = __builtin_bswap16(src3); + dst4[get_global_id(0)] = SWAP64(src4[get_global_id(0)]); + dst5[get_global_id(0)] = SWAP64(src5); } diff --git a/utests/compiler_bswap.cpp b/utests/compiler_bswap.cpp index 3af9ef5..ed22750 100644 --- a/utests/compiler_bswap.cpp +++ b/utests/compiler_bswap.cpp @@ -7,6 +7,14 @@ (((uint32_t)(A) & 0x00ff0000) >> 8) | \ (((uint32_t)(A) & 0x0000ff00) << 8) | \ (((uint32_t)(A) & 0x000000ff) << 24)) +#define cpu_htonll(A) ((((uint64_t)(A) & 0xff00000000000000) >> 56) | \ + (((uint64_t)(A) & 0x00ff000000000000) >> 40) | \ + (((uint64_t)(A) & 0x0000ff0000000000) >> 24) | \ + (((uint64_t)(A) & 0x000000ff00000000) >> 8) | \ + (((uint64_t)(A) & 0x00000000ff000000) << 8) | \ + (((uint64_t)(A) & 0x0000000000ff0000) << 24) | \ + (((uint64_t)(A) & 0x000000000000ff00) << 40) | \ + (((uint64_t)(A) & 0x00000000000000ff) << 56) ) template <typename T> static void gen_rand_val(T & val) @@ -22,6 +30,8 @@ template <typename T> static void cpu(int global_id, T *src, T *dst) g = cpu_htons(f); else if (sizeof(T) == sizeof(int32_t)) g = cpu_htonl(f); + else if (sizeof(T) == sizeof(int64_t)) + g = cpu_htonll(f); dst[global_id] = g; } @@ -33,15 +43,19 @@ template <typename T> static void cpu(int global_id, T src, T *dst) g = cpu_htons(f); else if (sizeof(T) == sizeof(int32_t)) g = cpu_htonl(f); + else if (sizeof(T) == sizeof(int64_t)) + g = cpu_htonll(f); dst[global_id] = g; } template <typename T> inline static void print_data(T& val) { if(sizeof(T) == sizeof(uint16_t)) - printf(" 0x%hx", val); - else - printf(" 0x%x", val); + printf(" 0x%hx", (uint16_t)val); + else if(sizeof(T) == sizeof(uint32_t)) + printf(" 0x%x", (uint32_t)val); + else if(sizeof(T) == sizeof(uint64_t)) + printf(" 0x%lx", (uint64_t)val); } template <typename T> static void dump_data(T* raw, T* cpu, T* gpu, int n) @@ -78,7 +92,7 @@ template <typename T> static void dump_data(T raw, T* cpu, T* gpu, int n) void compiler_bswap(void) { - const size_t n = 32; + const size_t n = 16; uint32_t src0[n]; uint16_t src1[n]; uint32_t dst0[n]; @@ -87,6 +101,10 @@ void compiler_bswap(void) int32_t dst2[n]; int16_t src3 = static_cast<int16_t>(rand()); int16_t dst3[n]; + uint64_t src4[n]; + uint64_t dst4[n]; + int64_t src5 = static_cast<int64_t>(rand()) << 32| static_cast<int64_t>(rand()); + int64_t dst5[n]; // Setup kernel and buffers OCL_CREATE_KERNEL_FROM_FILE("compiler_bswap", "compiler_bswap"); @@ -108,6 +126,15 @@ void compiler_bswap(void) OCL_CREATE_BUFFER(buf[5], 0, sizeof(dst3), NULL); OCL_SET_ARG(7, sizeof(cl_mem), &buf[5]); + OCL_CREATE_BUFFER(buf[6], 0, sizeof(src4), NULL); + OCL_SET_ARG(8, sizeof(cl_mem), &buf[6]); + OCL_CREATE_BUFFER(buf[7], 0, sizeof(dst4), NULL); + OCL_SET_ARG(9, sizeof(cl_mem), &buf[7]); + + OCL_SET_ARG(10, sizeof(int64_t), &src5); + OCL_CREATE_BUFFER(buf[8], 0, sizeof(dst5), NULL); + OCL_SET_ARG(11, sizeof(cl_mem), &buf[8]); + OCL_MAP_BUFFER(0); for (int32_t i = 0; i < (int32_t) n; ++i) { gen_rand_val(src0[i]); @@ -142,6 +169,16 @@ void compiler_bswap(void) memset(buf_data[5], 0, sizeof(dst3)); OCL_UNMAP_BUFFER(5); + OCL_MAP_BUFFER(6); + for (int32_t i = 0; i < (int32_t) n; ++i) { + uint64_t x, y; + gen_rand_val(x); + gen_rand_val(y); + src4[i] = (x << 32)| y; + } + memcpy(buf_data[6], src4, sizeof(src4)); + OCL_UNMAP_BUFFER(6); + globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); @@ -173,6 +210,14 @@ void compiler_bswap(void) for (int32_t i = 0; i < (int32_t) n; ++i) cpu(i, src3, dst3); + // Run on CPU + for (int32_t i = 0; i < (int32_t) n; ++i) + cpu(i, src4, dst4); + + // Run on CPU + for (int32_t i = 0; i < (int32_t) n; ++i) + cpu(i, src5, dst5); + OCL_MAP_BUFFER(1); //dump_data(src0, dst0, (uint32_t *)buf_data[1], n); OCL_ASSERT(!memcmp(buf_data[1], dst0, sizeof(dst0))); @@ -192,6 +237,16 @@ void compiler_bswap(void) //dump_data(src3, dst3, (int16_t *)buf_data[5], n); OCL_ASSERT(!memcmp(buf_data[5], dst3, sizeof(dst3))); OCL_UNMAP_BUFFER(5); + + OCL_MAP_BUFFER(7); + //dump_data(src4, dst4, (uint64_t *)buf_data[7], n); + OCL_ASSERT(!memcmp(buf_data[7], dst4, sizeof(dst4))); + OCL_UNMAP_BUFFER(7); + + OCL_MAP_BUFFER(8); + //dump_data(src5, dst5, (int64_t *)buf_data[8], n); + OCL_ASSERT(!memcmp(buf_data[8], dst5, sizeof(dst5))); + OCL_UNMAP_BUFFER(8); } MAKE_UTEST_FROM_FUNCTION(compiler_bswap); -- 1.9.1 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet