From: Junyan He <junyan...@linux.intel.com> Signed-off-by: Junyan He <junyan...@linux.intel.com> --- kernels/compiler_double_2.cl | 9 - kernels/compiler_double_convert.cl | 102 ++++++ kernels/compiler_half_convert.cl | 11 +- utests/CMakeLists.txt | 1 + utests/compiler_double_2.cpp | 47 --- utests/compiler_double_convert.cpp | 622 +++++++++++++++++++++++++++++++++++++ utests/compiler_half.cpp | 102 ++++++ 7 files changed, 837 insertions(+), 57 deletions(-) delete mode 100644 kernels/compiler_double_2.cl create mode 100644 kernels/compiler_double_convert.cl delete mode 100644 utests/compiler_double_2.cpp create mode 100644 utests/compiler_double_convert.cpp
diff --git a/kernels/compiler_double_2.cl b/kernels/compiler_double_2.cl deleted file mode 100644 index 20ee614..0000000 --- a/kernels/compiler_double_2.cl +++ /dev/null @@ -1,9 +0,0 @@ -#pragma OPENCL EXTENSION cl_khr_fp64 : enable -kernel void compiler_double_2(global float *src, global double *dst) { - int i = get_global_id(0); - float d = 1.234567890123456789f; - if (i < 14) - dst[i] = d * (d + src[i]); - else - dst[i] = 14; -} diff --git a/kernels/compiler_double_convert.cl b/kernels/compiler_double_convert.cl new file mode 100644 index 0000000..344f24e --- /dev/null +++ b/kernels/compiler_double_convert.cl @@ -0,0 +1,102 @@ +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +kernel void compiler_double_convert_int(global double *src, global int *dst0, global uint* dst1) { + int i = get_global_id(0); + + if (i%3) { + int i32 = src[i]; + dst0[i] = i32; + + uint u32 = src[i]; + dst1[i] = u32; + } +} + +kernel void compiler_double_convert_float(global double *src, global float *dst) { + int i = get_global_id(0); + + float f = src[i]; + dst[i] = f; +} + +kernel void compiler_double_convert_short(global double *src, global short *dst0, global ushort * dst1) { + int i = get_global_id(0); + + if (i%3) { + short i16 = src[i]; + dst0[i] = i16; + + ushort u16 = src[i]; + dst1[i] = u16; + } +} + +kernel void compiler_double_convert_long(global double *src, global long *dst0, global ulong * dst1) { + int i = get_global_id(0); + + if (i%3) { + long i64 = src[i]; + dst0[i] = i64; + + ulong u64 = src[i]; + dst1[i] = u64; + } +} + +kernel void compiler_double_convert_char(global double *src, global char *dst0, global uchar * dst1) { + int i = get_global_id(0); + + if (i%3) { + char i8 = src[i]; + dst0[i] = i8; + + uchar u8 = src[i]; + dst1[i] = u8; + } +} + +kernel void compiler_long_convert_double(global long *src0, global ulong *src1, global double * dst0, global double *dst1) { + int i = get_global_id(0); + + double d = src0[i]; + dst0[i] = d; + + d = src1[i]; + dst1[i] = d; +} + +kernel void compiler_int_convert_double(global int *src0, global uint *src1, global double * dst0, global double *dst1) { + int i = get_global_id(0); + + double d = src0[i]; + dst0[i] = d; + + d = src1[i]; + dst1[i] = d; +} + +kernel void compiler_short_convert_double(global short *src0, global ushort *src1, global double * dst0, global double *dst1) { + int i = get_global_id(0); + + double d = src0[i]; + dst0[i] = d; + + d = src1[i]; + dst1[i] = d; +} + +kernel void compiler_char_convert_double(global char *src0, global uchar *src1, global double * dst0, global double *dst1) { + int i = get_global_id(0); + + double d = src0[i]; + dst0[i] = d; + + d = src1[i]; + dst1[i] = d; +} + +kernel void compiler_float_convert_double(global float *src, global double *dst) { + int i = get_global_id(0); + + double d = src[i]; + dst[i] = d; +} diff --git a/kernels/compiler_half_convert.cl b/kernels/compiler_half_convert.cl index c28921e..3587e19 100644 --- a/kernels/compiler_half_convert.cl +++ b/kernels/compiler_half_convert.cl @@ -1,5 +1,4 @@ #pragma OPENCL EXTENSION cl_khr_fp16 : enable - kernel void compiler_half_to_long_sat(global half *src, global long *dst) { int i = get_global_id(0); dst[i] = convert_long_sat(src[i]); @@ -54,3 +53,13 @@ kernel void compiler_half_to_float(global half4 *src, global float4 *dst) { int i = get_global_id(0); dst[i] = convert_float4(src[i]); } + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +kernel void compiler_half_to_double(global half *src, global double *dst) { + int i = get_global_id(0); + dst[i] = src[i]; +} +kernel void compiler_double_to_half(global double *src, global half *dst) { + int i = get_global_id(0); + dst[i] = src[i]; +} diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index 18337fa..f44fe19 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -196,6 +196,7 @@ set (utests_sources compiler_double_precision.cpp compiler_double.cpp compiler_double_div.cpp + compiler_double_convert.cpp load_program_from_gen_bin.cpp load_program_from_spir.cpp get_arg_info.cpp diff --git a/utests/compiler_double_2.cpp b/utests/compiler_double_2.cpp deleted file mode 100644 index 7e3ae4b..0000000 --- a/utests/compiler_double_2.cpp +++ /dev/null @@ -1,47 +0,0 @@ -#include <cmath> -#include "utest_helper.hpp" - -static void cpu(int global_id, float *src, double *dst) { - float f = src[global_id]; - float d = 1.234567890123456789; - dst[global_id] = global_id < 14 ? d * (d + f) : 14; -} - -void compiler_double_2(void) -{ - const size_t n = 16; - float cpu_src[n]; - double cpu_dst[n]; - - // Setup kernel and buffers - OCL_CREATE_KERNEL("compiler_double_2"); - OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); - OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(double), NULL); - OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); - OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); - globals[0] = n; - locals[0] = 16; - - // Run random tests - for (uint32_t pass = 0; pass < 1; ++pass) { - OCL_MAP_BUFFER(0); - for (int32_t i = 0; i < (int32_t) n; ++i) - cpu_src[i] = ((float*)buf_data[0])[i] = .1f * (rand() & 15) - .75f; - OCL_UNMAP_BUFFER(0); - - // Run the kernel on GPU - OCL_NDRANGE(1); - - // Run on CPU - for (int32_t i = 0; i < (int32_t) n; ++i) - cpu(i, cpu_src, cpu_dst); - - // Compare - OCL_MAP_BUFFER(1); - for (int32_t i = 0; i < (int32_t) n; ++i) - OCL_ASSERT(fabs(((double*)buf_data[1])[i] - cpu_dst[i]) < 1e-4); - OCL_UNMAP_BUFFER(1); - } -} - -MAKE_UTEST_FROM_FUNCTION(compiler_double_2); diff --git a/utests/compiler_double_convert.cpp b/utests/compiler_double_convert.cpp new file mode 100644 index 0000000..9c5c97b --- /dev/null +++ b/utests/compiler_double_convert.cpp @@ -0,0 +1,622 @@ +#include <cmath> +#include <string.h> +#include "utest_helper.hpp" + +void compiler_double_convert_int(void) +{ + const size_t n = 16; + double src[n]; + int32_t cpu_dst0[n]; + uint32_t cpu_dst1[n]; + + if (!cl_check_double()) + return; + + memset(cpu_dst0, 0, sizeof(cpu_dst0)); + memset(cpu_dst1, 0, sizeof(cpu_dst1)); + // Setup kernel and buffers + OCL_CREATE_KERNEL_FROM_FILE("compiler_double_convert", "compiler_double_convert_int"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(double), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int32_t), NULL); + OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(uint32_t), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); + globals[0] = n; + locals[0] = 16; + + // Run random tests + OCL_MAP_BUFFER(0); + OCL_MAP_BUFFER(1); + OCL_MAP_BUFFER(2); + for (int32_t i = 0; i < (int32_t) n; ++i) { + src[i] = ((double*)buf_data[0])[i] = 32.1d * (rand() & 1324135) + 1434342.73209855531d; + ((int32_t*)buf_data[1])[i] = 0; + ((uint32_t*)buf_data[2])[i] = 0; + } + OCL_UNMAP_BUFFER(0); + OCL_UNMAP_BUFFER(1); + OCL_UNMAP_BUFFER(2); + + // Run the kernel on GPU + OCL_NDRANGE(1); + + // Run on CPU + for (int32_t i = 0; i < (int32_t) n; ++i) { + if (i%3 == 0) continue; + cpu_dst0[i] = (int32_t)src[i]; + cpu_dst1[i] = (uint32_t)src[i]; + } + + // Compare + OCL_MAP_BUFFER(1); + OCL_MAP_BUFFER(2); + for (int32_t i = 0; i < (int32_t) n; ++i) { + //printf("Return Int is %d, ref is %d,\t Uint is %u, ref is %u,\t double is %f\n", + // ((int*)buf_data[1])[i], cpu_dst0[i], ((uint32_t*)buf_data[2])[i], cpu_dst1[i], src[i]); + OCL_ASSERT(((int32_t*)buf_data[1])[i] == cpu_dst0[i]); + OCL_ASSERT(((uint32_t*)buf_data[2])[i] == cpu_dst1[i]); + } + OCL_UNMAP_BUFFER(1); + OCL_UNMAP_BUFFER(2); +} + +MAKE_UTEST_FROM_FUNCTION(compiler_double_convert_int); + +void compiler_double_convert_float(void) +{ + const size_t n = 16; + double src[n]; + float cpu_dst[n]; + + if (!cl_check_double()) + return; + + memset(cpu_dst, 0, sizeof(cpu_dst)); + // Setup kernel and buffers + OCL_CREATE_KERNEL_FROM_FILE("compiler_double_convert", "compiler_double_convert_float"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(double), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + globals[0] = n; + locals[0] = 16; + + // Run random tests + OCL_MAP_BUFFER(0); + OCL_MAP_BUFFER(1); + for (int32_t i = 0; i < (int32_t) n; ++i) { + src[i] = ((double*)buf_data[0])[i] = 1332.1d * (rand() & 1324135) - 1434342.73209855531d * (rand() & 135); + ((float*)buf_data[1])[i] = 0; + } + OCL_UNMAP_BUFFER(0); + OCL_UNMAP_BUFFER(1); + OCL_UNMAP_BUFFER(2); + + // Run the kernel on GPU + OCL_NDRANGE(1); + + // Run on CPU + for (int32_t i = 0; i < (int32_t) n; ++i) { + cpu_dst[i] = (float)src[i]; + } + + // Compare + OCL_MAP_BUFFER(1); + for (int32_t i = 0; i < (int32_t) n; ++i) { + //printf("Return float is %f,\t ref is %f,\t double is %f\n", ((float*)buf_data[1])[i], cpu_dst[i], src[i]); + OCL_ASSERT(((float*)buf_data[1])[i] == cpu_dst[i]); + } + OCL_UNMAP_BUFFER(1); +} + +MAKE_UTEST_FROM_FUNCTION(compiler_double_convert_float); + +void compiler_double_convert_short(void) +{ + const size_t n = 16; + double src[n]; + int16_t cpu_dst0[n]; + uint16_t cpu_dst1[n]; + + if (!cl_check_double()) + return; + + memset(cpu_dst0, 0, sizeof(cpu_dst0)); + memset(cpu_dst1, 0, sizeof(cpu_dst1)); + // Setup kernel and buffers + OCL_CREATE_KERNEL_FROM_FILE("compiler_double_convert", "compiler_double_convert_short"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(double), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int16_t), NULL); + OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(uint16_t), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); + globals[0] = n; + locals[0] = 16; + + // Run random tests + OCL_MAP_BUFFER(0); + OCL_MAP_BUFFER(1); + OCL_MAP_BUFFER(2); + for (int32_t i = 0; i < (int32_t) n; ++i) { + src[i] = ((double*)buf_data[0])[i] = 10.3443d * (rand() & 15) + 14.8924323d; + ((int16_t*)buf_data[1])[i] = 0; + ((uint16_t*)buf_data[2])[i] = 0; + } + OCL_UNMAP_BUFFER(0); + OCL_UNMAP_BUFFER(1); + OCL_UNMAP_BUFFER(2); + + // Run the kernel on GPU + OCL_NDRANGE(1); + + // Run on CPU + for (int32_t i = 0; i < (int32_t) n; ++i) { + if (i%3 == 0) continue; + cpu_dst0[i] = (int16_t)src[i]; + cpu_dst1[i] = (uint16_t)src[i]; + } + + // Compare + OCL_MAP_BUFFER(1); + OCL_MAP_BUFFER(2); + for (int32_t i = 0; i < (int32_t) n; ++i) { + //printf("Return Int is %d, ref is %d,\t Uint is %u, ref is %u,\t double is %f\n", + // ((int16_t*)buf_data[1])[i], cpu_dst0[i], ((uint16_t*)buf_data[2])[i], cpu_dst1[i], src[i]); + OCL_ASSERT(((int16_t*)buf_data[1])[i] == cpu_dst0[i]); + OCL_ASSERT(((uint16_t*)buf_data[2])[i] == cpu_dst1[i]); + } + OCL_UNMAP_BUFFER(1); + OCL_UNMAP_BUFFER(2); +} + +MAKE_UTEST_FROM_FUNCTION(compiler_double_convert_short); + +void compiler_double_convert_char(void) +{ + const size_t n = 16; + double src[n]; + int8_t cpu_dst0[n]; + uint8_t cpu_dst1[n]; + + if (!cl_check_double()) + return; + + memset(cpu_dst0, 0, sizeof(cpu_dst0)); + memset(cpu_dst1, 0, sizeof(cpu_dst1)); + // Setup kernel and buffers + OCL_CREATE_KERNEL_FROM_FILE("compiler_double_convert", "compiler_double_convert_char"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(double), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int8_t), NULL); + OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(uint8_t), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); + globals[0] = n; + locals[0] = 16; + + // Run random tests + OCL_MAP_BUFFER(0); + OCL_MAP_BUFFER(1); + OCL_MAP_BUFFER(2); + for (int32_t i = 0; i < (int32_t) n; ++i) { + src[i] = ((double*)buf_data[0])[i] = 10.3443d * (rand() & 7) + 2.8924323d; + ((int8_t*)buf_data[1])[i] = 0; + ((uint8_t*)buf_data[2])[i] = 0; + } + OCL_UNMAP_BUFFER(0); + OCL_UNMAP_BUFFER(1); + OCL_UNMAP_BUFFER(2); + + // Run the kernel on GPU + OCL_NDRANGE(1); + + // Run on CPU + for (int32_t i = 0; i < (int32_t) n; ++i) { + if (i%3 == 0) continue; + cpu_dst0[i] = (int8_t)src[i]; + cpu_dst1[i] = (uint8_t)src[i]; + } + + // Compare + OCL_MAP_BUFFER(1); + OCL_MAP_BUFFER(2); + for (int32_t i = 0; i < (int32_t) n; ++i) { +// printf("Return Int is %d, ref is %d,\t Uint is %u, ref is %u,\t double is %f\n", +// ((int8_t*)buf_data[1])[i], cpu_dst0[i], ((uint8_t*)buf_data[2])[i], cpu_dst1[i], src[i]); + OCL_ASSERT(((int8_t*)buf_data[1])[i] == cpu_dst0[i]); + OCL_ASSERT(((uint8_t*)buf_data[2])[i] == cpu_dst1[i]); + } + OCL_UNMAP_BUFFER(1); + OCL_UNMAP_BUFFER(2); +} + +MAKE_UTEST_FROM_FUNCTION(compiler_double_convert_char); + +void compiler_double_convert_long(void) +{ + const size_t n = 16; + double src[n]; + int64_t cpu_dst0[n]; + uint64_t cpu_dst1[n]; + + if (!cl_check_double()) + return; + + memset(cpu_dst0, 0, sizeof(cpu_dst0)); + memset(cpu_dst1, 0, sizeof(cpu_dst1)); + // Setup kernel and buffers + OCL_CREATE_KERNEL_FROM_FILE("compiler_double_convert", "compiler_double_convert_long"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(double), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int64_t), NULL); + OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(uint64_t), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); + globals[0] = n; + locals[0] = 16; + + // Run random tests + OCL_MAP_BUFFER(0); + OCL_MAP_BUFFER(1); + OCL_MAP_BUFFER(2); + for (int32_t i = 0; i < (int32_t) n; ++i) { + src[i] = ((double*)buf_data[0])[i] = 10.3443d * (rand() & 7) + 2.8924323d; + ((int64_t*)buf_data[1])[i] = 0; + ((uint64_t*)buf_data[2])[i] = 0; + } + OCL_UNMAP_BUFFER(0); + OCL_UNMAP_BUFFER(1); + OCL_UNMAP_BUFFER(2); + + // Run the kernel on GPU + OCL_NDRANGE(1); + + // Run on CPU + for (int32_t i = 0; i < (int32_t) n; ++i) { + if (i%3 == 0) continue; + cpu_dst0[i] = (int64_t)src[i]; + cpu_dst1[i] = (uint64_t)src[i]; + } + + // Compare + OCL_MAP_BUFFER(1); + OCL_MAP_BUFFER(2); + for (int32_t i = 0; i < (int32_t) n; ++i) { +// printf("Return Int is %d, ref is %d,\t Uint is %u, ref is %u,\t double is %f\n", +// ((int8_t*)buf_data[1])[i], cpu_dst0[i], ((uint8_t*)buf_data[2])[i], cpu_dst1[i], src[i]); + OCL_ASSERT(((int64_t*)buf_data[1])[i] == cpu_dst0[i]); + OCL_ASSERT(((uint64_t*)buf_data[2])[i] == cpu_dst1[i]); + } + OCL_UNMAP_BUFFER(1); + OCL_UNMAP_BUFFER(2); +} + +MAKE_UTEST_FROM_FUNCTION(compiler_double_convert_long); + +void compiler_long_convert_double(void) +{ + const size_t n = 16; + int64_t src0[n]; + uint64_t src1[n]; + double cpu_dst0[n]; + double cpu_dst1[n]; + + if (!cl_check_double()) + return; + + memset(cpu_dst0, 0, sizeof(cpu_dst0)); + memset(cpu_dst1, 0, sizeof(cpu_dst1)); + + // Setup kernel and buffers + OCL_CREATE_KERNEL_FROM_FILE("compiler_double_convert", "compiler_long_convert_double"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int64_t), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint64_t), NULL); + OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(double), NULL); + OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(double), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); + OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]); + globals[0] = n; + locals[0] = 16; + + // Run random tests + OCL_MAP_BUFFER(0); + OCL_MAP_BUFFER(1); + OCL_MAP_BUFFER(2); + OCL_MAP_BUFFER(3); + for (int32_t i = 0; i < (int32_t) n; ++i) { + src0[i] = ((int64_t*)buf_data[0])[i] = 0xABC8ABACDA00C * (rand() & 7); + src1[i] = ((uint64_t*)buf_data[1])[i] = 0xCABC8ABACDA00C * (rand() & 15); + ((double*)buf_data[2])[i] = 0.0d; + ((double*)buf_data[3])[i] = 0.0d; + } + OCL_UNMAP_BUFFER(0); + OCL_UNMAP_BUFFER(1); + OCL_UNMAP_BUFFER(2); + OCL_UNMAP_BUFFER(3); + + // Run the kernel on GPU + OCL_NDRANGE(1); + + // Run on CPU + for (int32_t i = 0; i < (int32_t) n; ++i) { + cpu_dst0[i] = (double)src0[i]; + cpu_dst1[i] = (double)src1[i]; + } + + // Compare + OCL_MAP_BUFFER(2); + OCL_MAP_BUFFER(3); + for (int32_t i = 0; i < (int32_t) n; ++i) { +// printf("long is %ld, ref is %f, double is %f \t" +// "ulong is %lu, ref is %f, double is %f\n", +// src0[i], cpu_dst0[i], ((double*)buf_data[2])[i], +// src1[i], cpu_dst1[i], ((double*)buf_data[3])[i]); + OCL_ASSERT(((double*)buf_data[2])[i] == cpu_dst0[i]); + OCL_ASSERT(((double*)buf_data[3])[i] == cpu_dst1[i]); + } + OCL_UNMAP_BUFFER(2); + OCL_UNMAP_BUFFER(3); +} + +MAKE_UTEST_FROM_FUNCTION(compiler_long_convert_double); + +void compiler_int_convert_double(void) +{ + const size_t n = 16; + int32_t src0[n]; + uint32_t src1[n]; + double cpu_dst0[n]; + double cpu_dst1[n]; + + if (!cl_check_double()) + return; + + memset(cpu_dst0, 0, sizeof(cpu_dst0)); + memset(cpu_dst1, 0, sizeof(cpu_dst1)); + + // Setup kernel and buffers + OCL_CREATE_KERNEL_FROM_FILE("compiler_double_convert", "compiler_int_convert_double"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int32_t), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); + OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(double), NULL); + OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(double), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); + OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]); + globals[0] = n; + locals[0] = 16; + + // Run random tests + OCL_MAP_BUFFER(0); + OCL_MAP_BUFFER(1); + OCL_MAP_BUFFER(2); + OCL_MAP_BUFFER(3); + for (int32_t i = 0; i < (int32_t) n; ++i) { + src0[i] = ((int32_t*)buf_data[0])[i] = 0xCABC8A0C * (rand() & 7); + src1[i] = ((uint32_t*)buf_data[1])[i] = 0xCACDA00C * (rand() & 15); + ((double*)buf_data[2])[i] = 0.0d; + ((double*)buf_data[3])[i] = 0.0d; + } + OCL_UNMAP_BUFFER(0); + OCL_UNMAP_BUFFER(1); + OCL_UNMAP_BUFFER(2); + OCL_UNMAP_BUFFER(3); + + // Run the kernel on GPU + OCL_NDRANGE(1); + + // Run on CPU + for (int32_t i = 0; i < (int32_t) n; ++i) { + cpu_dst0[i] = (double)src0[i]; + cpu_dst1[i] = (double)src1[i]; + } + + // Compare + OCL_MAP_BUFFER(2); + OCL_MAP_BUFFER(3); + for (int32_t i = 0; i < (int32_t) n; ++i) { +// printf("int is %d, ref is %f, double is %f \t" +// "uint is %u, ref is %f, double is %f\n", +// src0[i], cpu_dst0[i], ((double*)buf_data[2])[i], +// src1[i], cpu_dst1[i], ((double*)buf_data[3])[i]); + OCL_ASSERT(((double*)buf_data[2])[i] == cpu_dst0[i]); + OCL_ASSERT(((double*)buf_data[3])[i] == cpu_dst1[i]); + } + OCL_UNMAP_BUFFER(2); + OCL_UNMAP_BUFFER(3); +} + +MAKE_UTEST_FROM_FUNCTION(compiler_int_convert_double); + +void compiler_short_convert_double(void) +{ + const size_t n = 16; + int16_t src0[n]; + uint16_t src1[n]; + double cpu_dst0[n]; + double cpu_dst1[n]; + + if (!cl_check_double()) + return; + + memset(cpu_dst0, 0, sizeof(cpu_dst0)); + memset(cpu_dst1, 0, sizeof(cpu_dst1)); + + // Setup kernel and buffers + OCL_CREATE_KERNEL_FROM_FILE("compiler_double_convert", "compiler_short_convert_double"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int16_t), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint16_t), NULL); + OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(double), NULL); + OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(double), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); + OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]); + globals[0] = n; + locals[0] = 16; + + // Run random tests + OCL_MAP_BUFFER(0); + OCL_MAP_BUFFER(1); + OCL_MAP_BUFFER(2); + OCL_MAP_BUFFER(3); + for (int32_t i = 0; i < (int32_t) n; ++i) { + src0[i] = ((int16_t*)buf_data[0])[i] = 0x8A0C * (rand() & 7); + src1[i] = ((uint16_t*)buf_data[1])[i] = 0xC00C * (rand() & 15); + ((double*)buf_data[2])[i] = 0.0d; + ((double*)buf_data[3])[i] = 0.0d; + } + OCL_UNMAP_BUFFER(0); + OCL_UNMAP_BUFFER(1); + OCL_UNMAP_BUFFER(2); + OCL_UNMAP_BUFFER(3); + + // Run the kernel on GPU + OCL_NDRANGE(1); + + // Run on CPU + for (int32_t i = 0; i < (int32_t) n; ++i) { + cpu_dst0[i] = (double)src0[i]; + cpu_dst1[i] = (double)src1[i]; + } + + // Compare + OCL_MAP_BUFFER(2); + OCL_MAP_BUFFER(3); + for (int32_t i = 0; i < (int32_t) n; ++i) { +// printf("short is %d, ref is %f, double is %f \t" +// "ushort is %u, ref is %f, double is %f\n", +// src0[i], cpu_dst0[i], ((double*)buf_data[2])[i], +// src1[i], cpu_dst1[i], ((double*)buf_data[3])[i]); + OCL_ASSERT(((double*)buf_data[2])[i] == cpu_dst0[i]); + OCL_ASSERT(((double*)buf_data[3])[i] == cpu_dst1[i]); + } + OCL_UNMAP_BUFFER(2); + OCL_UNMAP_BUFFER(3); +} + +MAKE_UTEST_FROM_FUNCTION(compiler_short_convert_double); + +void compiler_char_convert_double(void) +{ + const size_t n = 16; + int8_t src0[n]; + uint8_t src1[n]; + double cpu_dst0[n]; + double cpu_dst1[n]; + + if (!cl_check_double()) + return; + + memset(cpu_dst0, 0, sizeof(cpu_dst0)); + memset(cpu_dst1, 0, sizeof(cpu_dst1)); + + // Setup kernel and buffers + OCL_CREATE_KERNEL_FROM_FILE("compiler_double_convert", "compiler_char_convert_double"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int8_t), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint8_t), NULL); + OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(double), NULL); + OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(double), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); + OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]); + globals[0] = n; + locals[0] = 16; + + // Run random tests + OCL_MAP_BUFFER(0); + OCL_MAP_BUFFER(1); + OCL_MAP_BUFFER(2); + OCL_MAP_BUFFER(3); + for (int32_t i = 0; i < (int32_t) n; ++i) { + src0[i] = ((int8_t*)buf_data[0])[i] = 0x8C * (rand() & 7); + src1[i] = ((uint8_t*)buf_data[1])[i] = 0xC0 * (rand() & 15); + ((double*)buf_data[2])[i] = 0.0d; + ((double*)buf_data[3])[i] = 0.0d; + } + OCL_UNMAP_BUFFER(0); + OCL_UNMAP_BUFFER(1); + OCL_UNMAP_BUFFER(2); + OCL_UNMAP_BUFFER(3); + + // Run the kernel on GPU + OCL_NDRANGE(1); + + // Run on CPU + for (int32_t i = 0; i < (int32_t) n; ++i) { + cpu_dst0[i] = (double)src0[i]; + cpu_dst1[i] = (double)src1[i]; + } + + // Compare + OCL_MAP_BUFFER(2); + OCL_MAP_BUFFER(3); + for (int32_t i = 0; i < (int32_t) n; ++i) { +// printf("char is %d, ref is %f, double is %f \t" +// "uchar is %u, ref is %f, double is %f\n", +// src0[i], cpu_dst0[i], ((double*)buf_data[2])[i], +// src1[i], cpu_dst1[i], ((double*)buf_data[3])[i]); + OCL_ASSERT(((double*)buf_data[2])[i] == cpu_dst0[i]); + OCL_ASSERT(((double*)buf_data[3])[i] == cpu_dst1[i]); + } + OCL_UNMAP_BUFFER(2); + OCL_UNMAP_BUFFER(3); +} + +MAKE_UTEST_FROM_FUNCTION(compiler_char_convert_double); + +void compiler_float_convert_double(void) +{ + const size_t n = 16; + float src[n]; + double cpu_dst[n]; + + if (!cl_check_double()) + return; + + memset(cpu_dst, 0, sizeof(cpu_dst)); + + // Setup kernel and buffers + OCL_CREATE_KERNEL_FROM_FILE("compiler_double_convert", "compiler_float_convert_double"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(double), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + globals[0] = n; + locals[0] = 16; + + // Run random tests + OCL_MAP_BUFFER(0); + OCL_MAP_BUFFER(1); + for (int32_t i = 0; i < (int32_t) n; ++i) { + src[i] = ((float*)buf_data[0])[i] = (float)(0x8C * (rand() & 7)) * 1342.42f; + ((double*)buf_data[1])[i] = 0.0d; + } + OCL_UNMAP_BUFFER(0); + OCL_UNMAP_BUFFER(1); + + // Run the kernel on GPU + OCL_NDRANGE(1); + + // Run on CPU + for (int32_t i = 0; i < (int32_t) n; ++i) { + cpu_dst[i] = (double)src[i]; + } + + // Compare + OCL_MAP_BUFFER(1); + for (int32_t i = 0; i < (int32_t) n; ++i) { + //printf("%f, \t%f\n", ((double*)buf_data[1])[i], cpu_dst[i]); + OCL_ASSERT(((double*)buf_data[2])[i] == cpu_dst0[i]); + OCL_ASSERT(((double*)buf_data[3])[i] == cpu_dst1[i]); + } + OCL_UNMAP_BUFFER(1); +} + +MAKE_UTEST_FROM_FUNCTION(compiler_float_convert_double); diff --git a/utests/compiler_half.cpp b/utests/compiler_half.cpp index e8ed286..163573f 100644 --- a/utests/compiler_half.cpp +++ b/utests/compiler_half.cpp @@ -922,3 +922,105 @@ void compiler_half_to_long_sat(void) OCL_UNMAP_BUFFER(1); } MAKE_UTEST_FROM_FUNCTION(compiler_half_to_long_sat); + +void compiler_half_to_double(void) +{ + const size_t n = 16; + uint16_t hsrc[n]; + double ddst[n]; + uint32_t tmp_f; + float f; + +// if (!check_half_device()) +// return; + if (!cl_check_double()) + return; + + // Setup kernel and buffers + OCL_CREATE_KERNEL_FROM_FILE("compiler_half_convert", "compiler_half_to_double"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint16_t), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(double), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + globals[0] = n; + locals[0] = 16; + + for (int32_t i = 0; i < (int32_t) n; ++i) { + f = -100.1f + 10.3f * i; + memcpy(&tmp_f, &f, sizeof(float)); + hsrc[i] = __float_to_half(tmp_f); + ddst[i] = (double)f; + } + + OCL_MAP_BUFFER(0); + OCL_MAP_BUFFER(1); + memcpy(buf_data[0], hsrc, sizeof(hsrc)); + memset(buf_data[1], 0, n*sizeof(double)); + OCL_UNMAP_BUFFER(0); + OCL_UNMAP_BUFFER(1); + + // Run the kernel on GPU + OCL_NDRANGE(1); + + // Compare + OCL_MAP_BUFFER(1); + for (int32_t i = 0; i < (int32_t) n; ++i) { + double dd = ((double *)(buf_data[1]))[i]; +// printf("%f %f, diff is %%%f\n", dd, ddst[i], fabs(dd - ddst[i])/fabs(ddst[i])); + OCL_ASSERT(fabs(dd - ddst[i]) < 0.001f * fabs(ddst[i])); + } + OCL_UNMAP_BUFFER(1); +} +MAKE_UTEST_FROM_FUNCTION(compiler_half_to_double); + +void compiler_double_to_half(void) +{ + const size_t n = 16; + uint16_t hdst[n]; + double src[n]; + uint32_t tmp_f; + float f; + +// if (!check_half_device()) +// return; + if (!cl_check_double()) + return; + + // Setup kernel and buffers + OCL_CREATE_KERNEL_FROM_FILE("compiler_half_convert", "compiler_double_to_half"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(double), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint16_t), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + globals[0] = n; + locals[0] = 16; + + for (int32_t i = 0; i < (int32_t) n; ++i) { + f = -100.1f + 10.3f * i; + src[i] = (double)f; + memcpy(&tmp_f, &f, sizeof(float)); + hdst[i] = __float_to_half(tmp_f); + } + + OCL_MAP_BUFFER(0); + OCL_MAP_BUFFER(1); + memcpy(buf_data[0], src, sizeof(src)); + memset(buf_data[1], 0, n*sizeof(uint16_t)); + OCL_UNMAP_BUFFER(0); + OCL_UNMAP_BUFFER(1); + + // Run the kernel on GPU + OCL_NDRANGE(1); + + // Compare + OCL_MAP_BUFFER(1); + for (int32_t i = 0; i < (int32_t) n; ++i) { + uint16_t hf = ((uint16_t *)(buf_data[1]))[i]; + //tmp_f = __half_to_float(hf); + //memcpy(&f, &tmp_f, sizeof(float)); + //printf("%f, %x, %x\n", f, hf, hdst[i]); + OCL_ASSERT(hf == hdst[i]); + } + OCL_UNMAP_BUFFER(1); +} +MAKE_UTEST_FROM_FUNCTION(compiler_double_to_half); -- 1.9.1 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet