Signed-off-by: rander <[email protected]>
---
kernels/builtin_convert_int16toDouble.cl | 36 ++++++++++++++
utests/CMakeLists.txt | 3 +-
utests/builtin_convert_int16toDouble.cpp | 85 ++++++++++++++++++++++++++++++++
3 files changed, 123 insertions(+), 1 deletion(-)
create mode 100644 kernels/builtin_convert_int16toDouble.cl
create mode 100644 utests/builtin_convert_int16toDouble.cpp
diff --git a/kernels/builtin_convert_int16toDouble.cl
b/kernels/builtin_convert_int16toDouble.cl
new file mode 100644
index 0000000..558b1c7
--- /dev/null
+++ b/kernels/builtin_convert_int16toDouble.cl
@@ -0,0 +1,36 @@
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+__kernel void builtin_convert_int16toDouble(__global short *X,
+
__global ushort *uX,
+
__global double *Z,
+
int max_input)
+{
+ int i = get_global_id(0);
+ int j;
+
+ for(j = 0; j < max_input; j++)
+ Z[i++] = convert_double_rtz(X[j]);
+
+ for(j = 0; j < max_input; j++)
+ Z[i++] = convert_double_rtn(X[j]);
+
+ for(j = 0; j < max_input; j++)
+ Z[i++] = convert_double_rte(X[j]);
+
+ for(j = 0; j < max_input; j++)
+ Z[i++] = convert_double_rtp(X[j]);
+
+ for(j = 0; j < max_input; j++)
+ Z[i++] = convert_double_rtz(uX[j]);
+
+ for(j = 0; j < max_input; j++)
+ Z[i++] = convert_double_rtn(uX[j]);
+
+ for(j = 0; j < max_input; j++)
+ Z[i++] = convert_double_rte(uX[j]);
+
+ for(j = 0; j < max_input; j++)
+ Z[i++] = convert_double_rtp(uX[j]);
+
+}
+
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 8f006c7..6e41eeb 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -304,7 +304,8 @@ set (utests_sources
builtin_convert_double2int16.cpp
builtin_convert_double2int32.cpp
builtin_convert_double2int64.cpp
- builtin_convert_int8toDouble.cpp)
+ builtin_convert_int8toDouble.cpp
+ builtin_convert_int16toDouble.cpp)
if (LLVM_VERSION_NODOT VERSION_GREATER 34)
SET(utests_sources
diff --git a/utests/builtin_convert_int16toDouble.cpp
b/utests/builtin_convert_int16toDouble.cpp
new file mode 100644
index 0000000..222dace
--- /dev/null
+++ b/utests/builtin_convert_int16toDouble.cpp
@@ -0,0 +1,85 @@
+#include "utest_helper.hpp"
+#include <cmath>
+#include <algorithm>
+
+namespace{
+
+const char* testFunc[] =
+{
+ " double convert_double_rtz(short x)",
+ " double convert_double_rtn(short x)",
+ " double convert_double_rte(short x)",
+ " double convert_double_rtp(short x)",
+
+ " double convert_double_rtz(ushortx)",
+ " double convert_double_rtn(ushort x)",
+ " double convert_double_rte(ushort x)",
+ " double convert_double_rtp(ushort x)",
+};
+
+short *input_data;
+const int count_input = 4096;
+const int max_function = 8;
+
+static void builtin_convert_int16toDouble(void)
+{
+ // Setup kernel and buffers
+ int k, i, index_cur;
+ double gpu_data[max_function * count_input] = {0};
+ float diff;
+ char log[256] = {0};
+
+ OCL_CREATE_KERNEL("builtin_convert_int16toDouble");
+
+ OCL_CREATE_BUFFER(buf[0], CL_MEM_READ_WRITE, count_input * sizeof(short),
NULL);
+ OCL_CREATE_BUFFER(buf[1], CL_MEM_READ_WRITE, count_input * sizeof(short),
NULL);
+ OCL_CREATE_BUFFER(buf[2], CL_MEM_READ_WRITE, count_input * max_function *
sizeof(double), NULL);
+
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
+ OCL_SET_ARG(3, sizeof(int), &count_input);
+
+ globals[0] = 1;
+ locals[0] = 1;
+
+ input_data = new short [4096];
+ for(int i = 0; i < 4096; i++)
+ input_data[i] = -32768 + i*16;
+ clEnqueueWriteBuffer( queue, buf[0], CL_TRUE, 0, count_input *
sizeof(short), input_data, 0, NULL, NULL);
+
+ for(int i = 0; i < 4096; i++)
+ input_data[i] = i*16;
+ clEnqueueWriteBuffer( queue, buf[1], CL_TRUE, 0, count_input *
sizeof(short), input_data, 0, NULL, NULL);
+
+ // Run the kernel
+ OCL_NDRANGE( 1 );
+
+ clEnqueueReadBuffer( queue, buf[2], CL_TRUE, 0, sizeof(double) *
max_function * count_input, gpu_data, 0, NULL, NULL);
+
+ int index = 0;
+ for (k = 0; (uint)k < count_input*max_function/2; k++)
+ {
+ index = index % 4096;
+ OCL_ASSERT(gpu_data[k] == (double)(-32768 + index*16));
+ if(gpu_data[k] != (double)(-32768 + index*16))
+ {
+ printf("failed at function:%s, index:%d expect value: %d, but get
:%lf \n", testFunc[k/count_input], k%count_input, (-32768 + index*16),
gpu_data[k]);
+ }
+ index ++;
+ }
+
+ double *ugpu_data = (gpu_data + max_function*count_input/2);
+ for (k = 0; (uint)k < count_input*max_function/2; k++)
+ {
+ OCL_ASSERT(ugpu_data[k] == (double)((k%4096)*16));
+ if(ugpu_data[k] != (double)((k%4096)*16))
+ {
+ printf("failed at function:%s, index:%d expect value: %d, but
get :%lf \n", testFunc[k/count_input + max_function/2], k%count_input,
((k%4096)*16), ugpu_data[k]);
+ }
+ }
+
+}
+
+MAKE_UTEST_FROM_FUNCTION(builtin_convert_int16toDouble)
+}
--
2.7.4
_______________________________________________
Beignet mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/beignet