this is a test case showing a possible bug of which same code executes different result between CPU and GPU.
the code in opencl file is same as in c++ file, after running on both CPU and GPU, the result is different. Signed-off-by: Homer Hsing <[email protected]> --- kernels/bug1.cl | 47 +++++++++++++++++++++++++++ utests/CMakeLists.txt | 1 + utests/bug1.cpp | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 137 insertions(+) create mode 100644 kernels/bug1.cl create mode 100644 utests/bug1.cpp diff --git a/kernels/bug1.cl b/kernels/bug1.cl new file mode 100644 index 0000000..7e07f23 --- /dev/null +++ b/kernels/bug1.cl @@ -0,0 +1,47 @@ +float gpu(float x, int n) { + union { float f; unsigned u; } u; + u.f = x; + unsigned s = u.u & 0x80000000u, v = u.u & 0x7fffffff, d = 0; + if(v >= 0x7f800000) + return x; + if(v == 0) + return x; + int e = v >> 23; + v &= 0x7fffff; + if(e >= 1) + v |= 0x800000; + else { + v <<= 1; + while(v < 0x800000) { + v <<= 1; + e --; + } + } + e = add_sat(e, n); + if(e >= 255) { + u.u = s | 0x7f800000; + return u.f; + } + if(e > 0) { + u.u = s | (e << 23) | (v & 0x7fffff); + return u.f; + } + if(e <= -23) { + u.u = s; + return u.f; + } + while(e <= 0) { + d = (d >> 1) | (v << 31); + v >>= 1; + e ++; + } + if(d > 0x80000000u) + v ++; + u.u = s | v; + return u.f; +} + +kernel void bug1(global float *src1, global int *src2, global float *dst) { + int i = get_global_id(0); + dst[i] = gpu(src1[i], src2[i]); +} diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index ecb6735..c8d4f02 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -132,6 +132,7 @@ set (utests_sources builtin_pow.cpp builtin_exp.cpp builtin_convert_sat.cpp + bug1.cpp sub_buffer.cpp runtime_createcontext.cpp runtime_null_kernel_arg.cpp diff --git a/utests/bug1.cpp b/utests/bug1.cpp new file mode 100644 index 0000000..26a36a5 --- /dev/null +++ b/utests/bug1.cpp @@ -0,0 +1,89 @@ +#include <cstdint> +#include <cstring> +#include <iostream> +#include <cmath> +#include "utest_helper.hpp" + +float cpu(float x, int n) { + union { float f; unsigned u; } u; + u.f = x; + unsigned s = u.u & 0x80000000u, v = u.u & 0x7fffffff, d = 0; + if(v >= 0x7f800000) + return x; + if(v == 0) + return x; + int e = v >> 23; + v &= 0x7fffff; + if(e >= 1) + v |= 0x800000; + else { + v <<= 1; + while(v < 0x800000) { + v <<= 1; + e --; + } + } + e = e + n; + if(e >= 255) { + u.u = s | 0x7f800000; + return u.f; + } + if(e > 0) { + u.u = s | (e << 23) | (v & 0x7fffff); + return u.f; + } + if(e <= -23) { + u.u = s; + return u.f; + } + while(e <= 0) { + d = (d >> 1) | (v << 31); + v >>= 1; + e ++; + } + if(d > 0x80000000u) + v ++; + u.u = s | v; + return u.f; +} + +void bug1(void) +{ + const int n = 16; + float src[n]; + int src2[n]; + + OCL_CREATE_KERNEL("bug1"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL); + OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(float), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); + globals[0] = n; + locals[0] = 16; + + for (int i = 0; i < n; ++i) { + src[i] = -3.59809e+22f; + src2[i] = -210; + } + OCL_MAP_BUFFER(0); + OCL_MAP_BUFFER(1); + memcpy(buf_data[0], src, sizeof(src)); + memcpy(buf_data[1], src2, sizeof(src2)); + OCL_UNMAP_BUFFER(0); + OCL_UNMAP_BUFFER(1); + + OCL_NDRANGE(1); + + OCL_MAP_BUFFER(2); + float *dest = ((float *)buf_data[2]); + for (int i = 0; i < n; ++i) { + float wish = cpu(src[i], src2[i]); + printf("%g %g\n", dest[i], wish); + OCL_ASSERT(dest[i] == wish); + } + OCL_UNMAP_BUFFER(2); +} + +MAKE_UTEST_FROM_FUNCTION_WITH_ISSUE(bug1); -- 1.8.3.2 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
