Fix bug at https://bugs.freedesktop.org/show_bug.cgi?id=93469
The fucntion is mapped to OP_SIMD_SIZE which returns the constant SIMD width, the correct function name is get_max_sub_group_size. contributor: Georg Kolling <[email protected]> Signed-off-by: Guo Yejun <[email protected]> --- backend/src/libocl/tmpl/ocl_simd.tmpl.h | 2 +- backend/src/llvm/llvm_gen_ocl_function.hxx | 2 +- kernels/compiler_get_max_sub_group_size.cl | 5 ++++ kernels/compiler_get_sub_group_id.cl | 2 +- kernels/compiler_get_sub_group_size.cl | 5 ---- kernels/compiler_sub_group_shuffle.cl | 4 +-- .../cl_internal_block_motion_estimate_intel.cl | 2 +- utests/CMakeLists.txt | 2 +- utests/compiler_get_max_sub_group_size.cpp | 32 ++++++++++++++++++++++ utests/compiler_get_sub_group_size.cpp | 32 ---------------------- 10 files changed, 44 insertions(+), 44 deletions(-) create mode 100644 kernels/compiler_get_max_sub_group_size.cl delete mode 100644 kernels/compiler_get_sub_group_size.cl create mode 100644 utests/compiler_get_max_sub_group_size.cpp delete mode 100644 utests/compiler_get_sub_group_size.cpp diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h b/backend/src/libocl/tmpl/ocl_simd.tmpl.h index 67a1cee..4055070 100644 --- a/backend/src/libocl/tmpl/ocl_simd.tmpl.h +++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h @@ -26,7 +26,7 @@ int sub_group_any(int); int sub_group_all(int); -uint get_sub_group_size(void); +uint get_max_sub_group_size(void); uint get_sub_group_id(void); OVERLOADABLE float intel_sub_group_shuffle(float x, uint c); diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index 8023744..046e1ae 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -161,7 +161,7 @@ DECL_LLVM_GEN_FUNCTION(SAT_CONV_F16_TO_U32, _Z16convert_uint_satDh) // SIMD level function for internal usage DECL_LLVM_GEN_FUNCTION(SIMD_ANY, sub_group_any) DECL_LLVM_GEN_FUNCTION(SIMD_ALL, sub_group_all) -DECL_LLVM_GEN_FUNCTION(SIMD_SIZE, get_sub_group_size) +DECL_LLVM_GEN_FUNCTION(SIMD_SIZE, get_max_sub_group_size) DECL_LLVM_GEN_FUNCTION(SIMD_ID, get_sub_group_id) DECL_LLVM_GEN_FUNCTION(SIMD_SHUFFLE, intel_sub_group_shuffle) diff --git a/kernels/compiler_get_max_sub_group_size.cl b/kernels/compiler_get_max_sub_group_size.cl new file mode 100644 index 0000000..8fb263b --- /dev/null +++ b/kernels/compiler_get_max_sub_group_size.cl @@ -0,0 +1,5 @@ +__kernel void compiler_get_max_sub_group_size(global int *dst) +{ + int i = get_global_id(0); + dst[i] = get_max_sub_group_size(); +} diff --git a/kernels/compiler_get_sub_group_id.cl b/kernels/compiler_get_sub_group_id.cl index 10033ff..afaa2a6 100644 --- a/kernels/compiler_get_sub_group_id.cl +++ b/kernels/compiler_get_sub_group_id.cl @@ -2,7 +2,7 @@ __kernel void compiler_get_sub_group_id(global int *dst) { int i = get_global_id(0); if (i == 0) - dst[0] = get_sub_group_size(); + dst[0] = get_max_sub_group_size(); dst[i+1] = get_sub_group_id(); } diff --git a/kernels/compiler_get_sub_group_size.cl b/kernels/compiler_get_sub_group_size.cl deleted file mode 100644 index 4d5e3eb..0000000 --- a/kernels/compiler_get_sub_group_size.cl +++ /dev/null @@ -1,5 +0,0 @@ -__kernel void compiler_get_sub_group_size(global int *dst) -{ - int i = get_global_id(0); - dst[i] = get_sub_group_size(); -} diff --git a/kernels/compiler_sub_group_shuffle.cl b/kernels/compiler_sub_group_shuffle.cl index 75adde3..a171faa 100644 --- a/kernels/compiler_sub_group_shuffle.cl +++ b/kernels/compiler_sub_group_shuffle.cl @@ -2,11 +2,11 @@ __kernel void compiler_sub_group_shuffle(global int *dst, int c) { int i = get_global_id(0); if (i == 0) - dst[0] = get_sub_group_size(); + dst[0] = get_max_sub_group_size(); dst++; int from = i; - int j = get_sub_group_size() - get_sub_group_id() - 1; + int j = get_max_sub_group_size() - get_sub_group_id() - 1; int o0 = get_sub_group_id(); int o1 = intel_sub_group_shuffle(from, c); int o2 = intel_sub_group_shuffle(from, 5); diff --git a/src/kernels/cl_internal_block_motion_estimate_intel.cl b/src/kernels/cl_internal_block_motion_estimate_intel.cl index 1f28f4e..23c5488 100644 --- a/src/kernels/cl_internal_block_motion_estimate_intel.cl +++ b/src/kernels/cl_internal_block_motion_estimate_intel.cl @@ -262,7 +262,7 @@ void block_motion_estimate_intel(accelerator_intel_t accel, ushort res[16]; uint write_back_dwx; - uint simd_width = get_sub_group_size(); + uint simd_width = get_max_sub_group_size(); /* In simd 8 mode, one kernel variable 'uint' map to 8 dword. * In simd 16 mode, one kernel variable 'uint' map to 16 dword. diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index d846b7b..2c6aea4 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -220,7 +220,7 @@ set (utests_sources runtime_use_host_ptr_buffer.cpp runtime_alloc_host_ptr_buffer.cpp runtime_use_host_ptr_image.cpp - compiler_get_sub_group_size.cpp + compiler_get_max_sub_group_size.cpp compiler_get_sub_group_id.cpp compiler_sub_group_shuffle.cpp builtin_global_linear_id.cpp diff --git a/utests/compiler_get_max_sub_group_size.cpp b/utests/compiler_get_max_sub_group_size.cpp new file mode 100644 index 0000000..debdf94 --- /dev/null +++ b/utests/compiler_get_max_sub_group_size.cpp @@ -0,0 +1,32 @@ +#include "utest_helper.hpp" + +void compiler_get_max_sub_group_size(void) +{ + const size_t n = 256; + + // Setup kernel and buffers + OCL_CREATE_KERNEL("compiler_get_max_sub_group_size"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + + globals[0] = n; + locals[0] = 16; + + OCL_MAP_BUFFER(0); + for (int32_t i = 0; i < (int32_t) n; ++i) + ((int*)buf_data[0])[i] = -1; + OCL_UNMAP_BUFFER(0); + + // Run the kernel on GPU + OCL_NDRANGE(1); + + // Compare + OCL_MAP_BUFFER(0); + int* dst = (int *)buf_data[0]; + for (int32_t i = 0; i < (int32_t) n; ++i){ + OCL_ASSERT(8 == dst[i] || 16 == dst[i]); + } + OCL_UNMAP_BUFFER(0); +} + +MAKE_UTEST_FROM_FUNCTION(compiler_get_max_sub_group_size); diff --git a/utests/compiler_get_sub_group_size.cpp b/utests/compiler_get_sub_group_size.cpp deleted file mode 100644 index 20339d7..0000000 --- a/utests/compiler_get_sub_group_size.cpp +++ /dev/null @@ -1,32 +0,0 @@ -#include "utest_helper.hpp" - -void compiler_get_sub_group_size(void) -{ - const size_t n = 256; - - // Setup kernel and buffers - OCL_CREATE_KERNEL("compiler_get_sub_group_size"); - OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL); - OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); - - globals[0] = n; - locals[0] = 16; - - OCL_MAP_BUFFER(0); - for (int32_t i = 0; i < (int32_t) n; ++i) - ((int*)buf_data[0])[i] = -1; - OCL_UNMAP_BUFFER(0); - - // Run the kernel on GPU - OCL_NDRANGE(1); - - // Compare - OCL_MAP_BUFFER(0); - int* dst = (int *)buf_data[0]; - for (int32_t i = 0; i < (int32_t) n; ++i){ - OCL_ASSERT(8 == dst[i] || 16 == dst[i]); - } - OCL_UNMAP_BUFFER(0); -} - -MAKE_UTEST_FROM_FUNCTION(compiler_get_sub_group_size); -- 1.9.1 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
