From: Pan Xiuli <xiuli....@intel.com> Signed-off-by: Pan Xiuli <xiuli....@intel.com> --- kernels/builtin_max_sub_group_size.cl | 7 ++++ kernels/builtin_num_sub_groups.cl | 7 ++++ kernels/builtin_sub_group_id.cl | 7 ++++ kernels/builtin_sub_group_size.cl | 7 ++++ utests/CMakeLists.txt | 4 +++ utests/builtin_max_sub_group_size.cpp | 60 ++++++++++++++++++++++++++++++++++ utests/builtin_num_sub_groups.cpp | 60 ++++++++++++++++++++++++++++++++++ utests/builtin_sub_group_id.cpp | 61 +++++++++++++++++++++++++++++++++++ utests/builtin_sub_group_size.cpp | 61 +++++++++++++++++++++++++++++++++++ 9 files changed, 274 insertions(+) create mode 100644 kernels/builtin_max_sub_group_size.cl create mode 100644 kernels/builtin_num_sub_groups.cl create mode 100644 kernels/builtin_sub_group_id.cl create mode 100644 kernels/builtin_sub_group_size.cl create mode 100644 utests/builtin_max_sub_group_size.cpp create mode 100644 utests/builtin_num_sub_groups.cpp create mode 100644 utests/builtin_sub_group_id.cpp create mode 100644 utests/builtin_sub_group_size.cpp
diff --git a/kernels/builtin_max_sub_group_size.cl b/kernels/builtin_max_sub_group_size.cl new file mode 100644 index 0000000..c2f3b5e --- /dev/null +++ b/kernels/builtin_max_sub_group_size.cl @@ -0,0 +1,7 @@ +__kernel void builtin_max_sub_group_size(global int *dst) +{ + int lid = get_local_linear_id(); + int lsz = get_local_size(0) * get_local_size(1) * get_local_size(2); + int gid = lid + lsz*(get_num_groups(1) * get_num_groups(0) * get_group_id(2) + get_num_groups(0) * get_group_id(1) + get_group_id(0)); + dst[gid] = get_max_sub_group_size(); +} diff --git a/kernels/builtin_num_sub_groups.cl b/kernels/builtin_num_sub_groups.cl new file mode 100644 index 0000000..08b5673 --- /dev/null +++ b/kernels/builtin_num_sub_groups.cl @@ -0,0 +1,7 @@ +__kernel void builtin_num_sub_groups(global int *dst) +{ + int lid = get_local_linear_id(); + int lsz = get_local_size(0) * get_local_size(1) * get_local_size(2); + int gid = lid + lsz*(get_num_groups(1) * get_num_groups(0) * get_group_id(2) + get_num_groups(0) * get_group_id(1) + get_group_id(0)); + dst[gid] = get_num_sub_groups(); +} diff --git a/kernels/builtin_sub_group_id.cl b/kernels/builtin_sub_group_id.cl new file mode 100644 index 0000000..accf3ad --- /dev/null +++ b/kernels/builtin_sub_group_id.cl @@ -0,0 +1,7 @@ +__kernel void builtin_sub_group_id(global int *dst) +{ + int lid = get_local_linear_id(); + int lsz = get_local_size(0) * get_local_size(1) * get_local_size(2); + int gid = lid + lsz*(get_num_groups(1) * get_num_groups(0) * get_group_id(2) + get_num_groups(0) * get_group_id(1) + get_group_id(0)); + dst[gid] = get_sub_group_id(); +} diff --git a/kernels/builtin_sub_group_size.cl b/kernels/builtin_sub_group_size.cl new file mode 100644 index 0000000..1e034bb --- /dev/null +++ b/kernels/builtin_sub_group_size.cl @@ -0,0 +1,7 @@ +__kernel void builtin_sub_group_size(global int *dst) +{ + int lid = get_local_linear_id(); + int lsz = get_local_size(0) * get_local_size(1) * get_local_size(2); + int gid = lid + lsz*(get_num_groups(1) * get_num_groups(0) * get_group_id(2) + get_num_groups(0) * get_group_id(1) + get_group_id(0)); + dst[gid] = get_sub_group_size(); +} diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index 26a2264..76b50a5 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -206,6 +206,10 @@ set (utests_sources builtin_global_id.cpp builtin_num_groups.cpp builtin_local_id.cpp + builtin_sub_group_size.cpp + builtin_max_sub_group_size.cpp + builtin_num_sub_groups.cpp + builtin_sub_group_id.cpp builtin_acos_asin.cpp builtin_pow.cpp builtin_exp.cpp diff --git a/utests/builtin_max_sub_group_size.cpp b/utests/builtin_max_sub_group_size.cpp new file mode 100644 index 0000000..bb1423b --- /dev/null +++ b/utests/builtin_max_sub_group_size.cpp @@ -0,0 +1,60 @@ +/* +According to the OpenCL cl_intel_subgroups. +Now define local and global size as following: + globals[0] = 4; + globals[1] = 9; + globals[2] = 16; + locals[0] = 2; + locals[1] = 3; + locals[2] = 4; +*/ + +#define udebug 0 +#include "utest_helper.hpp" +static void builtin_max_sub_group_size(void) +{ + + // Setup kernel and buffers + size_t dim, i,local_sz = 1,buf_len = 1; + OCL_CREATE_KERNEL("builtin_max_sub_group_size"); + size_t sub_sz; + + + OCL_CREATE_BUFFER(buf[0], CL_MEM_READ_WRITE, sizeof(int)*576, NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + + for( dim=1; dim <= 3; dim++ ) + { + buf_len = 1; + local_sz = 1; + for(i=1; i <= dim; i++) + { + locals[i - 1] = i + 1; + globals[i - 1] = (i + 1) * (i + 1); + buf_len *= ((i + 1) * (i + 1)); + local_sz *= i + 1; + } + for(i = dim+1; i <= 3; i++) + { + globals[i - 1] = 0; + locals[i - 1] = 0; + } + + OCL_CALL(clGetKernelSubGroupInfoKHR,kernel,device,CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,sizeof(size_t)*dim,locals,sizeof(size_t),&sub_sz,NULL); + // Run the kernel + OCL_NDRANGE( dim ); + clFinish(queue); + + OCL_MAP_BUFFER(0); + + for( i = 0; i < buf_len; i++) { +#if udebug + printf("got %d expect %d\n", ((uint32_t*)buf_data[0])[i], sub_sz); +#endif + OCL_ASSERT( ((uint32_t*)buf_data[0])[i] == sub_sz); + } + OCL_UNMAP_BUFFER(0); + } +} + +MAKE_UTEST_FROM_FUNCTION(builtin_max_sub_group_size); diff --git a/utests/builtin_num_sub_groups.cpp b/utests/builtin_num_sub_groups.cpp new file mode 100644 index 0000000..78acb13 --- /dev/null +++ b/utests/builtin_num_sub_groups.cpp @@ -0,0 +1,60 @@ +/* +According to the OpenCL cl_intel_subgroups. +Now define local and global size as following: + globals[0] = 4; + globals[1] = 9; + globals[2] = 16; + locals[0] = 2; + locals[1] = 3; + locals[2] = 4; +*/ + +#define udebug 0 +#include "utest_helper.hpp" +static void builtin_num_sub_groups(void) +{ + + // Setup kernel and buffers + size_t dim, i,local_sz = 1,buf_len = 1; + OCL_CREATE_KERNEL("builtin_num_sub_groups"); + size_t num_sub; + + + OCL_CREATE_BUFFER(buf[0], CL_MEM_READ_WRITE, sizeof(int)*576, NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + + for( dim=1; dim <= 3; dim++ ) + { + buf_len = 1; + local_sz = 1; + for(i=1; i <= dim; i++) + { + locals[i - 1] = i + 1; + globals[i - 1] = (i + 1) * (i + 1); + buf_len *= ((i + 1) * (i + 1)); + local_sz *= i + 1; + } + for(i = dim+1; i <= 3; i++) + { + globals[i - 1] = 0; + locals[i - 1] = 0; + } + + OCL_CALL(clGetKernelSubGroupInfoKHR,kernel,device,CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR ,sizeof(size_t)*dim,locals,sizeof(size_t),&num_sub,NULL); + // Run the kernel + OCL_NDRANGE( dim ); + clFinish(queue); + + OCL_MAP_BUFFER(0); + + for( i = 0; i < buf_len; i++) { +#if udebug + printf("%zu get %d, expect %zu\n",i, ((uint32_t*)buf_data[0])[i], num_sub); +#endif + OCL_ASSERT( ((uint32_t*)buf_data[0])[i] == num_sub); + } + OCL_UNMAP_BUFFER(0); + } +} + +MAKE_UTEST_FROM_FUNCTION(builtin_num_sub_groups); diff --git a/utests/builtin_sub_group_id.cpp b/utests/builtin_sub_group_id.cpp new file mode 100644 index 0000000..e81d173 --- /dev/null +++ b/utests/builtin_sub_group_id.cpp @@ -0,0 +1,61 @@ +/* +According to the OpenCL cl_intel_subgroups. +Now define local and global size as following: + globals[0] = 4; + globals[1] = 9; + globals[2] = 16; + locals[0] = 2; + locals[1] = 3; + locals[2] = 4; +*/ + +#define udebug 0 +#include "utest_helper.hpp" +static void builtin_sub_group_id(void) +{ + + // Setup kernel and buffers + size_t dim, i,local_sz = 1,buf_len = 1; + OCL_CREATE_KERNEL("builtin_sub_group_id"); + size_t max_sub_sz; + + + OCL_CREATE_BUFFER(buf[0], CL_MEM_READ_WRITE, sizeof(int)*576, NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + + for( dim=1; dim <= 3; dim++ ) + { + buf_len = 1; + local_sz = 1; + for(i=1; i <= dim; i++) + { + locals[i - 1] = i + 1; + globals[i - 1] = (i + 1) * (i + 1); + buf_len *= ((i + 1) * (i + 1)); + local_sz *= i + 1; + } + for(i = dim+1; i <= 3; i++) + { + globals[i - 1] = 0; + locals[i - 1] = 0; + } + + OCL_CALL(clGetKernelSubGroupInfoKHR,kernel,device,CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,sizeof(size_t)*dim,locals,sizeof(size_t),&max_sub_sz,NULL); + // Run the kernel + OCL_NDRANGE( dim ); + clFinish(queue); + + OCL_MAP_BUFFER(0); + + for( i = 0; i < buf_len; i++) { + size_t expect_id = (i % local_sz) / max_sub_sz; +#if udebug + printf("%zu get %d, expect %zu\n",i, ((uint32_t*)buf_data[0])[i], expect_id); +#endif + OCL_ASSERT( ((uint32_t*)buf_data[0])[i] == expect_id); + } + OCL_UNMAP_BUFFER(0); + } +} + +MAKE_UTEST_FROM_FUNCTION(builtin_sub_group_id); diff --git a/utests/builtin_sub_group_size.cpp b/utests/builtin_sub_group_size.cpp new file mode 100644 index 0000000..1dc24ed --- /dev/null +++ b/utests/builtin_sub_group_size.cpp @@ -0,0 +1,61 @@ +/* +According to the OpenCL cl_intel_subgroups. +Now define local and global size as following: + globals[0] = 4; + globals[1] = 9; + globals[2] = 16; + locals[0] = 2; + locals[1] = 3; + locals[2] = 4; +*/ + +#define udebug 0 +#include "utest_helper.hpp" +static void builtin_sub_group_size(void) +{ + + // Setup kernel and buffers + size_t dim, i,local_sz = 1,buf_len = 1; + OCL_CREATE_KERNEL("builtin_sub_group_size"); + size_t max_sub_sz; + + + OCL_CREATE_BUFFER(buf[0], CL_MEM_READ_WRITE, sizeof(int)*576, NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + + for( dim=1; dim <= 3; dim++ ) + { + buf_len = 1; + local_sz = 1; + for(i=1; i <= dim; i++) + { + locals[i - 1] = i + 1; + globals[i - 1] = (i + 1) * (i + 1); + buf_len *= ((i + 1) * (i + 1)); + local_sz *= i + 1; + } + for(i = dim+1; i <= 3; i++) + { + globals[i - 1] = 0; + locals[i - 1] = 0; + } + + OCL_CALL(clGetKernelSubGroupInfoKHR,kernel,device,CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,sizeof(size_t)*dim,locals,sizeof(size_t),&max_sub_sz,NULL); + // Run the kernel + OCL_NDRANGE( dim ); + clFinish(queue); + + OCL_MAP_BUFFER(0); + + for( i = 0; i < buf_len; i++) { + size_t expect_sz = (i % local_sz) < (local_sz / max_sub_sz * max_sub_sz) ? max_sub_sz : (local_sz % max_sub_sz); +#if udebug + printf("%zu get %d, expect %zu\n",i, ((uint32_t*)buf_data[0])[i], expect_sz); +#endif + OCL_ASSERT( ((uint32_t*)buf_data[0])[i] == expect_sz); + } + OCL_UNMAP_BUFFER(0); + } +} + +MAKE_UTEST_FROM_FUNCTION(builtin_sub_group_size); -- 2.7.4 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet