function cl_check_builtin_kernel_dimension() should be a static function which only is used within cl_device_id.c. The other part LGTM.
I will change it before push, no need for a new version. On Wed, Sep 10, 2014 at 11:31:32AM +0800, [email protected] wrote: > From: Luo Xionghu <[email protected]> > > add CL_KERNEL_GLOBAL_WORK_SIZE option for clGetKernelWorkGroupInfo. > > v2: should return the max global work size instead of current work size. > This funtion need return CL_INVALID_VALUE if the device is not a custom > device or kernel is not a built-in kernel. > we have 3 kind of built-in kernels for 1d/2d/3d memories, the max global > work size are decided by the dimension and memory type. > the piglit fail is caused by calling NON built-in kernels, so need send > patch to piglit later. > > Signed-off-by: Luo Xionghu <[email protected]> > --- > src/cl_device_id.c | 34 > +++++++++++++++++++++++++++++ > src/cl_device_id.h | 3 +++ > src/cl_gt_device.h | 3 +++ > src/cl_kernel.h | 2 ++ > utests/CMakeLists.txt | 1 + > utests/builtin_kernel_max_global_size.cpp | 30 +++++++++++++++++++++++++ > 6 files changed, 73 insertions(+) > create mode 100644 utests/builtin_kernel_max_global_size.cpp > > diff --git a/src/cl_device_id.c b/src/cl_device_id.c > index a0f0c99..5b24fcb 100644 > --- a/src/cl_device_id.c > +++ b/src/cl_device_id.c > @@ -515,6 +515,22 @@ cl_device_get_version(cl_device_id device, cl_int *ver) > > #include "cl_kernel.h" > #include "cl_program.h" > +LOCAL int > +cl_check_builtin_kernel_dimension(cl_kernel kernel, cl_device_id device) > +{ > + const char * n = cl_kernel_get_name(kernel); > + const char * builtin_kernels_2d = > "__cl_copy_image_2d_to_2d;__cl_copy_image_2d_to_buffer;__cl_copy_buffer_to_image_2d;__cl_fill_image_2d;__cl_fill_image_2d_array;"; > + const char * builtin_kernels_3d = > "__cl_copy_image_3d_to_2d;__cl_copy_image_2d_to_3d;__cl_copy_image_3d_to_3d;__cl_copy_image_3d_to_buffer;__cl_copy_buffer_to_image_3d;__cl_fill_image_3d"; > + if (!strstr(device->built_in_kernels, n)){ > + return 0; > + }else if(strstr(builtin_kernels_2d, n)){ > + return 2; > + }else if(strstr(builtin_kernels_3d, n)){ > + return 3; > + }else > + return 1; > + > +} > > LOCAL size_t > cl_get_kernel_max_wg_sz(cl_kernel kernel) > @@ -543,6 +559,7 @@ cl_get_kernel_workgroup_info(cl_kernel kernel, > size_t* param_value_size_ret) > { > int err = CL_SUCCESS; > + int dimension = 0; > if (UNLIKELY(device != &intel_ivb_gt1_device && > device != &intel_ivb_gt2_device && > device != &intel_baytrail_t_device && > @@ -573,6 +590,23 @@ cl_get_kernel_workgroup_info(cl_kernel kernel, > } > DECL_FIELD(COMPILE_WORK_GROUP_SIZE, kernel->compile_wg_sz) > DECL_FIELD(PRIVATE_MEM_SIZE, kernel->stack_size) > + case CL_KERNEL_GLOBAL_WORK_SIZE: > + dimension = cl_check_builtin_kernel_dimension(kernel, device); > + if ( !dimension ) return CL_INVALID_VALUE; > + if (param_value_size_ret != NULL) > + *param_value_size_ret = sizeof(device->max_1d_global_work_sizes); > + if (param_value) { > + if (dimension == 1) { > + memcpy(param_value, device->max_1d_global_work_sizes, > sizeof(device->max_1d_global_work_sizes)); > + }else if(dimension == 2){ > + memcpy(param_value, device->max_2d_global_work_sizes, > sizeof(device->max_2d_global_work_sizes)); > + }else if(dimension == 3){ > + memcpy(param_value, device->max_3d_global_work_sizes, > sizeof(device->max_3d_global_work_sizes)); > + }else > + return CL_INVALID_VALUE; > + > + return CL_SUCCESS; > + } > default: > return CL_INVALID_VALUE; > }; > diff --git a/src/cl_device_id.h b/src/cl_device_id.h > index c4f8227..31bce47 100644 > --- a/src/cl_device_id.h > +++ b/src/cl_device_id.h > @@ -30,6 +30,9 @@ struct _cl_device_id { > cl_uint max_work_item_dimensions; // should be 3. > size_t max_work_item_sizes[3]; // equal to maximum work group > size. > size_t max_work_group_size; // maximum work group size under > simd16 mode. > + size_t max_1d_global_work_sizes[3]; // maximum 1d global work size > for builtin kernels. > + size_t max_2d_global_work_sizes[3]; // maximum 2d global work size > for builtin kernels. > + size_t max_3d_global_work_sizes[3]; // maximum 3d global work size > for builtin kernels. > cl_uint preferred_vector_width_char; > cl_uint preferred_vector_width_short; > cl_uint preferred_vector_width_int; > diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h > index 33ef1f0..3cd54eb 100644 > --- a/src/cl_gt_device.h > +++ b/src/cl_gt_device.h > @@ -21,6 +21,9 @@ > .device_type = CL_DEVICE_TYPE_GPU, > .vendor_id = 0, /* == device_id (set when requested) */ > .max_work_item_dimensions = 3, > +.max_1d_global_work_sizes = {1024 * 1024 * 256, 1, 1}, > +.max_2d_global_work_sizes = {8192, 8192, 1}, > +.max_3d_global_work_sizes = {8192, 8192, 2048}, > .preferred_vector_width_char = 8, > .preferred_vector_width_short = 8, > .preferred_vector_width_int = 4, > diff --git a/src/cl_kernel.h b/src/cl_kernel.h > index f4ed8d3..85a997d 100644 > --- a/src/cl_kernel.h > +++ b/src/cl_kernel.h > @@ -59,6 +59,8 @@ struct _cl_kernel { > cl_ulong local_mem_sz; /* local memory size specified in kernel args. > */ > size_t compile_wg_sz[3]; /* Required workgroup size by > __attribute__((reqd_work_gro > up_size(X, Y, Z))) qualifier.*/ > + size_t global_work_sz[3]; /* maximum global size that can be used to > execute a kernel > + (i.e. global_work_size argument to > clEnqueueNDRangeKernel.)*/ > size_t stack_size; /* stack size per work item. */ > cl_argument *args; /* To track argument setting */ > uint32_t arg_n:31; /* Number of arguments */ > diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt > index b30e6f9..034f112 100644 > --- a/utests/CMakeLists.txt > +++ b/utests/CMakeLists.txt > @@ -181,6 +181,7 @@ set (utests_sources > test_printf.cpp > enqueue_fill_buf.cpp > enqueue_built_in_kernels.cpp > + builtin_kernel_max_global_size.cpp > image_1D_buffer.cpp > compare_image_2d_and_1d_array.cpp > compiler_constant_expr.cpp > diff --git a/utests/builtin_kernel_max_global_size.cpp > b/utests/builtin_kernel_max_global_size.cpp > new file mode 100644 > index 0000000..c777564 > --- /dev/null > +++ b/utests/builtin_kernel_max_global_size.cpp > @@ -0,0 +1,30 @@ > +#include "utest_helper.hpp" > + > +void builtin_kernel_max_global_size(void) > +{ > + char* built_in_kernel_names; > + size_t built_in_kernels_size; > + cl_int err = CL_SUCCESS; > + size_t ret_sz; > + > + > + OCL_CALL (clGetDeviceInfo, device, CL_DEVICE_BUILT_IN_KERNELS, 0, 0, > &built_in_kernels_size); > + built_in_kernel_names = (char* )malloc(built_in_kernels_size * > sizeof(char) ); > + OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_BUILT_IN_KERNELS, > built_in_kernels_size, (void*)built_in_kernel_names, &ret_sz); > + OCL_ASSERT(ret_sz == built_in_kernels_size); > + cl_program built_in_prog = clCreateProgramWithBuiltInKernels(ctx, 1, > &device, built_in_kernel_names, &err); > + OCL_ASSERT(built_in_prog != NULL); > + cl_kernel builtin_kernel_1d = clCreateKernel(built_in_prog, > "__cl_copy_region_unalign_src_offset", &err); > + OCL_ASSERT(builtin_kernel_1d != NULL); > + size_t param_value_size; > + void* param_value; > + clGetKernelWorkGroupInfo(builtin_kernel_1d, device, > CL_KERNEL_GLOBAL_WORK_SIZE, 0, NULL, ¶m_value_size); > + param_value = malloc(param_value_size); > + clGetKernelWorkGroupInfo(builtin_kernel_1d, device, > CL_KERNEL_GLOBAL_WORK_SIZE, param_value_size, param_value, 0); > + OCL_ASSERT(*(size_t*)param_value == 256 * 1024 *1024); > + clReleaseKernel(builtin_kernel_1d); > + clReleaseProgram(built_in_prog); > + free(param_value); > +} > + > +MAKE_UTEST_FROM_FUNCTION(builtin_kernel_max_global_size); > -- > 1.7.9.5 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
