In function cl_get_kernel_max_wg_sz, hsw's thread count may large than 64, add a max limit.
Signed-off-by: Yang Rong <rong.r.y...@intel.com> --- src/cl_device_id.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/cl_device_id.c b/src/cl_device_id.c index a0d0db6..7944ca4 100644 --- a/src/cl_device_id.c +++ b/src/cl_device_id.c @@ -633,7 +633,7 @@ cl_check_builtin_kernel_dimension(cl_kernel kernel, cl_device_id device) LOCAL size_t cl_get_kernel_max_wg_sz(cl_kernel kernel) { - size_t work_group_size; + size_t work_group_size, thread_cnt; int simd_width = interp_kernel_get_simd_width(kernel->opaque); int vendor_id = kernel->program->ctx->device->vendor_id; if (!interp_kernel_use_slm(kernel->opaque)) { @@ -642,9 +642,13 @@ cl_get_kernel_max_wg_sz(cl_kernel kernel) else work_group_size = kernel->program->ctx->device->max_compute_unit * kernel->program->ctx->device->max_thread_per_unit * simd_width; - } else - work_group_size = kernel->program->ctx->device->max_compute_unit * simd_width * + } else { + thread_cnt = kernel->program->ctx->device->max_compute_unit * kernel->program->ctx->device->max_thread_per_unit / kernel->program->ctx->device->sub_slice_count; + if(thread_cnt > 64) + thread_cnt = 64; + work_group_size = thread_cnt * simd_width; + } return work_group_size; } -- 1.8.3.2 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet