Pushed.
> -----Original Message----- > From: Beignet [mailto:[email protected]] On Behalf Of > Zhigang Gong > Sent: Friday, November 13, 2015 7:26 > To: Gong, Zhigang > Cc: [email protected] > Subject: Re: [Beignet] [PATCH 4/5] runtime: set > CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE to kernel's > SIMD_WIDTH. > > On Thu, Nov 12, 2015 at 04:47:04PM +0800, Zhigang Gong wrote: > > It makes sense to set > CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE to > > the corresponding SIMD size. Then it provides a way for intel's OCL > > application to get SIMD width at runtime and make some SIMD width > > dependant optimization possible. > > > > Signed-off-by: Zhigang Gong <[email protected]> > > --- > > src/cl_api.c | 3 ++- > > src/cl_command_queue_gen7.c | 2 +- > > src/cl_device_id.c | 11 ++++++++++- > > src/cl_device_id.h | 2 -- > > src/cl_gt_device.h | 1 - > > 5 files changed, 13 insertions(+), 6 deletions(-) > > > > diff --git a/src/cl_api.c b/src/cl_api.c index a18bc99..64206eb 100644 > > --- a/src/cl_api.c > > +++ b/src/cl_api.c > > @@ -3001,6 +3001,7 @@ clEnqueueNDRangeKernel(cl_command_queue > command_queue, > > err = cl_command_queue_flush(command_queue); > > } > > > > +error: > > if(b_output_kernel_perf) > > { > > if(kernel->program->build_opts != NULL) @@ -3008,7 +3009,7 @@ > > clEnqueueNDRangeKernel(cl_command_queue command_queue, > > else > > time_end(command_queue->ctx, cl_kernel_get_name(kernel), "", > command_queue); > > } > > -error: > > + > > The above change is to fix a dead lock when enable kernel performance > measurement and ran into error in cl_command_queue_ND_range(). Forgot > to mention it in the commit log. > > Thanks, > Zhigang Gong. > > > return err; > > } > > > > diff --git a/src/cl_command_queue_gen7.c > b/src/cl_command_queue_gen7.c > > index 2edc3be..f0ee20a 100644 > > --- a/src/cl_command_queue_gen7.c > > +++ b/src/cl_command_queue_gen7.c > > @@ -329,7 +329,7 @@ > cl_command_queue_ND_range_gen7(cl_command_queue > > queue, > > > > /* Compute the number of HW threads we need */ > > if(UNLIKELY(err = cl_kernel_work_group_sz(ker, local_wk_sz, 3, > &local_sz) != CL_SUCCESS)) { > > - fprintf(stderr, "Beignet: Work group size exceed Kerne's work group > size.\n"); > > + fprintf(stderr, "Beignet: Work group size exceed Kernel's work > > + group size.\n"); > > return err; > > } > > kernel.thread_n = thread_n = (local_sz + simd_sz - 1) / simd_sz; > > diff --git a/src/cl_device_id.c b/src/cl_device_id.c index > > 4551aa8..8186ac8 100644 > > --- a/src/cl_device_id.c > > +++ b/src/cl_device_id.c > > @@ -966,7 +966,16 @@ cl_get_kernel_workgroup_info(cl_kernel kernel, > > return CL_SUCCESS; > > } > > } > > - DECL_FIELD(PREFERRED_WORK_GROUP_SIZE_MULTIPLE, device- > >preferred_wg_sz_mul) > > + case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: > > + { > > + if (param_value && param_value_size < sizeof(size_t)) > > + return CL_INVALID_VALUE; > > + if (param_value_size_ret != NULL) > > + *param_value_size_ret = sizeof(size_t); > > + if (param_value) > > + *(size_t*)param_value = interp_kernel_get_simd_width(kernel- > >opaque); > > + return CL_SUCCESS; > > + } > > case CL_KERNEL_LOCAL_MEM_SIZE: > > { > > size_t local_mem_sz = > > interp_kernel_get_slm_size(kernel->opaque) + kernel->local_mem_sz; > > diff --git a/src/cl_device_id.h b/src/cl_device_id.h index > > 4a923ef..c5f9e57 100644 > > --- a/src/cl_device_id.h > > +++ b/src/cl_device_id.h > > @@ -108,8 +108,6 @@ struct _cl_device_id { > > size_t driver_version_sz; > > size_t spir_versions_sz; > > size_t built_in_kernels_sz; > > - /* Kernel specific info that we're assigning statically */ > > - size_t preferred_wg_sz_mul; > > /* SubDevice specific info */ > > cl_device_id parent_device; > > cl_uint partition_max_sub_device; > > diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h index > > de7a636..12987b7 100644 > > --- a/src/cl_gt_device.h > > +++ b/src/cl_gt_device.h > > @@ -39,7 +39,6 @@ > > .native_vector_width_float = 4, > > .native_vector_width_double = 2, > > .native_vector_width_half = 8, > > -.preferred_wg_sz_mul = 16, > > .address_bits = 32, > > .max_mem_alloc_size = 512 * 1024 * 1024, .image_support = CL_TRUE, > > -- > > 1.9.1 > > > > _______________________________________________ > > Beignet mailing list > > [email protected] > > http://lists.freedesktop.org/mailman/listinfo/beignet > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
