The first version has been pushed, please split the error code fix into another patch and send again. Thanks.
On Wed, Oct 15, 2014 at 04:44:16PM +0800, Yang Rong wrote: > Opencv only query and use device max work group size, when SLM/Barrier > enable, BDW > can't fill 1024 work group in one subslice, even in SIMD16. Change device's > max work > group size temp. > > V2: Return error code when work group error instead of exit. > Signed-off-by: Yang Rong <[email protected]> > --- > src/cl_command_queue_gen7.c | 5 ++++- > src/cl_device_id.c | 8 +++++--- > 2 files changed, 9 insertions(+), 4 deletions(-) > > diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c > index b020540..c11e25d 100644 > --- a/src/cl_command_queue_gen7.c > +++ b/src/cl_command_queue_gen7.c > @@ -317,7 +317,10 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue, > kernel.use_slm = interp_kernel_use_slm(ker->opaque); > > /* Compute the number of HW threads we need */ > - TRY (cl_kernel_work_group_sz, ker, local_wk_sz, 3, &local_sz); > + if(UNLIKELY(err = cl_kernel_work_group_sz(ker, local_wk_sz, 3, &local_sz) > != CL_SUCCESS)) { > + fprintf(stderr, "Beignet: Work group size exceed Kerne's work group > size.\n"); > + return err; > + } > kernel.thread_n = thread_n = (local_sz + simd_sz - 1) / simd_sz; > kernel.curbe_sz = cst_sz; > > diff --git a/src/cl_device_id.c b/src/cl_device_id.c > index 7944ca4..c911c23 100644 > --- a/src/cl_device_id.c > +++ b/src/cl_device_id.c > @@ -110,7 +110,7 @@ static struct _cl_device_id intel_brw_gt1_device = { > .max_thread_per_unit = 7, > .sub_slice_count = 2, > .max_work_item_sizes = {1024, 1024, 1024}, > - .max_work_group_size = 1024, > + .max_work_group_size = 512, > .max_clock_frequency = 1000, > #include "cl_gen75_device.h" > }; > @@ -121,7 +121,7 @@ static struct _cl_device_id intel_brw_gt2_device = { > .max_thread_per_unit = 7, > .sub_slice_count = 3, > .max_work_item_sizes = {1024, 1024, 1024}, > - .max_work_group_size = 1024, > + .max_work_group_size = 512, > .max_clock_frequency = 1000, > #include "cl_gen75_device.h" > }; > @@ -132,7 +132,7 @@ static struct _cl_device_id intel_brw_gt3_device = { > .max_thread_per_unit = 7, > .sub_slice_count = 6, > .max_work_item_sizes = {1024, 1024, 1024}, > - .max_work_group_size = 1024, > + .max_work_group_size = 512, > .max_clock_frequency = 1000, > #include "cl_gen75_device.h" > }; > @@ -648,6 +648,8 @@ cl_get_kernel_max_wg_sz(cl_kernel kernel) > if(thread_cnt > 64) > thread_cnt = 64; > work_group_size = thread_cnt * simd_width; > + if(work_group_size > kernel->program->ctx->device->max_work_group_size) > + work_group_size = kernel->program->ctx->device->max_work_group_size; > } > return work_group_size; > } > -- > 1.9.1 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
