After fix all found fails when local_work_size is not 1, re-enalbe it to improve performance.
Signed-off-by: Yang Rong <[email protected]> --- src/cl_api.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/cl_api.c b/src/cl_api.c index 405a41a..c0ae1d3 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -2422,7 +2422,8 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue, size_t fixed_global_sz[] = {1,1,1}; size_t fixed_local_sz[] = {1,1,1}; cl_int err = CL_SUCCESS; - cl_uint i; + cl_uint i, j; + size_t t; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); @@ -2472,13 +2473,20 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue, if (local_work_size != NULL) { for (i = 0; i < work_dim; ++i) fixed_local_sz[i] = local_work_size[i]; - } /*else { - for (i = 0; i< work_dim; i++) - for (j = 64; j > 1; j--) { //check from 64? - if (global_work_size[i] % j == 0) //global_work_size always non null + } else { + for (i = 0; i< work_dim; i++) { + for (j = 64; j > 1; j--) { //check from 64? + if (global_work_size[i] % j == 0) { //global_work_size always non null + t = fixed_local_sz[i]; fixed_local_sz[i] = j; + if(fixed_local_sz[0] * fixed_local_sz[1] * fixed_local_sz[2] > 256) + fixed_local_sz[i] = t; + else + break; + } } - } */ + } + } if (global_work_size != NULL) for (i = 0; i < work_dim; ++i) fixed_global_sz[i] = global_work_size[i]; -- 1.8.3.2 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
