LGTM, will push it latter. Thanks.
On Mon, Sep 09, 2013 at 04:10:21PM +0800, Yang Rong wrote: > Also refine the whole memcpy's condition in function > cl_enqueue_read_buffer_rect and cl_enqueue_write_buffer_rect. > > Signed-off-by: Yang Rong <rong.r.y...@intel.com> > --- > src/cl_api.c | 83 > +++++++++++++++++++++++++++++++++++++++++++----------- > src/cl_enqueue.c | 65 ++++++++++++++++++++++++++++++------------ > src/cl_enqueue.h | 7 +++-- > src/cl_gt_device.h | 2 +- > 4 files changed, 119 insertions(+), 38 deletions(-) > > diff --git a/src/cl_api.c b/src/cl_api.c > index 168bcfb..f014b41 100644 > --- a/src/cl_api.c > +++ b/src/cl_api.c > @@ -1272,7 +1272,7 @@ clEnqueueReadBuffer(cl_command_queue command_queue, > > data = &defer_enqueue_data; > data->type = EnqueueReadBuffer; > - data->mem_obj = buffer; > + data->mem_obj = &buffer; > data->ptr = ptr; > data->offset = offset; > data->size = size; > @@ -1353,7 +1353,7 @@ clEnqueueReadBufferRect(cl_command_queue command_queue, > > data = &no_wait_data; > data->type = EnqueueReadBufferRect; > - data->mem_obj = buffer; > + data->mem_obj = &buffer; > data->ptr = ptr; > data->origin[0] = buffer_origin[0]; data->origin[1] = buffer_origin[1]; > data->origin[2] = buffer_origin[2]; > data->host_origin[0] = host_origin[0]; data->host_origin[1] = > host_origin[1]; data->host_origin[2] = host_origin[2]; > @@ -1411,7 +1411,7 @@ clEnqueueWriteBuffer(cl_command_queue command_queue, > > data = &no_wait_data; > data->type = EnqueueWriteBuffer; > - data->mem_obj = buffer; > + data->mem_obj = &buffer; > data->const_ptr = ptr; > data->offset = offset; > data->size = size; > @@ -1493,7 +1493,7 @@ clEnqueueWriteBufferRect(cl_command_queue > command_queue, > > data = &no_wait_data; > data->type = EnqueueWriteBufferRect; > - data->mem_obj = buffer; > + data->mem_obj = &buffer; > data->const_ptr = ptr; > data->origin[0] = buffer_origin[0]; data->origin[1] = buffer_origin[1]; > data->origin[2] = buffer_origin[2]; > data->host_origin[0] = host_origin[0]; data->host_origin[1] = > host_origin[1]; data->host_origin[2] = host_origin[2]; > @@ -1683,7 +1683,7 @@ clEnqueueReadImage(cl_command_queue command_queue, > > data = &no_wait_data; > data->type = EnqueueReadImage; > - data->mem_obj = mem; > + data->mem_obj = &mem; > data->ptr = ptr; > data->origin[0] = origin[0]; data->origin[1] = origin[1]; > data->origin[2] = origin[2]; > data->region[0] = region[0]; data->region[1] = region[1]; > data->region[2] = region[2]; > @@ -1765,7 +1765,7 @@ clEnqueueWriteImage(cl_command_queue command_queue, > > data = &no_wait_data; > data->type = EnqueueWriteImage; > - data->mem_obj = mem; > + data->mem_obj = &mem; > data->const_ptr = ptr; > data->origin[0] = origin[0]; data->origin[1] = origin[1]; > data->origin[2] = origin[2]; > data->region[0] = region[0]; data->region[1] = region[1]; > data->region[2] = region[2]; > @@ -1860,7 +1860,7 @@ error: > > cl_int > clEnqueueCopyImageToBuffer(cl_command_queue command_queue, > - cl_mem src_image, > + cl_mem src_mem, > cl_mem dst_buffer, > const size_t * src_origin, > const size_t * region, > @@ -2001,10 +2001,9 @@ clEnqueueMapBuffer(cl_command_queue command_queue, > > data = &no_wait_data; > data->type = EnqueueMapBuffer; > - data->mem_obj = buffer; > + data->mem_obj = &buffer; > data->offset = offset; > data->size = size; > - data->map_flags = map_flags; > data->ptr = ptr; > > if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, > @@ -2088,12 +2087,11 @@ clEnqueueMapImage(cl_command_queue command_queue, > > data = &no_wait_data; > data->type = EnqueueMapImage; > - data->mem_obj = mem; > + data->mem_obj = &mem; > data->origin[0] = origin[0]; data->origin[1] = origin[1]; > data->origin[2] = origin[2]; > data->region[0] = region[0]; data->region[1] = region[1]; > data->region[2] = region[2]; > data->row_pitch = *image_row_pitch; > data->slice_pitch = *image_slice_pitch; > - data->map_flags = map_flags; > data->ptr = ptr; > data->offset = offset; > > @@ -2131,7 +2129,7 @@ clEnqueueUnmapMemObject(cl_command_queue command_queue, > > data = &no_wait_data; > data->type = EnqueueUnmapMemObject; > - data->mem_obj = memobj; > + data->mem_obj = &memobj; > data->ptr = mapped_ptr; > > if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, > @@ -2252,8 +2250,11 @@ clEnqueueTask(cl_command_queue command_queue, > const cl_event * event_wait_list, > cl_event * event) > { > - NOT_IMPLEMENTED; > - return 0; > + const size_t global_size[3] = {1, 0, 0}; > + const size_t local_size[3] = {1, 0, 0}; > + > + return clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, global_size, > local_size, > + num_events_in_wait_list, event_wait_list, > event); > } > > cl_int > @@ -2268,8 +2269,58 @@ clEnqueueNativeKernel(cl_command_queue command_queue, > const cl_event * event_wait_list, > cl_event * event) > { > - NOT_IMPLEMENTED; > - return 0; > + cl_int err = CL_SUCCESS; > + void *new_args = NULL; > + enqueue_data *data, no_wait_data = { 0 }; > + cl_int i; > + > + if(user_func == NULL || > + (args == NULL && cb_args > 0) || > + (args == NULL && num_mem_objects ==0) || > + (args != NULL && cb_args == 0) || > + (num_mem_objects > 0 && (mem_list == NULL || args_mem_loc == NULL)) || > + (num_mem_objects == 0 && (mem_list != NULL || args_mem_loc != NULL))) { > + err = CL_INVALID_VALUE; > + goto error; > + } > + > + //Per spec, need copy args > + if (cb_args) > + { > + new_args = malloc(cb_args); > + if (!new_args) > + { > + err = CL_OUT_OF_HOST_MEMORY; > + goto error; > + } > + memcpy(new_args, args, cb_args); > + > + for (i=0; i<num_mem_objects; ++i) > + { > + CHECK_MEM(mem_list[i]); > + args_mem_loc[i] = new_args + (args_mem_loc[i] - args); //change to > new args > + } > + } > + > + TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, > event, command_queue->ctx); > + > + data = &no_wait_data; > + data->type = EnqueueNativeKernel; > + data->mem_obj = mem_list; > + data->ptr = new_args; > + data->size = cb_args; > + data->offset = (size_t)num_mem_objects; > + data->const_ptr = args_mem_loc; > + data->user_func = user_func; > + > + if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, > + event, data, CL_COMMAND_NATIVE_KERNEL) == > CL_ENQUEUE_EXECUTE_IMM) { > + err = cl_enqueue_handle(data); > + if(event) cl_event_set_status(*event, CL_COMPLETE); > + } > + > +error: > + return err; > } > > cl_int > diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c > index 989b044..3c069fe 100644 > --- a/src/cl_enqueue.c > +++ b/src/cl_enqueue.c > @@ -32,14 +32,14 @@ cl_int cl_enqueue_read_buffer(enqueue_data* data) > cl_int err = CL_SUCCESS; > void* src_ptr; > > - if (!(src_ptr = cl_mem_map_auto(data->mem_obj))) { > + if (!(src_ptr = cl_mem_map_auto(*data->mem_obj))) { > err = CL_MAP_FAILURE; > goto error; > } > > memcpy(data->ptr, (char*)src_ptr + data->offset, data->size); > > - err = cl_mem_unmap_auto(data->mem_obj); > + err = cl_mem_unmap_auto(*data->mem_obj); > > error: > return err; > @@ -55,7 +55,7 @@ cl_int cl_enqueue_read_buffer_rect(enqueue_data* data) > const size_t* host_origin = data->host_origin; > const size_t* region = data->region; > > - if (!(src_ptr = cl_mem_map_auto(data->mem_obj))) { > + if (!(src_ptr = cl_mem_map_auto(*data->mem_obj))) { > err = CL_MAP_FAILURE; > goto error; > } > @@ -66,8 +66,8 @@ cl_int cl_enqueue_read_buffer_rect(enqueue_data* data) > offset = host_origin[0] + data->host_row_pitch*host_origin[1] + > data->host_slice_pitch*host_origin[2]; > dst_ptr = (char *)data->ptr + offset; > > - if (!origin[0] && !host_origin[0] && data->row_pitch == > data->host_row_pitch && > - (region[2] == 1 || (!origin[1] && !host_origin[1] && > data->slice_pitch == data->host_slice_pitch))) > + if (data->row_pitch == region[0] && data->row_pitch == > data->host_row_pitch && > + (region[2] == 1 || (data->slice_pitch == region[0]*region[1] && > data->slice_pitch == data->host_slice_pitch))) > { > memcpy(dst_ptr, src_ptr, region[2] == 1 ? data->row_pitch*region[1] : > data->slice_pitch*region[2]); > } > @@ -86,7 +86,7 @@ cl_int cl_enqueue_read_buffer_rect(enqueue_data* data) > } > } > > - err = cl_mem_unmap_auto(data->mem_obj); > + err = cl_mem_unmap_auto(*data->mem_obj); > > error: > return err; > @@ -97,14 +97,14 @@ cl_int cl_enqueue_write_buffer(enqueue_data *data) > cl_int err = CL_SUCCESS; > void* dst_ptr; > > - if (!(dst_ptr = cl_mem_map_auto(data->mem_obj))) { > + if (!(dst_ptr = cl_mem_map_auto(*data->mem_obj))) { > err = CL_MAP_FAILURE; > goto error; > } > > memcpy((char*)dst_ptr + data->offset, data->const_ptr, data->size); > > - err = cl_mem_unmap_auto(data->mem_obj); > + err = cl_mem_unmap_auto(*data->mem_obj); > > error: > return err; > @@ -120,7 +120,7 @@ cl_int cl_enqueue_write_buffer_rect(enqueue_data *data) > const size_t* host_origin = data->host_origin; > const size_t* region = data->region; > > - if (!(dst_ptr = cl_mem_map_auto(data->mem_obj))) { > + if (!(dst_ptr = cl_mem_map_auto(*data->mem_obj))) { > err = CL_MAP_FAILURE; > goto error; > } > @@ -131,8 +131,8 @@ cl_int cl_enqueue_write_buffer_rect(enqueue_data *data) > offset = host_origin[0] + data->host_row_pitch*host_origin[1] + > data->host_slice_pitch*host_origin[2]; > src_ptr = (char*)data->const_ptr + offset; > > - if (!origin[0] && !host_origin[0] && data->row_pitch == > data->host_row_pitch && > - (region[2] == 1 || (!origin[1] && !host_origin[1] && data->slice_pitch > == data->host_slice_pitch))) > + if (data->row_pitch == region[0] && data->row_pitch == > data->host_row_pitch && > + (region[2] == 1 || (data->slice_pitch == region[0]*region[1] && > data->slice_pitch == data->host_slice_pitch))) > { > memcpy(dst_ptr, src_ptr, region[2] == 1 ? data->row_pitch*region[1] : > data->slice_pitch*region[2]); > } > @@ -151,7 +151,7 @@ cl_int cl_enqueue_write_buffer_rect(enqueue_data *data) > } > } > > - err = cl_mem_unmap_auto(data->mem_obj); > + err = cl_mem_unmap_auto(*data->mem_obj); > > error: > return err; > @@ -163,7 +163,7 @@ cl_int cl_enqueue_read_image(enqueue_data *data) > cl_int err = CL_SUCCESS; > void* src_ptr; > > - cl_mem mem = data->mem_obj; > + cl_mem mem = *data->mem_obj; > CHECK_IMAGE(mem, image); > const size_t* origin = data->origin; > const size_t* region = data->region; > @@ -208,7 +208,7 @@ cl_int cl_enqueue_write_image(enqueue_data *data) > cl_int err = CL_SUCCESS; > void* dst_ptr; > > - cl_mem mem = data->mem_obj; > + cl_mem mem = *data->mem_obj; > CHECK_IMAGE(mem, image); > const size_t *origin = data->origin; > const size_t *region = data->region; > @@ -252,7 +252,7 @@ cl_int cl_enqueue_map_buffer(enqueue_data *data) > { > void *ptr = NULL; > cl_int err = CL_SUCCESS; > - cl_mem buffer = data->mem_obj; > + cl_mem buffer = *data->mem_obj; > //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here > if (!(ptr = cl_mem_map_gtt(buffer))) { > err = CL_MAP_FAILURE; > @@ -274,7 +274,7 @@ error: > cl_int cl_enqueue_map_image(enqueue_data *data) > { > cl_int err = CL_SUCCESS; > - cl_mem mem = data->mem_obj; > + cl_mem mem = *data->mem_obj; > void *ptr = NULL; > > if (!(ptr = cl_mem_map_gtt(mem))) { > @@ -295,7 +295,7 @@ cl_int cl_enqueue_unmap_mem_object(enqueue_data *data) > size_t mapped_size = 0; > void * v_ptr = NULL; > void * mapped_ptr = data->ptr; > - cl_mem memobj = data->mem_obj; > + cl_mem memobj = *data->mem_obj; > > assert(memobj->mapped_ptr_sz >= memobj->map_ref); > INVALID_VALUE_IF(!mapped_ptr); > @@ -351,6 +351,32 @@ error: > return err; > } > > +cl_int cl_enqueue_native_kernel(enqueue_data *data) > +{ > + cl_int err = CL_SUCCESS; > + cl_uint num_mem_objects = (cl_uint)data->offset; > + const cl_mem *mem_list = data->mem_obj; > + const void **args_mem_loc = (const void **)data->const_ptr; > + cl_uint i; > + > + for (i=0; i<num_mem_objects; ++i) > + { > + const cl_mem buffer = mem_list[i]; > + CHECK_MEM(buffer); > + > + *((void **)args_mem_loc[i]) = cl_mem_map_auto(buffer); > + } > + data->user_func(data->ptr); > + > + for (i=0; i<num_mem_objects; ++i) > + { > + cl_mem_unmap_auto(mem_list[i]); > + } > + > + free(data->ptr); > +error: > + return err; > +} > cl_int cl_enqueue_handle(enqueue_data* data) > { > switch(data->type) { > @@ -375,7 +401,10 @@ cl_int cl_enqueue_handle(enqueue_data* data) > case EnqueueCopyBufferRect: > case EnqueueCopyImage: > case EnqueueNDRangeKernel: > - cl_gpgpu_event_resume((cl_gpgpu_event)data->ptr); //goto default > + cl_gpgpu_event_resume((cl_gpgpu_event)data->ptr); > + return CL_SUCCESS; > + case EnqueueNativeKernel: > + return cl_enqueue_native_kernel(data); > default: > return CL_SUCCESS; > } > diff --git a/src/cl_enqueue.h b/src/cl_enqueue.h > index 848c7c4..236cc2d 100644 > --- a/src/cl_enqueue.h > +++ b/src/cl_enqueue.h > @@ -40,12 +40,13 @@ typedef enum { > EnqueueMapImage, > EnqueueUnmapMemObject, > EnqueueNDRangeKernel, > + EnqueueNativeKernel, > EnqueueInvalid > } enqueue_type; > > typedef struct _enqueue_data { > enqueue_type type; /* Command type */ > - cl_mem mem_obj; /* Enqueue's cl_mem */ > + const cl_mem *mem_obj; /* Enqueue's cl_mem */ > cl_command_queue queue; /* Command queue */ > size_t offset; /* Mem object's offset */ > size_t size; /* Size */ > @@ -56,9 +57,9 @@ typedef struct _enqueue_data { > size_t slice_pitch; /* Slice pitch */ > size_t host_row_pitch; /* Host row pitch, used in read/write > buffer rect */ > size_t host_slice_pitch; /* Host slice pitch, used in > read/write buffer rect */ > - cl_map_flags map_flags; /* Map flags */ > const void * const_ptr; /* Const ptr for memory read */ > - void * ptr; /* ptr for write and return value */ > + void * ptr; /* Ptr for write and return value */ > + void (*user_func)(void *); /* pointer to a host-callable user > function */ > } enqueue_data; > > /* Do real enqueue commands */ > diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h > index feb4ab3..1eb790f 100644 > --- a/src/cl_gt_device.h > +++ b/src/cl_gt_device.h > @@ -59,7 +59,7 @@ > .endian_little = CL_TRUE, > .available = CL_TRUE, > .compiler_available = CL_FALSE, /* XXX */ > -.execution_capabilities = CL_EXEC_KERNEL, > +.execution_capabilities = CL_EXEC_KERNEL | CL_EXEC_NATIVE_KERNEL, > .queue_properties = CL_QUEUE_PROFILING_ENABLE, > .platform = NULL, /* == intel_platform (set when requested) */ > /* IEEE 754, XXX does IVB support CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT? */ > -- > 1.8.1.2 > > _______________________________________________ > Beignet mailing list > Beignet@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet