Signed-off-by: Yang Rong <rong.r.y...@intel.com> --- src/cl_api.c | 47 +++++++++++++++++++++++++-- src/cl_enqueue.c | 1 + src/cl_mem.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/cl_mem.h | 9 ++++-- 4 files changed, 147 insertions(+), 6 deletions(-)
diff --git a/src/cl_api.c b/src/cl_api.c index ecc2f43..c4c1bc8 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -1917,7 +1917,7 @@ error: cl_int clEnqueueCopyBufferToImage(cl_command_queue command_queue, cl_mem src_buffer, - cl_mem dst_image, + cl_mem dst_mem, size_t src_offset, const size_t * dst_origin, const size_t * region, @@ -1925,8 +1925,49 @@ clEnqueueCopyBufferToImage(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) { - NOT_IMPLEMENTED; - return 0; + cl_int err = CL_SUCCESS; + enqueue_data *data, no_wait_data = { 0 }; + + CHECK_QUEUE(command_queue); + CHECK_MEM(src_buffer); + CHECK_IMAGE(dst_mem, dst_image); + if (command_queue->ctx != src_buffer->ctx || + command_queue->ctx != dst_mem->ctx) { + err = CL_INVALID_CONTEXT; + goto error; + } + + if (src_offset + region[0]*region[1]*region[2]*dst_image->bpp > src_buffer->size) { + err = CL_INVALID_VALUE; + goto error; + } + + if (!dst_origin || !region || dst_origin[0] + region[0] > dst_image->w || + dst_origin[1] + region[1] > dst_image->h || dst_origin[2] + region[2] > dst_image->depth) { + err = CL_INVALID_VALUE; + goto error; + } + + if (dst_image->image_type == CL_MEM_OBJECT_IMAGE2D && (dst_origin[2] != 0 || region[2] != 1)) { + err = CL_INVALID_VALUE; + goto error; + } + + cl_mem_copy_buffer_to_image(command_queue, src_buffer, dst_image, src_offset, dst_origin, region); + + TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, dst_mem->ctx); + + data = &no_wait_data; + data->type = EnqueueCopyBufferToImage; + data->queue = command_queue; + + if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, + event, data, CL_COMMAND_COPY_BUFFER_TO_IMAGE) == CL_ENQUEUE_EXECUTE_IMM) { + err = cl_command_queue_flush(command_queue); + } + +error: + return err; } static cl_int _cl_map_mem(cl_mem mem, void **ptr, void **mem_ptr, size_t offset, size_t size) diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c index ef1a33a..7103357 100644 --- a/src/cl_enqueue.c +++ b/src/cl_enqueue.c @@ -401,6 +401,7 @@ cl_int cl_enqueue_handle(enqueue_data* data) case EnqueueCopyBufferRect: case EnqueueCopyImage: case EnqueueCopyBufferToImage: + case EnqueueCopyImageToBuffer: case EnqueueNDRangeKernel: cl_gpgpu_event_resume((cl_gpgpu_event)data->ptr); return CL_SUCCESS; diff --git a/src/cl_mem.c b/src/cl_mem.c index 7290370..21da858 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -820,6 +820,102 @@ cl_mem_copy_image_to_buffer(cl_command_queue queue, struct _cl_mem_image* image, return ret; } + + +LOCAL cl_int +cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_mem buffer, struct _cl_mem_image* image, + const size_t src_offset, const size_t *dst_origin, const size_t *region) { + cl_int ret; + cl_kernel ker; + size_t global_off[] = {0,0,0}; + size_t global_sz[] = {1,1,1}; + size_t local_sz[] = {LOCAL_SZ_0,LOCAL_SZ_1,LOCAL_SZ_2}; + cl_int index = CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_0; + char option[40] = ""; + uint32_t intel_fmt, bpp; + cl_image_format fmt; + size_t origin0, region0; + + if(region[1] == 1) local_sz[1] = 1; + if(region[2] == 1) local_sz[2] = 1; + global_sz[0] = ((region[0] + local_sz[0] - 1) / local_sz[0]) * local_sz[0]; + global_sz[1] = ((region[1] + local_sz[1] - 1) / local_sz[1]) * local_sz[1]; + global_sz[2] = ((region[2] + local_sz[2] - 1) / local_sz[2]) * local_sz[2]; + + if(image->image_type == CL_MEM_OBJECT_IMAGE3D) { + strcat(option, "-D IMAGE_3D"); + index += 1; + } + + static const char *str_kernel = + "#ifdef IMAGE_3D \n" + " #define IMAGE_TYPE image3d_t \n" + " #define COORD_TYPE int4 \n" + "#else \n" + " #define IMAGE_TYPE image2d_t \n" + " #define COORD_TYPE int2 \n" + "#endif \n" + "kernel void __cl_copy_image_to_buffer ( \n" + " __read_only IMAGE_TYPE image, global uchar* buffer, \n" + " unsigned int region0, unsigned int region1, unsigned int region2, \n" + " unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2, \n" + " unsigned int src_offset) { \n" + " int i = get_global_id(0); \n" + " int j = get_global_id(1); \n" + " int k = get_global_id(2); \n" + " uint4 color = (uint4)(0); \n" + " COORD_TYPE dst_coord; \n" + " if((i >= region0) || (j>= region1) || (k>=region2)) \n" + " return; \n" + " dst_coord.x = dst_origin0 + i; \n" + " dst_coord.y = dst_origin1 + j; \n" + "#ifdef IMAGE_3D \n" + " dst_coord.z = dst_origin2 + k; \n" + "#endif \n" + " src_offset += (k * region1 + j) * region0 + i; \n" + " color.x = buffer[src_offset]; \n" + " write_imageui(image, dst_coord, color); \n" + "}"; + + /* We use one kernel to copy the data. The kernel is lazily created. */ + assert(image->base.ctx == buffer->ctx); + + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_UNSIGNED_INT8; + intel_fmt = image->intel_fmt; + bpp = image->bpp; + image->intel_fmt = cl_image_get_intel_format(&fmt); + image->w = image->w * image->bpp; + image->bpp = 1; + region0 = region[0] * bpp; + origin0 = dst_origin[0] * bpp; + global_sz[0] = ((region0 + local_sz[0] - 1) / local_sz[0]) * local_sz[0]; + + /* setup the kernel and run. */ + ker = cl_context_get_static_kernel(queue->ctx, index, str_kernel, option); + if (!ker) + return CL_OUT_OF_RESOURCES; + + cl_kernel_set_arg(ker, 0, sizeof(cl_mem), &image); + cl_kernel_set_arg(ker, 1, sizeof(cl_mem), &buffer); + cl_kernel_set_arg(ker, 2, sizeof(cl_int), ®ion0); + cl_kernel_set_arg(ker, 3, sizeof(cl_int), ®ion[1]); + cl_kernel_set_arg(ker, 4, sizeof(cl_int), ®ion[2]); + cl_kernel_set_arg(ker, 5, sizeof(cl_int), &origin0); + cl_kernel_set_arg(ker, 6, sizeof(cl_int), &dst_origin[1]); + cl_kernel_set_arg(ker, 7, sizeof(cl_int), &dst_origin[2]); + cl_kernel_set_arg(ker, 8, sizeof(cl_int), &src_offset); + + ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz); + + image->intel_fmt = intel_fmt; + image->bpp = bpp; + image->w = image->w / bpp; + + return ret; +} + + LOCAL void* cl_mem_map(cl_mem mem) { diff --git a/src/cl_mem.h b/src/cl_mem.h index 0a8c723..2619385 100644 --- a/src/cl_mem.h +++ b/src/cl_mem.h @@ -193,13 +193,16 @@ extern cl_int cl_mem_copy_buffer_rect(cl_command_queue, cl_mem, cl_mem, /* api clEnqueueCopyImage help function */ extern cl_int cl_mem_kernel_copy_image(cl_command_queue, struct _cl_mem_image*, struct _cl_mem_image*, - const size_t *, const size_t *, const size_t *); + const size_t *, const size_t *, const size_t *); - -/* api clEnqueueCopyImage help function */ +/* api clEnqueueCopyImageToBuffer help function */ extern cl_int cl_mem_copy_image_to_buffer(cl_command_queue, struct _cl_mem_image*, cl_mem, const size_t *, const size_t, const size_t *); +/* api clEnqueueCopyBufferToImage help function */ +extern cl_int cl_mem_copy_buffer_to_image(cl_command_queue, cl_mem, struct _cl_mem_image*, + const size_t, const size_t *, const size_t *); + /* Directly map a memory object */ extern void *cl_mem_map(cl_mem); -- 1.8.1.2 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet