In function cl_mem_copy, the local size is only 1, maybe set it to large or equal to 16 can improve performance.
-----Original Message----- From: [email protected] [mailto:[email protected]] On Behalf Of [email protected] Sent: Monday, September 23, 2013 5:02 PM To: [email protected] Cc: Junyan He Subject: [Beignet] [PATCH 3/4] Implement the clEnqueueCopyBuffer API using internal binary kernel From: Junyan He <[email protected]> Signed-off-by: Junyan He <[email protected]> --- src/cl_api.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++- src/cl_context.c | 26 ++++++++++++++++++++++++++ src/cl_context.h | 28 +++++++++++++++++----------- src/cl_mem.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ src/cl_mem.h | 4 ++++ 5 files changed, 150 insertions(+), 12 deletions(-) diff --git a/src/cl_api.c b/src/cl_api.c index c81f730..e9303d8 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -1521,8 +1521,57 @@ clEnqueueCopyBuffer(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) { - NOT_IMPLEMENTED; + cl_int err = CL_SUCCESS; + enqueue_data *data, no_wait_data = { 0 }; + + CHECK_QUEUE(command_queue); + CHECK_MEM(src_buffer); + CHECK_MEM(dst_buffer); + + if (command_queue->ctx != src_buffer->ctx) { + err = CL_INVALID_CONTEXT; + goto error; + } + + if (command_queue->ctx != dst_buffer->ctx) { + err = CL_INVALID_CONTEXT; + goto error; + } + + if (src_offset < 0 || src_offset + cb > src_buffer->size) { + err = CL_INVALID_VALUE; + goto error; + } + if (dst_offset < 0 || dst_offset + cb > src_buffer->size) { + err = CL_INVALID_VALUE; + goto error; + } + + /* Check overlap */ + if (src_buffer == dst_buffer + && (src_offset <= dst_offset && dst_offset <= src_offset + cb - 1) + && (dst_offset <= src_offset && src_offset <= dst_offset + cb - 1)) { + err = CL_MEM_COPY_OVERLAP; + goto error; + } + + // TODO: Need to check the sub buffer cases. + err = cl_mem_copy(command_queue, src_buffer, dst_buffer, src_offset, + dst_offset, cb); + + TRY(cl_event_check_waitlist, num_events_in_wait_list, + event_wait_list, event, src_buffer->ctx); + + data = &no_wait_data; + data->type = EnqueueCopyBuffer; + data->queue = command_queue; + + if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, + event, data, CL_COMMAND_COPY_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) { + err = cl_command_queue_flush(command_queue); + } return 0; + +error: + return err; } cl_int diff --git a/src/cl_context.c b/src/cl_context.c index 4f1c611..b62e946 100644 --- a/src/cl_context.c +++ b/src/cl_context.c @@ -268,3 +268,29 @@ cl_context_get_static_kernel(cl_context ctx, cl_int index, const char * str_kern return ctx->internel_kernels[index]; } + +cl_kernel +cl_context_get_static_kernel_form_bin(cl_context ctx, cl_int index, + const char * str_kernel, size_t size, const char * +str_option) { + cl_int ret; + cl_int binary_status = CL_SUCCESS; + if (!ctx->internal_prgs[index]) + { + ctx->internal_prgs[index] = cl_program_create_from_binary(ctx, 1, &ctx->device, + &size, (const unsigned char **)&str_kernel, &binary_status, +&ret); + + if (!ctx->internal_prgs[index]) + return NULL; + + ret = cl_program_build(ctx->internal_prgs[index], str_option); + if (ret != CL_SUCCESS) + return NULL; + + ctx->internal_prgs[index]->is_built = 1; + + ctx->internel_kernels[index] = + cl_kernel_dup(ctx->internal_prgs[index]->ker[0]); + } + + return ctx->internel_kernels[index]; +} diff --git a/src/cl_context.h b/src/cl_context.h index 7016733..29bcb9f 100644 --- a/src/cl_context.h +++ b/src/cl_context.h @@ -40,17 +40,19 @@ enum _cl_gl_context_type { }; enum _cl_internal_ker_type { - CL_ENQUEUE_COPY_BUFFER = 0, - CL_ENQUEUE_COPY_BUFFER_RECT = 1, - CL_ENQUEUE_COPY_IMAGE_0 = 2, //copy image 2d to image 2d - CL_ENQUEUE_COPY_IMAGE_1 = 3, //copy image 3d to image 2d - CL_ENQUEUE_COPY_IMAGE_2 = 4, //copy image 2d to image 3d - CL_ENQUEUE_COPY_IMAGE_3 = 5, //copy image 3d to image 3d - CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_0 = 6, //copy image 2d to buffer - CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_1 = 7, //copy image 3d tobuffer - CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_0 = 8, //copy buffer to image 2d - CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_1 = 9, //copy buffer to image 3d - CL_INTERNAL_KERNEL_MAX = 10 + CL_ENQUEUE_COPY_BUFFER_ALIGN1 = 0, + CL_ENQUEUE_COPY_BUFFER_ALIGN4, + CL_ENQUEUE_COPY_BUFFER_ALIGN16, + CL_ENQUEUE_COPY_BUFFER_RECT, + CL_ENQUEUE_COPY_IMAGE_0, //copy image 2d to image 2d + CL_ENQUEUE_COPY_IMAGE_1, //copy image 3d to image 2d + CL_ENQUEUE_COPY_IMAGE_2, //copy image 2d to image 3d + CL_ENQUEUE_COPY_IMAGE_3, //copy image 3d to image 3d + CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_0, //copy image 2d to buffer + CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_1, //copy image 3d tobuffer + CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_0, //copy buffer to image 2d + CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_1, //copy buffer to image 3d + CL_INTERNAL_KERNEL_MAX }; struct _cl_context_prop { @@ -137,5 +139,9 @@ extern cl_buffer_mgr cl_context_get_bufmgr(cl_context ctx); /* Get the internal used kernel */ extern cl_kernel cl_context_get_static_kernel(cl_context ctx, cl_int index, const char *str_kernel, const char * str_option); +/* Get the internal used kernel from binary*/ extern cl_kernel +cl_context_get_static_kernel_form_bin(cl_context ctx, cl_int index, + const char * str_kernel, size_t size, const char * +str_option); + #endif /* __CL_CONTEXT_H__ */ diff --git a/src/cl_mem.c b/src/cl_mem.c index 769e1cb..56bc6b1 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -596,6 +596,59 @@ cl_mem_add_ref(cl_mem mem) atomic_inc(&mem->ref_n); } +LOCAL cl_int +cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf, + size_t src_offset, size_t dst_offset, size_t cb) { + cl_int ret; + cl_kernel ker; + size_t global_off[] = {0,0,0}; + size_t global_sz[] = {1,1,1}; + size_t local_sz[] = {1,1,1}; + + /* We use one kernel to copy the data. The kernel is lazily created. + */ assert(src_buf->ctx == dst_buf->ctx); + + if ((cb % 4) || (src_offset % 4) || (dst_offset % 4)) { + extern char cl_internal_copy_buf_align1_str[]; + extern int cl_internal_copy_buf_align1_str_size; + + ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_ALIGN1, + cl_internal_copy_buf_align1_str, (size_t)cl_internal_copy_buf_align1_str_size, NULL); + global_sz[0] = cb; + } else if ((cb % 16) || (src_offset % 16) || (dst_offset % 16)) { + extern char cl_internal_copy_buf_align4_str[]; + extern int cl_internal_copy_buf_align4_str_size; + + ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_ALIGN4, + cl_internal_copy_buf_align4_str, (size_t)cl_internal_copy_buf_align4_str_size, NULL); + global_sz[0] = cb/4; + src_offset = src_offset/4; + dst_offset = dst_offset/4; + } else { + extern char cl_internal_copy_buf_align16_str[]; + extern int cl_internal_copy_buf_align16_str_size; + + ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_ALIGN16, + cl_internal_copy_buf_align16_str, (size_t)cl_internal_copy_buf_align16_str_size, NULL); + global_sz[0] = cb/16; + src_offset = src_offset/4; + dst_offset = dst_offset/4; + } + + if (!ker) + return CL_OUT_OF_RESOURCES; + + cl_kernel_set_arg(ker, 0, sizeof(cl_mem), &src_buf); + cl_kernel_set_arg(ker, 1, sizeof(int), &src_offset); + cl_kernel_set_arg(ker, 2, sizeof(cl_mem), &dst_buf); + cl_kernel_set_arg(ker, 3, sizeof(int), &dst_offset); + + ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, + local_sz); + + return ret; +} + #define LOCAL_SZ_0 16 #define LOCAL_SZ_1 4 #define LOCAL_SZ_2 4 diff --git a/src/cl_mem.h b/src/cl_mem.h index ac09c3b..3313224 100644 --- a/src/cl_mem.h +++ b/src/cl_mem.h @@ -187,6 +187,10 @@ extern void cl_mem_gl_delete(struct _cl_mem_gl_image *); /* Add one more reference to this object */ extern void cl_mem_add_ref(cl_mem); +/* api clEnqueueCopyBuffer help function */ extern cl_int +cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf, + size_t src_offset, size_t dst_offset, size_t cb); + /* api clEnqueueCopyBufferRect help function */ extern cl_int cl_mem_copy_buffer_rect(cl_command_queue, cl_mem, cl_mem, const size_t *, const size_t *, const size_t *, -- 1.7.9.5 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
