LGTM, except some format. I have run git clang-format by manual and pushed, thanks.
> -----Original Message----- > From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of > yan.w...@linux.intel.com > Sent: Tuesday, June 13, 2017 16:32 > To: beignet@lists.freedesktop.org > Cc: Yan Wang <yan.w...@linux.intel.com> > Subject: [Beignet] [PATCH 2/2] Use aligned16 and aligne4 kernel to copy for > large 3D image with TILE_Y. > > From: Yan Wang <yan.w...@linux.intel.com> > > It is similar with 2D image for avoiding extended image width truncated. > > Signed-off-by: Yan Wang <yan.w...@linux.intel.com> > --- > src/CMakeLists.txt | 2 + > src/cl_context.h | 4 ++ > src/cl_mem.c | 46 > +++++++++++++++++++--- > .../cl_internal_copy_buffer_to_image_3d_align16.cl | 19 > +++++++++ .../cl_internal_copy_buffer_to_image_3d_align4.cl | 19 > +++++++++ .../cl_internal_copy_image_3d_to_buffer_align16.cl | 20 > ++++++++++ .../cl_internal_copy_image_3d_to_buffer_align4.cl | 20 > ++++++++++ > 7 files changed, 125 insertions(+), 5 deletions(-) create mode 100644 > src/kernels/cl_internal_copy_buffer_to_image_3d_align16.cl > create mode 100644 > src/kernels/cl_internal_copy_buffer_to_image_3d_align4.cl > create mode 100644 > src/kernels/cl_internal_copy_image_3d_to_buffer_align16.cl > create mode 100644 > src/kernels/cl_internal_copy_image_3d_to_buffer_align4.cl > > diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 87ad48b..ecb98b9 > 100644 > --- a/src/CMakeLists.txt > +++ b/src/CMakeLists.txt > @@ -54,6 +54,8 @@ cl_internal_copy_image_2d_array_to_3d > cl_internal_copy_image_3d_to_2d_array > cl_internal_copy_image_2d_to_buffer > cl_internal_copy_image_2d_to_buffer_align16 > cl_internal_copy_image_3d_to_buffer > cl_internal_copy_buffer_to_image_2d > cl_internal_copy_buffer_to_image_2d_align16 > cl_internal_copy_buffer_to_image_3d > cl_internal_copy_buffer_to_image_2d_align4 > cl_internal_copy_image_2d_to_buffer_align4 > +cl_internal_copy_buffer_to_image_3d_align4 > +cl_internal_copy_image_3d_to_buffer_align4 > +cl_internal_copy_buffer_to_image_3d_align16 > +cl_internal_copy_image_3d_to_buffer_align16 > cl_internal_fill_buf_align8 cl_internal_fill_buf_align4 > cl_internal_fill_buf_align2 cl_internal_fill_buf_unalign > cl_internal_fill_buf_align128 cl_internal_fill_image_1d diff --git > a/src/cl_context.h b/src/cl_context.h index 75bf895..b3a79bc 100644 > --- a/src/cl_context.h > +++ b/src/cl_context.h > @@ -64,10 +64,14 @@ enum _cl_internal_ker_type { > CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER_ALIGN16, > CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER_ALIGN4, > CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER, //copy image 3d tobuffer > + CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER_ALIGN16, > + CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER_ALIGN4, > CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D, //copy buffer to image 2d > CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D_ALIGN16, > CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D_ALIGN4, > CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D, //copy buffer to image 3d > + CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D_ALIGN16, > + CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D_ALIGN4, > CL_ENQUEUE_FILL_BUFFER_UNALIGN, //fill buffer with 1 aligne pattern, > pattern size=1 > CL_ENQUEUE_FILL_BUFFER_ALIGN2, //fill buffer with 2 aligne pattern, > pattern size=2 > CL_ENQUEUE_FILL_BUFFER_ALIGN4, //fill buffer with 4 aligne pattern, > pattern size=4 > diff --git a/src/cl_mem.c b/src/cl_mem.c index b6dce3f..307db50 100644 > --- a/src/cl_mem.c > +++ b/src/cl_mem.c > @@ -2162,13 +2162,13 @@ get_align_size_for_copy_kernel(struct > _cl_mem_image* image, const size_t origin0 > const size_t offset, cl_image_format *fmt) { > size_t align_size = 0; > > - if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image->w * > image->bpp) % ALIGN16 == 0) && > + if(((image->w * image->bpp) % ALIGN16 == 0) && > ((origin0 * image->bpp) % ALIGN16 == 0) && (region0 % ALIGN16 == 0) > && (offset % ALIGN16 == 0)){ > fmt->image_channel_order = CL_RGBA; > fmt->image_channel_data_type = CL_UNSIGNED_INT32; > align_size = ALIGN16; > } > - else if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image- > >w * image->bpp) % ALIGN4 == 0) && > + else if(((image->w * image->bpp) % ALIGN4 == 0) && > ((origin0 * image->bpp) % ALIGN4 == 0) && (region0 % ALIGN4 == 0) && > (offset % ALIGN4 == 0)){ > fmt->image_channel_order = CL_R; > fmt->image_channel_data_type = CL_UNSIGNED_INT32; @@ -2247,11 > +2247,29 @@ cl_mem_copy_image_to_buffer(cl_command_queue queue, > cl_event event, struct _cl_m > cl_internal_copy_image_2d_to_buffer_str, > (size_t)cl_internal_copy_image_2d_to_buffer_str_size, NULL); > } > }else if(image->image_type == CL_MEM_OBJECT_IMAGE3D) { > - extern char cl_internal_copy_image_3d_to_buffer_str[]; > - extern size_t cl_internal_copy_image_3d_to_buffer_str_size; > + if(align_size == ALIGN16){ > + extern char cl_internal_copy_image_3d_to_buffer_align16_str[]; > + extern size_t > + cl_internal_copy_image_3d_to_buffer_align16_str_size; > + > + ker = cl_context_get_static_kernel_from_bin(queue->ctx, > CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER_ALIGN16, > + cl_internal_copy_image_3d_to_buffer_align16_str, > + (size_t)cl_internal_copy_image_3d_to_buffer_align16_str_size, > NULL); > + } > + else if(align_size == ALIGN4){ > + extern char cl_internal_copy_image_3d_to_buffer_align4_str[]; > + extern size_t > + cl_internal_copy_image_3d_to_buffer_align4_str_size; > > - ker = cl_context_get_static_kernel_from_bin(queue->ctx, > CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER, > + ker = cl_context_get_static_kernel_from_bin(queue->ctx, > CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER_ALIGN4, > + cl_internal_copy_image_3d_to_buffer_align4_str, > + (size_t)cl_internal_copy_image_3d_to_buffer_align4_str_size, > NULL); > + } > + else{ > + extern char cl_internal_copy_image_3d_to_buffer_str[]; > + extern size_t cl_internal_copy_image_3d_to_buffer_str_size; > + > + ker = cl_context_get_static_kernel_from_bin(queue->ctx, > + CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER, > cl_internal_copy_image_3d_to_buffer_str, > (size_t)cl_internal_copy_image_3d_to_buffer_str_size, NULL); > + } > } > > if (!ker) { > @@ -2347,11 +2365,29 @@ > cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_event > event, cl_mem buffe > cl_internal_copy_buffer_to_image_2d_str, > (size_t)cl_internal_copy_buffer_to_image_2d_str_size, NULL); > } > }else if(image->image_type == CL_MEM_OBJECT_IMAGE3D) { > + if(align_size == ALIGN16){ > + extern char cl_internal_copy_buffer_to_image_3d_align16_str[]; > + extern size_t > + cl_internal_copy_buffer_to_image_3d_align16_str_size; > + > + ker = cl_context_get_static_kernel_from_bin(queue->ctx, > CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D_ALIGN16, > + cl_internal_copy_buffer_to_image_3d_align16_str, > + (size_t)cl_internal_copy_buffer_to_image_3d_align16_str_size, > NULL); > + } > + else if(align_size == ALIGN4){ > + extern char cl_internal_copy_buffer_to_image_3d_align4_str[]; > + extern size_t > + cl_internal_copy_buffer_to_image_3d_align4_str_size; > + > + ker = cl_context_get_static_kernel_from_bin(queue->ctx, > CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D_ALIGN4, > + cl_internal_copy_buffer_to_image_3d_align4_str, > + (size_t)cl_internal_copy_buffer_to_image_3d_align4_str_size, > NULL); > + } > + else{ > extern char cl_internal_copy_buffer_to_image_3d_str[]; > extern size_t cl_internal_copy_buffer_to_image_3d_str_size; > > ker = cl_context_get_static_kernel_from_bin(queue->ctx, > CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D, > cl_internal_copy_buffer_to_image_3d_str, > (size_t)cl_internal_copy_buffer_to_image_3d_str_size, NULL); > + } > } > if (!ker) > return CL_OUT_OF_RESOURCES; > diff --git a/src/kernels/cl_internal_copy_buffer_to_image_3d_align16.cl > b/src/kernels/cl_internal_copy_buffer_to_image_3d_align16.cl > new file mode 100644 > index 0000000..32f1f63 > --- /dev/null > +++ b/src/kernels/cl_internal_copy_buffer_to_image_3d_align16.cl > @@ -0,0 +1,19 @@ > +kernel void __cl_copy_buffer_to_image_3d_align16(__write_only > image3d_t image, global uint4* buffer, > + unsigned int region0, unsigned int > region1, unsigned int > region2, > + unsigned int dst_origin0, unsigned > int dst_origin1, > unsigned int dst_origin2, > + unsigned int src_offset) { > + int i = get_global_id(0); > + int j = get_global_id(1); > + int k = get_global_id(2); > + uint4 color = (uint4)(0); > + int4 dst_coord; > + if((i >= region0) || (j>= region1) || (k>=region2)) > + return; > + dst_coord.x = dst_origin0 + i; > + dst_coord.y = dst_origin1 + j; > + dst_coord.z = dst_origin2 + k; > + src_offset += (k * region1 + j) * region0 + i; > + color = buffer[src_offset]; > + write_imageui(image, dst_coord, color); } > diff --git a/src/kernels/cl_internal_copy_buffer_to_image_3d_align4.cl > b/src/kernels/cl_internal_copy_buffer_to_image_3d_align4.cl > new file mode 100644 > index 0000000..2ccbcf1 > --- /dev/null > +++ b/src/kernels/cl_internal_copy_buffer_to_image_3d_align4.cl > @@ -0,0 +1,19 @@ > +kernel void __cl_copy_buffer_to_image_3d_align4(__write_only > image3d_t image, global uint* buffer, > + unsigned int region0, unsigned int > region1, unsigned int > region2, > + unsigned int dst_origin0, unsigned > int dst_origin1, > unsigned int dst_origin2, > + unsigned int src_offset) { > + int i = get_global_id(0); > + int j = get_global_id(1); > + int k = get_global_id(2); > + uint4 color = (uint4)(0); > + int4 dst_coord; > + if((i >= region0) || (j>= region1) || (k>=region2)) > + return; > + dst_coord.x = dst_origin0 + i; > + dst_coord.y = dst_origin1 + j; > + dst_coord.z = dst_origin2 + k; > + src_offset += (k * region1 + j) * region0 + i; > + color.x = buffer[src_offset]; > + write_imageui(image, dst_coord, color); } > diff --git a/src/kernels/cl_internal_copy_image_3d_to_buffer_align16.cl > b/src/kernels/cl_internal_copy_image_3d_to_buffer_align16.cl > new file mode 100644 > index 0000000..e116d47 > --- /dev/null > +++ b/src/kernels/cl_internal_copy_image_3d_to_buffer_align16.cl > @@ -0,0 +1,20 @@ > +kernel void __cl_copy_image_3d_to_buffer_align16 ( __read_only > image3d_t image, global uint4* buffer, > + unsigned int region0, unsigned int > region1, unsigned int > region2, > + unsigned int src_origin0, unsigned > int src_origin1, > unsigned int src_origin2, > + unsigned int dst_offset) { > + int i = get_global_id(0); > + int j = get_global_id(1); > + int k = get_global_id(2); > + uint4 color; > + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | > +CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; > + int4 src_coord; > + if((i >= region0) || (j>= region1) || (k>=region2)) > + return; > + src_coord.x = src_origin0 + i; > + src_coord.y = src_origin1 + j; > + src_coord.z = src_origin2 + k; > + color = read_imageui(image, sampler, src_coord); > + dst_offset += (k * region1 + j) * region0 + i; > + *(buffer + dst_offset) = color; > +} > diff --git a/src/kernels/cl_internal_copy_image_3d_to_buffer_align4.cl > b/src/kernels/cl_internal_copy_image_3d_to_buffer_align4.cl > new file mode 100644 > index 0000000..d5374c4 > --- /dev/null > +++ b/src/kernels/cl_internal_copy_image_3d_to_buffer_align4.cl > @@ -0,0 +1,20 @@ > +kernel void __cl_copy_image_3d_to_buffer_align4 ( __read_only > image3d_t image, global uint* buffer, > + unsigned int region0, unsigned int > region1, unsigned int > region2, > + unsigned int src_origin0, unsigned > int src_origin1, > unsigned int src_origin2, > + unsigned int dst_offset) { > + int i = get_global_id(0); > + int j = get_global_id(1); > + int k = get_global_id(2); > + uint4 color; > + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | > +CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; > + int4 src_coord; > + if((i >= region0) || (j>= region1) || (k>=region2)) > + return; > + src_coord.x = src_origin0 + i; > + src_coord.y = src_origin1 + j; > + src_coord.z = src_origin2 + k; > + color = read_imageui(image, sampler, src_coord); > + dst_offset += (k * region1 + j) * region0 + i; > + buffer[dst_offset] = color.x; > +} > -- > 2.7.4 > > _______________________________________________ > Beignet mailing list > Beignet@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet