LGTM, pushed, thanks.
On Wed, Mar 26, 2014 at 06:27:48PM +0800, [email protected] wrote: > From: Junyan He <[email protected]> > > Add these three cl files, > one for src and dst are not aligned but have same offset to 4. > second for src's %4 offset is bigger than the dst's > third for src's %4 offset is small than the dst's > > Signed-off-by: Junyan He <[email protected]> > --- > src/CMakeLists.txt | 4 ++- > .../cl_internal_copy_buf_unalign_dst_offset.cl | 28 +++++++++++++++++++++ > .../cl_internal_copy_buf_unalign_same_offset.cl | 19 ++++++++++++++ > .../cl_internal_copy_buf_unalign_src_offset.cl | 29 > ++++++++++++++++++++++ > 4 files changed, 79 insertions(+), 1 deletion(-) > create mode 100644 src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl > create mode 100644 src/kernels/cl_internal_copy_buf_unalign_same_offset.cl > create mode 100644 src/kernels/cl_internal_copy_buf_unalign_src_offset.cl > > diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt > index 95ff56f..9db53ad 100644 > --- a/src/CMakeLists.txt > +++ b/src/CMakeLists.txt > @@ -18,7 +18,9 @@ endforeach (KF) > endmacro (MakeKernelBinStr) > > set (KERNEL_STR_FILES) > -set (KERNEL_NAMES cl_internal_copy_buf_align1 cl_internal_copy_buf_align4 > cl_internal_copy_buf_align16) > +set (KERNEL_NAMES cl_internal_copy_buf_align1 cl_internal_copy_buf_align4 > +cl_internal_copy_buf_align16 cl_internal_copy_buf_unalign_same_offset > +cl_internal_copy_buf_unalign_dst_offset > cl_internal_copy_buf_unalign_src_offset) > MakeKernelBinStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}") > > set(OPENCL_SRC > diff --git a/src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl > b/src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl > new file mode 100644 > index 0000000..13f4162 > --- /dev/null > +++ b/src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl > @@ -0,0 +1,28 @@ > +kernel void __cl_cpy_region_unalign_dst_offset ( global int* src, unsigned > int src_offset, > + global int* dst, unsigned int > dst_offset, > + unsigned int size, > + unsigned int first_mask, unsigned int > last_mask, > + unsigned int shift, unsigned int dw_mask) > +{ > + int i = get_global_id(0); > + unsigned int tmp = 0; > + > + if (i > size -1) > + return; > + > + /* last dw, need to be careful, not to overflow the source. */ > + if ((i == size - 1) && ((last_mask & (~(~dw_mask >> shift))) == 0)) { > + tmp = ((src[src_offset + i] & ~dw_mask) >> shift); > + } else { > + tmp = ((src[src_offset + i] & ~dw_mask) >> shift) > + | ((src[src_offset + i + 1] & dw_mask) << (32 - shift)); > + } > + > + if (i == 0) { > + dst[dst_offset] = (dst[dst_offset] & first_mask) | (tmp & > (~first_mask)); > + } else if (i == size - 1) { > + dst[i+dst_offset] = (tmp & last_mask) | (dst[i+dst_offset] & > (~last_mask)); > + } else { > + dst[i+dst_offset] = tmp; > + } > +} > diff --git a/src/kernels/cl_internal_copy_buf_unalign_same_offset.cl > b/src/kernels/cl_internal_copy_buf_unalign_same_offset.cl > new file mode 100644 > index 0000000..8510246 > --- /dev/null > +++ b/src/kernels/cl_internal_copy_buf_unalign_same_offset.cl > @@ -0,0 +1,19 @@ > +kernel void __cl_cpy_region_unalign_same_offset ( global int* src, unsigned > int src_offset, > + global int* dst, unsigned int > dst_offset, > + unsigned int size, > + unsigned int first_mask, unsigned int > last_mask) > +{ > + int i = get_global_id(0); > + if (i > size -1) > + return; > + > + if (i == 0) { > + dst[dst_offset] = (dst[dst_offset] & first_mask) > + | (src[src_offset] & (~first_mask)); > + } else if (i == size - 1) { > + dst[i+dst_offset] = (src[i+src_offset] & last_mask) > + | (dst[i+dst_offset] & (~last_mask)); > + } else { > + dst[i+dst_offset] = src[i+src_offset]; > + } > +} > diff --git a/src/kernels/cl_internal_copy_buf_unalign_src_offset.cl > b/src/kernels/cl_internal_copy_buf_unalign_src_offset.cl > new file mode 100644 > index 0000000..f98368a > --- /dev/null > +++ b/src/kernels/cl_internal_copy_buf_unalign_src_offset.cl > @@ -0,0 +1,29 @@ > +kernel void __cl_cpy_region_unalign_src_offset ( global int* src, unsigned > int src_offset, > + global int* dst, unsigned int > dst_offset, > + unsigned int size, > + unsigned int first_mask, unsigned int > last_mask, > + unsigned int shift, unsigned int dw_mask, > int src_less) > +{ > + int i = get_global_id(0); > + unsigned int tmp = 0; > + > + if (i > size -1) > + return; > + > + if (i == 0) { > + tmp = ((src[src_offset + i] & dw_mask) << shift); > + } else if (src_less && i == size - 1) { // not exceed the bound of source > + tmp = ((src[src_offset + i - 1] & ~dw_mask) >> (32 - shift)); > + } else { > + tmp = ((src[src_offset + i - 1] & ~dw_mask) >> (32 - shift)) > + | ((src[src_offset + i] & dw_mask) << shift); > + } > + > + if (i == 0) { > + dst[dst_offset] = (dst[dst_offset] & first_mask) | (tmp & > (~first_mask)); > + } else if (i == size - 1) { > + dst[i+dst_offset] = (tmp & last_mask) | (dst[i+dst_offset] & > (~last_mask)); > + } else { > + dst[i+dst_offset] = tmp; > + } > +} > -- > 1.8.3.2 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
