From: Junyan He <[email protected]> Add internal used kernels for buffer copy. The align 1 4 16 is seperated into three kernels to improve performance. The CMakeList is also updated.
Signed-off-by: Junyan He <[email protected]> --- src/CMakeLists.txt | 18 ++++++++++++++++++ src/kernels/cl_internal_copy_buf_align1.cl | 8 ++++++++ src/kernels/cl_internal_copy_buf_align16.cl | 12 ++++++++++++ src/kernels/cl_internal_copy_buf_align4.cl | 8 ++++++++ 4 files changed, 46 insertions(+) create mode 100644 src/kernels/cl_internal_copy_buf_align1.cl create mode 100644 src/kernels/cl_internal_copy_buf_align16.cl create mode 100644 src/kernels/cl_internal_copy_buf_align4.cl diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3fc8689..e295845 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -4,7 +4,25 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/../include ${MESA_SOURCE_INCLUDES}) +macro (MakeKernelBinStr KERNEL_PATH KERNEL_FILES) +foreach (KF ${KERNEL_FILES}) + set (input_file ${KERNEL_PATH}/${KF}.cl) + set (output_file ${KERNEL_PATH}/${KF}_str.c) + list (APPEND KERNEL_STR_FILES ${output_file}) + add_custom_command( + OUTPUT ${output_file} + COMMAND rm -rf ${output_file} + COMMAND ${CMAKE_CURRENT_BINARY_DIR}/../backend/src/gbe_bin_generater -s ${input_file} -o${output_file} + DEPENDS ${input_file} ${CMAKE_CURRENT_BINARY_DIR}/../backend/src/gbe_bin_generater) +endforeach (KF) +endmacro (MakeKernelBinStr) + +set (KERNEL_STR_FILES) +set (KERNEL_NAMES cl_internal_copy_buf_align1 cl_internal_copy_buf_align4 cl_internal_copy_buf_align16) +MakeKernelBinStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}") + set(OPENCL_SRC + ${KERNEL_STR_FILES} cl_api.c cl_alloc.c cl_kernel.c diff --git a/src/kernels/cl_internal_copy_buf_align1.cl b/src/kernels/cl_internal_copy_buf_align1.cl new file mode 100644 index 0000000..cd3ec7b --- /dev/null +++ b/src/kernels/cl_internal_copy_buf_align1.cl @@ -0,0 +1,8 @@ +kernel void __cl_cpy_region_align1 ( global char* src, unsigned int src_offset, + global char* dst, unsigned int dst_offset, + unsigned int size) +{ + int i = get_global_id(0); + if (i < size) + dst[i+dst_offset] = src[i+src_offset]; +} diff --git a/src/kernels/cl_internal_copy_buf_align16.cl b/src/kernels/cl_internal_copy_buf_align16.cl new file mode 100644 index 0000000..75b1a4a --- /dev/null +++ b/src/kernels/cl_internal_copy_buf_align16.cl @@ -0,0 +1,12 @@ +kernel void __cl_cpy_region_align16 ( global float* src, unsigned int src_offset, + global float* dst, unsigned int dst_offset, + unsigned int size) +{ + int i = get_global_id(0) * 4; + if (i < size*4) { + dst[i+dst_offset] = src[i+src_offset]; + dst[i+dst_offset + 1] = src[i+src_offset + 1]; + dst[i+dst_offset + 2] = src[i+src_offset + 2]; + dst[i+dst_offset + 3] = src[i+src_offset + 3]; + } +} diff --git a/src/kernels/cl_internal_copy_buf_align4.cl b/src/kernels/cl_internal_copy_buf_align4.cl new file mode 100644 index 0000000..44a0f81 --- /dev/null +++ b/src/kernels/cl_internal_copy_buf_align4.cl @@ -0,0 +1,8 @@ +kernel void __cl_cpy_region_align4 ( global float* src, unsigned int src_offset, + global float* dst, unsigned int dst_offset, + unsigned int size) +{ + int i = get_global_id(0); + if (i < size) + dst[i+dst_offset] = src[i+src_offset]; +} -- 1.7.9.5 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
