Add three helper function calls for it. Store the ndrange info to stack, and write the device enqueue infos to the auxiliary global buffer.
Signed-off-by: Yang Rong <[email protected]> --- backend/src/backend/program.cpp | 1 + backend/src/libocl/CMakeLists.txt | 4 +- backend/src/libocl/include/ocl.h | 1 + backend/src/libocl/include/ocl_enqueue.h | 67 +++++++++++++ backend/src/libocl/src/ocl_enqueue.cl | 156 +++++++++++++++++++++++++++++++ 5 files changed, 227 insertions(+), 2 deletions(-) create mode 100644 backend/src/libocl/include/ocl_enqueue.h create mode 100644 backend/src/libocl/src/ocl_enqueue.cl diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp index 0119670..1580fe8 100644 --- a/backend/src/backend/program.cpp +++ b/backend/src/backend/program.cpp @@ -546,6 +546,7 @@ namespace gbe { // FIXME we haven't implement those builtin functions, // so disable it currently. args.push_back("-fno-builtin"); + args.push_back("-fblocks"); args.push_back("-disable-llvm-optzns"); if(bFastMath) args.push_back("-D __FAST_RELAXED_MATH__=1"); diff --git a/backend/src/libocl/CMakeLists.txt b/backend/src/libocl/CMakeLists.txt index d7ed841..3b0d5f8 100644 --- a/backend/src/libocl/CMakeLists.txt +++ b/backend/src/libocl/CMakeLists.txt @@ -53,7 +53,7 @@ FOREACH(M ${OCL_COPY_HEADERS}) ENDFOREACH(M) SET (OCL_COPY_MODULES ocl_workitem ocl_atom ocl_async ocl_sync ocl_memcpy - ocl_memset ocl_misc ocl_vload ocl_geometric ocl_image ocl_work_group ocl_pipe) + ocl_memset ocl_misc ocl_vload ocl_geometric ocl_image ocl_work_group ocl_pipe ocl_enqueue) FOREACH(M ${OCL_COPY_MODULES}) COPY_THE_HEADER(${M}) COPY_THE_SOURCE(${M}) @@ -130,7 +130,7 @@ FOREACH(M ${OCL_BASH_GENERATED_MODULES}) ENDFOREACH(M) -SET (CLANG_OCL_FLAGS -fno-builtin -ffp-contract=off -cl-kernel-arg-info -DGEN7_SAMPLER_CLAMP_BORDER_WORKAROUND "-cl-std=CL2.0") +SET (CLANG_OCL_FLAGS -fno-builtin -ffp-contract=off -cl-kernel-arg-info -fblocks -DGEN7_SAMPLER_CLAMP_BORDER_WORKAROUND "-cl-std=CL2.0") MACRO(ADD_CL_TO_BC_TARGET _file) # CMake seems can not add pattern rule, use MACRO to replace. STRING(REGEX REPLACE "${LIBOCL_BINARY_DIR}/src/\(o.*\)\\.cl" "${OCL_OBJECT_DIR}/\\1.bc" output_name ${_file}) diff --git a/backend/src/libocl/include/ocl.h b/backend/src/libocl/include/ocl.h index e2918c6..852a523 100644 --- a/backend/src/libocl/include/ocl.h +++ b/backend/src/libocl/include/ocl.h @@ -41,6 +41,7 @@ #include "ocl_simd.h" #include "ocl_work_group.h" #include "ocl_pipe.h" +#include "ocl_enqueue.h" #pragma OPENCL EXTENSION cl_khr_fp64 : disable #pragma OPENCL EXTENSION cl_khr_fp16 : disable #endif diff --git a/backend/src/libocl/include/ocl_enqueue.h b/backend/src/libocl/include/ocl_enqueue.h new file mode 100644 index 0000000..a578846 --- /dev/null +++ b/backend/src/libocl/include/ocl_enqueue.h @@ -0,0 +1,67 @@ +/* + * Copyright © 2012 - 2014 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + * + */ + +#ifndef __OCL_ENQUEUE_H__ +#define __OCL_ENQUEUE_H__ + +#include "ocl_types.h" +#define CLK_ENQUEUE_FLAGS_WAIT_KERNEL 0 +#define CLK_SUCCESS 0 + +struct ndrange_info_t { + int type; + int global_work_size[3]; + int local_work_size[3]; + int global_work_offset[3]; +}; + +struct Block_literal { + void *isa; // initialized to &_NSConcreteStackBlock or &_NSConcreteGlobalBlock + int flags; + int reserved; + __global void (*invoke)(void *, ...); + struct Block_descriptor_1 { + unsigned long int reserved; // NULL + unsigned long int size; // sizeof(struct Block_literal_1) + // optional helper functions + void (*copy_helper)(void *dst, void *src); // IFF (1<<25) + void (*dispose_helper)(void *src); // IFF (1<<25) + // required ABI.2010.3.16 + const char *signature; // IFF (1<<30) + } *descriptor; + // imported variables +}; + +int enqueue_kernel(queue_t q, int flag, ndrange_t ndrange, void (^block)(void)); + +queue_t get_default_queue(void); +int __gen_enqueue_kernel(queue_t q, int flag, ndrange_t ndrange, void (^block)(void), int size); + +OVERLOADABLE ndrange_t ndrange_1D(size_t global_work_size); +OVERLOADABLE ndrange_t ndrange_1D(size_t global_work_size, size_t local_work_size); +OVERLOADABLE ndrange_t ndrange_1D(size_t global_work_offset, size_t global_work_size, size_t local_work_size); + +OVERLOADABLE ndrange_t ndrange_2D(size_t global_work_size[2]); +OVERLOADABLE ndrange_t ndrange_2D(size_t global_work_size[2], size_t local_work_size[2]); +OVERLOADABLE ndrange_t ndrange_2D(size_t global_work_offset[2], size_t global_work_size[2], size_t local_work_size[2]); + +OVERLOADABLE ndrange_t ndrange_3D(size_t global_work_size[3]); +OVERLOADABLE ndrange_t ndrange_3D(size_t global_work_size[3], size_t local_work_size[3]); +OVERLOADABLE ndrange_t ndrange_3D(size_t global_work_offset[3], size_t global_work_size[3], size_t local_work_size[3]); + +#endif diff --git a/backend/src/libocl/src/ocl_enqueue.cl b/backend/src/libocl/src/ocl_enqueue.cl new file mode 100644 index 0000000..78c39c4 --- /dev/null +++ b/backend/src/libocl/src/ocl_enqueue.cl @@ -0,0 +1,156 @@ +/* + * Copyright © 2012 - 2014 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + * + */ +#include "ocl_types.h" +#include "ocl_enqueue.h" +#include "ocl_workitem.h" +#include "ocl_atom.h" + +queue_t get_default_queue(void) +{ + queue_t queue; + return queue; //return NULL queue +} + +ndrange_t __gen_ocl_set_ndrange_info(__private struct ndrange_info_t *info); +__private struct ndrange_info_t* __gen_ocl_get_ndrange_info(ndrange_t info); +__global int* __gen_ocl_get_enqueue_info_addr(void); + +int enqueue_kernel(queue_t q, int flag, ndrange_t ndrange, void (^block)(void)) +{ + int i; + struct Block_literal *literal = (struct Block_literal *)block; + uchar *data = (uchar *)block; + int size = literal->descriptor->size; + __global int* start_addr = __gen_ocl_get_enqueue_info_addr(); + int offset = atomic_add(start_addr, size + sizeof(struct ndrange_info_t)); + __global uchar* addr = (__global uchar*)start_addr + offset + sizeof(int); + __private struct ndrange_info_t *info = __gen_ocl_get_ndrange_info(ndrange); + + *((__global struct ndrange_info_t *)addr) = *info; + addr += sizeof(*info); + + for(i=0; i< size; i++) { + addr[i] = data[i]; + } + return 0; +} + +OVERLOADABLE ndrange_t ndrange_1D(size_t global_work_size) +{ + struct ndrange_info_t info; + info.type = 0x1; + info.global_work_size[0] = global_work_size; + return __gen_ocl_set_ndrange_info(&info); + //return ndrange; +} + +OVERLOADABLE ndrange_t ndrange_1D(size_t global_work_size, size_t local_work_size) +{ + struct ndrange_info_t info; + info.type = 0x2; + info.global_work_size[0] = global_work_size; + info.local_work_size[0] = local_work_size; + return __gen_ocl_set_ndrange_info(&info); + // return ndrange; +} + + +OVERLOADABLE ndrange_t ndrange_1D(size_t global_work_offset, size_t global_work_size, size_t local_work_size) +{ + struct ndrange_info_t info; + info.type = 0x3; + info.global_work_size[0] = global_work_size; + info.local_work_size[0] = local_work_size; + info.global_work_offset[0] = global_work_offset; + return __gen_ocl_set_ndrange_info(&info); + //return ndrange; +} + +OVERLOADABLE ndrange_t ndrange_2D(size_t global_work_size[2]) +{ + struct ndrange_info_t info; + info.type = 0x11; + info.global_work_size[0] = global_work_size[0]; + info.global_work_size[1] = global_work_size[1]; + return __gen_ocl_set_ndrange_info(&info); + //return ndrange; +} + +OVERLOADABLE ndrange_t ndrange_2D(size_t global_work_size[2], size_t local_work_size[2]) +{ + struct ndrange_info_t info; + info.type = 0x12; + info.global_work_size[0] = global_work_size[0]; + info.global_work_size[1] = global_work_size[1]; + info.local_work_size[0] = local_work_size[0]; + info.local_work_size[1] = local_work_size[1]; + return __gen_ocl_set_ndrange_info(&info); +} + + +OVERLOADABLE ndrange_t ndrange_2D(size_t global_work_offset[2], size_t global_work_size[2], size_t local_work_size[2]) +{ + struct ndrange_info_t info; + info.type = 0x13; + info.global_work_size[0] = global_work_size[0]; + info.global_work_size[1] = global_work_size[1]; + info.local_work_size[0] = local_work_size[0]; + info.local_work_size[1] = local_work_size[1]; + info.global_work_offset[0] = global_work_offset[0]; + info.global_work_offset[1] = global_work_offset[1]; + return __gen_ocl_set_ndrange_info(&info); +} + +OVERLOADABLE ndrange_t ndrange_3D(size_t global_work_size[3]) +{ + struct ndrange_info_t info; + info.type = 0x21; + info.global_work_size[0] = global_work_size[0]; + info.global_work_size[1] = global_work_size[1]; + info.global_work_size[2] = global_work_size[2]; + return __gen_ocl_set_ndrange_info(&info); +} + +OVERLOADABLE ndrange_t ndrange_3D(size_t global_work_size[3], size_t local_work_size[3]) +{ + struct ndrange_info_t info; + info.type = 0x22; + info.global_work_size[0] = global_work_size[0]; + info.global_work_size[1] = global_work_size[1]; + info.global_work_size[2] = global_work_size[2]; + info.local_work_size[0] = local_work_size[0]; + info.local_work_size[1] = local_work_size[1]; + info.local_work_size[2] = local_work_size[2]; + return __gen_ocl_set_ndrange_info(&info); +} + +OVERLOADABLE ndrange_t ndrange_3D(size_t global_work_offset[3], size_t global_work_size[3], size_t local_work_size[3]) +{ + struct ndrange_info_t info; + info.type = 0x23; + info.global_work_size[0] = global_work_size[0]; + info.global_work_size[1] = global_work_size[1]; + info.global_work_size[2] = global_work_size[2]; + info.local_work_size[0] = local_work_size[0]; + info.local_work_size[1] = local_work_size[1]; + info.local_work_size[2] = local_work_size[2]; + info.global_work_offset[0] = global_work_offset[0]; + info.global_work_offset[1] = global_work_offset[1]; + info.global_work_offset[2] = global_work_offset[2]; + return __gen_ocl_set_ndrange_info(&info); +} -- 1.9.1 _______________________________________________ Beignet mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/beignet
