From: Luo Xionghu <[email protected]> the SPIR header file requirs these functions to be overlable. (https://github.com/KhronosGroup/SPIR-Tools/blob/master/headers/opencl_spir.h)
Signed-off-by: Luo Xionghu <[email protected]> --- backend/src/libocl/include/ocl_async.h | 2 +- backend/src/libocl/include/ocl_sync.h | 2 +- backend/src/libocl/include/ocl_types.h | 2 -- backend/src/libocl/include/ocl_workitem.h | 16 ++++++++-------- backend/src/libocl/src/ocl_async.cl | 2 +- backend/src/libocl/src/ocl_barrier.ll | 2 +- backend/src/libocl/src/ocl_workitem.cl | 6 +++--- kernels/compiler_async_copy.cl | 4 ++-- 8 files changed, 17 insertions(+), 19 deletions(-) diff --git a/backend/src/libocl/include/ocl_async.h b/backend/src/libocl/include/ocl_async.h index dd89942..9d5cc06 100644 --- a/backend/src/libocl/include/ocl_async.h +++ b/backend/src/libocl/include/ocl_async.h @@ -45,7 +45,7 @@ DEF(double) #undef DEFN #undef DEF -void wait_group_events (int num_events, event_t *event_list); +OVERLOADABLE void wait_group_events (int num_events, event_t *event_list); #define DEFN(TYPE) \ OVERLOADABLE void prefetch(const global TYPE *p, size_t num); diff --git a/backend/src/libocl/include/ocl_sync.h b/backend/src/libocl/include/ocl_sync.h index ed7c6e4..18090d5 100644 --- a/backend/src/libocl/include/ocl_sync.h +++ b/backend/src/libocl/include/ocl_sync.h @@ -27,7 +27,7 @@ #define CLK_GLOBAL_MEM_FENCE (1 << 1) typedef uint cl_mem_fence_flags; -void barrier(cl_mem_fence_flags flags); +OVERLOADABLE void barrier(cl_mem_fence_flags flags); void mem_fence(cl_mem_fence_flags flags); void read_mem_fence(cl_mem_fence_flags flags); void write_mem_fence(cl_mem_fence_flags flags); diff --git a/backend/src/libocl/include/ocl_types.h b/backend/src/libocl/include/ocl_types.h index 487fe68..ae0236b 100644 --- a/backend/src/libocl/include/ocl_types.h +++ b/backend/src/libocl/include/ocl_types.h @@ -87,7 +87,5 @@ DEF(double); // FIXME: // This is a transitional hack to bypass the LLVM 3.3 built-in types. // See the Khronos SPIR specification for handling of these types. -typedef size_t __event_t; -#define event_t __event_t #endif /* __OCL_TYPES_H__ */ diff --git a/backend/src/libocl/include/ocl_workitem.h b/backend/src/libocl/include/ocl_workitem.h index 7534ee8..84bb1fb 100644 --- a/backend/src/libocl/include/ocl_workitem.h +++ b/backend/src/libocl/include/ocl_workitem.h @@ -20,13 +20,13 @@ #include "ocl_types.h" -uint get_work_dim(void); -uint get_global_size(uint dimindx); -uint get_global_id(uint dimindx); -uint get_local_size(uint dimindx); -uint get_local_id(uint dimindx); -uint get_num_groups(uint dimindx); -uint get_group_id(uint dimindx); -uint get_global_offset(uint dimindx); +OVERLOADABLE uint get_work_dim(void); +OVERLOADABLE uint get_global_size(uint dimindx); +OVERLOADABLE uint get_global_id(uint dimindx); +OVERLOADABLE uint get_local_size(uint dimindx); +OVERLOADABLE uint get_local_id(uint dimindx); +OVERLOADABLE uint get_num_groups(uint dimindx); +OVERLOADABLE uint get_group_id(uint dimindx); +OVERLOADABLE uint get_global_offset(uint dimindx); #endif /* __OCL_WORKITEM_H__ */ diff --git a/backend/src/libocl/src/ocl_async.cl b/backend/src/libocl/src/ocl_async.cl index 041aaf2..10d0aa4 100644 --- a/backend/src/libocl/src/ocl_async.cl +++ b/backend/src/libocl/src/ocl_async.cl @@ -66,7 +66,7 @@ DEF(double) #undef DEFN #undef DEF -void wait_group_events (int num_events, event_t *event_list) { +OVERLOADABLE void wait_group_events (int num_events, event_t *event_list) { barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); } diff --git a/backend/src/libocl/src/ocl_barrier.ll b/backend/src/libocl/src/ocl_barrier.ll index 4e55fcb..dc3579c 100644 --- a/backend/src/libocl/src/ocl_barrier.ll +++ b/backend/src/libocl/src/ocl_barrier.ll @@ -10,7 +10,7 @@ declare void @__gen_ocl_barrier_local() nounwind alwaysinline noduplicate declare void @__gen_ocl_barrier_global() nounwind alwaysinline noduplicate declare void @__gen_ocl_barrier_local_and_global() nounwind alwaysinline noduplicate -define void @barrier(i32 %flags) nounwind noduplicate alwaysinline { +define void @_Z7barrierj(i32 %flags) nounwind noduplicate alwaysinline { %1 = icmp eq i32 %flags, 3 br i1 %1, label %barrier_local_global, label %barrier_local_check diff --git a/backend/src/libocl/src/ocl_workitem.cl b/backend/src/libocl/src/ocl_workitem.cl index f4629f8..6ddc406 100644 --- a/backend/src/libocl/src/ocl_workitem.cl +++ b/backend/src/libocl/src/ocl_workitem.cl @@ -18,7 +18,7 @@ #include "ocl_workitem.h" PURE CONST uint __gen_ocl_get_work_dim(void); -uint get_work_dim(void) +OVERLOADABLE uint get_work_dim(void) { return __gen_ocl_get_work_dim(); } @@ -37,7 +37,7 @@ DECL_INTERNAL_WORK_ITEM_FN(get_num_groups) #undef DECL_INTERNAL_WORK_ITEM_FN #define DECL_PUBLIC_WORK_ITEM_FN(NAME, OTHER_RET) \ -unsigned NAME(unsigned int dim) { \ +OVERLOADABLE unsigned NAME(unsigned int dim) { \ if (dim == 0) return __gen_ocl_##NAME##0(); \ else if (dim == 1) return __gen_ocl_##NAME##1(); \ else if (dim == 2) return __gen_ocl_##NAME##2(); \ @@ -52,6 +52,6 @@ DECL_PUBLIC_WORK_ITEM_FN(get_global_offset, 0) DECL_PUBLIC_WORK_ITEM_FN(get_num_groups, 1) #undef DECL_PUBLIC_WORK_ITEM_FN -uint get_global_id(uint dim) { +OVERLOADABLE uint get_global_id(uint dim) { return get_local_id(dim) + get_local_size(dim) * get_group_id(dim) + get_global_offset(dim); } diff --git a/kernels/compiler_async_copy.cl b/kernels/compiler_async_copy.cl index dddde44..4beb436 100644 --- a/kernels/compiler_async_copy.cl +++ b/kernels/compiler_async_copy.cl @@ -5,10 +5,10 @@ compiler_async_copy_##TYPE(__global TYPE *dst, __global TYPE *src, __local TYPE event_t event; \ int copiesPerWorkgroup = copiesPerWorkItem * get_local_size(0); \ int i; \ - event = async_work_group_copy((__local TYPE*)localBuffer, (__global const TYPE*)(src+copiesPerWorkgroup*get_group_id(0)), (size_t)copiesPerWorkgroup, (event_t)0 ); \ + event = async_work_group_copy((__local TYPE*)localBuffer, (__global const TYPE*)(src+copiesPerWorkgroup*get_group_id(0)), (size_t)copiesPerWorkgroup, 0 ); \ wait_group_events( 1, &event ); \ \ - event = async_work_group_copy((__global TYPE*)(dst+copiesPerWorkgroup*get_group_id(0)), (__local const TYPE*)localBuffer, (size_t)copiesPerWorkgroup, (event_t)0 ); \ + event = async_work_group_copy((__global TYPE*)(dst+copiesPerWorkgroup*get_group_id(0)), (__local const TYPE*)localBuffer, (size_t)copiesPerWorkgroup, 0 ); \ wait_group_events( 1, &event ); \ } -- 1.9.1 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
