it is defined in https://www.khronos.org/registry/cl/extensions/intel/cl_intel_subgroups.txt
Signed-off-by: Guo Yejun <[email protected]> --- backend/src/backend/gen_insn_selection.cpp | 2 ++ backend/src/libocl/include/ocl_misc.h | 8 ------ backend/src/libocl/tmpl/ocl_simd.tmpl.h | 2 ++ backend/src/llvm/llvm_gen_backend.cpp | 4 +-- backend/src/llvm/llvm_gen_ocl_function.hxx | 4 +-- kernels/compiler_simd_all.cl | 12 --------- kernels/compiler_simd_any.cl | 15 ----------- kernels/compiler_sub_group_all.cl | 12 +++++++++ kernels/compiler_sub_group_any.cl | 15 +++++++++++ utests/CMakeLists.txt | 4 +-- utests/compiler_simd_all.cpp | 43 ------------------------------ utests/compiler_simd_any.cpp | 43 ------------------------------ utests/compiler_sub_group_all.cpp | 43 ++++++++++++++++++++++++++++++ utests/compiler_sub_group_any.cpp | 43 ++++++++++++++++++++++++++++++ 14 files changed, 123 insertions(+), 127 deletions(-) delete mode 100644 kernels/compiler_simd_all.cl delete mode 100644 kernels/compiler_simd_any.cl create mode 100644 kernels/compiler_sub_group_all.cl create mode 100644 kernels/compiler_sub_group_any.cl delete mode 100644 utests/compiler_simd_all.cpp delete mode 100644 utests/compiler_simd_any.cpp create mode 100644 utests/compiler_sub_group_all.cpp create mode 100644 utests/compiler_sub_group_any.cpp diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 98d8780..105983c 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -2170,6 +2170,8 @@ namespace gbe return insnType; if (opcode == ir::OP_FBH || opcode == ir::OP_FBL || opcode == ir::OP_LZD) return ir::TYPE_U32; + if (opcode == ir::OP_SIMD_ANY || opcode == ir::OP_SIMD_ALL) + return ir::TYPE_S32; if (insnType == ir::TYPE_S16 || insnType == ir::TYPE_U16) return insnType; if (insnType == ir::TYPE_BOOL) diff --git a/backend/src/libocl/include/ocl_misc.h b/backend/src/libocl/include/ocl_misc.h index aa3f504..359025b 100644 --- a/backend/src/libocl/include/ocl_misc.h +++ b/backend/src/libocl/include/ocl_misc.h @@ -128,14 +128,6 @@ DEF(ulong) #undef DEC16 #undef DEC16X - -/* Temp to add the SIMD functions here. */ -///////////////////////////////////////////////////////////////////////////// -// SIMD level function -///////////////////////////////////////////////////////////////////////////// -short __gen_ocl_simd_any(short); -short __gen_ocl_simd_all(short); - struct time_stamp { // time tick ulong tick; diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h b/backend/src/libocl/tmpl/ocl_simd.tmpl.h index 14e5750..67a1cee 100644 --- a/backend/src/libocl/tmpl/ocl_simd.tmpl.h +++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h @@ -23,6 +23,8 @@ ///////////////////////////////////////////////////////////////////////////// // SIMD level function ///////////////////////////////////////////////////////////////////////////// +int sub_group_any(int); +int sub_group_all(int); uint get_sub_group_size(void); uint get_sub_group_id(void); diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index f5743ba..fadc97b 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -3063,14 +3063,14 @@ namespace gbe { const ir::Register src = this->getRegister(*AI); const ir::Register dst = this->getRegister(&I); - ctx.ALU1(ir::OP_SIMD_ALL, ir::TYPE_S16, dst, src); + ctx.ALU1(ir::OP_SIMD_ALL, ir::TYPE_S32, dst, src); break; } case GEN_OCL_SIMD_ANY: { const ir::Register src = this->getRegister(*AI); const ir::Register dst = this->getRegister(&I); - ctx.ALU1(ir::OP_SIMD_ANY, ir::TYPE_S16, dst, src); + ctx.ALU1(ir::OP_SIMD_ANY, ir::TYPE_S32, dst, src); break; } case GEN_OCL_READ_TM: diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index a0e0b94..671e785 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -152,8 +152,8 @@ DECL_LLVM_GEN_FUNCTION(CONV_F16_TO_F32, __gen_ocl_f16to32) DECL_LLVM_GEN_FUNCTION(CONV_F32_TO_F16, __gen_ocl_f32to16) // SIMD level function for internal usage -DECL_LLVM_GEN_FUNCTION(SIMD_ANY, __gen_ocl_simd_any) -DECL_LLVM_GEN_FUNCTION(SIMD_ALL, __gen_ocl_simd_all) +DECL_LLVM_GEN_FUNCTION(SIMD_ANY, sub_group_any) +DECL_LLVM_GEN_FUNCTION(SIMD_ALL, sub_group_all) DECL_LLVM_GEN_FUNCTION(SIMD_SIZE, get_sub_group_size) DECL_LLVM_GEN_FUNCTION(SIMD_ID, get_sub_group_id) DECL_LLVM_GEN_FUNCTION(SIMD_SHUFFLE, intel_sub_group_shuffle) diff --git a/kernels/compiler_simd_all.cl b/kernels/compiler_simd_all.cl deleted file mode 100644 index 504710b..0000000 --- a/kernels/compiler_simd_all.cl +++ /dev/null @@ -1,12 +0,0 @@ -__kernel void compiler_simd_all(global int *src, global int *dst) -{ - int i = get_global_id(0); - if (i % 2 == 1) { - if (__gen_ocl_simd_all((src[i] < 12) && (src[i] > 0))) - dst[i] = 1; - else - dst[i] = 2; - } - else - dst[i] = 3; -} diff --git a/kernels/compiler_simd_any.cl b/kernels/compiler_simd_any.cl deleted file mode 100644 index 3b04f82..0000000 --- a/kernels/compiler_simd_any.cl +++ /dev/null @@ -1,15 +0,0 @@ -__kernel void compiler_simd_any(global int *src, global int *dst) -{ - int i = get_global_id(0); - - if (i % 2 == 1) { - if (__gen_ocl_simd_any(src[i] == 5) || __gen_ocl_simd_any(src[i] == 9)) - dst[i] = 1; - else if (__gen_ocl_simd_any(src[i] == 6)) - dst[i] = 0; - else - dst[i] = 2; - } - else - dst[i] = 3; -} diff --git a/kernels/compiler_sub_group_all.cl b/kernels/compiler_sub_group_all.cl new file mode 100644 index 0000000..30db5bc --- /dev/null +++ b/kernels/compiler_sub_group_all.cl @@ -0,0 +1,12 @@ +__kernel void compiler_sub_group_all(global int *src, global int *dst) +{ + int i = get_global_id(0); + if (i % 2 == 1) { + if (sub_group_all((src[i] < 12) && (src[i] > 0))) + dst[i] = 1; + else + dst[i] = 2; + } + else + dst[i] = 3; +} diff --git a/kernels/compiler_sub_group_any.cl b/kernels/compiler_sub_group_any.cl new file mode 100644 index 0000000..15702db --- /dev/null +++ b/kernels/compiler_sub_group_any.cl @@ -0,0 +1,15 @@ +__kernel void compiler_sub_group_any(global int *src, global int *dst) +{ + int i = get_global_id(0); + + if (i % 2 == 1) { + if (sub_group_any(src[i] == 5) || sub_group_any(src[i] == 9)) + dst[i] = 1; + else if (sub_group_any(src[i] == 6)) + dst[i] = 0; + else + dst[i] = 2; + } + else + dst[i] = 3; +} diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index 977e459..899b52c 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -187,8 +187,8 @@ set (utests_sources compiler_private_const.cpp compiler_private_data_overflow.cpp compiler_getelementptr_bitcast.cpp - compiler_simd_any.cpp - compiler_simd_all.cpp + compiler_sub_group_any.cpp + compiler_sub_group_all.cpp compiler_time_stamp.cpp compiler_double_precision.cpp load_program_from_gen_bin.cpp diff --git a/utests/compiler_simd_all.cpp b/utests/compiler_simd_all.cpp deleted file mode 100644 index 086c54f..0000000 --- a/utests/compiler_simd_all.cpp +++ /dev/null @@ -1,43 +0,0 @@ -#include "utest_helper.hpp" - -void compiler_simd_all(void) -{ - const size_t n = 40; - - // Setup kernel and buffers - OCL_CREATE_KERNEL("compiler_simd_all"); - OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL); - OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL); - OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); - OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); - - globals[0] = n; - locals[0] = 10; - - OCL_MAP_BUFFER(0); - for (int32_t i = 0; i < (int32_t) n; ++i) - ((int*)buf_data[0])[i] = i; - OCL_UNMAP_BUFFER(0); - - // Run the kernel on GPU - OCL_NDRANGE(1); - - // Run on CPU - - // Compare - OCL_MAP_BUFFER(1); - for (int32_t i = 0; i < (int32_t) n; ++i) { - //printf("%d %d\n", i, ((int *)buf_data[1])[i]); - if (i % 2 == 1) { - if (i < (int32_t)locals[0]) - OCL_ASSERT(((int *)buf_data[1])[i] == 1); - else - OCL_ASSERT(((int *)buf_data[1])[i] == 2); - } - else - OCL_ASSERT(((int *)buf_data[1])[i] == 3); - } - OCL_UNMAP_BUFFER(1); -} - -MAKE_UTEST_FROM_FUNCTION(compiler_simd_all); diff --git a/utests/compiler_simd_any.cpp b/utests/compiler_simd_any.cpp deleted file mode 100644 index dcc5ef1..0000000 --- a/utests/compiler_simd_any.cpp +++ /dev/null @@ -1,43 +0,0 @@ -#include "utest_helper.hpp" - -void compiler_simd_any(void) -{ - const size_t n = 40; - - // Setup kernel and buffers - OCL_CREATE_KERNEL("compiler_simd_any"); - OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL); - OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL); - OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); - OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); - - globals[0] = n; - locals[0] = 10; - - OCL_MAP_BUFFER(0); - for (int32_t i = 0; i < (int32_t) n; ++i) - ((int*)buf_data[0])[i] = i; - OCL_UNMAP_BUFFER(0); - - // Run the kernel on GPU - OCL_NDRANGE(1); - - // Run on CPU - - // Compare - OCL_MAP_BUFFER(1); - for (int32_t i = 0; i < (int32_t) n; ++i){ - //printf("%d %d\n", i, ((int *)buf_data[1])[i]); - if (i % 2 == 1) { - if (i < (int32_t)locals[0]) - OCL_ASSERT(((int *)buf_data[1])[i] == 1); - else - OCL_ASSERT(((int *)buf_data[1])[i] == 2); - } - else - OCL_ASSERT(((int *)buf_data[1])[i] == 3); - } - OCL_UNMAP_BUFFER(1); -} - -MAKE_UTEST_FROM_FUNCTION(compiler_simd_any); diff --git a/utests/compiler_sub_group_all.cpp b/utests/compiler_sub_group_all.cpp new file mode 100644 index 0000000..d8e4130 --- /dev/null +++ b/utests/compiler_sub_group_all.cpp @@ -0,0 +1,43 @@ +#include "utest_helper.hpp" + +void compiler_sub_group_all(void) +{ + const size_t n = 40; + + // Setup kernel and buffers + OCL_CREATE_KERNEL("compiler_sub_group_all"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + + globals[0] = n; + locals[0] = 10; + + OCL_MAP_BUFFER(0); + for (int32_t i = 0; i < (int32_t) n; ++i) + ((int*)buf_data[0])[i] = i; + OCL_UNMAP_BUFFER(0); + + // Run the kernel on GPU + OCL_NDRANGE(1); + + // Run on CPU + + // Compare + OCL_MAP_BUFFER(1); + for (int32_t i = 0; i < (int32_t) n; ++i) { + //printf("%d %d\n", i, ((int *)buf_data[1])[i]); + if (i % 2 == 1) { + if (i < (int32_t)locals[0]) + OCL_ASSERT(((int *)buf_data[1])[i] == 1); + else + OCL_ASSERT(((int *)buf_data[1])[i] == 2); + } + else + OCL_ASSERT(((int *)buf_data[1])[i] == 3); + } + OCL_UNMAP_BUFFER(1); +} + +MAKE_UTEST_FROM_FUNCTION(compiler_sub_group_all); diff --git a/utests/compiler_sub_group_any.cpp b/utests/compiler_sub_group_any.cpp new file mode 100644 index 0000000..98b1bdd --- /dev/null +++ b/utests/compiler_sub_group_any.cpp @@ -0,0 +1,43 @@ +#include "utest_helper.hpp" + +void compiler_sub_group_any(void) +{ + const size_t n = 40; + + // Setup kernel and buffers + OCL_CREATE_KERNEL("compiler_sub_group_any"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + + globals[0] = n; + locals[0] = 10; + + OCL_MAP_BUFFER(0); + for (int32_t i = 0; i < (int32_t) n; ++i) + ((int*)buf_data[0])[i] = i; + OCL_UNMAP_BUFFER(0); + + // Run the kernel on GPU + OCL_NDRANGE(1); + + // Run on CPU + + // Compare + OCL_MAP_BUFFER(1); + for (int32_t i = 0; i < (int32_t) n; ++i){ + //printf("%d %d\n", i, ((int *)buf_data[1])[i]); + if (i % 2 == 1) { + if (i < (int32_t)locals[0]) + OCL_ASSERT(((int *)buf_data[1])[i] == 1); + else + OCL_ASSERT(((int *)buf_data[1])[i] == 2); + } + else + OCL_ASSERT(((int *)buf_data[1])[i] == 3); + } + OCL_UNMAP_BUFFER(1); +} + +MAKE_UTEST_FROM_FUNCTION(compiler_sub_group_any); -- 1.9.1 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
