From: Pan Xiuli <xiuli....@intel.com> Add intel sub group short type builtins. V2: Add gen8 part code.
Signed-off-by: Pan Xiuli <xiuli....@intel.com> --- backend/src/backend/gen8_context.cpp | 12 +++++++++ backend/src/backend/gen_context.cpp | 12 +++++++++ backend/src/libocl/tmpl/ocl_simd.tmpl.cl | 44 ++++++++++++++++++++++++++++++++ backend/src/libocl/tmpl/ocl_simd.tmpl.h | 36 ++++++++++++++++++++++++++ 4 files changed, 104 insertions(+) diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp index 09b38b2..42736e1 100644 --- a/backend/src/backend/gen8_context.cpp +++ b/backend/src/backend/gen8_context.cpp @@ -1351,6 +1351,10 @@ namespace gbe p->MOV(dataReg, GenRegister::immint64(0x0)); else if (dataReg.type == GEN_TYPE_UL) p->MOV(dataReg, GenRegister::immuint64(0x0)); + else if (dataReg.type == GEN_TYPE_W) + p->MOV(dataReg, GenRegister::immw(0x0)); + else if (dataReg.type == GEN_TYPE_UW) + p->MOV(dataReg, GenRegister::immuw(0x0)); else GBE_ASSERT(0); /* unsupported data-type */ } @@ -1371,6 +1375,10 @@ namespace gbe p->MOV(dataReg, GenRegister::immint64(0x7FFFFFFFFFFFFFFFL)); else if (dataReg.type == GEN_TYPE_UL) p->MOV(dataReg, GenRegister::immuint64(0xFFFFFFFFFFFFFFFFL)); + else if (dataReg.type == GEN_TYPE_W) + p->MOV(dataReg, GenRegister::immw(0x7FFF)); + else if (dataReg.type == GEN_TYPE_UW) + p->MOV(dataReg, GenRegister::immuw(0xFFFF)); else GBE_ASSERT(0); /* unsupported data-type */ } @@ -1391,6 +1399,10 @@ namespace gbe p->MOV(dataReg, GenRegister::immint64(0x8000000000000000L)); else if (dataReg.type == GEN_TYPE_UL) p->MOV(dataReg, GenRegister::immuint64(0x0)); + else if (dataReg.type == GEN_TYPE_W) + p->MOV(dataReg, GenRegister::immw(0x8000)); + else if (dataReg.type == GEN_TYPE_UW) + p->MOV(dataReg, GenRegister::immuw(0x0)); else GBE_ASSERT(0); /* unsupported data-type */ } diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index e907931..a1ae5ea 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -2878,6 +2878,10 @@ namespace gbe p->MOV(dataReg, GenRegister::immint64(0x0)); else if (dataReg.type == GEN_TYPE_UL) p->MOV(dataReg, GenRegister::immuint64(0x0)); + else if (dataReg.type == GEN_TYPE_W) + p->MOV(dataReg, GenRegister::immw(0x0)); + else if (dataReg.type == GEN_TYPE_UW) + p->MOV(dataReg, GenRegister::immuw(0x0)); else GBE_ASSERT(0); /* unsupported data-type */ } @@ -2896,6 +2900,10 @@ namespace gbe p->MOV(dataReg, GenRegister::immint64(0x7FFFFFFFFFFFFFFFL)); else if (dataReg.type == GEN_TYPE_UL) p->MOV(dataReg, GenRegister::immuint64(0xFFFFFFFFFFFFFFFFL)); + else if (dataReg.type == GEN_TYPE_W) + p->MOV(dataReg, GenRegister::immw(0x7FFF)); + else if (dataReg.type == GEN_TYPE_UW) + p->MOV(dataReg, GenRegister::immuw(0xFFFF)); else GBE_ASSERT(0); /* unsupported data-type */ } @@ -2914,6 +2922,10 @@ namespace gbe p->MOV(dataReg, GenRegister::immint64(0x8000000000000000L)); else if (dataReg.type == GEN_TYPE_UL) p->MOV(dataReg, GenRegister::immuint64(0x0)); + else if (dataReg.type == GEN_TYPE_W) + p->MOV(dataReg, GenRegister::immw(0x8000)); + else if (dataReg.type == GEN_TYPE_UW) + p->MOV(dataReg, GenRegister::immuw(0x0)); else GBE_ASSERT(0); /* unsupported data-type */ } diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.cl b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl index d1bcfa3..90c7cc2 100644 --- a/backend/src/libocl/tmpl/ocl_simd.tmpl.cl +++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl @@ -76,6 +76,8 @@ RANGE_OP(reduce, add, ulong, false) RANGE_OP(reduce, add, half, true) RANGE_OP(reduce, add, float, true) RANGE_OP(reduce, add, double, true) +RANGE_OP(reduce, add, short, true) +RANGE_OP(reduce, add, ushort, false) /* reduce min */ RANGE_OP(reduce, min, int, true) RANGE_OP(reduce, min, uint, false) @@ -84,6 +86,8 @@ RANGE_OP(reduce, min, ulong, false) RANGE_OP(reduce, min, half, true) RANGE_OP(reduce, min, float, true) RANGE_OP(reduce, min, double, true) +RANGE_OP(reduce, min, short, true) +RANGE_OP(reduce, min, ushort, false) /* reduce max */ RANGE_OP(reduce, max, int, true) RANGE_OP(reduce, max, uint, false) @@ -92,6 +96,8 @@ RANGE_OP(reduce, max, ulong, false) RANGE_OP(reduce, max, half, true) RANGE_OP(reduce, max, float, true) RANGE_OP(reduce, max, double, true) +RANGE_OP(reduce, max, short, true) +RANGE_OP(reduce, max, ushort, false) /* scan_inclusive add */ RANGE_OP(scan_inclusive, add, int, true) @@ -101,6 +107,8 @@ RANGE_OP(scan_inclusive, add, ulong, false) RANGE_OP(scan_inclusive, add, half, true) RANGE_OP(scan_inclusive, add, float, true) RANGE_OP(scan_inclusive, add, double, true) +RANGE_OP(scan_inclusive, add, short, true) +RANGE_OP(scan_inclusive, add, ushort, false) /* scan_inclusive min */ RANGE_OP(scan_inclusive, min, int, true) RANGE_OP(scan_inclusive, min, uint, false) @@ -109,6 +117,8 @@ RANGE_OP(scan_inclusive, min, ulong, false) RANGE_OP(scan_inclusive, min, half, true) RANGE_OP(scan_inclusive, min, float, true) RANGE_OP(scan_inclusive, min, double, true) +RANGE_OP(scan_inclusive, min, short, true) +RANGE_OP(scan_inclusive, min, ushort, false) /* scan_inclusive max */ RANGE_OP(scan_inclusive, max, int, true) RANGE_OP(scan_inclusive, max, uint, false) @@ -117,6 +127,8 @@ RANGE_OP(scan_inclusive, max, ulong, false) RANGE_OP(scan_inclusive, max, half, true) RANGE_OP(scan_inclusive, max, float, true) RANGE_OP(scan_inclusive, max, double, true) +RANGE_OP(scan_inclusive, max, short, true) +RANGE_OP(scan_inclusive, max, ushort, false) /* scan_exclusive add */ RANGE_OP(scan_exclusive, add, int, true) @@ -126,6 +138,8 @@ RANGE_OP(scan_exclusive, add, ulong, false) RANGE_OP(scan_exclusive, add, half, true) RANGE_OP(scan_exclusive, add, float, true) RANGE_OP(scan_exclusive, add, double, true) +RANGE_OP(scan_exclusive, add, short, true) +RANGE_OP(scan_exclusive, add, ushort, false) /* scan_exclusive min */ RANGE_OP(scan_exclusive, min, int, true) RANGE_OP(scan_exclusive, min, uint, false) @@ -134,6 +148,8 @@ RANGE_OP(scan_exclusive, min, ulong, false) RANGE_OP(scan_exclusive, min, half, true) RANGE_OP(scan_exclusive, min, float, true) RANGE_OP(scan_exclusive, min, double, true) +RANGE_OP(scan_exclusive, min, short, true) +RANGE_OP(scan_exclusive, min, ushort, false) /* scan_exclusive max */ RANGE_OP(scan_exclusive, max, int, true) RANGE_OP(scan_exclusive, max, uint, false) @@ -142,8 +158,36 @@ RANGE_OP(scan_exclusive, max, ulong, false) RANGE_OP(scan_exclusive, max, half, true) RANGE_OP(scan_exclusive, max, float, true) RANGE_OP(scan_exclusive, max, double, true) +RANGE_OP(scan_exclusive, max, short, true) +RANGE_OP(scan_exclusive, max, ushort, false) #undef RANGE_OP + +#define INTEL_RANGE_OP(RANGE, OP, GEN_TYPE, SIGN) \ + OVERLOADABLE GEN_TYPE intel_sub_group_##RANGE##_##OP(GEN_TYPE x) { \ + return __gen_ocl_sub_group_##RANGE##_##OP(SIGN, x); \ + } + +INTEL_RANGE_OP(reduce, add, short, true) +INTEL_RANGE_OP(reduce, add, ushort, false) +INTEL_RANGE_OP(reduce, min, short, true) +INTEL_RANGE_OP(reduce, min, ushort, false) +INTEL_RANGE_OP(reduce, max, short, true) +INTEL_RANGE_OP(reduce, max, ushort, false) +INTEL_RANGE_OP(scan_inclusive, add, short, true) +INTEL_RANGE_OP(scan_inclusive, add, ushort, false) +INTEL_RANGE_OP(scan_inclusive, min, short, true) +INTEL_RANGE_OP(scan_inclusive, min, ushort, false) +INTEL_RANGE_OP(scan_inclusive, max, short, true) +INTEL_RANGE_OP(scan_inclusive, max, ushort, false) +INTEL_RANGE_OP(scan_exclusive, add, short, true) +INTEL_RANGE_OP(scan_exclusive, add, ushort, false) +INTEL_RANGE_OP(scan_exclusive, min, short, true) +INTEL_RANGE_OP(scan_exclusive, min, ushort, false) +INTEL_RANGE_OP(scan_exclusive, max, short, true) +INTEL_RANGE_OP(scan_exclusive, max, ushort, false) + +#undef INTEL_RANGE_OP PURE CONST uint __gen_ocl_sub_group_block_read_mem(const global uint* p); PURE CONST uint2 __gen_ocl_sub_group_block_read_mem2(const global uint* p); PURE CONST uint4 __gen_ocl_sub_group_block_read_mem4(const global uint* p); diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h b/backend/src/libocl/tmpl/ocl_simd.tmpl.h index c609c2e..d0f06d1 100644 --- a/backend/src/libocl/tmpl/ocl_simd.tmpl.h +++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h @@ -55,6 +55,10 @@ OVERLOADABLE ulong sub_group_reduce_add(ulong x); OVERLOADABLE half sub_group_reduce_add(half x); OVERLOADABLE float sub_group_reduce_add(float x); OVERLOADABLE double sub_group_reduce_add(double x); +OVERLOADABLE short sub_group_reduce_add(short x); +OVERLOADABLE ushort sub_group_reduce_add(ushort x); +OVERLOADABLE short intel_sug_group_reduce_add(short x); +OVERLOADABLE ushort intel_sug_group_reduce_add(ushort x); /* reduce min */ OVERLOADABLE int sub_group_reduce_min(int x); @@ -64,6 +68,10 @@ OVERLOADABLE ulong sub_group_reduce_min(ulong x); OVERLOADABLE half sub_group_reduce_min(half x); OVERLOADABLE float sub_group_reduce_min(float x); OVERLOADABLE double sub_group_reduce_min(double x); +OVERLOADABLE short sub_group_reduce_min(short x); +OVERLOADABLE ushort sub_group_reduce_min(ushort x); +OVERLOADABLE short intel_sug_group_reduce_min(short x); +OVERLOADABLE ushort intel_sug_group_reduce_min(ushort x); /* reduce max */ OVERLOADABLE int sub_group_reduce_max(int x); @@ -73,6 +81,10 @@ OVERLOADABLE ulong sub_group_reduce_max(ulong x); OVERLOADABLE half sub_group_reduce_max(half x); OVERLOADABLE float sub_group_reduce_max(float x); OVERLOADABLE double sub_group_reduce_max(double x); +OVERLOADABLE short sub_group_reduce_max(short x); +OVERLOADABLE ushort sub_group_reduce_max(ushort x); +OVERLOADABLE short intel_sug_group_reduce_max(short x); +OVERLOADABLE ushort intel_sug_group_reduce_max(ushort x); /* scan_inclusive add */ OVERLOADABLE int sub_group_scan_inclusive_add(int x); @@ -82,6 +94,10 @@ OVERLOADABLE ulong sub_group_scan_inclusive_add(ulong x); OVERLOADABLE half sub_group_scan_inclusive_add(half x); OVERLOADABLE float sub_group_scan_inclusive_add(float x); OVERLOADABLE double sub_group_scan_inclusive_add(double x); +OVERLOADABLE short sub_group_scan_inclusive_add(short x); +OVERLOADABLE ushort sub_group_scan_inclusive_add(ushort x); +OVERLOADABLE short intel_sug_group_scan_inclusive_add(short x); +OVERLOADABLE ushort intel_sug_group_scan_inclusive_add(ushort x); /* scan_inclusive min */ OVERLOADABLE int sub_group_scan_inclusive_min(int x); @@ -91,6 +107,10 @@ OVERLOADABLE ulong sub_group_scan_inclusive_min(ulong x); OVERLOADABLE half sub_group_scan_inclusive_min(half x); OVERLOADABLE float sub_group_scan_inclusive_min(float x); OVERLOADABLE double sub_group_scan_inclusive_min(double x); +OVERLOADABLE short sub_group_scan_inclusive_min(short x); +OVERLOADABLE ushort sub_group_scan_inclusive_min(ushort x); +OVERLOADABLE short intel_sug_group_scan_inclusive_min(short x); +OVERLOADABLE ushort intel_sug_group_scan_inclusive_min(ushort x); /* scan_inclusive max */ OVERLOADABLE int sub_group_scan_inclusive_max(int x); @@ -100,6 +120,10 @@ OVERLOADABLE ulong sub_group_scan_inclusive_max(ulong x); OVERLOADABLE half sub_group_scan_inclusive_max(half x); OVERLOADABLE float sub_group_scan_inclusive_max(float x); OVERLOADABLE double sub_group_scan_inclusive_max(double x); +OVERLOADABLE short sub_group_scan_inclusive_max(short x); +OVERLOADABLE ushort sub_group_scan_inclusive_max(ushort x); +OVERLOADABLE short intel_sug_group_scan_inclusive_max(short x); +OVERLOADABLE ushort intel_sug_group_scan_inclusive_max(ushort x); /* scan_exclusive add */ OVERLOADABLE int sub_group_scan_exclusive_add(int x); @@ -109,6 +133,10 @@ OVERLOADABLE ulong sub_group_scan_exclusive_add(ulong x); OVERLOADABLE half sub_group_scan_exclusive_add(half x); OVERLOADABLE float sub_group_scan_exclusive_add(float x); OVERLOADABLE double sub_group_scan_exclusive_add(double x); +OVERLOADABLE short sub_group_scan_exclusive_add(short x); +OVERLOADABLE ushort sub_group_scan_exclusive_add(ushort x); +OVERLOADABLE short intel_sub_group_scan_exclusive_add(short x); +OVERLOADABLE ushort intel_sub_group_scan_exclusive_add(ushort x); /* scan_exclusive min */ OVERLOADABLE int sub_group_scan_exclusive_min(int x); @@ -118,6 +146,10 @@ OVERLOADABLE ulong sub_group_scan_exclusive_min(ulong x); OVERLOADABLE half sub_group_scan_exclusive_min(half x); OVERLOADABLE float sub_group_scan_exclusive_min(float x); OVERLOADABLE double sub_group_scan_exclusive_min(double x); +OVERLOADABLE short sub_group_scan_exclusive_min(short x); +OVERLOADABLE ushort sub_group_scan_exclusive_min(ushort x); +OVERLOADABLE short intel_sug_group_scan_exclusive_min(short x); +OVERLOADABLE ushort intel_sug_group_scan_exclusive_min(ushort x); /* scan_exclusive max */ OVERLOADABLE int sub_group_scan_exclusive_max(int x); @@ -127,6 +159,10 @@ OVERLOADABLE ulong sub_group_scan_exclusive_max(ulong x); OVERLOADABLE half sub_group_scan_exclusive_max(half x); OVERLOADABLE float sub_group_scan_exclusive_max(float x); OVERLOADABLE double sub_group_scan_exclusive_max(double x); +OVERLOADABLE short sub_group_scan_exclusive_max(short x); +OVERLOADABLE ushort sub_group_scan_exclusive_max(ushort x); +OVERLOADABLE short intel_sug_group_scan_exclusive_max(short x); +OVERLOADABLE ushort intel_sug_group_scan_exclusive_max(ushort x); /* shuffle */ OVERLOADABLE half intel_sub_group_shuffle(half x, uint c); -- 2.7.4 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet