[Beignet] [PATCH 1/7 V2] Backend: Delete the useless MOV_DF instruction.
From: Junyan HeBecause just platform after BDW will support double, the special instruction for double MOV is not needed anymore. Signed-off-by: Junyan He --- backend/src/backend/gen75_encoder.cpp | 36 - backend/src/backend/gen75_encoder.hpp | 1 - backend/src/backend/gen8_encoder.cpp | 36 - backend/src/backend/gen8_encoder.hpp | 1 - backend/src/backend/gen_context.cpp| 3 --- backend/src/backend/gen_encoder.cpp| 43 -- backend/src/backend/gen_encoder.hpp| 2 -- backend/src/backend/gen_insn_selection.cpp | 23 +--- backend/src/backend/gen_insn_selection.hxx | 1 - 9 files changed, 1 insertion(+), 145 deletions(-) diff --git a/backend/src/backend/gen75_encoder.cpp b/backend/src/backend/gen75_encoder.cpp index 135be02..5d1a964 100644 --- a/backend/src/backend/gen75_encoder.cpp +++ b/backend/src/backend/gen75_encoder.cpp @@ -251,42 +251,6 @@ namespace gbe pop(); } - void Gen75Encoder::MOV_DF(GenRegister dest, GenRegister src0, GenRegister tmp) { -GBE_ASSERT((src0.type == GEN_TYPE_F && dest.isdf()) || (src0.isdf() && dest.type == GEN_TYPE_F)); -GenRegister r = GenRegister::retype(tmp, GEN_TYPE_F); -int w = curr.execWidth; -GenRegister r0; -r0 = GenRegister::h2(r); -push(); -curr.execWidth = 4; -curr.predicate = GEN_PREDICATE_NONE; -curr.noMask = 1; -MOV(r0, src0); -MOV(GenRegister::suboffset(r0, 4), GenRegister::suboffset(src0, 4)); -curr.noMask = 0; -curr.quarterControl = 0; -curr.nibControl = 0; -MOV(dest, r0); -curr.nibControl = 1; -MOV(GenRegister::suboffset(dest, 4), GenRegister::suboffset(r0, 4)); -pop(); -if (w == 16) { - push(); - curr.execWidth = 4; - curr.predicate = GEN_PREDICATE_NONE; - curr.noMask = 1; - MOV(r0, GenRegister::suboffset(src0, 8)); - MOV(GenRegister::suboffset(r0, 4), GenRegister::suboffset(src0, 12)); - curr.noMask = 0; - curr.quarterControl = 1; - curr.nibControl = 0; - MOV(GenRegister::suboffset(dest, 8), r0); - curr.nibControl = 1; - MOV(GenRegister::suboffset(dest, 12), GenRegister::suboffset(r0, 4)); - pop(); -} - } - void Gen75Encoder::JMPI(GenRegister src, bool longjmp) { alu2(this, GEN_OPCODE_JMPI, GenRegister::ip(), GenRegister::ip(), src); } diff --git a/backend/src/backend/gen75_encoder.hpp b/backend/src/backend/gen75_encoder.hpp index e494f29..f5044c0 100644 --- a/backend/src/backend/gen75_encoder.hpp +++ b/backend/src/backend/gen75_encoder.hpp @@ -42,7 +42,6 @@ namespace gbe virtual void JMPI(GenRegister src, bool longjmp = false); /*! Patch JMPI/BRC/BRD (located at index insnID) with the given jump distance */ virtual void patchJMPI(uint32_t insnID, int32_t jip, int32_t uip); -virtual void MOV_DF(GenRegister dest, GenRegister src0, GenRegister tmp = GenRegister::null()); virtual void LOAD_DF_IMM(GenRegister dest, GenRegister tmp, double value); virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum); virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister bti, uint32_t elemNum); diff --git a/backend/src/backend/gen8_encoder.cpp b/backend/src/backend/gen8_encoder.cpp index 55fc3fb..98c3917 100644 --- a/backend/src/backend/gen8_encoder.cpp +++ b/backend/src/backend/gen8_encoder.cpp @@ -260,42 +260,6 @@ namespace gbe MOV(dest, value); } - void Gen8Encoder::MOV_DF(GenRegister dest, GenRegister src0, GenRegister tmp) { -GBE_ASSERT((src0.type == GEN_TYPE_F && dest.isdf()) || (src0.isdf() && dest.type == GEN_TYPE_F)); -GenRegister r = GenRegister::retype(tmp, GEN_TYPE_F); -int w = curr.execWidth; -GenRegister r0; -r0 = GenRegister::h2(r); -push(); -curr.execWidth = 4; -curr.predicate = GEN_PREDICATE_NONE; -curr.noMask = 1; -MOV(r0, src0); -MOV(GenRegister::suboffset(r0, 4), GenRegister::suboffset(src0, 4)); -curr.noMask = 0; -curr.quarterControl = 0; -curr.nibControl = 0; -MOV(dest, r0); -curr.nibControl = 1; -MOV(GenRegister::suboffset(dest, 4), GenRegister::suboffset(r0, 4)); -pop(); -if (w == 16) { - push(); - curr.execWidth = 4; - curr.predicate = GEN_PREDICATE_NONE; - curr.noMask = 1; - MOV(r0, GenRegister::suboffset(src0, 8)); - MOV(GenRegister::suboffset(r0, 4), GenRegister::suboffset(src0, 12)); - curr.noMask = 0; - curr.quarterControl = 1; - curr.nibControl = 0; - MOV(GenRegister::suboffset(dest, 8), r0); - curr.nibControl = 1; - MOV(GenRegister::suboffset(dest, 12), GenRegister::suboffset(r0, 4)); - pop(); -} - } - void Gen8Encoder::JMPI(GenRegister src, bool longjmp) { alu2(this, GEN_OPCODE_JMPI, GenRegister::ip(),
[Beignet] [PATCH 6/7 V2] Backend: Fix a potential bug for uniform conversion.
From: Junyan HeWhen we do conversion, the src may be uniform but dst is not. In this case, we need to set the simd=1. Signed-off-by: Junyan He --- backend/src/backend/gen_insn_selection.cpp | 33 ++ 1 file changed, 33 insertions(+) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 49ba499..f4f9d03 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -4651,7 +4651,15 @@ namespace gbe unpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0; unpacked = GenRegister::retype(unpacked, GEN_TYPE_F); +sel.push(); +if (sel.isScalarReg(insn.getSrc(0))) { + sel.curr.execWidth = 1; + sel.curr.predicate = GEN_PREDICATE_NONE; + sel.curr.noMask = 1; +} sel.MOV(unpacked, src); +sel.pop(); + sel.MOV(dst, unpacked); } else { // float to double, just mov @@ -4685,13 +4693,29 @@ namespace gbe // half to double. There is no direct double to half MOV, need tmp float. GBE_ASSERT(srcType == ir::TYPE_HALF); GenRegister tmpFloat = GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD)), GEN_TYPE_F); + +sel.push(); +if (sel.isScalarReg(insn.getSrc(0))) { + sel.curr.execWidth = 1; + sel.curr.predicate = GEN_PREDICATE_NONE; + sel.curr.noMask = 1; +} sel.MOV(tmpFloat, src); +sel.pop(); + sel.MOV(dst, tmpFloat); } else { // double to half. No direct MOV from double to half, so double->float->half GBE_ASSERT(srcType == ir::TYPE_DOUBLE); GBE_ASSERT(dstType == ir::TYPE_HALF); +sel.push(); +if (sel.isScalarReg(insn.getSrc(0))) { + sel.curr.execWidth = 1; + sel.curr.predicate = GEN_PREDICATE_NONE; + sel.curr.noMask = 1; +} + // double to float GenRegister unpackedFloat = sel.unpacked_ud(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0; unpackedFloat = GenRegister::retype(unpackedFloat, GEN_TYPE_F); @@ -4701,6 +4725,7 @@ namespace gbe GenRegister unpackedHalf = sel.unpacked_uw(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0; unpackedHalf = GenRegister::retype(unpackedHalf, GEN_TYPE_HF); sel.MOV(unpackedHalf, unpackedFloat); +sel.pop(); sel.MOV(dst, unpackedHalf); } @@ -4795,7 +4820,15 @@ namespace gbe unpacked = GenRegister::retype(unpacked, dstType == TYPE_U8 ? GEN_TYPE_UW : GEN_TYPE_W); } +sel.push(); +if (sel.isScalarReg(insn.getSrc(0))) { + sel.curr.execWidth = 1; + sel.curr.predicate = GEN_PREDICATE_NONE; + sel.curr.noMask = 1; +} sel.MOV(unpacked, src); +sel.pop(); + sel.MOV(dst, unpacked); } } -- 1.9.1 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH 2/7 V2] Backend: Delete LOAD_DF_IMM instruction.
From: Junyan HeDouble is supported on BDW later platforms, just normal MOV can handle the loading of double. So no need for LOAD_DF_IMM anymore. Signed-off-by: Junyan He --- backend/src/backend/gen75_encoder.cpp | 30 -- backend/src/backend/gen75_encoder.hpp | 1 - backend/src/backend/gen8_encoder.cpp | 28 backend/src/backend/gen8_encoder.hpp | 1 - backend/src/backend/gen_context.cpp| 3 --- backend/src/backend/gen_encoder.cpp| 29 - backend/src/backend/gen_encoder.hpp| 1 - backend/src/backend/gen_insn_selection.cpp | 5 ++--- backend/src/backend/gen_insn_selection.hxx | 1 - 9 files changed, 2 insertions(+), 97 deletions(-) diff --git a/backend/src/backend/gen75_encoder.cpp b/backend/src/backend/gen75_encoder.cpp index 5d1a964..fc37991 100644 --- a/backend/src/backend/gen75_encoder.cpp +++ b/backend/src/backend/gen75_encoder.cpp @@ -221,36 +221,6 @@ namespace gbe } } - - void Gen75Encoder::LOAD_DF_IMM(GenRegister dest, GenRegister tmp, double value) { -union { double d; unsigned u[2]; } u; -u.d = value; -GenRegister r = GenRegister::retype(tmp, GEN_TYPE_UD); -push(); -curr.predicate = GEN_PREDICATE_NONE; -curr.noMask = 1; -curr.execWidth = 1; -MOV(r, GenRegister::immud(u.u[0])); -MOV(GenRegister::suboffset(r, 1), GenRegister::immud(u.u[1])); -pop(); -r.type = GEN_TYPE_DF; -r.vstride = GEN_VERTICAL_STRIDE_0; -r.width = GEN_WIDTH_1; -r.hstride = GEN_HORIZONTAL_STRIDE_0; -push(); -uint32_t width = curr.execWidth; -curr.execWidth = 8; -curr.predicate = GEN_PREDICATE_NONE; -curr.noMask = 1; -curr.quarterControl = GEN_COMPRESSION_Q1; -MOV(dest, r); -if (width == 16) { - curr.quarterControl = GEN_COMPRESSION_Q2; - MOV(GenRegister::offset(dest, 2), r); -} -pop(); - } - void Gen75Encoder::JMPI(GenRegister src, bool longjmp) { alu2(this, GEN_OPCODE_JMPI, GenRegister::ip(), GenRegister::ip(), src); } diff --git a/backend/src/backend/gen75_encoder.hpp b/backend/src/backend/gen75_encoder.hpp index f5044c0..d06f393 100644 --- a/backend/src/backend/gen75_encoder.hpp +++ b/backend/src/backend/gen75_encoder.hpp @@ -42,7 +42,6 @@ namespace gbe virtual void JMPI(GenRegister src, bool longjmp = false); /*! Patch JMPI/BRC/BRD (located at index insnID) with the given jump distance */ virtual void patchJMPI(uint32_t insnID, int32_t jip, int32_t uip); -virtual void LOAD_DF_IMM(GenRegister dest, GenRegister tmp, double value); virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum); virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister bti, uint32_t elemNum); virtual void UNTYPED_WRITE(GenRegister src, GenRegister bti, uint32_t elemNum); diff --git a/backend/src/backend/gen8_encoder.cpp b/backend/src/backend/gen8_encoder.cpp index 98c3917..16b3fc6 100644 --- a/backend/src/backend/gen8_encoder.cpp +++ b/backend/src/backend/gen8_encoder.cpp @@ -227,34 +227,6 @@ namespace gbe this->setSrc1(insn, bti); } } - void Gen8Encoder::LOAD_DF_IMM(GenRegister dest, GenRegister tmp, double value) { -union { double d; unsigned u[2]; } u; -u.d = value; -GenRegister r = GenRegister::retype(tmp, GEN_TYPE_UD); -push(); -curr.predicate = GEN_PREDICATE_NONE; -curr.noMask = 1; -curr.execWidth = 1; -MOV(r, GenRegister::immud(u.u[0])); -MOV(GenRegister::suboffset(r, 1), GenRegister::immud(u.u[1])); -pop(); -r.type = GEN_TYPE_DF; -r.vstride = GEN_VERTICAL_STRIDE_0; -r.width = GEN_WIDTH_1; -r.hstride = GEN_HORIZONTAL_STRIDE_0; -push(); -uint32_t width = curr.execWidth; -curr.execWidth = 8; -curr.predicate = GEN_PREDICATE_NONE; -curr.noMask = 1; -curr.quarterControl = GEN_COMPRESSION_Q1; -MOV(dest, r); -if (width == 16) { - curr.quarterControl = GEN_COMPRESSION_Q2; - MOV(GenRegister::offset(dest, 2), r); -} -pop(); - } void Gen8Encoder::LOAD_INT64_IMM(GenRegister dest, GenRegister value) { MOV(dest, value); diff --git a/backend/src/backend/gen8_encoder.hpp b/backend/src/backend/gen8_encoder.hpp index 2aa074f..8c447ea 100644 --- a/backend/src/backend/gen8_encoder.hpp +++ b/backend/src/backend/gen8_encoder.hpp @@ -42,7 +42,6 @@ namespace gbe virtual void patchJMPI(uint32_t insnID, int32_t jip, int32_t uip); virtual void F16TO32(GenRegister dest, GenRegister src0); virtual void F32TO16(GenRegister dest, GenRegister src0); -virtual void LOAD_DF_IMM(GenRegister dest, GenRegister tmp, double value); virtual void LOAD_INT64_IMM(GenRegister dest, GenRegister value); virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t
[Beignet] [PATCH 5/7 V2] Utest: Fix a bug for double div.
From: Junyan HeSigned-off-by: Junyan He --- utests/compiler_double_convert.cpp | 3 +-- utests/compiler_double_div.cpp | 3 +++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/utests/compiler_double_convert.cpp b/utests/compiler_double_convert.cpp index 9c5c97b..30787d2 100644 --- a/utests/compiler_double_convert.cpp +++ b/utests/compiler_double_convert.cpp @@ -613,8 +613,7 @@ void compiler_float_convert_double(void) OCL_MAP_BUFFER(1); for (int32_t i = 0; i < (int32_t) n; ++i) { //printf("%f, \t%f\n", ((double*)buf_data[1])[i], cpu_dst[i]); -OCL_ASSERT(((double*)buf_data[2])[i] == cpu_dst0[i]); -OCL_ASSERT(((double*)buf_data[3])[i] == cpu_dst1[i]); +OCL_ASSERT(((double*)buf_data[1])[i] == cpu_dst[i]); } OCL_UNMAP_BUFFER(1); } diff --git a/utests/compiler_double_div.cpp b/utests/compiler_double_div.cpp index db763e3..11578cf 100644 --- a/utests/compiler_double_div.cpp +++ b/utests/compiler_double_div.cpp @@ -23,12 +23,15 @@ void compiler_double_div(void) // Run random tests OCL_MAP_BUFFER(0); OCL_MAP_BUFFER(1); + OCL_MAP_BUFFER(2); for (int32_t i = 0; i < (int32_t) n; ++i) { cpu_src0[i] = ((double*)buf_data[0])[i] = ((double)(((i - 5)*1334) * 11105)); cpu_src1[i] = ((double*)buf_data[1])[i] = 499.13542123d*(i + 132.43d + 142.32*i); +((double*)buf_data[2])[i] = 0.0d; } OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); + OCL_UNMAP_BUFFER(2); // Run the kernel on GPU OCL_NDRANGE(1); -- 1.9.1 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH 3/7 V2] Backend: Add double conversion to insn selection.
From: Junyan HeSigned-off-by: Junyan He --- backend/src/backend/gen_insn_selection.cpp | 197 +++-- 1 file changed, 189 insertions(+), 8 deletions(-) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index b66cf71..49ba499 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -4619,8 +4619,11 @@ namespace gbe sel.pop(); sel.MOV(dst, tmp); } else if (src.type == GEN_TYPE_DF) { -//TODO: -GBE_ASSERT(0); +GBE_ASSERT(sel.hasDoubleType()); +GBE_ASSERT(sel.hasLongType()); //So far, if we support double, we support native long. + +// Just Mov +sel.MOV(dst, src); } else { /* Invalid case. */ GBE_ASSERT(0); @@ -4630,11 +4633,77 @@ namespace gbe INLINE void convertBetweenFloatDouble(Selection::Opaque , const ir::ConvertInstruction , bool ) const { using namespace ir; + const Type dstType = insn.getDstType(); + const Type srcType = insn.getSrcType(); + const GenRegister dst = sel.selReg(insn.getDst(0), dstType); + const GenRegister src = sel.selReg(insn.getSrc(0), srcType); + + GBE_ASSERT(sel.hasDoubleType()); + + if (sel.isScalarReg(insn.getDst(0))) { +// dst is scalar, just MOV and nothing more. +GBE_ASSERT(sel.isScalarReg(insn.getSrc(0))); +sel.MOV(dst, src); + } else if (srcType == ir::TYPE_DOUBLE) { +// double to float +GBE_ASSERT(dstType == ir::TYPE_FLOAT); +GenRegister unpacked; +unpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0; +unpacked = GenRegister::retype(unpacked, GEN_TYPE_F); + +sel.MOV(unpacked, src); +sel.MOV(dst, unpacked); + } else { +// float to double, just mov +sel.MOV(dst, src); + } + + return; } INLINE void convertBetweenHalfDouble(Selection::Opaque , const ir::ConvertInstruction , bool ) const { using namespace ir; + const Type dstType = insn.getDstType(); + const Type srcType = insn.getSrcType(); + const GenRegister dst = sel.selReg(insn.getDst(0), dstType); + const GenRegister src = sel.selReg(insn.getSrc(0), srcType); + + GBE_ASSERT(sel.hasDoubleType()); + GBE_ASSERT(sel.hasHalfType()); //So far, if we support double, we support half. + + if (sel.isScalarReg(insn.getDst(0))) { // uniform case. +GBE_ASSERT(sel.isScalarReg(insn.getSrc(0))); +GBE_ASSERT(sel.curr.execWidth == 1); +GenRegister tmpFloat = GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD)), GEN_TYPE_F); +sel.MOV(tmpFloat, src); +sel.MOV(dst, tmpFloat); +return; + } + + if (dstType == ir::TYPE_DOUBLE) { +// half to double. There is no direct double to half MOV, need tmp float. +GBE_ASSERT(srcType == ir::TYPE_HALF); +GenRegister tmpFloat = GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD)), GEN_TYPE_F); +sel.MOV(tmpFloat, src); +sel.MOV(dst, tmpFloat); + } else { +// double to half. No direct MOV from double to half, so double->float->half +GBE_ASSERT(srcType == ir::TYPE_DOUBLE); +GBE_ASSERT(dstType == ir::TYPE_HALF); + +// double to float +GenRegister unpackedFloat = sel.unpacked_ud(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0; +unpackedFloat = GenRegister::retype(unpackedFloat, GEN_TYPE_F); +sel.MOV(unpackedFloat, src); + +// float to half +GenRegister unpackedHalf = sel.unpacked_uw(sel.reg(FAMILY_QWORD, sel.isScalarReg(insn.getSrc(0; +unpackedHalf = GenRegister::retype(unpackedHalf, GEN_TYPE_HF); +sel.MOV(unpackedHalf, unpackedFloat); + +sel.MOV(dst, unpackedHalf); + } } INLINE void convertHalfToSmallInts(Selection::Opaque , const ir::ConvertInstruction , bool ) const @@ -4694,6 +4763,105 @@ namespace gbe sel.MOV(dst, tmp); } +INLINE void convertDoubleToSmallInts(Selection::Opaque , const ir::ConvertInstruction , bool ) const +{ + using namespace ir; + const Type dstType = insn.getDstType(); + const Type srcType = insn.getSrcType(); + const GenRegister dst = sel.selReg(insn.getDst(0), dstType); + const GenRegister src = sel.selReg(insn.getSrc(0), srcType); + const RegisterFamily dstFamily = getFamily(dstType); + + GBE_ASSERT(sel.hasDoubleType()); + GBE_ASSERT(sel.hasHalfType()); //So far, if we support double, we support half. + if (sel.isScalarReg(insn.getDst(0))) { +// dst is scalar, just MOV and nothing more. +GBE_ASSERT(sel.isScalarReg(insn.getSrc(0))); +sel.MOV(dst, src); + } else { +GenRegister unpacked; +if (dstFamily ==
Re: [Beignet] Unrecoverable system lockup when allocating too much memory
Would this be better if you turn off the overcommit via proc fs? Only if you also disable any swap space ( sudo swapoff -a && sudo sh -c "echo -n 2 > /proc/sys/vm/overcommit_memory" #warning, this may itself crash your desktop); if I disable overcommit but leave swap on, I get a hang with the following trace. (The example in https://bugs.launchpad.net/ubuntu/+source/pyopencl/+bug/1354086 no longer hangs, so the "rapidly allocating and freeing pyopencl objects doesn't actually free the memory" aspect has evidently been fixed, but keeping too many objects for the available memory still does hang. Though for me, SysRq still works.) Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.597991] Purging GPU memory, 0 bytes freed, 5685248 bytes still pinned. Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598134] Xorg invoked oom-killer: gfp_mask=0x0, order=0, oom_score_adj=0 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598136] Xorg cpuset=/ mems_allowed=0 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598140] CPU: 3 PID: 823 Comm: Xorg Not tainted 3.16.0-4-amd64 #1 Debian 3.16.7-ckt11-1+deb8u5 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598142] Hardware name: TOSHIBA SATELLITE PRO C50-A-1E4/PT10F, BIOS 1.20 09/04/2013 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598144] 8150b4c5 880036dc69a0 81509127 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598147] 0056c000 880149207b30 880149207c08 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598149] 8800a8b0a000 a058c2ab 88014920 0100 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598152] Call Trace: Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598159] [] ? dump_stack+0x41/0x51 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598163] [] ? dump_header+0x76/0x1e8 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598182] [] ? i915_gem_shrinker_oom+0x15b/0x1c0 [i915] Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598186] [] ? oom_kill_process+0x21d/0x370 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598189] [] ? find_lock_task_mm+0x3d/0x90 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598191] [] ? out_of_memory+0x473/0x4b0 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598194] [] ? pagefault_out_of_memory+0x6f/0x80 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598198] [] ? __do_page_fault+0x3c5/0x4f0 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598201] [] ? do_mmap_pgoff+0x2e9/0x3b0 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598205] [] ? dput+0x9e/0x170 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598207] [] ? do_vfs_ioctl+0x2cf/0x4b0 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598211] [] ? page_fault+0x28/0x30 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598212] Mem-Info: Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598214] Node 0 DMA per-cpu: Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598215] CPU0: hi: 0, btch: 1 usd: 0 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598217] CPU1: hi: 0, btch: 1 usd: 0 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598218] CPU2: hi: 0, btch: 1 usd: 0 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598219] CPU3: hi: 0, btch: 1 usd: 0 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598220] Node 0 DMA32 per-cpu: Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598222] CPU0: hi: 186, btch: 31 usd: 133 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598223] CPU1: hi: 186, btch: 31 usd: 168 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598224] CPU2: hi: 186, btch: 31 usd: 184 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598226] CPU3: hi: 186, btch: 31 usd: 180 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598226] Node 0 Normal per-cpu: Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598228] CPU0: hi: 186, btch: 31 usd: 125 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598229] CPU1: hi: 186, btch: 31 usd: 166 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598230] CPU2: hi: 186, btch: 31 usd: 197 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598232] CPU3: hi: 186, btch: 31 usd: 146 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598236] active_anon:61164 inactive_anon:263991 isolated_anon:0 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598236] active_file:142392 inactive_file:107249 isolated_file:0 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598236] unevictable:0 dirty:129 writeback:0 unstable:0 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598236] free:364625 slab_reclaimable:23144 slab_unreclaimable:7171 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598236] mapped:38107 shmem:264190 pagetables:4104 bounce:0 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598236] free_cma:0 Nov 5 07:54:46 rnpalmer-laptop kernel: [ 759.598239] Node 0 DMA free:15612kB min:272kB low:340kB high:408kB active_anon:0kB inactive_anon:56kB active_file:140kB inactive_file:0kB unevictable:0kB isolated(anon):0kB
Re: [Beignet] [PATCH 1/7 V2] Backend: Delete the useless MOV_DF instruction.
V2: Fix uniform bug in conversion. Delete verbose printf in utests. Fix a bug for BSW when convert half to double. On Thu, Nov 05, 2015 at 04:15:41PM +0800, junyan...@inbox.com wrote: > Date: Thu, 5 Nov 2015 16:15:41 +0800 > From: junyan...@inbox.com > To: beignet@lists.freedesktop.org > Subject: [Beignet] [PATCH 1/7 V2] Backend: Delete the useless MOV_DF > instruction. > X-Mailer: git-send-email 1.7.9.5 > > From: Junyan He> > Because just platform after BDW will support double, > the special instruction for double MOV is not needed > anymore. > > Signed-off-by: Junyan He > --- > backend/src/backend/gen75_encoder.cpp | 36 - > backend/src/backend/gen75_encoder.hpp | 1 - > backend/src/backend/gen8_encoder.cpp | 36 - > backend/src/backend/gen8_encoder.hpp | 1 - > backend/src/backend/gen_context.cpp| 3 --- > backend/src/backend/gen_encoder.cpp| 43 > -- > backend/src/backend/gen_encoder.hpp| 2 -- > backend/src/backend/gen_insn_selection.cpp | 23 +--- > backend/src/backend/gen_insn_selection.hxx | 1 - > 9 files changed, 1 insertion(+), 145 deletions(-) > > diff --git a/backend/src/backend/gen75_encoder.cpp > b/backend/src/backend/gen75_encoder.cpp > index 135be02..5d1a964 100644 > --- a/backend/src/backend/gen75_encoder.cpp > +++ b/backend/src/backend/gen75_encoder.cpp > @@ -251,42 +251,6 @@ namespace gbe > pop(); >} > > - void Gen75Encoder::MOV_DF(GenRegister dest, GenRegister src0, GenRegister > tmp) { > -GBE_ASSERT((src0.type == GEN_TYPE_F && dest.isdf()) || (src0.isdf() && > dest.type == GEN_TYPE_F)); > -GenRegister r = GenRegister::retype(tmp, GEN_TYPE_F); > -int w = curr.execWidth; > -GenRegister r0; > -r0 = GenRegister::h2(r); > -push(); > -curr.execWidth = 4; > -curr.predicate = GEN_PREDICATE_NONE; > -curr.noMask = 1; > -MOV(r0, src0); > -MOV(GenRegister::suboffset(r0, 4), GenRegister::suboffset(src0, 4)); > -curr.noMask = 0; > -curr.quarterControl = 0; > -curr.nibControl = 0; > -MOV(dest, r0); > -curr.nibControl = 1; > -MOV(GenRegister::suboffset(dest, 4), GenRegister::suboffset(r0, 4)); > -pop(); > -if (w == 16) { > - push(); > - curr.execWidth = 4; > - curr.predicate = GEN_PREDICATE_NONE; > - curr.noMask = 1; > - MOV(r0, GenRegister::suboffset(src0, 8)); > - MOV(GenRegister::suboffset(r0, 4), GenRegister::suboffset(src0, 12)); > - curr.noMask = 0; > - curr.quarterControl = 1; > - curr.nibControl = 0; > - MOV(GenRegister::suboffset(dest, 8), r0); > - curr.nibControl = 1; > - MOV(GenRegister::suboffset(dest, 12), GenRegister::suboffset(r0, 4)); > - pop(); > -} > - } > - >void Gen75Encoder::JMPI(GenRegister src, bool longjmp) { > alu2(this, GEN_OPCODE_JMPI, GenRegister::ip(), GenRegister::ip(), src); >} > diff --git a/backend/src/backend/gen75_encoder.hpp > b/backend/src/backend/gen75_encoder.hpp > index e494f29..f5044c0 100644 > --- a/backend/src/backend/gen75_encoder.hpp > +++ b/backend/src/backend/gen75_encoder.hpp > @@ -42,7 +42,6 @@ namespace gbe > virtual void JMPI(GenRegister src, bool longjmp = false); > /*! Patch JMPI/BRC/BRD (located at index insnID) with the given jump > distance */ > virtual void patchJMPI(uint32_t insnID, int32_t jip, int32_t uip); > -virtual void MOV_DF(GenRegister dest, GenRegister src0, GenRegister tmp > = GenRegister::null()); > virtual void LOAD_DF_IMM(GenRegister dest, GenRegister tmp, double > value); > virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, > GenRegister bti, uint32_t srcNum); > virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister > bti, uint32_t elemNum); > diff --git a/backend/src/backend/gen8_encoder.cpp > b/backend/src/backend/gen8_encoder.cpp > index 55fc3fb..98c3917 100644 > --- a/backend/src/backend/gen8_encoder.cpp > +++ b/backend/src/backend/gen8_encoder.cpp > @@ -260,42 +260,6 @@ namespace gbe > MOV(dest, value); >} > > - void Gen8Encoder::MOV_DF(GenRegister dest, GenRegister src0, GenRegister > tmp) { > -GBE_ASSERT((src0.type == GEN_TYPE_F && dest.isdf()) || (src0.isdf() && > dest.type == GEN_TYPE_F)); > -GenRegister r = GenRegister::retype(tmp, GEN_TYPE_F); > -int w = curr.execWidth; > -GenRegister r0; > -r0 = GenRegister::h2(r); > -push(); > -curr.execWidth = 4; > -curr.predicate = GEN_PREDICATE_NONE; > -curr.noMask = 1; > -MOV(r0, src0); > -MOV(GenRegister::suboffset(r0, 4), GenRegister::suboffset(src0, 4)); > -curr.noMask = 0; > -curr.quarterControl = 0; > -curr.nibControl = 0; > -MOV(dest, r0); > -curr.nibControl = 1; > -MOV(GenRegister::suboffset(dest, 4), GenRegister::suboffset(r0, 4)); > -
[Beignet] [PATCH v6 2/4] Add extensions intel_accelerator and basic intel_motion_estimation.
v2: 1. Just upload the first vme_state. 2. Remove duplicated code in check_opt1_extension. 3. Check image format before cl_gpgpu_bind_image_for_vme. 4. Fix error of getting mv. Because we suppose this kernel run in SIMD16 mode, so dword 0 of grf 1 should be __gen_ocl_region(8,vme_result.s0), not __gen_ocl_region(0,vme_result.s1). v3: Return CL_IMAGE_FORMAT_NOT_SUPPORTED if image format is not the required one. v4: Fix two conflicts after code rebase and wordaround a curbe related bug. v6: Treat simd8 and simd16 differently when getting mv. Signed-off-by: Guo YejunSigned-off-by: Chuanbo Weng --- include/CL/cl_ext.h| 103 + src/CMakeLists.txt | 4 +- src/cl_accelerator_intel.c | 86 src/cl_accelerator_intel.h | 29 +++ src/cl_api.c | 106 +- src/cl_command_queue.c | 17 +- src/cl_command_queue_gen7.c| 8 +- src/cl_context.c | 1 + src/cl_context.h | 3 + src/cl_driver.h| 21 ++ src/cl_driver_defs.c | 2 + src/cl_extensions.c| 4 +- src/cl_extensions.h| 8 + src/cl_gen7_device.h | 5 +- src/cl_gt_device.h | 6 +- src/cl_internals.h | 1 + src/cl_kernel.c| 57 - src/cl_kernel.h| 6 +- src/cl_utils.h | 12 ++ src/intel/intel_gpgpu.c| 217 ++- src/intel/intel_structs.h | 120 +++ .../cl_internal_block_motion_estimate_intel.cl | 233 + 22 files changed, 1016 insertions(+), 33 deletions(-) create mode 100644 src/cl_accelerator_intel.c create mode 100644 src/cl_accelerator_intel.h create mode 100644 src/kernels/cl_internal_block_motion_estimate_intel.cl diff --git a/include/CL/cl_ext.h b/include/CL/cl_ext.h index 710bea8..0a66d70 100644 --- a/include/CL/cl_ext.h +++ b/include/CL/cl_ext.h @@ -184,6 +184,109 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clTerminateContextKHR_fn)(cl_context / #define CL_PRINTF_CALLBACK_ARM 0x40B0 #define CL_PRINTF_BUFFERSIZE_ARM0x40B1 +/* +* cl_intel_accelerator extension * +*/ +#define cl_intel_accelerator 1 +#define cl_intel_motion_estimation 1 + +typedef struct _cl_accelerator_intel* cl_accelerator_intel; +typedef cl_uint cl_accelerator_type_intel; +typedef cl_uint cl_accelerator_info_intel; + +typedef struct _cl_motion_estimation_desc_intel { +cl_uint mb_block_type; +cl_uint subpixel_mode; +cl_uint sad_adjust_mode; +cl_uint search_path_type; +} cl_motion_estimation_desc_intel; + +/* Error Codes */ +#define CL_INVALID_ACCELERATOR_INTEL-1094 +#define CL_INVALID_ACCELERATOR_TYPE_INTEL -1095 +#define CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL -1096 +#define CL_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL -1097 + +/* Deprecated Error Codes */ +#define CL_INVALID_ACCELERATOR_INTEL_DEPRECATED-6000 +#define CL_INVALID_ACCELERATOR_TYPE_INTEL_DEPRECATED -6001 +#define CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL_DEPRECATED -6002 +#define CL_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL_DEPRECATED -6003 + +/* cl_accelerator_type_intel */ +#define CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL 0x0 + +/* cl_accelerator_info_intel */ +#define CL_ACCELERATOR_DESCRIPTOR_INTEL 0x4090 +#define CL_ACCELERATOR_REFERENCE_COUNT_INTEL0x4091 +#define CL_ACCELERATOR_CONTEXT_INTEL0x4092 +#define CL_ACCELERATOR_TYPE_INTEL 0x4093 + +/*cl_motion_detect_desc_intel flags */ +#define CL_ME_MB_TYPE_16x16_INTEL 0x0 +#define CL_ME_MB_TYPE_8x8_INTEL 0x1 +#define CL_ME_MB_TYPE_4x4_INTEL 0x2 + +#define CL_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0 +#define CL_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1 +#define CL_ME_SUBPIXEL_MODE_QPEL_INTEL 0x2 + +#define CL_ME_SAD_ADJUST_MODE_NONE_INTEL0x0 +#define CL_ME_SAD_ADJUST_MODE_HAAR_INTEL0x1 + +#define CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL 0x0 +#define CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL 0x1 +#define CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL0x5 + +extern CL_API_ENTRY cl_accelerator_intel CL_API_CALL +clCreateAcceleratorINTEL( +cl_context
[Beignet] [PATCH v6 3/4] Add basic utest for block_motion_estimate_intel.
If the CL device does not support this builtin kernel, the test returns PASS. Signed-off-by: Guo Yejun--- utests/CMakeLists.txt | 1 + .../builtin_kernel_block_motion_estimate_intel.cpp | 109 + utests/utest_helper.hpp| 1 + 3 files changed, 111 insertions(+) create mode 100644 utests/builtin_kernel_block_motion_estimate_intel.cpp diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index 18337fa..1603f94 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -205,6 +205,7 @@ set (utests_sources test_printf.cpp enqueue_fill_buf.cpp builtin_kernel_max_global_size.cpp + builtin_kernel_block_motion_estimate_intel.cpp image_1D_buffer.cpp image_from_buffer.cpp compare_image_2d_and_1d_array.cpp diff --git a/utests/builtin_kernel_block_motion_estimate_intel.cpp b/utests/builtin_kernel_block_motion_estimate_intel.cpp new file mode 100644 index 000..12bcb7d --- /dev/null +++ b/utests/builtin_kernel_block_motion_estimate_intel.cpp @@ -0,0 +1,109 @@ +#include "utest_helper.hpp" +#include + +void builtin_kernel_block_motion_estimate_intel(void) +{ + char* built_in_kernel_names; + size_t built_in_kernels_size; + cl_int err = CL_SUCCESS; + size_t ret_sz; + + OCL_CALL (clGetDeviceInfo, device, CL_DEVICE_BUILT_IN_KERNELS, 0, 0, _in_kernels_size); + built_in_kernel_names = (char* )malloc(built_in_kernels_size * sizeof(char) ); + OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_BUILT_IN_KERNELS, built_in_kernels_size, (void*)built_in_kernel_names, _sz); + OCL_ASSERT(ret_sz == built_in_kernels_size); + + if (strstr(built_in_kernel_names, "block_motion_estimate_intel") == NULL) + { +free(built_in_kernel_names); +return; + } + + cl_program built_in_prog = clCreateProgramWithBuiltInKernels(ctx, 1, , built_in_kernel_names, ); + OCL_ASSERT(built_in_prog != NULL); + kernel = clCreateKernel(built_in_prog, "block_motion_estimate_intel", ); + OCL_ASSERT(kernel != NULL); + + cl_motion_estimation_desc_intel vmedesc = {CL_ME_MB_TYPE_16x16_INTEL, //0x0 + CL_ME_SUBPIXEL_MODE_INTEGER_INTEL, //0x0 + CL_ME_SAD_ADJUST_MODE_NONE_INTEL, //0x0 + CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL //0x5 + }; + cl_accelerator_intel accel = clCreateAcceleratorINTEL(ctx, CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL,sizeof(cl_motion_estimation_desc_intel), , ); + OCL_ASSERT(accel != NULL); + + const size_t w = 71; //80 + const size_t h = 41; //48 + + cl_image_format format; + cl_image_desc desc; + + memset(, 0x0, sizeof(cl_image_desc)); + memset(, 0x0, sizeof(cl_image_format)); + + uint8_t* image_data1 = (uint8_t *)malloc(w * h);//src + uint8_t* image_data2 = (uint8_t *)malloc(w * h);//ref + for (size_t j = 0; j < h; j++) { +for (size_t i = 0; i < w; i++) { + if (i >= 32 && i <= 47 && j >= 16 && j <= 31) +image_data2[w * j + i] = image_data1[w * j + i] = 100; + else +image_data2[w * j + i] = image_data1[w * j + i] = 0; +} + } + + format.image_channel_order = CL_R; + format.image_channel_data_type = CL_UNORM_INT8; + desc.image_type = CL_MEM_OBJECT_IMAGE2D; + desc.image_width = w; + desc.image_height = h; + desc.image_row_pitch = 0; + OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, , , image_data1); //src + OCL_CREATE_IMAGE(buf[1], CL_MEM_COPY_HOST_PTR, , , image_data2); //ref + + const size_t mv = (80/16) * (48/16); + OCL_CREATE_BUFFER(buf[2], 0, mv * sizeof(int) * 4, NULL); + + OCL_SET_ARG(0, sizeof(cl_accelerator_intel), ); + OCL_SET_ARG(1, sizeof(cl_mem), [0]); + OCL_SET_ARG(2, sizeof(cl_mem), [1]); + OCL_SET_ARG(3, sizeof(cl_mem), NULL); + OCL_SET_ARG(4, sizeof(cl_mem), [2]); + OCL_SET_ARG(5, sizeof(cl_mem), NULL); + + globals[0] = w; + globals[1] = h; + OCL_CALL(clEnqueueNDRangeKernel, queue, kernel, 2, NULL, globals, NULL, 0, NULL, NULL); + + OCL_MAP_BUFFER(2); + short expected[] = {-64, -48, +-64, -48, +-64, -48, +-64, -48, +-64, -48, +-64, -48, +-64, -48, +0, 0, +0, -48, +-64, -48, +-64, -48, +-64, -48, +-64, -48, +0, -48, +-64, -48}; + short* res = (short*)buf_data[2]; + for (uint32_t j = 0; j < mv; ++j) { +OCL_ASSERT(res[j * 2 + 0] == expected[j * 2 + 0]); +OCL_ASSERT(res[j * 2 + 1] == expected[j * 2 + 1]); + } + OCL_UNMAP_BUFFER(2); + + clReleaseAcceleratorINTEL(accel); + clReleaseKernel(kernel); + clReleaseProgram(built_in_prog); + free(built_in_kernel_names); + free(image_data1); + free(image_data2); +} +
[Beignet] [PATCH v6 1/4] Add built-in function __gen_ocl_vme.
__gen_ocl_vme is used for hardware accelerated video motion estimation. It gets payload values as parameters and uses MOV to pass these payload values to VME SEND Message's payload grfs. The int8 return value is used to store SEND Message writeback. v2: Remove unnecessary 5 parameters(src_grf*) of built-in function(we just need to allocate related registers in gen_insn_selection step). v3: Remove redundant code and change MAX_SRC_NUM to 40. v4: Choose message response length by message type instead of hard code. v5: Choose message response length by message type in the whole backend pipeline. v6: Treat simd8 and simd16 differently when mov payload value to consecutive payload grfs. Signed-off-by: Chuanbo Weng--- backend/src/backend/gen/gen_mesa_disasm.c | 14 backend/src/backend/gen7_instruction.hpp | 15 backend/src/backend/gen_context.cpp| 98 ++ backend/src/backend/gen_context.hpp| 1 + backend/src/backend/gen_defs.hpp | 15 backend/src/backend/gen_encoder.cpp| 44 ++ backend/src/backend/gen_encoder.hpp| 13 +++ .../src/backend/gen_insn_gen7_schedule_info.hxx| 1 + backend/src/backend/gen_insn_selection.cpp | 73 backend/src/backend/gen_insn_selection.hpp | 14 +++- backend/src/backend/gen_insn_selection.hxx | 1 + backend/src/ir/instruction.cpp | 66 +++ backend/src/ir/instruction.hpp | 17 +++- backend/src/ir/instruction.hxx | 1 + backend/src/libocl/include/ocl_misc.h | 15 backend/src/llvm/llvm_gen_backend.cpp | 47 +++ backend/src/llvm/llvm_gen_ocl_function.hxx | 2 + backend/src/llvm/llvm_scalarize.cpp| 4 + 18 files changed, 436 insertions(+), 5 deletions(-) diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c index 5b71cfa..3198da7 100644 --- a/backend/src/backend/gen/gen_mesa_disasm.c +++ b/backend/src/backend/gen/gen_mesa_disasm.c @@ -476,6 +476,13 @@ static int column; static int gen_version; +#define GEN7_BITS_FIELD(inst, gen7) \ + ({\ +int bits; \ + bits = ((const union Gen7NativeInstruction *)inst)->gen7; \ +bits; \ + }) + #define GEN_BITS_FIELD(inst, gen) \ ({\ int bits; \ @@ -530,6 +537,8 @@ static int gen_version; #define EXECUTION_SIZE(inst) GEN_BITS_FIELD(inst, header.execution_size) #define BRANCH_JIP(inst) GEN_BITS_FIELD2(inst, bits3.gen7_branch.jip, bits3.gen8_branch.jip/8) #define BRANCH_UIP(inst) GEN_BITS_FIELD2(inst, bits3.gen7_branch.uip, bits2.gen8_branch.uip/8) +#define VME_BTI(inst) GEN7_BITS_FIELD(inst, bits3.vme_gen7.bti) +#define VME_MSG_TYPE(inst) GEN7_BITS_FIELD(inst, bits3.vme_gen7.msg_type) #define SAMPLE_BTI(inst) GEN_BITS_FIELD(inst, bits3.sampler_gen7.bti) #define SAMPLER(inst) GEN_BITS_FIELD(inst, bits3.sampler_gen7.sampler) #define SAMPLER_MSG_TYPE(inst) GEN_BITS_FIELD(inst, bits3.sampler_gen7.msg_type) @@ -1431,6 +1440,11 @@ int gen_disasm (FILE *file, const void *inst, uint32_t deviceID, uint32_t compac if (GEN_BITS_FIELD2(inst, bits1.da1.src1_reg_file, bits2.da1.src1_reg_file) == GEN_IMMEDIATE_VALUE) { switch (target) { +case GEN_SFID_VIDEO_MOTION_EST: + format(file, " (bti: %d, msg_type: %d)", + VME_BTI(inst), + VME_MSG_TYPE(inst)); + break; case GEN_SFID_SAMPLER: format(file, " (%d, %d, %d, %d)", SAMPLE_BTI(inst), diff --git a/backend/src/backend/gen7_instruction.hpp b/backend/src/backend/gen7_instruction.hpp index 51f342b..258dd24 100644 --- a/backend/src/backend/gen7_instruction.hpp +++ b/backend/src/backend/gen7_instruction.hpp @@ -350,6 +350,21 @@ union Gen7NativeInstruction uint32_t end_of_thread:1; } sampler_gen7; + struct { +uint32_t bti:8; +uint32_t vme_search_path_lut:3; +uint32_t lut_sub:2; +uint32_t msg_type:2; +uint32_t stream_in:1; +uint32_t stream_out:1; +uint32_t reserved_mbz:2; +uint32_t header_present:1; +uint32_t response_length:5; +uint32_t msg_length:4; +uint32_t pad1:2; +uint32_t end_of_thread:1; + } vme_gen7; + /** * Message for the Sandybridge Sampler Cache or Constant Cache Data Port. * diff --git a/backend/src/backend/gen_context.cpp
[Beignet] [PATCH v6 4/4] Add document of video motion estimation support.
v3: Fix two typos. Signed-off-by: Chuanbo Weng--- docs/Beignet.mdwn | 1 + docs/howto/video-motion-estimation-howto.mdwn | 79 +++ 2 files changed, 80 insertions(+) create mode 100644 docs/howto/video-motion-estimation-howto.mdwn diff --git a/docs/Beignet.mdwn b/docs/Beignet.mdwn index 9a2b516..363add0 100644 --- a/docs/Beignet.mdwn +++ b/docs/Beignet.mdwn @@ -306,6 +306,7 @@ Documents for OpenCL application developers - [[Kernel Optimization Guide|Beignet/optimization-guide]] - [[Libva Buffer Sharing|Beignet/howto/libva-buffer-sharing-howto]] - [[V4l2 Buffer Sharing|Beignet/howto/v4l2-buffer-sharing-howto]] +- [[Video Motion Estimation|Beignet/howto/video-motion-estimation-howto]] The wiki URL is as below: [http://www.freedesktop.org/wiki/Software/Beignet/](http://www.freedesktop.org/wiki/Software/Beignet/) diff --git a/docs/howto/video-motion-estimation-howto.mdwn b/docs/howto/video-motion-estimation-howto.mdwn new file mode 100644 index 000..d9edc9b --- /dev/null +++ b/docs/howto/video-motion-estimation-howto.mdwn @@ -0,0 +1,79 @@ +Video Motion Vector HowTo +== + +Beignet now supports cl_intel_accelerator and part of cl_intel_motion_estimation, which +are Khronos official extensions. It provides a hardware acceleration of video motion +vector to users. + +Supported hardware platform and limitation +-- + +Only 3rd Generation Intel Core Processors is supported for vme now. And now we just +implement this part of cl_intel_motion_estimation for motion vector computation(residuals +can not be returned yet) on 3rd Generation Intel Core Processors: + mb_block_type = CL_ME_MB_TYPE_16x16_INTEL + subpixel_mode = CL_ME_SUBPIXEL_MODE_INTEGER_INTEL + search_path_type = CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL / CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL + / CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL +We will fully support cl_intel_motion_estimation in the future. + +Steps +- + +In order to use video motion estimation provided by Beignet in your program, please follow +the steps as below: + +- Create a cl_accelerator_intel object using extension API clCreateAcceleratorINTEL, with + the following parameters: + _accelerator_type_intel accelerator_type = CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL; + cl_motion_estimation_desc_intel vmedesc = {CL_ME_MB_TYPE_16x16_INTEL, + CL_ME_SUBPIXEL_MODE_INTEGER_INTEL, + CL_ME_SAD_ADJUST_MODE_NONE_INTEL, + CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL( + or CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL + or CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL) +}; + +- Invoke clCreateProgramWithBuiltInKernels to create a program object with built-in kernels + information, and invoke clCreateKernel to create a kernel object whose kernel name is + block_motion_estimate_intel. + +- The prototype of built-in kernel block_motion_estimate_intel is as following: + _kernel void + block_motion_estimate_intel + ( + accelerator_intel_t accelerator, + __read_only image2d_t src_image, + __read_only image2d_t ref_image, + __global short2 * prediction_motion_vector_buffer, + __global short2 * motion_vector_buffer, + __global ushort * residuals + ); + So you should create related objects and setup these kernel arguments by clSetKernelArg. + Create source and reference image object, on which you want to do video motion estimation. + The image_channel_order should be CL_R and image_channel_data_type should be CL_UNORM_INT8. + Create a buffer object to get the motion vector result. This motion vector buffer representing + a vector field of pixel block motion vectors, stored linearly in row-major order. The elements + (pixels) of this image contain a motion vector for the corresponding pixel block, with its x/y + components packed as two 16-bit integer values. Each component is encoded as a S13.2 fixed + point value(two's complement). + +- Use clEnqueueNDRangeKernel to enqueue this kernel. The only thing you need to setup is global_work_size: + global_work_size[0] equal to width of source image, global_work_size[1] equal to height of source + image. + +- Use clEnqueueReadBuffer or clEnqueueMapBuffer to get motion vector result. + + +Sample code +--- + +We have developed an utest case of using video motion vector in utests/builtin_kernel_block_motion_estimate_intel.cpp. +Please go through it for details. + +More references +--- + +https://www.khronos.org/registry/cl/extensions/intel/cl_intel_accelerator.txt +https://www.khronos.org/registry/cl/extensions/intel/cl_intel_motion_estimation.txt
Re: [Beignet] [PATCH] utests: fix image_from_buffer bugs
Ping for pushed. -Original Message- From: Luo, Xionghu Sent: Wednesday, October 28, 2015 9:42 AM To: Pan, Xiuli; beignet@lists.freedesktop.org Cc: Pan, Xiuli Subject: RE: [Beignet] [PATCH] utests: fix image_from_buffer bugs This patch LGTM. Thanks. Luo Xionghu Best Regards -Original Message- From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of Pan Xiuli Sent: Tuesday, October 27, 2015 2:16 PM To: beignet@lists.freedesktop.org Cc: Pan, Xiuli Subject: [Beignet] [PATCH] utests: fix image_from_buffer bugs Fixed 2 bugs: 1.This test case uses usrptr, so we should never free the orginal buffer space, otherwise undefined behavior would happen: adding or losing one header file causing data in front broken, NDRangeKernel fail etc. 2.The utest need to test when to free image from buffer and the buffer, but the utest helper function will released it again and causes libc made some warnings. We just make the global variable to NULL to avoid these questions. These will fix the utests image_from_buffer broken. Signed-off-by: Pan Xiuli --- utests/image_from_buffer.cpp | 17 + 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/utests/image_from_buffer.cpp b/utests/image_from_buffer.cpp index 78d6797..b1171d1 100644 --- a/utests/image_from_buffer.cpp +++ b/utests/image_from_buffer.cpp @@ -32,13 +32,13 @@ static void image_from_buffer(void) // Setup kernel and images size_t buffer_sz = sizeof(uint32_t) * w * h; - //buf_data[0] = (uint32_t*) malloc(buffer_sz); - buf_data[0] = (uint32_t*)memalign(base_address_alignment, buffer_sz); + uint32_t* src_data; + src_data = (uint32_t*)memalign(base_address_alignment, buffer_sz); for (uint32_t j = 0; j < h; ++j) for (uint32_t i = 0; i < w; i++) - ((uint32_t*)buf_data[0])[j * w + i] = j * w + i; + src_data[j * w + i] = j * w + i; - cl_mem buff = clCreateBuffer(ctx, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, buffer_sz, buf_data[0], ); + cl_mem buff = clCreateBuffer(ctx, CL_MEM_READ_ONLY | + CL_MEM_USE_HOST_PTR, buffer_sz, src_data, ); OCL_ASSERT(error == CL_SUCCESS); format.image_channel_order = CL_RGBA; @@ -49,7 +49,7 @@ static void image_from_buffer(void) desc.image_row_pitch = w * sizeof(uint32_t); desc.buffer = 0; - OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, , , buf_data[0]); + OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, , , + src_data); desc.buffer = buff; OCL_CREATE_IMAGE(buf[1], 0, , , NULL); @@ -58,9 +58,6 @@ static void image_from_buffer(void) desc.image_row_pitch = 0; OCL_CREATE_IMAGE(buf[2], CL_MEM_WRITE_ONLY, , , NULL); - free(buf_data[0]); - buf_data[0] = NULL; - OCL_SET_ARG(0, sizeof(cl_mem), [1]); OCL_SET_ARG(1, sizeof(cl_mem), [2]); @@ -87,6 +84,8 @@ static void image_from_buffer(void) OCL_UNMAP_BUFFER_GTT(1); OCL_UNMAP_BUFFER_GTT(2); + free(src_data); + //spec didn't tell the sequence of release buffer of image. so release either buffer or image first is ok here. //we follow the rule of destroy the bo at the last release, then the access of buffer after release image is legal //and vice verse. @@ -98,6 +97,8 @@ static void image_from_buffer(void) clReleaseMemObject(buf[1]); #endif clReleaseMemObject(buf[2]); + buf[1] = NULL; + buf[2] = NULL; } MAKE_UTEST_FROM_FUNCTION(image_from_buffer); -- 2.1.4 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
Re: [Beignet] [PATCH] GBE: fix printf class static variable bug
Ping for review. -Original Message- From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of Pan Xiuli Sent: Tuesday, November 3, 2015 11:30 AM To: beignet@lists.freedesktop.org Cc: Pan, XiuliSubject: [Beignet] [PATCH] GBE: fix printf class static variable bug The PrintfParse::printfs static is not thread safe and maybe reset or adding something wrong when runing in mutlithread. Fix the problem by change the printfs to a thread local variable. Signed-off-by: Pan Xiuli --- backend/src/llvm/llvm_printf_parser.cpp | 9 - 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/backend/src/llvm/llvm_printf_parser.cpp b/backend/src/llvm/llvm_printf_parser.cpp index bdaed8a..93f87ea 100644 --- a/backend/src/llvm/llvm_printf_parser.cpp +++ b/backend/src/llvm/llvm_printf_parser.cpp @@ -45,6 +45,8 @@ namespace gbe { using namespace ir; + thread_local map printfs; + /* Return the conversion_specifier if succeed, -1 if failed. */ static char __parse_printf_state(char *begin, char *end, char** rend, PrintfState * state) { @@ -301,7 +303,6 @@ error: Value* g1Xg2Xg3; Value* wg_offset; int out_buf_sizeof_offset; -static map printfs; int printf_num; int totalSizeofSize; @@ -972,12 +973,10 @@ error: return false; } - map PrintfParser::printfs; - void* getPrintfInfo(CallInst* inst) { -if (PrintfParser::printfs[inst]) - return (void*)PrintfParser::printfs[inst]; +if (printfs[inst]) + return (void*)printfs[inst]; return NULL; } -- 2.1.4 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH V2 2/2] Backend: add debugwait function
Use wait function to extend a debug function: void debugwait(void) This function can hang the gpu unless gpu reset or host send something to let it go. EXTREMELY DANGEROUS for machines turn off hangcheck v2: Fix some bugs, and add setting predicate and execwidth, also modify some inst scheduling Signed-off-by: Pan Xiuli--- backend/src/backend/gen_context.cpp | 3 ++- backend/src/backend/gen_encoder.cpp | 1 + backend/src/backend/gen_insn_scheduling.cpp | 1 + backend/src/backend/gen_insn_selection.cpp | 24 +-- backend/src/backend/gen_insn_selection.hpp | 1 + backend/src/ir/instruction.cpp | 30 + backend/src/ir/instruction.hpp | 9 + backend/src/ir/instruction.hxx | 1 + backend/src/libocl/include/ocl_sync.h | 1 + backend/src/libocl/src/ocl_barrier.ll | 6 ++ backend/src/libocl/src/ocl_sync.cl | 1 + backend/src/llvm/llvm_gen_backend.cpp | 6 ++ backend/src/llvm/llvm_gen_ocl_function.hxx | 2 ++ 13 files changed, 83 insertions(+), 3 deletions(-) diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index baf3897..6a9b4e5 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -1804,7 +1804,8 @@ namespace gbe } void GenContext::emitWaitInstruction(const SelectionInstruction ) { -p->WAIT(); +p->curr.execWidth = 1; +p->WAIT(insn.extra.waitType); } void GenContext::emitBarrierInstruction(const SelectionInstruction ) { diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp index dc49689..2eca5fc 100644 --- a/backend/src/backend/gen_encoder.cpp +++ b/backend/src/backend/gen_encoder.cpp @@ -1108,6 +1108,7 @@ namespace gbe void GenEncoder::WAIT(uint32_t n) { GenNativeInstruction *insn = this->next(GEN_OPCODE_WAIT); + GBE_ASSERT(curr.predicate == GEN_PREDICATE_NONE); GenRegister src = GenRegister::notification0(n); this->setDst(insn, GenRegister::null()); this->setSrc0(insn, src); diff --git a/backend/src/backend/gen_insn_scheduling.cpp b/backend/src/backend/gen_insn_scheduling.cpp index 358a2ce..8ee5e48 100644 --- a/backend/src/backend/gen_insn_scheduling.cpp +++ b/backend/src/backend/gen_insn_scheduling.cpp @@ -589,6 +589,7 @@ namespace gbe || node->insn.opcode == SEL_OP_ENDIF || node->insn.opcode == SEL_OP_WHILE || node->insn.opcode == SEL_OP_READ_ARF + || node->insn.opcode == SEL_OP_WAIT || node->insn.opcode == SEL_OP_BARRIER) tracker.makeBarrier(insnID, insnNum); } diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 1711ab6..a9f6bce 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -618,7 +618,7 @@ namespace gbe /*! No-op */ void NOP(void); /*! Wait instruction (used for the barrier) */ -void WAIT(void); +void WAIT(uint32_t n = 0); /*! Atomic instruction */ void ATOMIC(Reg dst, uint32_t function, uint32_t srcNum, Reg src0, Reg src1, Reg src2, GenRegister bti, vector temps); /*! Read 64 bits float/int array */ @@ -1282,7 +1282,11 @@ namespace gbe void Selection::Opaque::EOT(void) { this->appendInsn(SEL_OP_EOT, 0, 0); } void Selection::Opaque::NOP(void) { this->appendInsn(SEL_OP_NOP, 0, 0); } - void Selection::Opaque::WAIT(void) { this->appendInsn(SEL_OP_WAIT, 0, 0); } + void Selection::Opaque::WAIT(uint32_t n) + { +SelectionInstruction *insn = this->appendInsn(SEL_OP_WAIT, 0, 0); +insn->extra.waitType = n; + } void Selection::Opaque::READ64(Reg addr, const GenRegister *dst, @@ -3331,6 +3335,21 @@ namespace gbe DECL_CTOR(SyncInstruction, 1,1); }; + /*! Wait instruction */ + DECL_PATTERN(WaitInstruction) + { +INLINE bool emitOne(Selection::Opaque , const ir::WaitInstruction , bool ) const +{ + using namespace ir; + // Debugwait will use reg 1, which is different from barrier + sel.curr.predicate = GEN_PREDICATE_NONE; + sel.WAIT(1); + return true; +} + +DECL_CTOR(WaitInstruction, 1,1); + }; + INLINE uint32_t getByteScatterGatherSize(Selection::Opaque , ir::Type type) { using namespace ir; switch (type) { @@ -5543,6 +5562,7 @@ namespace gbe this->insert(); this->insert(); this->insert(); +this->insert(); // Sort all the patterns with the number of instructions they output for (uint32_t op = 0; op < ir::OP_INVALID; ++op) diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp index 275eb9c..7e6ce96 100644 --- a/backend/src/backend/gen_insn_selection.hpp +++ b/backend/src/backend/gen_insn_selection.hpp @@ -130,6 +130,7 @@ namespace gbe
[Beignet] [PATCH 1/2] Backend: enable to choose notification register
There are 3 notification can be used by wait, so we should be able to choose which one we'd like to use. Also the 3 reg is n0.0 n0.1 and n0.2 so also change the function name. Signed-off-by: Pan Xiuli--- backend/src/backend/gen_encoder.cpp | 4 ++-- backend/src/backend/gen_encoder.hpp | 2 +- backend/src/backend/gen_register.hpp | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp index cac29e8..dc49689 100644 --- a/backend/src/backend/gen_encoder.cpp +++ b/backend/src/backend/gen_encoder.cpp @@ -1106,9 +1106,9 @@ namespace gbe this->setSrc1(insn, src1); } - void GenEncoder::WAIT(void) { + void GenEncoder::WAIT(uint32_t n) { GenNativeInstruction *insn = this->next(GEN_OPCODE_WAIT); - GenRegister src = GenRegister::notification1(); + GenRegister src = GenRegister::notification0(n); this->setDst(insn, GenRegister::null()); this->setSrc0(insn, src); this->setSrc1(insn, GenRegister::null()); diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp index 79e7b6e..3549661 100644 --- a/backend/src/backend/gen_encoder.hpp +++ b/backend/src/backend/gen_encoder.hpp @@ -167,7 +167,7 @@ namespace gbe /*! No-op */ void NOP(void); /*! Wait instruction (used for the barrier) */ -void WAIT(void); +void WAIT(uint32_t n = 0); /*! Atomic instructions */ virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum); /*! Untyped read (upto 4 channels) */ diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp index aac2da5..68a210e 100644 --- a/backend/src/backend/gen_register.hpp +++ b/backend/src/backend/gen_register.hpp @@ -809,10 +809,10 @@ namespace gbe GEN_HORIZONTAL_STRIDE_0); } -static INLINE GenRegister notification1(void) { +static INLINE GenRegister notification0(uint32_t subnr) { return GenRegister(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_NOTIFICATION_COUNT, - 0, + subnr, GEN_TYPE_UD, GEN_VERTICAL_STRIDE_0, GEN_WIDTH_1, -- 2.1.4 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
Re: [Beignet] [PATCH v2 1/2] Add extension clCreateBufferFromFdINTEL to create cl buffer by external buffer object's fd.
Ping for review, thanks! -Original Message- From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of Weng, Chuanbo Sent: Tuesday, October 27, 2015 10:16 To: beignet@lists.freedesktop.org Cc: Wu, Zhiwen Subject: Re: [Beignet] [PATCH v2 1/2] Add extension clCreateBufferFromFdINTEL to create cl buffer by external buffer object's fd. Ping for review, thanks. -Original Message- From: Weng, Chuanbo Sent: Monday, September 21, 2015 16:20 To: beignet@lists.freedesktop.org Cc: Wu, Zhiwen; Weng, Chuanbo Subject: [PATCH v2 1/2] Add extension clCreateBufferFromFdINTEL to create cl buffer by external buffer object's fd. Before this patch, Beignet can only create cl buffer from external bo by its handle using clCreateBufferFromLibvaIntel. Render node is the first choice of accessing gpu in currect Beignet implementation. DRM_IOCTL_GEM_OPEN is used by clCreateBufferFromLibvaIntel but forbidden in Render node mode. So it's necessary to add this extension to support buffer sharing between different libraries. v2: Seperate clCreateMemObjectFromFdIntel into two extensions: clCreateBufferFromFdINTEL and clCreateImageFromFdINTEL. Signed-off-by: Chuanbo Weng--- include/CL/cl_intel.h| 16 src/cl_api.c | 23 +++ src/cl_driver.h | 3 +++ src/cl_driver_defs.c | 1 + src/cl_mem.c | 30 ++ src/cl_mem.h | 5 + src/intel/intel_driver.c | 34 +++--- 7 files changed, 109 insertions(+), 3 deletions(-) diff --git a/include/CL/cl_intel.h b/include/CL/cl_intel.h index 28bcb62..01da553 100644 --- a/include/CL/cl_intel.h +++ b/include/CL/cl_intel.h @@ -133,6 +133,22 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetMemObjectFdIntel_fn)( cl_mem /* Memory Obejct */, int* /* returned fd */); +typedef struct _cl_import_buffer_info_intel { +int fd; +int size; +} cl_import_buffer_info_intel; + +/* Create memory object from external buffer object by fd */ extern +CL_API_ENTRY cl_mem CL_API_CALL +clCreateBufferFromFdINTEL(cl_context/* context */, + const cl_import_buffer_info_intel * /* info */, + cl_int * /* errcode_ret */); + +typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateBufferFromFdINTEL_fn)( + cl_context/* context */, + const cl_import_buffer_info_intel * /* info */, + cl_int * /* errcode_ret */); + #ifdef __cplusplus } #endif diff --git a/src/cl_api.c b/src/cl_api.c index dbbcbb0..ba82743 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -3187,6 +3187,7 @@ internal_clGetExtensionFunctionAddress(const char *func_name) EXTFUNC(clCreateBufferFromLibvaIntel) EXTFUNC(clCreateImageFromLibvaIntel) EXTFUNC(clGetMemObjectFdIntel) + EXTFUNC(clCreateBufferFromFdINTEL) return NULL; } @@ -3355,3 +3356,25 @@ clGetMemObjectFdIntel(cl_context context, error: return err; } + +cl_mem +clCreateBufferFromFdINTEL(cl_context context, + const cl_import_buffer_info_intel* info, + cl_int *errorcode_ret) { + cl_mem mem = NULL; + cl_int err = CL_SUCCESS; + CHECK_CONTEXT (context); + + if (!info) { +err = CL_INVALID_VALUE; +goto error; + } + + mem = cl_mem_new_buffer_from_fd(context, info->fd, info->size, ); + +error: + if (errorcode_ret) +*errorcode_ret = err; + return mem; +} diff --git a/src/cl_driver.h b/src/cl_driver.h index 1ab4dff..e0991c1 100644 --- a/src/cl_driver.h +++ b/src/cl_driver.h @@ -381,6 +381,9 @@ extern cl_buffer_get_fd_cb *cl_buffer_get_fd; typedef int (cl_buffer_get_tiling_align_cb)(cl_context ctx, uint32_t tiling_mode, uint32_t dim); extern cl_buffer_get_tiling_align_cb *cl_buffer_get_tiling_align; +typedef cl_buffer (cl_buffer_get_buffer_from_fd_cb)(cl_context ctx, int +fd, int size); extern cl_buffer_get_buffer_from_fd_cb +*cl_buffer_get_buffer_from_fd; + /* Get the device id */ typedef int (cl_driver_get_device_id_cb)(void); extern cl_driver_get_device_id_cb *cl_driver_get_device_id; diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c index b77acdc..b3e8403 100644 --- a/src/cl_driver_defs.c +++ b/src/cl_driver_defs.c @@ -53,6 +53,7 @@ LOCAL cl_buffer_get_buffer_from_libva_cb *cl_buffer_get_buffer_from_libva = NULL LOCAL cl_buffer_get_image_from_libva_cb *cl_buffer_get_image_from_libva = NULL; LOCAL cl_buffer_get_fd_cb *cl_buffer_get_fd = NULL; LOCAL cl_buffer_get_tiling_align_cb *cl_buffer_get_tiling_align = NULL; +LOCAL cl_buffer_get_buffer_from_fd_cb *cl_buffer_get_buffer_from_fd = +NULL; /* cl_khr_gl_sharing */ LOCAL
Re: [Beignet] [PATCH] utests: fix compiler_fill_image_2d_array random bug
Ping for review. -Original Message- From: Pan, Xiuli Sent: Thursday, October 29, 2015 1:47 PM To: beignet@lists.freedesktop.org Cc: Pan, XiuliSubject: [PATCH] utests: fix compiler_fill_image_2d_array random bug Use safer image write instead of map and memset. When create image without data, we could not set pitch and we don't know the pitch either. So use map and memset the space is too dangerous if pitch is bigger than w*sizeof(bpp), in this case the actually pitch is 512 but memset use pitch as 64*4=256. With only half space set to 0, there will be undefined behavior when we want to check the result for those space that we haven't set to 0. Signed-off-by: Pan Xiuli --- utests/compiler_fill_image_2d_array.cpp | 18 -- 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/utests/compiler_fill_image_2d_array.cpp b/utests/compiler_fill_image_2d_array.cpp index fc09362..ab7470e 100644 --- a/utests/compiler_fill_image_2d_array.cpp +++ b/utests/compiler_fill_image_2d_array.cpp @@ -11,6 +11,7 @@ static void compiler_fill_image_2d_array(void) size_t origin[3] = { }; size_t region[3]; uint32_t* dst; + uint32_t* src; memset(, 0x0, sizeof(cl_image_desc)); memset(, 0x0, sizeof(cl_image_format)); @@ -28,9 +29,16 @@ static void compiler_fill_image_2d_array(void) OCL_CREATE_IMAGE(buf[0], 0, , , NULL); - OCL_MAP_BUFFER_GTT(0); - memset(buf_data[0], 0, sizeof(uint32_t) * w * h * array); - OCL_UNMAP_BUFFER_GTT(0); + region[0] = w; + region[1] = h; + region[2] = array; + + // As we don't know the pitch right now, we cannot + // use map to setup the image. It is safer to use + // write image + src = (uint32_t*)malloc(sizeof(uint32_t) * w * h * array); + memset(src, 0, sizeof(uint32_t) * w * h * array); + OCL_WRITE_IMAGE(buf[0], origin, region, src); // Run the kernel OCL_SET_ARG(0, sizeof(cl_mem), [0]); @@ -43,9 +51,6 @@ static void compiler_fill_image_2d_array(void) OCL_NDRANGE(3); // Check result - region[0] = w; - region[1] = h; - region[2] = array; dst = (uint32_t*)malloc(w*h*array*sizeof(uint32_t)); OCL_READ_IMAGE(buf[0], origin, region, dst); @@ -79,6 +84,7 @@ static void compiler_fill_image_2d_array(void) } } free(dst); + free(src); } MAKE_UTEST_FROM_FUNCTION(compiler_fill_image_2d_array); -- 2.1.4 ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
Re: [Beignet] [PATCH v3] GBE: Don't read past end of printf format string
Ping for pushed. -Original Message- From: Pan, Xiuli Sent: Wednesday, November 4, 2015 9:48 AM To: Rebecca N. Palmer; beignet@lists.freedesktop.org Subject: RE: [Beignet] [PATCH v3] GBE: Don't read past end of printf format string LGTM, Thanks for your help! -Original Message- From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of Rebecca N. Palmer Sent: Wednesday, November 4, 2015 6:19 AM To: beignet@lists.freedesktop.org Subject: Re: [Beignet] [PATCH v3] GBE: Don't read past end of printf format string When p == end (the null terminator byte), don't try to read p + 1: as this is outside the string, it might be a '%' from a different object (causing __parse_printf_state(end + 2, end, ...) to be called, which will fail), or an invalid address. Signed-off-by: Rebecca Palmer --- backend/src/llvm/llvm_printf_parser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/src/llvm/llvm_printf_parser.cpp b/backend/src/llvm/llvm_printf_parser.cpp index bdaed8a..f427107 100644 --- a/backend/src/llvm/llvm_printf_parser.cpp +++ b/backend/src/llvm/llvm_printf_parser.cpp @@ -229,7 +229,7 @@ again: printf("string end with %%\n"); goto error; } - if (*(p + 1) == '%') { // %% + if (p + 1 < end && *(p + 1) == '%') { // %% p += 2; goto again; } ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
Re: [Beignet] [PATCH] GBE: fix printf class static variable bug
Thread_local is not needed to pass data from one llvm pass to another. You can still access the info after pass that has already run. In a later llvm pass, you can use getAnalysis() to get the the PrintfParser pass handle. Then expose an interface in PrintfParser like map *PrintfParser::getPrintfInfo() { return } then you can query the printfInfo in GenWriter. Thanks! Ruiling > -Original Message- > From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of > Pan, Xiuli > Sent: Friday, November 6, 2015 9:44 AM > To: Pan, Xiuli; beignet@lists.freedesktop.org > Subject: Re: [Beignet] [PATCH] GBE: fix printf class static variable bug > > Ping for review. > > -Original Message- > From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of > Pan Xiuli > Sent: Tuesday, November 3, 2015 11:30 AM > To: beignet@lists.freedesktop.org > Cc: Pan, Xiuli> Subject: [Beignet] [PATCH] GBE: fix printf class static variable bug > > The PrintfParse::printfs static is not thread > safe and maybe reset or adding something wrong > when runing in mutlithread. Fix the problem by > change the printfs to a thread local variable. > > Signed-off-by: Pan Xiuli > --- > backend/src/llvm/llvm_printf_parser.cpp | 9 - > 1 file changed, 4 insertions(+), 5 deletions(-) > > diff --git a/backend/src/llvm/llvm_printf_parser.cpp > b/backend/src/llvm/llvm_printf_parser.cpp > index bdaed8a..93f87ea 100644 > --- a/backend/src/llvm/llvm_printf_parser.cpp > +++ b/backend/src/llvm/llvm_printf_parser.cpp > @@ -45,6 +45,8 @@ namespace gbe > { >using namespace ir; > > + thread_local map printfs; > + >/* Return the conversion_specifier if succeed, -1 if failed. */ >static char __parse_printf_state(char *begin, char *end, char** rend, > PrintfState * state) >{ > @@ -301,7 +303,6 @@ error: > Value* g1Xg2Xg3; > Value* wg_offset; > int out_buf_sizeof_offset; > -static map printfs; > int printf_num; > int totalSizeofSize; > > @@ -972,12 +973,10 @@ error: > return false; >} > > - map PrintfParser::printfs; > - >void* getPrintfInfo(CallInst* inst) >{ > -if (PrintfParser::printfs[inst]) > - return (void*)PrintfParser::printfs[inst]; > +if (printfs[inst]) > + return (void*)printfs[inst]; > return NULL; >} > > -- > 2.1.4 > > ___ > Beignet mailing list > Beignet@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/beignet > ___ > Beignet mailing list > Beignet@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/beignet ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
Re: [Beignet] [PATCH] GBE: fix printf class static variable bug
Yes, but the problem is that if two thread has kernel with printf functions, the map printfs will be cleared in construction and destructor. This will cause the one who is still need info in printfs get null pointer. thread_local now is to protect printfs from other thread but not pass data from one pass to another. I tried the getAnalysis, but what we need the printfs as you said is in GenWriter and It could not be used there as easier as now. Every passmanger will clear the printfs and need a new map to avoid disturb each other. It is just ok when only one pass exist, but in multithread there will be a lot of passes in the same time. If using interface like: *PrintfParser::getPrintfInfo() { return } It still a static one, and threads may have disturb. -Original Message- From: Song, Ruiling Sent: Friday, November 6, 2015 9:54 AM To: Pan, Xiuli; Pan, Xiuli ; beignet@lists.freedesktop.org Subject: RE: [Beignet] [PATCH] GBE: fix printf class static variable bug Thread_local is not needed to pass data from one llvm pass to another. You can still access the info after pass that has already run. In a later llvm pass, you can use getAnalysis() to get the the PrintfParser pass handle. Then expose an interface in PrintfParser then you can query the printfInfo in GenWriter. Thanks! Ruiling > -Original Message- > From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of > Pan, Xiuli > Sent: Friday, November 6, 2015 9:44 AM > To: Pan, Xiuli; beignet@lists.freedesktop.org > Subject: Re: [Beignet] [PATCH] GBE: fix printf class static variable bug > > Ping for review. > > -Original Message- > From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of > Pan Xiuli > Sent: Tuesday, November 3, 2015 11:30 AM > To: beignet@lists.freedesktop.org > Cc: Pan, Xiuli > Subject: [Beignet] [PATCH] GBE: fix printf class static variable bug > > The PrintfParse::printfs static is not thread > safe and maybe reset or adding something wrong > when runing in mutlithread. Fix the problem by > change the printfs to a thread local variable. > > Signed-off-by: Pan Xiuli > --- > backend/src/llvm/llvm_printf_parser.cpp | 9 - > 1 file changed, 4 insertions(+), 5 deletions(-) > > diff --git a/backend/src/llvm/llvm_printf_parser.cpp > b/backend/src/llvm/llvm_printf_parser.cpp > index bdaed8a..93f87ea 100644 > --- a/backend/src/llvm/llvm_printf_parser.cpp > +++ b/backend/src/llvm/llvm_printf_parser.cpp > @@ -45,6 +45,8 @@ namespace gbe > { >using namespace ir; > > + thread_local map printfs; > + >/* Return the conversion_specifier if succeed, -1 if failed. */ >static char __parse_printf_state(char *begin, char *end, char** rend, > PrintfState * state) >{ > @@ -301,7 +303,6 @@ error: > Value* g1Xg2Xg3; > Value* wg_offset; > int out_buf_sizeof_offset; > -static map printfs; > int printf_num; > int totalSizeofSize; > > @@ -972,12 +973,10 @@ error: > return false; >} > > - map PrintfParser::printfs; > - >void* getPrintfInfo(CallInst* inst) >{ > -if (PrintfParser::printfs[inst]) > - return (void*)PrintfParser::printfs[inst]; > +if (printfs[inst]) > + return (void*)printfs[inst]; > return NULL; >} > > -- > 2.1.4 > > ___ > Beignet mailing list > Beignet@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/beignet > ___ > Beignet mailing list > Beignet@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/beignet ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
Re: [Beignet] [PATCH v6 1/4] Add built-in function __gen_ocl_vme.
Hi Ruiling, As we discussed before, I have refined code to handle both simd8 and simd16 in backend and ocl kernel. Please confirm if no problem so that this patchset can be pushed. Thanks! -Original Message- From: Weng, Chuanbo Sent: Friday, November 06, 2015 11:28 To: beignet@lists.freedesktop.org Cc: Weng, Chuanbo Subject: [PATCH v6 1/4] Add built-in function __gen_ocl_vme. __gen_ocl_vme is used for hardware accelerated video motion estimation. It gets payload values as parameters and uses MOV to pass these payload values to VME SEND Message's payload grfs. The int8 return value is used to store SEND Message writeback. v2: Remove unnecessary 5 parameters(src_grf*) of built-in function(we just need to allocate related registers in gen_insn_selection step). v3: Remove redundant code and change MAX_SRC_NUM to 40. v4: Choose message response length by message type instead of hard code. v5: Choose message response length by message type in the whole backend pipeline. v6: Treat simd8 and simd16 differently when mov payload value to consecutive payload grfs. Signed-off-by: Chuanbo Weng--- backend/src/backend/gen/gen_mesa_disasm.c | 14 backend/src/backend/gen7_instruction.hpp | 15 backend/src/backend/gen_context.cpp| 98 ++ backend/src/backend/gen_context.hpp| 1 + backend/src/backend/gen_defs.hpp | 15 backend/src/backend/gen_encoder.cpp| 44 ++ backend/src/backend/gen_encoder.hpp| 13 +++ .../src/backend/gen_insn_gen7_schedule_info.hxx| 1 + backend/src/backend/gen_insn_selection.cpp | 73 backend/src/backend/gen_insn_selection.hpp | 14 +++- backend/src/backend/gen_insn_selection.hxx | 1 + backend/src/ir/instruction.cpp | 66 +++ backend/src/ir/instruction.hpp | 17 +++- backend/src/ir/instruction.hxx | 1 + backend/src/libocl/include/ocl_misc.h | 15 backend/src/llvm/llvm_gen_backend.cpp | 47 +++ backend/src/llvm/llvm_gen_ocl_function.hxx | 2 + backend/src/llvm/llvm_scalarize.cpp| 4 + 18 files changed, 436 insertions(+), 5 deletions(-) diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c index 5b71cfa..3198da7 100644 --- a/backend/src/backend/gen/gen_mesa_disasm.c +++ b/backend/src/backend/gen/gen_mesa_disasm.c @@ -476,6 +476,13 @@ static int column; static int gen_version; +#define GEN7_BITS_FIELD(inst, gen7) \ + ({\ +int bits; \ + bits = ((const union Gen7NativeInstruction *)inst)->gen7; \ +bits; \ + }) + #define GEN_BITS_FIELD(inst, gen) \ ({\ int bits; \ @@ -530,6 +537,8 @@ static int gen_version; #define EXECUTION_SIZE(inst) GEN_BITS_FIELD(inst, header.execution_size) #define BRANCH_JIP(inst) GEN_BITS_FIELD2(inst, bits3.gen7_branch.jip, bits3.gen8_branch.jip/8) #define BRANCH_UIP(inst) GEN_BITS_FIELD2(inst, bits3.gen7_branch.uip, bits2.gen8_branch.uip/8) +#define VME_BTI(inst) GEN7_BITS_FIELD(inst, bits3.vme_gen7.bti) +#define VME_MSG_TYPE(inst) GEN7_BITS_FIELD(inst, bits3.vme_gen7.msg_type) #define SAMPLE_BTI(inst) GEN_BITS_FIELD(inst, bits3.sampler_gen7.bti) #define SAMPLER(inst) GEN_BITS_FIELD(inst, bits3.sampler_gen7.sampler) #define SAMPLER_MSG_TYPE(inst) GEN_BITS_FIELD(inst, bits3.sampler_gen7.msg_type) @@ -1431,6 +1440,11 @@ int gen_disasm (FILE *file, const void *inst, uint32_t deviceID, uint32_t compac if (GEN_BITS_FIELD2(inst, bits1.da1.src1_reg_file, bits2.da1.src1_reg_file) == GEN_IMMEDIATE_VALUE) { switch (target) { +case GEN_SFID_VIDEO_MOTION_EST: + format(file, " (bti: %d, msg_type: %d)", + VME_BTI(inst), + VME_MSG_TYPE(inst)); + break; case GEN_SFID_SAMPLER: format(file, " (%d, %d, %d, %d)", SAMPLE_BTI(inst), diff --git a/backend/src/backend/gen7_instruction.hpp b/backend/src/backend/gen7_instruction.hpp index 51f342b..258dd24 100644 --- a/backend/src/backend/gen7_instruction.hpp +++ b/backend/src/backend/gen7_instruction.hpp @@ -350,6 +350,21 @@ union Gen7NativeInstruction uint32_t end_of_thread:1; } sampler_gen7; + struct { +uint32_t bti:8; +uint32_t vme_search_path_lut:3; +uint32_t lut_sub:2; +uint32_t msg_type:2; +uint32_t stream_in:1; +uint32_t stream_out:1;
Re: [Beignet] [PATCH 1/7 V2] Backend: Delete the useless MOV_DF instruction.
The patchset LGTM, pushed, thanks. > -Original Message- > From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of > He Junyan > Sent: Thursday, November 5, 2015 16:21 > To: beignet@lists.freedesktop.org > Subject: Re: [Beignet] [PATCH 1/7 V2] Backend: Delete the useless MOV_DF > instruction. > > V2: > > Fix uniform bug in conversion. > Delete verbose printf in utests. > Fix a bug for BSW when convert half to double. > > On Thu, Nov 05, 2015 at 04:15:41PM +0800, junyan...@inbox.com wrote: > > Date: Thu, 5 Nov 2015 16:15:41 +0800 > > From: junyan...@inbox.com > > To: beignet@lists.freedesktop.org > > Subject: [Beignet] [PATCH 1/7 V2] Backend: Delete the useless MOV_DF > > instruction. > > X-Mailer: git-send-email 1.7.9.5 > > > > From: Junyan He> > > > Because just platform after BDW will support double, the special > > instruction for double MOV is not needed anymore. > > > > Signed-off-by: Junyan He > > --- > > backend/src/backend/gen75_encoder.cpp | 36 - > > backend/src/backend/gen75_encoder.hpp | 1 - > > backend/src/backend/gen8_encoder.cpp | 36 - > > backend/src/backend/gen8_encoder.hpp | 1 - > > backend/src/backend/gen_context.cpp| 3 --- > > backend/src/backend/gen_encoder.cpp| 43 > > -- > > backend/src/backend/gen_encoder.hpp| 2 -- > > backend/src/backend/gen_insn_selection.cpp | 23 +--- > > backend/src/backend/gen_insn_selection.hxx | 1 - > > 9 files changed, 1 insertion(+), 145 deletions(-) > > > > diff --git a/backend/src/backend/gen75_encoder.cpp > > b/backend/src/backend/gen75_encoder.cpp > > index 135be02..5d1a964 100644 > > --- a/backend/src/backend/gen75_encoder.cpp > > +++ b/backend/src/backend/gen75_encoder.cpp > > @@ -251,42 +251,6 @@ namespace gbe > > pop(); > >} > > > > - void Gen75Encoder::MOV_DF(GenRegister dest, GenRegister src0, > GenRegister tmp) { > > -GBE_ASSERT((src0.type == GEN_TYPE_F && dest.isdf()) || (src0.isdf() > && dest.type == GEN_TYPE_F)); > > -GenRegister r = GenRegister::retype(tmp, GEN_TYPE_F); > > -int w = curr.execWidth; > > -GenRegister r0; > > -r0 = GenRegister::h2(r); > > -push(); > > -curr.execWidth = 4; > > -curr.predicate = GEN_PREDICATE_NONE; > > -curr.noMask = 1; > > -MOV(r0, src0); > > -MOV(GenRegister::suboffset(r0, 4), GenRegister::suboffset(src0, 4)); > > -curr.noMask = 0; > > -curr.quarterControl = 0; > > -curr.nibControl = 0; > > -MOV(dest, r0); > > -curr.nibControl = 1; > > -MOV(GenRegister::suboffset(dest, 4), GenRegister::suboffset(r0, 4)); > > -pop(); > > -if (w == 16) { > > - push(); > > - curr.execWidth = 4; > > - curr.predicate = GEN_PREDICATE_NONE; > > - curr.noMask = 1; > > - MOV(r0, GenRegister::suboffset(src0, 8)); > > - MOV(GenRegister::suboffset(r0, 4), GenRegister::suboffset(src0, 12)); > > - curr.noMask = 0; > > - curr.quarterControl = 1; > > - curr.nibControl = 0; > > - MOV(GenRegister::suboffset(dest, 8), r0); > > - curr.nibControl = 1; > > - MOV(GenRegister::suboffset(dest, 12), GenRegister::suboffset(r0, 4)); > > - pop(); > > -} > > - } > > - > >void Gen75Encoder::JMPI(GenRegister src, bool longjmp) { > > alu2(this, GEN_OPCODE_JMPI, GenRegister::ip(), GenRegister::ip(), src); > >} > > diff --git a/backend/src/backend/gen75_encoder.hpp > > b/backend/src/backend/gen75_encoder.hpp > > index e494f29..f5044c0 100644 > > --- a/backend/src/backend/gen75_encoder.hpp > > +++ b/backend/src/backend/gen75_encoder.hpp > > @@ -42,7 +42,6 @@ namespace gbe > > virtual void JMPI(GenRegister src, bool longjmp = false); > > /*! Patch JMPI/BRC/BRD (located at index insnID) with the given jump > distance */ > > virtual void patchJMPI(uint32_t insnID, int32_t jip, int32_t uip); > > -virtual void MOV_DF(GenRegister dest, GenRegister src0, GenRegister > tmp = GenRegister::null()); > > virtual void LOAD_DF_IMM(GenRegister dest, GenRegister tmp, double > value); > > virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister > > src, > GenRegister bti, uint32_t srcNum); > > virtual void UNTYPED_READ(GenRegister dst, GenRegister src, > > GenRegister bti, uint32_t elemNum); diff --git > > a/backend/src/backend/gen8_encoder.cpp > > b/backend/src/backend/gen8_encoder.cpp > > index 55fc3fb..98c3917 100644 > > --- a/backend/src/backend/gen8_encoder.cpp > > +++ b/backend/src/backend/gen8_encoder.cpp > > @@ -260,42 +260,6 @@ namespace gbe > > MOV(dest, value); > >} > > > > - void Gen8Encoder::MOV_DF(GenRegister dest, GenRegister src0, > GenRegister tmp) { > > -GBE_ASSERT((src0.type == GEN_TYPE_F && dest.isdf()) || (src0.isdf() > && dest.type == GEN_TYPE_F)); > > -GenRegister r = GenRegister::retype(tmp,
Re: [Beignet] [PATCH] utests: fix image_from_buffer bugs
Pushed. > -Original Message- > From: Pan, Xiuli > Sent: Friday, November 6, 2015 9:43 > To: Luo, Xionghu; beignet@lists.freedesktop.org > Cc: Yang, Rong R > Subject: RE: [Beignet] [PATCH] utests: fix image_from_buffer bugs > > Ping for pushed. > > -Original Message- > From: Luo, Xionghu > Sent: Wednesday, October 28, 2015 9:42 AM > To: Pan, Xiuli; beignet@lists.freedesktop.org > Cc: Pan, Xiuli > Subject: RE: [Beignet] [PATCH] utests: fix image_from_buffer bugs > > This patch LGTM. > Thanks. > > Luo Xionghu > Best Regards > > -Original Message- > From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of > Pan Xiuli > Sent: Tuesday, October 27, 2015 2:16 PM > To: beignet@lists.freedesktop.org > Cc: Pan, Xiuli > Subject: [Beignet] [PATCH] utests: fix image_from_buffer bugs > > Fixed 2 bugs: > 1.This test case uses usrptr, so we should never free the orginal buffer > space, > otherwise undefined behavior would happen: adding or losing one header > file causing data in front broken, NDRangeKernel fail etc. > 2.The utest need to test when to free image from buffer and the buffer, but > the utest helper function will released it again and causes libc made some > warnings. We just make the global variable to NULL to avoid these questions. > These will fix the utests image_from_buffer broken. > > Signed-off-by: Pan Xiuli > --- > utests/image_from_buffer.cpp | 17 + > 1 file changed, 9 insertions(+), 8 deletions(-) > > diff --git a/utests/image_from_buffer.cpp b/utests/image_from_buffer.cpp > index 78d6797..b1171d1 100644 > --- a/utests/image_from_buffer.cpp > +++ b/utests/image_from_buffer.cpp > @@ -32,13 +32,13 @@ static void image_from_buffer(void) > >// Setup kernel and images >size_t buffer_sz = sizeof(uint32_t) * w * h; > - //buf_data[0] = (uint32_t*) malloc(buffer_sz); > - buf_data[0] = (uint32_t*)memalign(base_address_alignment, buffer_sz); > + uint32_t* src_data; > + src_data = (uint32_t*)memalign(base_address_alignment, buffer_sz); >for (uint32_t j = 0; j < h; ++j) > for (uint32_t i = 0; i < w; i++) > - ((uint32_t*)buf_data[0])[j * w + i] = j * w + i; > + src_data[j * w + i] = j * w + i; > > - cl_mem buff = clCreateBuffer(ctx, CL_MEM_READ_ONLY | > CL_MEM_USE_HOST_PTR, buffer_sz, buf_data[0], ); > + cl_mem buff = clCreateBuffer(ctx, CL_MEM_READ_ONLY | > + CL_MEM_USE_HOST_PTR, buffer_sz, src_data, ); > >OCL_ASSERT(error == CL_SUCCESS); >format.image_channel_order = CL_RGBA; @@ -49,7 +49,7 @@ static void > image_from_buffer(void) >desc.image_row_pitch = w * sizeof(uint32_t); > >desc.buffer = 0; > - OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, , , > buf_data[0]); > + OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, , , > + src_data); > >desc.buffer = buff; >OCL_CREATE_IMAGE(buf[1], 0, , , NULL); @@ -58,9 +58,6 > @@ static void image_from_buffer(void) >desc.image_row_pitch = 0; >OCL_CREATE_IMAGE(buf[2], CL_MEM_WRITE_ONLY, , , > NULL); > > - free(buf_data[0]); > - buf_data[0] = NULL; > - >OCL_SET_ARG(0, sizeof(cl_mem), [1]); >OCL_SET_ARG(1, sizeof(cl_mem), [2]); > > @@ -87,6 +84,8 @@ static void image_from_buffer(void) >OCL_UNMAP_BUFFER_GTT(1); >OCL_UNMAP_BUFFER_GTT(2); > > + free(src_data); > + >//spec didn't tell the sequence of release buffer of image. so release > either > buffer or image first is ok here. >//we follow the rule of destroy the bo at the last release, then the > access of > buffer after release image is legal >//and vice verse. > @@ -98,6 +97,8 @@ static void image_from_buffer(void) >clReleaseMemObject(buf[1]); > #endif >clReleaseMemObject(buf[2]); > + buf[1] = NULL; > + buf[2] = NULL; > } > > MAKE_UTEST_FROM_FUNCTION(image_from_buffer); > -- > 2.1.4 > > ___ > Beignet mailing list > Beignet@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/beignet ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
Re: [Beignet] [PATCH v2 1/2] add benckmark for copy data from buffer to buffer
Pushed. > -Original Message- > From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of > Song, Ruiling > Sent: Wednesday, November 4, 2015 16:42 > To: Meng, Mengmeng; beignet@lists.freedesktop.org > Cc: Meng, Mengmeng > Subject: Re: [Beignet] [PATCH v2 1/2] add benckmark for copy data from > buffer to buffer > > This patchset LGTM. This benchmark is important to compare buffer/image > performance on different generations. > > Thanks! > Ruiling > > > -Original Message- > > From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf > > Of Meng Mengmeng > > Sent: Wednesday, November 4, 2015 4:17 PM > > To: beignet@lists.freedesktop.org > > Cc: Meng, Mengmeng > > Subject: [Beignet] [PATCH v2 1/2] add benckmark for copy data from > > buffer to buffer > > > > Set the data format as 1920 * 1080 four channels and type as char,short and > int. > > > > Signed-off-by: Meng Mengmeng> > --- > > benchmark/CMakeLists.txt| 3 +- > > benchmark/benchmark_copy_buffer.cpp | 55 > > + > > kernels/bench_copy_buffer.cl| 27 ++ > > 3 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 > > benchmark/benchmark_copy_buffer.cpp > > create mode 100644 kernels/bench_copy_buffer.cl > > > > diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index > > 3e43a21..03a56f2 100644 > > --- a/benchmark/CMakeLists.txt > > +++ b/benchmark/CMakeLists.txt > > @@ -16,7 +16,8 @@ set (benchmark_sources > >benchmark_read_buffer.cpp > >benchmark_read_image.cpp > >benchmark_copy_buffer_to_image.cpp > > - benchmark_copy_image_to_buffer.cpp) > > + benchmark_copy_image_to_buffer.cpp > > + benchmark_copy_buffer.cpp) > > > > > > SET(CMAKE_CXX_FLAGS "-DBUILD_BENCHMARK ${CMAKE_CXX_FLAGS}") > diff > > --git a/benchmark/benchmark_copy_buffer.cpp > > b/benchmark/benchmark_copy_buffer.cpp > > new file mode 100644 > > index 000..88983a7 > > --- /dev/null > > +++ b/benchmark/benchmark_copy_buffer.cpp > > @@ -0,0 +1,55 @@ > > +#include "utests/utest_helper.hpp" > > +#include > > + > > +#define BENCH_COPY_BUFFER(T, K, M) \ > > +double benchmark_copy_buffer_ ##T(void) \ { \ > > + struct timeval start,stop; \ > > + \ > > + const size_t w = 1920; \ > > + const size_t h = 1080; \ > > + const size_t sz = 4 * w * h; \ > > + \ > > + OCL_CREATE_BUFFER(buf[0], 0, sz * sizeof(M), NULL); \ > > + OCL_CREATE_BUFFER(buf[1], 0, sz * sizeof(M), NULL); \ \ > > + OCL_CREATE_KERNEL_FROM_FILE("bench_copy_buffer",K); \ \ > > + OCL_MAP_BUFFER(0); \ > > + for (size_t i = 0; i < sz; i ++) { \ > > +((M *)(buf_data[0]))[i] = rand(); \ > > + } \ > > + OCL_UNMAP_BUFFER(0); \ > > + \ > > + OCL_SET_ARG(0, sizeof(cl_mem), [0]); \ > > + OCL_SET_ARG(1, sizeof(cl_mem), [1]); \ \ > > + globals[0] = w; \ > > + globals[1] = h; \ > > + locals[0] = 16; \ > > + locals[1] = 4; \ > > + \ > > + gettimeofday(,0); \ > > + for (size_t i=0; i<100; i++) { \ > > +OCL_NDRANGE(2); \ > > + } \ > > + OCL_FINISH(); \ > > + \ > > + OCL_MAP_BUFFER(1); \ > > + OCL_UNMAP_BUFFER(1); \ > > + gettimeofday(,0); \ > > + \ > > + clReleaseMemObject(buf[0]); \ > > + free(buf_data[0]); \ > > + buf_data[0] = NULL; \ > > + \ > > + double elapsed = time_subtract(, , 0); \ \ > > + return BANDWIDTH(sz * sizeof(M) * 2 * 100, elapsed); \ } \ \ > > > +MAKE_BENCHMARK_FROM_FUNCTION_KEEP_PROGRAM(benchmark_cop > y_bu > > ffer_ ##T,true); > > + > > +BENCH_COPY_BUFFER(uchar,"bench_copy_buffer_uchar",unsigned char) > > +BENCH_COPY_BUFFER(ushort,"bench_copy_buffer_ushort",unsigned > short) > > +BENCH_COPY_BUFFER(uint,"bench_copy_buffer_uint",unsigned int) > > diff --git a/kernels/bench_copy_buffer.cl > > b/kernels/bench_copy_buffer.cl new file mode 100644 index > > 000..ed20352 > > --- /dev/null > > +++ b/kernels/bench_copy_buffer.cl > > @@ -0,0 +1,27 @@ > > +__kernel void > > +bench_copy_buffer_uchar(__global uchar4* src, __global uchar4* dst) { > > + int x = (int)get_global_id(0); > > + int y = (int)get_global_id(1); > > + int x_sz = (int)get_global_size(0); > > + dst[y * x_sz + x] = src[y * x_sz + x]; } > > + > > +__kernel void > > +bench_copy_buffer_ushort(__global ushort4* src, __global ushort4* > > +dst) { > > + int x = (int)get_global_id(0); > > + int y = (int)get_global_id(1); > > + int x_sz = (int)get_global_size(0); > > + dst[y * x_sz + x] = src[y * x_sz + x]; } > > + > > +__kernel void > > +bench_copy_buffer_uint(__global uint4* src, __global uint4* dst) { > > + int x = (int)get_global_id(0); > > + int y = (int)get_global_id(1); > > + int x_sz = (int)get_global_size(0); > > + dst[y * x_sz + x] = src[y * x_sz + x]; } > > + > > -- > > 1.9.1 > > > > ___ > > Beignet mailing list > > Beignet@lists.freedesktop.org > > http://lists.freedesktop.org/mailman/listinfo/beignet > ___ > Beignet mailing list >
Re: [Beignet] [PATCH V3] GBE: Refine ir for memory operation like atomic/load/store
Pushed. > -Original Message- > From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of > Ruiling Song > Sent: Wednesday, November 4, 2015 15:50 > To: beignet@lists.freedesktop.org > Cc: Song, Ruiling > Subject: [Beignet] [PATCH V3] GBE: Refine ir for memory operation like > atomic/load/store > > the legacyMode means what kind of address mode to choose. > when legacyMode is true, we need to do complex bti analysis. > > dynamicBti and staticBti are most for platforms before BDW. > And stateless is for platform BDW+ > > v2: > only do analyzePointerOrigin() under legacyMode. > > v3: > fix conflict with master, and some reorder warning. > > Signed-off-by: Ruiling Song> --- > backend/src/backend/gen_insn_selection.cpp | 132 -- > backend/src/ir/context.hpp | 19 -- > backend/src/ir/instruction.cpp | 410 > + > backend/src/ir/instruction.hpp | 78 +++--- > backend/src/ir/lowering.cpp| 4 +- > backend/src/llvm/llvm_gen_backend.cpp | 393 +--- > --- > 6 files changed, 531 insertions(+), 505 deletions(-) > > diff --git a/backend/src/backend/gen_insn_selection.cpp > b/backend/src/backend/gen_insn_selection.cpp > index 2452aea..5ec420e 100644 > --- a/backend/src/backend/gen_insn_selection.cpp > +++ b/backend/src/backend/gen_insn_selection.cpp > @@ -1254,11 +1254,11 @@ namespace gbe >} > >void Selection::Opaque::ATOMIC(Reg dst, uint32_t function, > - uint32_t srcNum, Reg src0, > + uint32_t msgPayload, Reg src0, > Reg src1, Reg src2, GenRegister bti, > vector temps) { > unsigned dstNum = 1 + temps.size(); > -SelectionInstruction *insn = this->appendInsn(SEL_OP_ATOMIC, dstNum, > srcNum + 1); > +SelectionInstruction *insn = this->appendInsn(SEL_OP_ATOMIC, dstNum, > msgPayload + 1); > > if (bti.file != GEN_IMMEDIATE_VALUE) { >insn->state.flag = 0; > @@ -1272,14 +1272,15 @@ namespace gbe > } > > insn->src(0) = src0; > -if(srcNum > 1) insn->src(1) = src1; > -if(srcNum > 2) insn->src(2) = src2; > -insn->src(srcNum) = bti; > +if(msgPayload > 1) insn->src(1) = src1; > +if(msgPayload > 2) insn->src(2) = src2; > +insn->src(msgPayload) = bti; > + > insn->extra.function = function; > -insn->extra.elem = srcNum; > +insn->extra.elem = msgPayload; > > SelectionVector *vector = this->appendVector(); > -vector->regNum = srcNum; > +vector->regNum = msgPayload; //bti not included in SelectionVector > vector->offsetID = 0; > vector->reg = >src(0); > vector->isSrc = 1; > @@ -3424,8 +3425,6 @@ namespace gbe > uint32_t valueNum, > ir::BTI bti) const > { > -//GenRegister temp = getRelativeAddress(sel, addr, > sel.selReg(bti.base, > ir::TYPE_U32)); > - > GenRegister b = bti.isConst ? GenRegister::immud(bti.imm) : > sel.selReg(bti.reg, ir::TYPE_U32); > sel.UNTYPED_READ(addr, dst.data(), valueNum, b, > sel.getBTITemps(bti)); > } > @@ -3726,28 +3725,12 @@ namespace gbe >return false; > } > > -INLINE ir::BTI getBTI(SelectionDAG , const ir::LoadInstruction ) > const { > - using namespace ir; > - SelectionDAG *child0 = dag.child[0]; > - ir::BTI b; > - if (insn.isFixedBTI()) { > -const auto = cast(child0->insn); > -const auto imm = immInsn.getImmediate(); > -b.isConst = 1; > -b.imm = imm.getIntegerValue(); > - } else { > -b.isConst = 0; > -b.reg = insn.getBTI(); > - } > - return b; > -} > - > /*! Implements base class */ > virtual bool emit(Selection::Opaque , SelectionDAG ) const > { >using namespace ir; >const ir::LoadInstruction = cast(dag.insn); > - GenRegister address = sel.selReg(insn.getAddress(), ir::TYPE_U32); > + GenRegister address = sel.selReg(insn.getAddressRegister(), > ir::TYPE_U32); >GBE_ASSERT(insn.getAddressSpace() == MEM_GLOBAL || > insn.getAddressSpace() == MEM_CONSTANT || > insn.getAddressSpace() == MEM_PRIVATE || > @@ -3755,8 +3738,17 @@ namespace gbe > insn.getAddressSpace() == MEM_MIXED); >//GBE_ASSERT(sel.isScalarReg(insn.getValue(0)) == false); > > - BTI bti = getBTI(dag, insn); > - > + BTI bti; > + AddressMode am = insn.getAddressMode(); > + if (am == AM_StaticBti) { > +bti.isConst = 1; > +bti.imm = insn.getSurfaceIndex(); > + } else if (am == AM_DynamicBti) { > +bti.isConst = 0; > +bti.reg = insn.getBtiReg(); > + } else { > +assert(0 && "stateless not supported yet"); > + } >const Type type = insn.getValueType(); >const
Re: [Beignet] Fwd: [Bug 1277925] New: clinfo: Failed to release test userptr object! (9) i915 kernel driver may not be sane!
Thanks. Backported to Fedora. Works perfectly. On Wed, Nov 4, 2015, 11:48 PM Rebecca N. Palmerwrote: > Specifically (and assuming it is the same bug on Ivy Bridge and > Haswell), fixed in git master by > > http://cgit.freedesktop.org/beignet/commit/?id=f48b4f6766fcaa193652fcbe6ea0bb29f92e45aa > , still present in 1.1.x. > > ___ > Beignet mailing list > Beignet@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/beignet > -- -Igor Gnatenko ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
Re: [Beignet] [PATCH] utests: fix compiler_fill_image_2d_array random bug
LGTM, thanks, pushed. > -Original Message- > From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of > Pan Xiuli > Sent: Thursday, October 29, 2015 13:47 > To: beignet@lists.freedesktop.org > Cc: Pan, Xiuli > Subject: [Beignet] [PATCH] utests: fix compiler_fill_image_2d_array random > bug > > Use safer image write instead of map and memset. When create image > without data, we could not set pitch and we don't know the pitch either. So > use map and memset the space is too dangerous if pitch is bigger than > w*sizeof(bpp), in this case the actually pitch is 512 but memset use pitch as > 64*4=256. With only half space set to 0, there will be undefined behavior > when we want to check the result for those space that we haven't set to 0. > > Signed-off-by: Pan Xiuli> --- > utests/compiler_fill_image_2d_array.cpp | 18 -- > 1 file changed, 12 insertions(+), 6 deletions(-) > > diff --git a/utests/compiler_fill_image_2d_array.cpp > b/utests/compiler_fill_image_2d_array.cpp > index fc09362..ab7470e 100644 > --- a/utests/compiler_fill_image_2d_array.cpp > +++ b/utests/compiler_fill_image_2d_array.cpp > @@ -11,6 +11,7 @@ static void compiler_fill_image_2d_array(void) >size_t origin[3] = { }; >size_t region[3]; >uint32_t* dst; > + uint32_t* src; > >memset(, 0x0, sizeof(cl_image_desc)); >memset(, 0x0, sizeof(cl_image_format)); @@ -28,9 +29,16 @@ > static void compiler_fill_image_2d_array(void) > >OCL_CREATE_IMAGE(buf[0], 0, , , NULL); > > - OCL_MAP_BUFFER_GTT(0); > - memset(buf_data[0], 0, sizeof(uint32_t) * w * h * array); > - OCL_UNMAP_BUFFER_GTT(0); > + region[0] = w; > + region[1] = h; > + region[2] = array; > + > + // As we don't know the pitch right now, we cannot // use map to > + setup the image. It is safer to use // write image src = > + (uint32_t*)malloc(sizeof(uint32_t) * w * h * array); memset(src, 0, > + sizeof(uint32_t) * w * h * array); OCL_WRITE_IMAGE(buf[0], origin, > + region, src); > >// Run the kernel >OCL_SET_ARG(0, sizeof(cl_mem), [0]); @@ -43,9 +51,6 @@ static void > compiler_fill_image_2d_array(void) >OCL_NDRANGE(3); > >// Check result > - region[0] = w; > - region[1] = h; > - region[2] = array; >dst = (uint32_t*)malloc(w*h*array*sizeof(uint32_t)); >OCL_READ_IMAGE(buf[0], origin, region, dst); > > @@ -79,6 +84,7 @@ static void compiler_fill_image_2d_array(void) > } >} >free(dst); > + free(src); > } > > MAKE_UTEST_FROM_FUNCTION(compiler_fill_image_2d_array); > -- > 2.1.4 > > ___ > Beignet mailing list > Beignet@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/beignet ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet
Re: [Beignet] [PATCH v3] GBE: Don't read past end of printf format string
Pushed. > -Original Message- > From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of > Pan, Xiuli > Sent: Friday, November 6, 2015 9:44 > To: 'Rebecca N. Palmer'; 'beignet@lists.freedesktop.org' > Subject: Re: [Beignet] [PATCH v3] GBE: Don't read past end of printf format > string > > Ping for pushed. > > -Original Message- > From: Pan, Xiuli > Sent: Wednesday, November 4, 2015 9:48 AM > To: Rebecca N. Palmer; > beignet@lists.freedesktop.org > Subject: RE: [Beignet] [PATCH v3] GBE: Don't read past end of printf format > string > > LGTM, Thanks for your help! > > -Original Message- > From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of > Rebecca N. Palmer > Sent: Wednesday, November 4, 2015 6:19 AM > To: beignet@lists.freedesktop.org > Subject: Re: [Beignet] [PATCH v3] GBE: Don't read past end of printf format > string > > When p == end (the null terminator byte), don't try to read p + 1: > as this is outside the string, it might be a '%' from a different object > (causing > __parse_printf_state(end + 2, end, ...) to be called, which will fail), or an > invalid address. > > Signed-off-by: Rebecca Palmer > --- > backend/src/llvm/llvm_printf_parser.cpp | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/backend/src/llvm/llvm_printf_parser.cpp > b/backend/src/llvm/llvm_printf_parser.cpp > index bdaed8a..f427107 100644 > --- a/backend/src/llvm/llvm_printf_parser.cpp > +++ b/backend/src/llvm/llvm_printf_parser.cpp > @@ -229,7 +229,7 @@ again: > printf("string end with %%\n"); > goto error; >} > - if (*(p + 1) == '%') { // %% > + if (p + 1 < end && *(p + 1) == '%') { // %% > p += 2; > goto again; >} > > ___ > Beignet mailing list > Beignet@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/beignet > ___ > Beignet mailing list > Beignet@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/beignet ___ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet