date:20151105

[Beignet] [PATCH 1/7 V2] Backend: Delete the useless MOV_DF instruction.

2015-11-05 Thread junyan . he

From: Junyan He 

Because just platform after BDW will support double,
the special instruction for double MOV is not needed
anymore.

Signed-off-by: Junyan He 
---
 backend/src/backend/gen75_encoder.cpp  | 36 -
 backend/src/backend/gen75_encoder.hpp  |  1 -
 backend/src/backend/gen8_encoder.cpp   | 36 -
 backend/src/backend/gen8_encoder.hpp   |  1 -
 backend/src/backend/gen_context.cpp|  3 ---
 backend/src/backend/gen_encoder.cpp| 43 --
 backend/src/backend/gen_encoder.hpp|  2 --
 backend/src/backend/gen_insn_selection.cpp | 23 +---
 backend/src/backend/gen_insn_selection.hxx |  1 -
 9 files changed, 1 insertion(+), 145 deletions(-)

diff --git a/backend/src/backend/gen75_encoder.cpp 
b/backend/src/backend/gen75_encoder.cpp
index 135be02..5d1a964 100644
--- a/backend/src/backend/gen75_encoder.cpp
+++ b/backend/src/backend/gen75_encoder.cpp
@@ -251,42 +251,6 @@ namespace gbe
 pop();
   }
 
-  void Gen75Encoder::MOV_DF(GenRegister dest, GenRegister src0, GenRegister 
tmp) {
-GBE_ASSERT((src0.type == GEN_TYPE_F && dest.isdf()) || (src0.isdf() && 
dest.type == GEN_TYPE_F));
-GenRegister r = GenRegister::retype(tmp, GEN_TYPE_F);
-int w = curr.execWidth;
-GenRegister r0;
-r0 = GenRegister::h2(r);
-push();
-curr.execWidth = 4;
-curr.predicate = GEN_PREDICATE_NONE;
-curr.noMask = 1;
-MOV(r0, src0);
-MOV(GenRegister::suboffset(r0, 4), GenRegister::suboffset(src0, 4));
-curr.noMask = 0;
-curr.quarterControl = 0;
-curr.nibControl = 0;
-MOV(dest, r0);
-curr.nibControl = 1;
-MOV(GenRegister::suboffset(dest, 4), GenRegister::suboffset(r0, 4));
-pop();
-if (w == 16) {
-  push();
-  curr.execWidth = 4;
-  curr.predicate = GEN_PREDICATE_NONE;
-  curr.noMask = 1;
-  MOV(r0, GenRegister::suboffset(src0, 8));
-  MOV(GenRegister::suboffset(r0, 4), GenRegister::suboffset(src0, 12));
-  curr.noMask = 0;
-  curr.quarterControl = 1;
-  curr.nibControl = 0;
-  MOV(GenRegister::suboffset(dest, 8), r0);
-  curr.nibControl = 1;
-  MOV(GenRegister::suboffset(dest, 12), GenRegister::suboffset(r0, 4));
-  pop();
-}
-  }
-
   void Gen75Encoder::JMPI(GenRegister src, bool longjmp) {
 alu2(this, GEN_OPCODE_JMPI, GenRegister::ip(), GenRegister::ip(), src);
   }
diff --git a/backend/src/backend/gen75_encoder.hpp 
b/backend/src/backend/gen75_encoder.hpp
index e494f29..f5044c0 100644
--- a/backend/src/backend/gen75_encoder.hpp
+++ b/backend/src/backend/gen75_encoder.hpp
@@ -42,7 +42,6 @@ namespace gbe
 virtual void JMPI(GenRegister src, bool longjmp = false);
 /*! Patch JMPI/BRC/BRD (located at index insnID) with the given jump 
distance */
 virtual void patchJMPI(uint32_t insnID, int32_t jip, int32_t uip);
-virtual void MOV_DF(GenRegister dest, GenRegister src0, GenRegister tmp = 
GenRegister::null());
 virtual void LOAD_DF_IMM(GenRegister dest, GenRegister tmp, double value);
 virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, 
GenRegister bti, uint32_t srcNum);
 virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister 
bti, uint32_t elemNum);
diff --git a/backend/src/backend/gen8_encoder.cpp 
b/backend/src/backend/gen8_encoder.cpp
index 55fc3fb..98c3917 100644
--- a/backend/src/backend/gen8_encoder.cpp
+++ b/backend/src/backend/gen8_encoder.cpp
@@ -260,42 +260,6 @@ namespace gbe
 MOV(dest, value);
   }
 
-  void Gen8Encoder::MOV_DF(GenRegister dest, GenRegister src0, GenRegister 
tmp) {
-GBE_ASSERT((src0.type == GEN_TYPE_F && dest.isdf()) || (src0.isdf() && 
dest.type == GEN_TYPE_F));
-GenRegister r = GenRegister::retype(tmp, GEN_TYPE_F);
-int w = curr.execWidth;
-GenRegister r0;
-r0 = GenRegister::h2(r);
-push();
-curr.execWidth = 4;
-curr.predicate = GEN_PREDICATE_NONE;
-curr.noMask = 1;
-MOV(r0, src0);
-MOV(GenRegister::suboffset(r0, 4), GenRegister::suboffset(src0, 4));
-curr.noMask = 0;
-curr.quarterControl = 0;
-curr.nibControl = 0;
-MOV(dest, r0);
-curr.nibControl = 1;
-MOV(GenRegister::suboffset(dest, 4), GenRegister::suboffset(r0, 4));
-pop();
-if (w == 16) {
-  push();
-  curr.execWidth = 4;
-  curr.predicate = GEN_PREDICATE_NONE;
-  curr.noMask = 1;
-  MOV(r0, GenRegister::suboffset(src0, 8));
-  MOV(GenRegister::suboffset(r0, 4), GenRegister::suboffset(src0, 12));
-  curr.noMask = 0;
-  curr.quarterControl = 1;
-  curr.nibControl = 0;
-  MOV(GenRegister::suboffset(dest, 8), r0);
-  curr.nibControl = 1;
-  MOV(GenRegister::suboffset(dest, 12), GenRegister::suboffset(r0, 4));
-  pop();
-}
-  }
-
   void Gen8Encoder::JMPI(GenRegister src, bool longjmp) {
 alu2(this, GEN_OPCODE_JMPI, GenRegister::ip(),

[Beignet] [PATCH 6/7 V2] Backend: Fix a potential bug for uniform conversion.

2015-11-05 Thread junyan . he

From: Junyan He 

When we do conversion, the src may be uniform but dst
is not. In this case, we need to set the simd=1.

Signed-off-by: Junyan He 
---
 backend/src/backend/gen_insn_selection.cpp | 33 ++
 1 file changed, 33 insertions(+)

diff --git a/backend/src/backend/gen_insn_selection.cpp 
b/backend/src/backend/gen_insn_selection.cpp
index 49ba499..f4f9d03 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -4651,7 +4651,15 @@ namespace gbe
 unpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, 
sel.isScalarReg(insn.getSrc(0;
 unpacked = GenRegister::retype(unpacked, GEN_TYPE_F);
 
+sel.push();
+if (sel.isScalarReg(insn.getSrc(0))) {
+  sel.curr.execWidth = 1;
+  sel.curr.predicate = GEN_PREDICATE_NONE;
+  sel.curr.noMask = 1;
+}
 sel.MOV(unpacked, src);
+sel.pop();
+
 sel.MOV(dst, unpacked);
   } else {
 // float to double, just mov
@@ -4685,13 +4693,29 @@ namespace gbe
 // half to double. There is no direct double to half MOV, need tmp 
float.
 GBE_ASSERT(srcType == ir::TYPE_HALF);
 GenRegister tmpFloat = 
GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD)), GEN_TYPE_F);
+
+sel.push();
+if (sel.isScalarReg(insn.getSrc(0))) {
+  sel.curr.execWidth = 1;
+  sel.curr.predicate = GEN_PREDICATE_NONE;
+  sel.curr.noMask = 1;
+}
 sel.MOV(tmpFloat, src);
+sel.pop();
+
 sel.MOV(dst, tmpFloat);
   } else {
 // double to half. No direct MOV from double to half, so 
double->float->half
 GBE_ASSERT(srcType == ir::TYPE_DOUBLE);
 GBE_ASSERT(dstType == ir::TYPE_HALF);
 
+sel.push();
+if (sel.isScalarReg(insn.getSrc(0))) {
+  sel.curr.execWidth = 1;
+  sel.curr.predicate = GEN_PREDICATE_NONE;
+  sel.curr.noMask = 1;
+}
+
 // double to float
 GenRegister unpackedFloat = sel.unpacked_ud(sel.reg(FAMILY_QWORD, 
sel.isScalarReg(insn.getSrc(0;
 unpackedFloat = GenRegister::retype(unpackedFloat, GEN_TYPE_F);
@@ -4701,6 +4725,7 @@ namespace gbe
 GenRegister unpackedHalf = sel.unpacked_uw(sel.reg(FAMILY_QWORD, 
sel.isScalarReg(insn.getSrc(0;
 unpackedHalf = GenRegister::retype(unpackedHalf, GEN_TYPE_HF);
 sel.MOV(unpackedHalf, unpackedFloat);
+sel.pop();
 
 sel.MOV(dst, unpackedHalf);
   }
@@ -4795,7 +4820,15 @@ namespace gbe
   unpacked = GenRegister::retype(unpacked, dstType == TYPE_U8 ? 
GEN_TYPE_UW : GEN_TYPE_W);
 }
 
+sel.push();
+if (sel.isScalarReg(insn.getSrc(0))) {
+  sel.curr.execWidth = 1;
+  sel.curr.predicate = GEN_PREDICATE_NONE;
+  sel.curr.noMask = 1;
+}
 sel.MOV(unpacked, src);
+sel.pop();
+
 sel.MOV(dst, unpacked);
   }
 }
-- 
1.9.1



___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet

[Beignet] [PATCH 2/7 V2] Backend: Delete LOAD_DF_IMM instruction.

2015-11-05 Thread junyan . he

From: Junyan He 

Double is supported on BDW later platforms, just normal MOV
can handle the loading of double. So no need for LOAD_DF_IMM
anymore.

Signed-off-by: Junyan He 
---
 backend/src/backend/gen75_encoder.cpp  | 30 --
 backend/src/backend/gen75_encoder.hpp  |  1 -
 backend/src/backend/gen8_encoder.cpp   | 28 
 backend/src/backend/gen8_encoder.hpp   |  1 -
 backend/src/backend/gen_context.cpp|  3 ---
 backend/src/backend/gen_encoder.cpp| 29 -
 backend/src/backend/gen_encoder.hpp|  1 -
 backend/src/backend/gen_insn_selection.cpp |  5 ++---
 backend/src/backend/gen_insn_selection.hxx |  1 -
 9 files changed, 2 insertions(+), 97 deletions(-)

diff --git a/backend/src/backend/gen75_encoder.cpp 
b/backend/src/backend/gen75_encoder.cpp
index 5d1a964..fc37991 100644
--- a/backend/src/backend/gen75_encoder.cpp
+++ b/backend/src/backend/gen75_encoder.cpp
@@ -221,36 +221,6 @@ namespace gbe
 }
   }
 
-
-  void Gen75Encoder::LOAD_DF_IMM(GenRegister dest, GenRegister tmp, double 
value) {
-union { double d; unsigned u[2]; } u;
-u.d = value;
-GenRegister r = GenRegister::retype(tmp, GEN_TYPE_UD);
-push();
-curr.predicate = GEN_PREDICATE_NONE;
-curr.noMask = 1;
-curr.execWidth = 1;
-MOV(r, GenRegister::immud(u.u[0]));
-MOV(GenRegister::suboffset(r, 1), GenRegister::immud(u.u[1]));
-pop();
-r.type = GEN_TYPE_DF;
-r.vstride = GEN_VERTICAL_STRIDE_0;
-r.width = GEN_WIDTH_1;
-r.hstride = GEN_HORIZONTAL_STRIDE_0;
-push();
-uint32_t width = curr.execWidth;
-curr.execWidth = 8;
-curr.predicate = GEN_PREDICATE_NONE;
-curr.noMask = 1;
-curr.quarterControl = GEN_COMPRESSION_Q1;
-MOV(dest, r);
-if (width == 16) {
-  curr.quarterControl = GEN_COMPRESSION_Q2;
-  MOV(GenRegister::offset(dest, 2), r);
-}
-pop();
-  }
-
   void Gen75Encoder::JMPI(GenRegister src, bool longjmp) {
 alu2(this, GEN_OPCODE_JMPI, GenRegister::ip(), GenRegister::ip(), src);
   }
diff --git a/backend/src/backend/gen75_encoder.hpp 
b/backend/src/backend/gen75_encoder.hpp
index f5044c0..d06f393 100644
--- a/backend/src/backend/gen75_encoder.hpp
+++ b/backend/src/backend/gen75_encoder.hpp
@@ -42,7 +42,6 @@ namespace gbe
 virtual void JMPI(GenRegister src, bool longjmp = false);
 /*! Patch JMPI/BRC/BRD (located at index insnID) with the given jump 
distance */
 virtual void patchJMPI(uint32_t insnID, int32_t jip, int32_t uip);
-virtual void LOAD_DF_IMM(GenRegister dest, GenRegister tmp, double value);
 virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, 
GenRegister bti, uint32_t srcNum);
 virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister 
bti, uint32_t elemNum);
 virtual void UNTYPED_WRITE(GenRegister src, GenRegister bti, uint32_t 
elemNum);
diff --git a/backend/src/backend/gen8_encoder.cpp 
b/backend/src/backend/gen8_encoder.cpp
index 98c3917..16b3fc6 100644
--- a/backend/src/backend/gen8_encoder.cpp
+++ b/backend/src/backend/gen8_encoder.cpp
@@ -227,34 +227,6 @@ namespace gbe
   this->setSrc1(insn, bti);
 }
   }
-  void Gen8Encoder::LOAD_DF_IMM(GenRegister dest, GenRegister tmp, double 
value) {
-union { double d; unsigned u[2]; } u;
-u.d = value;
-GenRegister r = GenRegister::retype(tmp, GEN_TYPE_UD);
-push();
-curr.predicate = GEN_PREDICATE_NONE;
-curr.noMask = 1;
-curr.execWidth = 1;
-MOV(r, GenRegister::immud(u.u[0]));
-MOV(GenRegister::suboffset(r, 1), GenRegister::immud(u.u[1]));
-pop();
-r.type = GEN_TYPE_DF;
-r.vstride = GEN_VERTICAL_STRIDE_0;
-r.width = GEN_WIDTH_1;
-r.hstride = GEN_HORIZONTAL_STRIDE_0;
-push();
-uint32_t width = curr.execWidth;
-curr.execWidth = 8;
-curr.predicate = GEN_PREDICATE_NONE;
-curr.noMask = 1;
-curr.quarterControl = GEN_COMPRESSION_Q1;
-MOV(dest, r);
-if (width == 16) {
-  curr.quarterControl = GEN_COMPRESSION_Q2;
-  MOV(GenRegister::offset(dest, 2), r);
-}
-pop();
-  }
 
   void Gen8Encoder::LOAD_INT64_IMM(GenRegister dest, GenRegister value) {
 MOV(dest, value);
diff --git a/backend/src/backend/gen8_encoder.hpp 
b/backend/src/backend/gen8_encoder.hpp
index 2aa074f..8c447ea 100644
--- a/backend/src/backend/gen8_encoder.hpp
+++ b/backend/src/backend/gen8_encoder.hpp
@@ -42,7 +42,6 @@ namespace gbe
 virtual void patchJMPI(uint32_t insnID, int32_t jip, int32_t uip);
 virtual void F16TO32(GenRegister dest, GenRegister src0);
 virtual void F32TO16(GenRegister dest, GenRegister src0);
-virtual void LOAD_DF_IMM(GenRegister dest, GenRegister tmp, double value);
 virtual void LOAD_INT64_IMM(GenRegister dest, GenRegister value);
 virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, 
GenRegister bti, uint32_t

[Beignet] [PATCH 5/7 V2] Utest: Fix a bug for double div.

2015-11-05 Thread junyan . he

From: Junyan He 

Signed-off-by: Junyan He 
---
 utests/compiler_double_convert.cpp | 3 +--
 utests/compiler_double_div.cpp | 3 +++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/utests/compiler_double_convert.cpp 
b/utests/compiler_double_convert.cpp
index 9c5c97b..30787d2 100644
--- a/utests/compiler_double_convert.cpp
+++ b/utests/compiler_double_convert.cpp
@@ -613,8 +613,7 @@ void compiler_float_convert_double(void)
   OCL_MAP_BUFFER(1);
   for (int32_t i = 0; i < (int32_t) n; ++i) {
 //printf("%f,   \t%f\n", ((double*)buf_data[1])[i], cpu_dst[i]);
-OCL_ASSERT(((double*)buf_data[2])[i] == cpu_dst0[i]);
-OCL_ASSERT(((double*)buf_data[3])[i] == cpu_dst1[i]);
+OCL_ASSERT(((double*)buf_data[1])[i] == cpu_dst[i]);
   }
   OCL_UNMAP_BUFFER(1);
 }
diff --git a/utests/compiler_double_div.cpp b/utests/compiler_double_div.cpp
index db763e3..11578cf 100644
--- a/utests/compiler_double_div.cpp
+++ b/utests/compiler_double_div.cpp
@@ -23,12 +23,15 @@ void compiler_double_div(void)
   // Run random tests
   OCL_MAP_BUFFER(0);
   OCL_MAP_BUFFER(1);
+  OCL_MAP_BUFFER(2);
   for (int32_t i = 0; i < (int32_t) n; ++i) {
 cpu_src0[i] = ((double*)buf_data[0])[i] = ((double)(((i - 5)*1334) * 
11105));
 cpu_src1[i] = ((double*)buf_data[1])[i] = 499.13542123d*(i + 132.43d + 
142.32*i);
+((double*)buf_data[2])[i] = 0.0d;
   }
   OCL_UNMAP_BUFFER(0);
   OCL_UNMAP_BUFFER(1);
+  OCL_UNMAP_BUFFER(2);
 
   // Run the kernel on GPU
   OCL_NDRANGE(1);
-- 
1.9.1



___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet

[Beignet] [PATCH 3/7 V2] Backend: Add double conversion to insn selection.

2015-11-05 Thread junyan . he

From: Junyan He 

Signed-off-by: Junyan He 
---
 backend/src/backend/gen_insn_selection.cpp | 197 +++--
 1 file changed, 189 insertions(+), 8 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp 
b/backend/src/backend/gen_insn_selection.cpp
index b66cf71..49ba499 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -4619,8 +4619,11 @@ namespace gbe
 sel.pop();
 sel.MOV(dst, tmp);
   } else if (src.type == GEN_TYPE_DF) {
-//TODO:
-GBE_ASSERT(0);
+GBE_ASSERT(sel.hasDoubleType());
+GBE_ASSERT(sel.hasLongType()); //So far, if we support double, we 
support native long.
+
+// Just Mov
+sel.MOV(dst, src);
   } else {
 /* Invalid case. */
 GBE_ASSERT(0);
@@ -4630,11 +4633,77 @@ namespace gbe
 INLINE void convertBetweenFloatDouble(Selection::Opaque , const 
ir::ConvertInstruction , bool ) const
 {
   using namespace ir;
+  const Type dstType = insn.getDstType();
+  const Type srcType = insn.getSrcType();
+  const GenRegister dst = sel.selReg(insn.getDst(0), dstType);
+  const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
+
+  GBE_ASSERT(sel.hasDoubleType());
+
+  if (sel.isScalarReg(insn.getDst(0))) {
+// dst is scalar, just MOV and nothing more.
+GBE_ASSERT(sel.isScalarReg(insn.getSrc(0)));
+sel.MOV(dst, src);
+  } else if (srcType == ir::TYPE_DOUBLE) {
+// double to float
+GBE_ASSERT(dstType == ir::TYPE_FLOAT);
+GenRegister unpacked;
+unpacked = sel.unpacked_ud(sel.reg(FAMILY_QWORD, 
sel.isScalarReg(insn.getSrc(0;
+unpacked = GenRegister::retype(unpacked, GEN_TYPE_F);
+
+sel.MOV(unpacked, src);
+sel.MOV(dst, unpacked);
+  } else {
+// float to double, just mov
+sel.MOV(dst, src);
+  }
+
+  return;
 }
 
 INLINE void convertBetweenHalfDouble(Selection::Opaque , const 
ir::ConvertInstruction , bool ) const
 {
   using namespace ir;
+  const Type dstType = insn.getDstType();
+  const Type srcType = insn.getSrcType();
+  const GenRegister dst = sel.selReg(insn.getDst(0), dstType);
+  const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
+
+  GBE_ASSERT(sel.hasDoubleType());
+  GBE_ASSERT(sel.hasHalfType()); //So far, if we support double, we 
support half.
+
+  if (sel.isScalarReg(insn.getDst(0))) { // uniform case.
+GBE_ASSERT(sel.isScalarReg(insn.getSrc(0)));
+GBE_ASSERT(sel.curr.execWidth == 1);
+GenRegister tmpFloat = 
GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD)), GEN_TYPE_F);
+sel.MOV(tmpFloat, src);
+sel.MOV(dst, tmpFloat);
+return;
+  }
+
+  if (dstType == ir::TYPE_DOUBLE) {
+// half to double. There is no direct double to half MOV, need tmp 
float.
+GBE_ASSERT(srcType == ir::TYPE_HALF);
+GenRegister tmpFloat = 
GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD)), GEN_TYPE_F);
+sel.MOV(tmpFloat, src);
+sel.MOV(dst, tmpFloat);
+  } else {
+// double to half. No direct MOV from double to half, so 
double->float->half
+GBE_ASSERT(srcType == ir::TYPE_DOUBLE);
+GBE_ASSERT(dstType == ir::TYPE_HALF);
+
+// double to float
+GenRegister unpackedFloat = sel.unpacked_ud(sel.reg(FAMILY_QWORD, 
sel.isScalarReg(insn.getSrc(0;
+unpackedFloat = GenRegister::retype(unpackedFloat, GEN_TYPE_F);
+sel.MOV(unpackedFloat, src);
+
+// float to half
+GenRegister unpackedHalf = sel.unpacked_uw(sel.reg(FAMILY_QWORD, 
sel.isScalarReg(insn.getSrc(0;
+unpackedHalf = GenRegister::retype(unpackedHalf, GEN_TYPE_HF);
+sel.MOV(unpackedHalf, unpackedFloat);
+
+sel.MOV(dst, unpackedHalf);
+  }
 }
 
 INLINE void convertHalfToSmallInts(Selection::Opaque , const 
ir::ConvertInstruction , bool ) const
@@ -4694,6 +4763,105 @@ namespace gbe
   sel.MOV(dst, tmp);
 }
 
+INLINE void convertDoubleToSmallInts(Selection::Opaque , const 
ir::ConvertInstruction , bool ) const
+{
+  using namespace ir;
+  const Type dstType = insn.getDstType();
+  const Type srcType = insn.getSrcType();
+  const GenRegister dst = sel.selReg(insn.getDst(0), dstType);
+  const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
+  const RegisterFamily dstFamily = getFamily(dstType);
+
+  GBE_ASSERT(sel.hasDoubleType());
+  GBE_ASSERT(sel.hasHalfType()); //So far, if we support double, we 
support half.
+  if (sel.isScalarReg(insn.getDst(0))) {
+// dst is scalar, just MOV and nothing more.
+GBE_ASSERT(sel.isScalarReg(insn.getSrc(0)));
+sel.MOV(dst, src);
+  } else {
+GenRegister unpacked;
+if (dstFamily ==

Re: [Beignet] Unrecoverable system lockup when allocating too much memory

2015-11-05 Thread Rebecca N. Palmer


Would this be better if you turn off the overcommit via proc fs?


Only if you also disable any swap space ( sudo swapoff -a && sudo sh -c 
"echo -n 2 > /proc/sys/vm/overcommit_memory" #warning, this may itself 
crash your desktop); if I disable overcommit but leave swap on, I get a 
hang with the following trace.


(The example in 
https://bugs.launchpad.net/ubuntu/+source/pyopencl/+bug/1354086 no 
longer hangs, so the "rapidly allocating and freeing pyopencl objects 
doesn't actually free the memory" aspect has evidently been fixed, but 
keeping too many objects for the available memory still does hang. 
Though for me, SysRq still works.)


Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.597991] Purging GPU 
memory, 0 bytes freed, 5685248 bytes still pinned.
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598134] Xorg invoked 
oom-killer: gfp_mask=0x0, order=0, oom_score_adj=0
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598136] Xorg cpuset=/ 
mems_allowed=0
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598140] CPU: 3 PID: 823 
Comm: Xorg Not tainted 3.16.0-4-amd64 #1 Debian 3.16.7-ckt11-1+deb8u5
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598142] Hardware name: 
TOSHIBA SATELLITE PRO C50-A-1E4/PT10F, BIOS 1.20 09/04/2013
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598144]   
8150b4c5 880036dc69a0 81509127
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598147]  0056c000 
880149207b30 880149207c08 
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598149]  8800a8b0a000 
a058c2ab 88014920 0100

Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598152] Call Trace:
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598159] 
[] ? dump_stack+0x41/0x51
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598163] 
[] ? dump_header+0x76/0x1e8
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598182] 
[] ? i915_gem_shrinker_oom+0x15b/0x1c0 [i915]
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598186] 
[] ? oom_kill_process+0x21d/0x370
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598189] 
[] ? find_lock_task_mm+0x3d/0x90
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598191] 
[] ? out_of_memory+0x473/0x4b0
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598194] 
[] ? pagefault_out_of_memory+0x6f/0x80
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598198] 
[] ? __do_page_fault+0x3c5/0x4f0
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598201] 
[] ? do_mmap_pgoff+0x2e9/0x3b0
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598205] 
[] ? dput+0x9e/0x170
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598207] 
[] ? do_vfs_ioctl+0x2cf/0x4b0
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598211] 
[] ? page_fault+0x28/0x30

Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598212] Mem-Info:
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598214] Node 0 DMA per-cpu:
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598215] CPU0: hi: 
0, btch:   1 usd:   0
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598217] CPU1: hi: 
0, btch:   1 usd:   0
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598218] CPU2: hi: 
0, btch:   1 usd:   0
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598219] CPU3: hi: 
0, btch:   1 usd:   0

Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598220] Node 0 DMA32 per-cpu:
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598222] CPU0: hi: 
186, btch:  31 usd: 133
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598223] CPU1: hi: 
186, btch:  31 usd: 168
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598224] CPU2: hi: 
186, btch:  31 usd: 184
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598226] CPU3: hi: 
186, btch:  31 usd: 180
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598226] Node 0 Normal 
per-cpu:
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598228] CPU0: hi: 
186, btch:  31 usd: 125
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598229] CPU1: hi: 
186, btch:  31 usd: 166
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598230] CPU2: hi: 
186, btch:  31 usd: 197
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598232] CPU3: hi: 
186, btch:  31 usd: 146
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598236] active_anon:61164 
inactive_anon:263991 isolated_anon:0
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598236] 
active_file:142392 inactive_file:107249 isolated_file:0
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598236]  unevictable:0 
dirty:129 writeback:0 unstable:0
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598236]  free:364625 
slab_reclaimable:23144 slab_unreclaimable:7171
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598236]  mapped:38107 
shmem:264190 pagetables:4104 bounce:0

Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598236]  free_cma:0
Nov  5 07:54:46 rnpalmer-laptop kernel: [  759.598239] Node 0 DMA 
free:15612kB min:272kB low:340kB high:408kB active_anon:0kB 
inactive_anon:56kB active_file:140kB inactive_file:0kB unevictable:0kB 
isolated(anon):0kB

Re: [Beignet] [PATCH 1/7 V2] Backend: Delete the useless MOV_DF instruction.

2015-11-05 Thread He Junyan

V2:

Fix uniform bug in conversion.
Delete verbose printf in utests.
Fix a bug for BSW when convert half to double.

On Thu, Nov 05, 2015 at 04:15:41PM +0800, junyan...@inbox.com wrote:
> Date: Thu,  5 Nov 2015 16:15:41 +0800
> From: junyan...@inbox.com
> To: beignet@lists.freedesktop.org
> Subject: [Beignet] [PATCH 1/7 V2] Backend: Delete the useless MOV_DF
>  instruction.
> X-Mailer: git-send-email 1.7.9.5
> 
> From: Junyan He 
> 
> Because just platform after BDW will support double,
> the special instruction for double MOV is not needed
> anymore.
> 
> Signed-off-by: Junyan He 
> ---
>  backend/src/backend/gen75_encoder.cpp  | 36 -
>  backend/src/backend/gen75_encoder.hpp  |  1 -
>  backend/src/backend/gen8_encoder.cpp   | 36 -
>  backend/src/backend/gen8_encoder.hpp   |  1 -
>  backend/src/backend/gen_context.cpp|  3 ---
>  backend/src/backend/gen_encoder.cpp| 43 
> --
>  backend/src/backend/gen_encoder.hpp|  2 --
>  backend/src/backend/gen_insn_selection.cpp | 23 +---
>  backend/src/backend/gen_insn_selection.hxx |  1 -
>  9 files changed, 1 insertion(+), 145 deletions(-)
> 
> diff --git a/backend/src/backend/gen75_encoder.cpp 
> b/backend/src/backend/gen75_encoder.cpp
> index 135be02..5d1a964 100644
> --- a/backend/src/backend/gen75_encoder.cpp
> +++ b/backend/src/backend/gen75_encoder.cpp
> @@ -251,42 +251,6 @@ namespace gbe
>  pop();
>}
>  
> -  void Gen75Encoder::MOV_DF(GenRegister dest, GenRegister src0, GenRegister 
> tmp) {
> -GBE_ASSERT((src0.type == GEN_TYPE_F && dest.isdf()) || (src0.isdf() && 
> dest.type == GEN_TYPE_F));
> -GenRegister r = GenRegister::retype(tmp, GEN_TYPE_F);
> -int w = curr.execWidth;
> -GenRegister r0;
> -r0 = GenRegister::h2(r);
> -push();
> -curr.execWidth = 4;
> -curr.predicate = GEN_PREDICATE_NONE;
> -curr.noMask = 1;
> -MOV(r0, src0);
> -MOV(GenRegister::suboffset(r0, 4), GenRegister::suboffset(src0, 4));
> -curr.noMask = 0;
> -curr.quarterControl = 0;
> -curr.nibControl = 0;
> -MOV(dest, r0);
> -curr.nibControl = 1;
> -MOV(GenRegister::suboffset(dest, 4), GenRegister::suboffset(r0, 4));
> -pop();
> -if (w == 16) {
> -  push();
> -  curr.execWidth = 4;
> -  curr.predicate = GEN_PREDICATE_NONE;
> -  curr.noMask = 1;
> -  MOV(r0, GenRegister::suboffset(src0, 8));
> -  MOV(GenRegister::suboffset(r0, 4), GenRegister::suboffset(src0, 12));
> -  curr.noMask = 0;
> -  curr.quarterControl = 1;
> -  curr.nibControl = 0;
> -  MOV(GenRegister::suboffset(dest, 8), r0);
> -  curr.nibControl = 1;
> -  MOV(GenRegister::suboffset(dest, 12), GenRegister::suboffset(r0, 4));
> -  pop();
> -}
> -  }
> -
>void Gen75Encoder::JMPI(GenRegister src, bool longjmp) {
>  alu2(this, GEN_OPCODE_JMPI, GenRegister::ip(), GenRegister::ip(), src);
>}
> diff --git a/backend/src/backend/gen75_encoder.hpp 
> b/backend/src/backend/gen75_encoder.hpp
> index e494f29..f5044c0 100644
> --- a/backend/src/backend/gen75_encoder.hpp
> +++ b/backend/src/backend/gen75_encoder.hpp
> @@ -42,7 +42,6 @@ namespace gbe
>  virtual void JMPI(GenRegister src, bool longjmp = false);
>  /*! Patch JMPI/BRC/BRD (located at index insnID) with the given jump 
> distance */
>  virtual void patchJMPI(uint32_t insnID, int32_t jip, int32_t uip);
> -virtual void MOV_DF(GenRegister dest, GenRegister src0, GenRegister tmp 
> = GenRegister::null());
>  virtual void LOAD_DF_IMM(GenRegister dest, GenRegister tmp, double 
> value);
>  virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, 
> GenRegister bti, uint32_t srcNum);
>  virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister 
> bti, uint32_t elemNum);
> diff --git a/backend/src/backend/gen8_encoder.cpp 
> b/backend/src/backend/gen8_encoder.cpp
> index 55fc3fb..98c3917 100644
> --- a/backend/src/backend/gen8_encoder.cpp
> +++ b/backend/src/backend/gen8_encoder.cpp
> @@ -260,42 +260,6 @@ namespace gbe
>  MOV(dest, value);
>}
>  
> -  void Gen8Encoder::MOV_DF(GenRegister dest, GenRegister src0, GenRegister 
> tmp) {
> -GBE_ASSERT((src0.type == GEN_TYPE_F && dest.isdf()) || (src0.isdf() && 
> dest.type == GEN_TYPE_F));
> -GenRegister r = GenRegister::retype(tmp, GEN_TYPE_F);
> -int w = curr.execWidth;
> -GenRegister r0;
> -r0 = GenRegister::h2(r);
> -push();
> -curr.execWidth = 4;
> -curr.predicate = GEN_PREDICATE_NONE;
> -curr.noMask = 1;
> -MOV(r0, src0);
> -MOV(GenRegister::suboffset(r0, 4), GenRegister::suboffset(src0, 4));
> -curr.noMask = 0;
> -curr.quarterControl = 0;
> -curr.nibControl = 0;
> -MOV(dest, r0);
> -curr.nibControl = 1;
> -MOV(GenRegister::suboffset(dest, 4), GenRegister::suboffset(r0, 4));
> -

[Beignet] [PATCH v6 2/4] Add extensions intel_accelerator and basic intel_motion_estimation.

2015-11-05 Thread Chuanbo Weng

v2:
1. Just upload the first vme_state.
2. Remove duplicated code in check_opt1_extension.
3. Check image format before cl_gpgpu_bind_image_for_vme.
4. Fix error of getting mv. Because we suppose this kernel run in SIMD16
   mode, so dword 0 of grf 1 should be
   __gen_ocl_region(8,vme_result.s0), not
   __gen_ocl_region(0,vme_result.s1).

v3:
Return CL_IMAGE_FORMAT_NOT_SUPPORTED if image format is not the required
one.

v4:
Fix two conflicts after code rebase and wordaround a curbe related bug.

v6:
Treat simd8 and simd16 differently when getting mv.

Signed-off-by: Guo Yejun 
Signed-off-by: Chuanbo Weng 
---
 include/CL/cl_ext.h| 103 +
 src/CMakeLists.txt |   4 +-
 src/cl_accelerator_intel.c |  86 
 src/cl_accelerator_intel.h |  29 +++
 src/cl_api.c   | 106 +-
 src/cl_command_queue.c |  17 +-
 src/cl_command_queue_gen7.c|   8 +-
 src/cl_context.c   |   1 +
 src/cl_context.h   |   3 +
 src/cl_driver.h|  21 ++
 src/cl_driver_defs.c   |   2 +
 src/cl_extensions.c|   4 +-
 src/cl_extensions.h|   8 +
 src/cl_gen7_device.h   |   5 +-
 src/cl_gt_device.h |   6 +-
 src/cl_internals.h |   1 +
 src/cl_kernel.c|  57 -
 src/cl_kernel.h|   6 +-
 src/cl_utils.h |  12 ++
 src/intel/intel_gpgpu.c| 217 ++-
 src/intel/intel_structs.h  | 120 +++
 .../cl_internal_block_motion_estimate_intel.cl | 233 +
 22 files changed, 1016 insertions(+), 33 deletions(-)
 create mode 100644 src/cl_accelerator_intel.c
 create mode 100644 src/cl_accelerator_intel.h
 create mode 100644 src/kernels/cl_internal_block_motion_estimate_intel.cl

diff --git a/include/CL/cl_ext.h b/include/CL/cl_ext.h
index 710bea8..0a66d70 100644
--- a/include/CL/cl_ext.h
+++ b/include/CL/cl_ext.h
@@ -184,6 +184,109 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL 
*clTerminateContextKHR_fn)(cl_context /
 #define CL_PRINTF_CALLBACK_ARM  0x40B0
 #define CL_PRINTF_BUFFERSIZE_ARM0x40B1
 
+/*
+* cl_intel_accelerator extension *
+*/
+#define cl_intel_accelerator 1
+#define cl_intel_motion_estimation 1
+
+typedef struct _cl_accelerator_intel* cl_accelerator_intel;
+typedef cl_uint   cl_accelerator_type_intel;
+typedef cl_uint   cl_accelerator_info_intel;
+
+typedef struct _cl_motion_estimation_desc_intel {
+cl_uint mb_block_type;
+cl_uint subpixel_mode;
+cl_uint sad_adjust_mode;
+cl_uint search_path_type;
+} cl_motion_estimation_desc_intel;
+
+/* Error Codes */
+#define CL_INVALID_ACCELERATOR_INTEL-1094
+#define CL_INVALID_ACCELERATOR_TYPE_INTEL   -1095
+#define CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL -1096
+#define CL_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL -1097
+
+/* Deprecated Error Codes */
+#define CL_INVALID_ACCELERATOR_INTEL_DEPRECATED-6000
+#define CL_INVALID_ACCELERATOR_TYPE_INTEL_DEPRECATED   -6001
+#define CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL_DEPRECATED -6002
+#define CL_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL_DEPRECATED -6003
+
+/* cl_accelerator_type_intel */
+#define CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL 0x0
+
+/* cl_accelerator_info_intel */
+#define CL_ACCELERATOR_DESCRIPTOR_INTEL 0x4090
+#define CL_ACCELERATOR_REFERENCE_COUNT_INTEL0x4091
+#define CL_ACCELERATOR_CONTEXT_INTEL0x4092
+#define CL_ACCELERATOR_TYPE_INTEL   0x4093
+
+/*cl_motion_detect_desc_intel flags */
+#define CL_ME_MB_TYPE_16x16_INTEL   0x0
+#define CL_ME_MB_TYPE_8x8_INTEL 0x1
+#define CL_ME_MB_TYPE_4x4_INTEL 0x2
+
+#define CL_ME_SUBPIXEL_MODE_INTEGER_INTEL   0x0
+#define CL_ME_SUBPIXEL_MODE_HPEL_INTEL  0x1
+#define CL_ME_SUBPIXEL_MODE_QPEL_INTEL  0x2
+
+#define CL_ME_SAD_ADJUST_MODE_NONE_INTEL0x0
+#define CL_ME_SAD_ADJUST_MODE_HAAR_INTEL0x1
+
+#define CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL  0x0
+#define CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL  0x1
+#define CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL0x5
+
+extern CL_API_ENTRY cl_accelerator_intel CL_API_CALL
+clCreateAcceleratorINTEL(
+cl_context

[Beignet] [PATCH v6 3/4] Add basic utest for block_motion_estimate_intel.

2015-11-05 Thread Chuanbo Weng

If the CL device does not support this builtin kernel, the test returns
PASS.

Signed-off-by: Guo Yejun 
---
 utests/CMakeLists.txt  |   1 +
 .../builtin_kernel_block_motion_estimate_intel.cpp | 109 +
 utests/utest_helper.hpp|   1 +
 3 files changed, 111 insertions(+)
 create mode 100644 utests/builtin_kernel_block_motion_estimate_intel.cpp

diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 18337fa..1603f94 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -205,6 +205,7 @@ set (utests_sources
   test_printf.cpp
   enqueue_fill_buf.cpp
   builtin_kernel_max_global_size.cpp
+  builtin_kernel_block_motion_estimate_intel.cpp
   image_1D_buffer.cpp
   image_from_buffer.cpp
   compare_image_2d_and_1d_array.cpp
diff --git a/utests/builtin_kernel_block_motion_estimate_intel.cpp 
b/utests/builtin_kernel_block_motion_estimate_intel.cpp
new file mode 100644
index 000..12bcb7d
--- /dev/null
+++ b/utests/builtin_kernel_block_motion_estimate_intel.cpp
@@ -0,0 +1,109 @@
+#include "utest_helper.hpp"
+#include 
+
+void builtin_kernel_block_motion_estimate_intel(void)
+{
+  char* built_in_kernel_names;
+  size_t built_in_kernels_size;
+  cl_int err = CL_SUCCESS;
+  size_t ret_sz;
+
+  OCL_CALL (clGetDeviceInfo, device, CL_DEVICE_BUILT_IN_KERNELS, 0, 0, 
_in_kernels_size);
+  built_in_kernel_names = (char* )malloc(built_in_kernels_size * sizeof(char) 
);
+  OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_BUILT_IN_KERNELS, 
built_in_kernels_size, (void*)built_in_kernel_names, _sz);
+  OCL_ASSERT(ret_sz == built_in_kernels_size);
+
+  if (strstr(built_in_kernel_names, "block_motion_estimate_intel") == NULL)
+  {
+free(built_in_kernel_names);
+return;
+  }
+
+  cl_program built_in_prog = clCreateProgramWithBuiltInKernels(ctx, 1, 
, built_in_kernel_names, );
+  OCL_ASSERT(built_in_prog != NULL);
+  kernel = clCreateKernel(built_in_prog, "block_motion_estimate_intel",  );
+  OCL_ASSERT(kernel != NULL);
+
+  cl_motion_estimation_desc_intel vmedesc = {CL_ME_MB_TYPE_16x16_INTEL,   
//0x0
+  CL_ME_SUBPIXEL_MODE_INTEGER_INTEL,  
//0x0
+  CL_ME_SAD_ADJUST_MODE_NONE_INTEL,   
//0x0
+  CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL 
//0x5
+  };
+  cl_accelerator_intel accel = clCreateAcceleratorINTEL(ctx, 
CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL,sizeof(cl_motion_estimation_desc_intel),
 , );
+  OCL_ASSERT(accel != NULL);
+
+  const size_t w = 71; //80
+  const size_t h = 41; //48
+
+  cl_image_format format;
+  cl_image_desc desc;
+
+  memset(, 0x0, sizeof(cl_image_desc));
+  memset(, 0x0, sizeof(cl_image_format));
+
+  uint8_t* image_data1 = (uint8_t *)malloc(w * h);//src
+  uint8_t* image_data2 = (uint8_t *)malloc(w * h);//ref
+  for (size_t j = 0; j < h; j++) {
+for (size_t i = 0; i < w; i++) {
+  if (i >= 32 && i <= 47 && j >= 16 && j <= 31)
+image_data2[w * j + i] = image_data1[w * j + i] = 100;
+  else
+image_data2[w * j + i] = image_data1[w * j + i] = 0;
+}
+  }
+
+  format.image_channel_order = CL_R;
+  format.image_channel_data_type = CL_UNORM_INT8;
+  desc.image_type = CL_MEM_OBJECT_IMAGE2D;
+  desc.image_width = w;
+  desc.image_height = h;
+  desc.image_row_pitch = 0;
+  OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, , , image_data1); 
   //src
+  OCL_CREATE_IMAGE(buf[1], CL_MEM_COPY_HOST_PTR, , , image_data2); 
   //ref
+
+  const size_t mv = (80/16) * (48/16);
+  OCL_CREATE_BUFFER(buf[2], 0, mv * sizeof(int) * 4, NULL);
+
+  OCL_SET_ARG(0, sizeof(cl_accelerator_intel), );
+  OCL_SET_ARG(1, sizeof(cl_mem), [0]);
+  OCL_SET_ARG(2, sizeof(cl_mem), [1]);
+  OCL_SET_ARG(3, sizeof(cl_mem), NULL);
+  OCL_SET_ARG(4, sizeof(cl_mem), [2]);
+  OCL_SET_ARG(5, sizeof(cl_mem), NULL);
+
+  globals[0] = w;
+  globals[1] = h;
+  OCL_CALL(clEnqueueNDRangeKernel, queue, kernel, 2, NULL, globals, NULL, 0, 
NULL, NULL);
+
+  OCL_MAP_BUFFER(2);
+  short expected[] = {-64, -48,
+-64, -48,
+-64, -48,
+-64, -48,
+-64, -48,
+-64, -48,
+-64, -48,
+0, 0,
+0, -48,
+-64, -48,
+-64, -48,
+-64, -48,
+-64, -48,
+0, -48,
+-64, -48};
+  short* res = (short*)buf_data[2];
+  for (uint32_t j = 0; j < mv; ++j) {
+OCL_ASSERT(res[j * 2 + 0] == expected[j * 2 + 0]);
+OCL_ASSERT(res[j * 2 + 1] == expected[j * 2 + 1]);
+  }
+  OCL_UNMAP_BUFFER(2);
+
+  clReleaseAcceleratorINTEL(accel);
+  clReleaseKernel(kernel);
+  clReleaseProgram(built_in_prog);
+  free(built_in_kernel_names);
+  free(image_data1);
+  free(image_data2);
+}
+

[Beignet] [PATCH v6 1/4] Add built-in function __gen_ocl_vme.

2015-11-05 Thread Chuanbo Weng

__gen_ocl_vme is used for hardware accelerated video motion estimation.
It gets payload values as parameters and uses MOV to pass these payload
values to VME SEND Message's payload grfs. The int8 return value is used
to store SEND Message writeback.

v2:
Remove unnecessary 5 parameters(src_grf*) of built-in function(we just
need to allocate related registers in gen_insn_selection step).

v3:
Remove redundant code and change MAX_SRC_NUM to 40.

v4:
Choose message response length by message type instead of hard code.

v5:
Choose message response length by message type in the whole backend
pipeline.

v6:
Treat simd8 and simd16 differently when mov payload value to consecutive
payload grfs.

Signed-off-by: Chuanbo Weng 
---
 backend/src/backend/gen/gen_mesa_disasm.c  | 14 
 backend/src/backend/gen7_instruction.hpp   | 15 
 backend/src/backend/gen_context.cpp| 98 ++
 backend/src/backend/gen_context.hpp|  1 +
 backend/src/backend/gen_defs.hpp   | 15 
 backend/src/backend/gen_encoder.cpp| 44 ++
 backend/src/backend/gen_encoder.hpp| 13 +++
 .../src/backend/gen_insn_gen7_schedule_info.hxx|  1 +
 backend/src/backend/gen_insn_selection.cpp | 73 
 backend/src/backend/gen_insn_selection.hpp | 14 +++-
 backend/src/backend/gen_insn_selection.hxx |  1 +
 backend/src/ir/instruction.cpp | 66 +++
 backend/src/ir/instruction.hpp | 17 +++-
 backend/src/ir/instruction.hxx |  1 +
 backend/src/libocl/include/ocl_misc.h  | 15 
 backend/src/llvm/llvm_gen_backend.cpp  | 47 +++
 backend/src/llvm/llvm_gen_ocl_function.hxx |  2 +
 backend/src/llvm/llvm_scalarize.cpp|  4 +
 18 files changed, 436 insertions(+), 5 deletions(-)

diff --git a/backend/src/backend/gen/gen_mesa_disasm.c 
b/backend/src/backend/gen/gen_mesa_disasm.c
index 5b71cfa..3198da7 100644
--- a/backend/src/backend/gen/gen_mesa_disasm.c
+++ b/backend/src/backend/gen/gen_mesa_disasm.c
@@ -476,6 +476,13 @@ static int column;
 
 static int gen_version;
 
+#define GEN7_BITS_FIELD(inst, gen7) \
+  ({\
+int bits;   \
+  bits = ((const union Gen7NativeInstruction *)inst)->gen7; \
+bits;   \
+  })
+
 #define GEN_BITS_FIELD(inst, gen)   \
   ({\
 int bits;   \
@@ -530,6 +537,8 @@ static int gen_version;
 #define EXECUTION_SIZE(inst)   GEN_BITS_FIELD(inst, header.execution_size)
 #define BRANCH_JIP(inst)   GEN_BITS_FIELD2(inst, 
bits3.gen7_branch.jip, bits3.gen8_branch.jip/8)
 #define BRANCH_UIP(inst)   GEN_BITS_FIELD2(inst, 
bits3.gen7_branch.uip, bits2.gen8_branch.uip/8)
+#define VME_BTI(inst)  GEN7_BITS_FIELD(inst, bits3.vme_gen7.bti)
+#define VME_MSG_TYPE(inst) GEN7_BITS_FIELD(inst, 
bits3.vme_gen7.msg_type)
 #define SAMPLE_BTI(inst)   GEN_BITS_FIELD(inst, bits3.sampler_gen7.bti)
 #define SAMPLER(inst)  GEN_BITS_FIELD(inst, 
bits3.sampler_gen7.sampler)
 #define SAMPLER_MSG_TYPE(inst) GEN_BITS_FIELD(inst, 
bits3.sampler_gen7.msg_type)
@@ -1431,6 +1440,11 @@ int gen_disasm (FILE *file, const void *inst, uint32_t 
deviceID, uint32_t compac
 
 if (GEN_BITS_FIELD2(inst, bits1.da1.src1_reg_file, 
bits2.da1.src1_reg_file) == GEN_IMMEDIATE_VALUE) {
   switch (target) {
+case GEN_SFID_VIDEO_MOTION_EST:
+  format(file, " (bti: %d, msg_type: %d)",
+ VME_BTI(inst),
+ VME_MSG_TYPE(inst));
+  break;
 case GEN_SFID_SAMPLER:
   format(file, " (%d, %d, %d, %d)",
  SAMPLE_BTI(inst),
diff --git a/backend/src/backend/gen7_instruction.hpp 
b/backend/src/backend/gen7_instruction.hpp
index 51f342b..258dd24 100644
--- a/backend/src/backend/gen7_instruction.hpp
+++ b/backend/src/backend/gen7_instruction.hpp
@@ -350,6 +350,21 @@ union Gen7NativeInstruction
 uint32_t end_of_thread:1;
   } sampler_gen7;
 
+  struct {
+uint32_t bti:8;
+uint32_t vme_search_path_lut:3;
+uint32_t lut_sub:2;
+uint32_t msg_type:2;
+uint32_t stream_in:1;
+uint32_t stream_out:1;
+uint32_t reserved_mbz:2;
+uint32_t header_present:1;
+uint32_t response_length:5;
+uint32_t msg_length:4;
+uint32_t pad1:2;
+uint32_t end_of_thread:1;
+  } vme_gen7;
+
   /**
* Message for the Sandybridge Sampler Cache or Constant Cache Data Port.
*
diff --git a/backend/src/backend/gen_context.cpp

[Beignet] [PATCH v6 4/4] Add document of video motion estimation support.

2015-11-05 Thread Chuanbo Weng

v3:
Fix two typos.

Signed-off-by: Chuanbo Weng 
---
 docs/Beignet.mdwn |  1 +
 docs/howto/video-motion-estimation-howto.mdwn | 79 +++
 2 files changed, 80 insertions(+)
 create mode 100644 docs/howto/video-motion-estimation-howto.mdwn

diff --git a/docs/Beignet.mdwn b/docs/Beignet.mdwn
index 9a2b516..363add0 100644
--- a/docs/Beignet.mdwn
+++ b/docs/Beignet.mdwn
@@ -306,6 +306,7 @@ Documents for OpenCL application developers
 - [[Kernel Optimization Guide|Beignet/optimization-guide]]
 - [[Libva Buffer Sharing|Beignet/howto/libva-buffer-sharing-howto]]
 - [[V4l2 Buffer Sharing|Beignet/howto/v4l2-buffer-sharing-howto]]
+- [[Video Motion Estimation|Beignet/howto/video-motion-estimation-howto]]
 
 The wiki URL is as below:
 
[http://www.freedesktop.org/wiki/Software/Beignet/](http://www.freedesktop.org/wiki/Software/Beignet/)
diff --git a/docs/howto/video-motion-estimation-howto.mdwn 
b/docs/howto/video-motion-estimation-howto.mdwn
new file mode 100644
index 000..d9edc9b
--- /dev/null
+++ b/docs/howto/video-motion-estimation-howto.mdwn
@@ -0,0 +1,79 @@
+Video Motion Vector HowTo
+==
+
+Beignet now supports cl_intel_accelerator and part of 
cl_intel_motion_estimation, which
+are Khronos official extensions. It provides a hardware acceleration of video 
motion
+vector to users.
+
+Supported hardware platform and limitation
+--
+
+Only 3rd Generation Intel Core Processors is supported for vme now. And now we 
just
+implement this part of cl_intel_motion_estimation for motion vector 
computation(residuals
+can not be returned yet) on 3rd Generation Intel Core Processors:
+  mb_block_type = CL_ME_MB_TYPE_16x16_INTEL
+  subpixel_mode = CL_ME_SUBPIXEL_MODE_INTEGER_INTEL
+  search_path_type = CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL / 
CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL
+ / CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL
+We will fully support cl_intel_motion_estimation in the future.
+
+Steps
+-
+
+In order to use video motion estimation provided by Beignet in your program, 
please follow
+the steps as below:
+
+- Create a cl_accelerator_intel object using extension API 
clCreateAcceleratorINTEL, with
+  the following parameters:
+  _accelerator_type_intel accelerator_type = 
CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL;
+  cl_motion_estimation_desc_intel vmedesc = {CL_ME_MB_TYPE_16x16_INTEL,
+ CL_ME_SUBPIXEL_MODE_INTEGER_INTEL,
+ CL_ME_SAD_ADJUST_MODE_NONE_INTEL,
+ 
CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL(
+ or 
CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL
+ or 
CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL)
+};
+
+- Invoke clCreateProgramWithBuiltInKernels to create a program object with 
built-in kernels
+  information, and invoke clCreateKernel to create a kernel object whose 
kernel name is
+  block_motion_estimate_intel.
+
+- The prototype of built-in kernel block_motion_estimate_intel is as following:
+  _kernel void
+  block_motion_estimate_intel
+  (
+   accelerator_intel_t accelerator,
+   __read_only  image2d_t src_image,
+   __read_only  image2d_t ref_image,
+   __global short2 * prediction_motion_vector_buffer,
+   __global short2 * motion_vector_buffer,
+   __global ushort * residuals
+   );
+  So you should create related objects and setup these kernel arguments by 
clSetKernelArg.
+  Create source and reference image object, on which you want to do video 
motion estimation.
+  The image_channel_order should be CL_R and image_channel_data_type should be 
CL_UNORM_INT8.
+  Create a buffer object to get the motion vector result. This motion vector 
buffer representing
+  a vector field of pixel block motion vectors, stored linearly in row-major 
order. The elements
+  (pixels) of this image contain a motion vector for the corresponding pixel 
block, with its x/y
+  components packed as two 16-bit integer values. Each component is encoded as 
a S13.2 fixed
+  point value(two's complement).
+
+- Use clEnqueueNDRangeKernel to enqueue this kernel. The only thing you need 
to setup is global_work_size:
+  global_work_size[0] equal to width of source image, global_work_size[1] 
equal to height of source
+  image.
+
+- Use clEnqueueReadBuffer or clEnqueueMapBuffer to get motion vector result.
+
+
+Sample code
+---
+
+We have developed an utest case of using video motion vector in 
utests/builtin_kernel_block_motion_estimate_intel.cpp.
+Please go through it for details.
+
+More references
+---
+
+https://www.khronos.org/registry/cl/extensions/intel/cl_intel_accelerator.txt
+https://www.khronos.org/registry/cl/extensions/intel/cl_intel_motion_estimation.txt

Re: [Beignet] [PATCH] utests: fix image_from_buffer bugs

2015-11-05 Thread Pan, Xiuli

Ping for pushed.

-Original Message-
From: Luo, Xionghu 
Sent: Wednesday, October 28, 2015 9:42 AM
To: Pan, Xiuli ; beignet@lists.freedesktop.org
Cc: Pan, Xiuli 
Subject: RE: [Beignet] [PATCH] utests: fix image_from_buffer bugs

This patch LGTM.
Thanks.

Luo Xionghu
Best Regards

-Original Message-
From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of Pan 
Xiuli
Sent: Tuesday, October 27, 2015 2:16 PM
To: beignet@lists.freedesktop.org
Cc: Pan, Xiuli
Subject: [Beignet] [PATCH] utests: fix image_from_buffer bugs

Fixed 2 bugs:
1.This test case uses usrptr, so we should never free the orginal buffer space, 
otherwise undefined behavior would happen: adding or losing one header file 
causing data in front broken, NDRangeKernel fail etc.
2.The utest need to test when to free image from buffer and the buffer, but the 
utest helper function will released it again and causes libc made some 
warnings. We just make the global variable to NULL to avoid these questions.
These will fix the utests image_from_buffer broken.

Signed-off-by: Pan Xiuli 
---
 utests/image_from_buffer.cpp | 17 +
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/utests/image_from_buffer.cpp b/utests/image_from_buffer.cpp index 
78d6797..b1171d1 100644
--- a/utests/image_from_buffer.cpp
+++ b/utests/image_from_buffer.cpp
@@ -32,13 +32,13 @@ static void image_from_buffer(void)
 
   // Setup kernel and images
   size_t buffer_sz = sizeof(uint32_t) * w * h;
-  //buf_data[0] = (uint32_t*) malloc(buffer_sz);
-  buf_data[0] = (uint32_t*)memalign(base_address_alignment, buffer_sz);
+  uint32_t* src_data;
+  src_data = (uint32_t*)memalign(base_address_alignment, buffer_sz);
   for (uint32_t j = 0; j < h; ++j)
 for (uint32_t i = 0; i < w; i++)
-  ((uint32_t*)buf_data[0])[j * w + i] = j * w + i;
+  src_data[j * w + i] = j * w + i;
 
-  cl_mem buff = clCreateBuffer(ctx, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, 
buffer_sz, buf_data[0], );
+  cl_mem buff = clCreateBuffer(ctx, CL_MEM_READ_ONLY | 
+ CL_MEM_USE_HOST_PTR, buffer_sz, src_data, );
 
   OCL_ASSERT(error == CL_SUCCESS);
   format.image_channel_order = CL_RGBA; @@ -49,7 +49,7 @@ static void 
image_from_buffer(void)
   desc.image_row_pitch = w * sizeof(uint32_t);
 
   desc.buffer = 0;
-  OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, , , buf_data[0]);
+  OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, , , 
+ src_data);
 
   desc.buffer = buff;
   OCL_CREATE_IMAGE(buf[1], 0, , , NULL); @@ -58,9 +58,6 @@ static 
void image_from_buffer(void)
   desc.image_row_pitch = 0;
   OCL_CREATE_IMAGE(buf[2], CL_MEM_WRITE_ONLY, , , NULL);
 
-  free(buf_data[0]);
-  buf_data[0] = NULL;
-
   OCL_SET_ARG(0, sizeof(cl_mem), [1]);
   OCL_SET_ARG(1, sizeof(cl_mem), [2]);
 
@@ -87,6 +84,8 @@ static void image_from_buffer(void)
   OCL_UNMAP_BUFFER_GTT(1);
   OCL_UNMAP_BUFFER_GTT(2);
 
+  free(src_data);
+
   //spec didn't tell the sequence of release buffer of image. so release 
either buffer or image first is ok here.
   //we follow the rule of destroy the bo at the last release, then the access 
of buffer after release image is legal
   //and vice verse.
@@ -98,6 +97,8 @@ static void image_from_buffer(void)
   clReleaseMemObject(buf[1]);
 #endif
   clReleaseMemObject(buf[2]);
+  buf[1] = NULL;
+  buf[2] = NULL;
 }
 
 MAKE_UTEST_FROM_FUNCTION(image_from_buffer);
--
2.1.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet
___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet

Re: [Beignet] [PATCH] GBE: fix printf class static variable bug

2015-11-05 Thread Pan, Xiuli

Ping for review.

-Original Message-
From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of Pan 
Xiuli
Sent: Tuesday, November 3, 2015 11:30 AM
To: beignet@lists.freedesktop.org
Cc: Pan, Xiuli 
Subject: [Beignet] [PATCH] GBE: fix printf class static variable bug

The PrintfParse::printfs static is not thread
safe and maybe reset or adding something wrong
when runing in mutlithread. Fix the problem by
change the printfs to a thread local variable.

Signed-off-by: Pan Xiuli 
---
 backend/src/llvm/llvm_printf_parser.cpp | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/backend/src/llvm/llvm_printf_parser.cpp 
b/backend/src/llvm/llvm_printf_parser.cpp
index bdaed8a..93f87ea 100644
--- a/backend/src/llvm/llvm_printf_parser.cpp
+++ b/backend/src/llvm/llvm_printf_parser.cpp
@@ -45,6 +45,8 @@ namespace gbe
 {
   using namespace ir;
 
+  thread_local map printfs;
+
   /* Return the conversion_specifier if succeed, -1 if failed. */
   static char __parse_printf_state(char *begin, char *end, char** rend, 
PrintfState * state)
   {
@@ -301,7 +303,6 @@ error:
 Value* g1Xg2Xg3;
 Value* wg_offset;
 int out_buf_sizeof_offset;
-static map printfs;
 int printf_num;
 int totalSizeofSize;
 
@@ -972,12 +973,10 @@ error:
 return false;
   }
 
-  map PrintfParser::printfs;
-
   void* getPrintfInfo(CallInst* inst)
   {
-if (PrintfParser::printfs[inst])
-  return (void*)PrintfParser::printfs[inst];
+if (printfs[inst])
+  return (void*)printfs[inst];
 return NULL;
   }
 
-- 
2.1.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet
___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet

[Beignet] [PATCH V2 2/2] Backend: add debugwait function

2015-11-05 Thread Pan Xiuli

Use wait function to extend a debug function:
void debugwait(void)
This function can hang the gpu unless gpu reset
or host send something to let it go.
EXTREMELY DANGEROUS for machines turn off hangcheck

v2:
Fix some bugs, and add setting predicate and execwidth,
also modify some inst scheduling

Signed-off-by: Pan Xiuli 
---
 backend/src/backend/gen_context.cpp |  3 ++-
 backend/src/backend/gen_encoder.cpp |  1 +
 backend/src/backend/gen_insn_scheduling.cpp |  1 +
 backend/src/backend/gen_insn_selection.cpp  | 24 +--
 backend/src/backend/gen_insn_selection.hpp  |  1 +
 backend/src/ir/instruction.cpp  | 30 +
 backend/src/ir/instruction.hpp  |  9 +
 backend/src/ir/instruction.hxx  |  1 +
 backend/src/libocl/include/ocl_sync.h   |  1 +
 backend/src/libocl/src/ocl_barrier.ll   |  6 ++
 backend/src/libocl/src/ocl_sync.cl  |  1 +
 backend/src/llvm/llvm_gen_backend.cpp   |  6 ++
 backend/src/llvm/llvm_gen_ocl_function.hxx  |  2 ++
 13 files changed, 83 insertions(+), 3 deletions(-)

diff --git a/backend/src/backend/gen_context.cpp 
b/backend/src/backend/gen_context.cpp
index baf3897..6a9b4e5 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -1804,7 +1804,8 @@ namespace gbe
   }
 
   void GenContext::emitWaitInstruction(const SelectionInstruction ) {
-p->WAIT();
+p->curr.execWidth = 1;
+p->WAIT(insn.extra.waitType);
   }
 
   void GenContext::emitBarrierInstruction(const SelectionInstruction ) {
diff --git a/backend/src/backend/gen_encoder.cpp 
b/backend/src/backend/gen_encoder.cpp
index dc49689..2eca5fc 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -1108,6 +1108,7 @@ namespace gbe
 
   void GenEncoder::WAIT(uint32_t n) {
  GenNativeInstruction *insn = this->next(GEN_OPCODE_WAIT);
+ GBE_ASSERT(curr.predicate == GEN_PREDICATE_NONE);
  GenRegister src = GenRegister::notification0(n);
  this->setDst(insn, GenRegister::null());
  this->setSrc0(insn, src);
diff --git a/backend/src/backend/gen_insn_scheduling.cpp 
b/backend/src/backend/gen_insn_scheduling.cpp
index 358a2ce..8ee5e48 100644
--- a/backend/src/backend/gen_insn_scheduling.cpp
+++ b/backend/src/backend/gen_insn_scheduling.cpp
@@ -589,6 +589,7 @@ namespace gbe
   || node->insn.opcode == SEL_OP_ENDIF
   || node->insn.opcode == SEL_OP_WHILE
   || node->insn.opcode == SEL_OP_READ_ARF
+  || node->insn.opcode == SEL_OP_WAIT
   || node->insn.opcode == SEL_OP_BARRIER)
 tracker.makeBarrier(insnID, insnNum);
 }
diff --git a/backend/src/backend/gen_insn_selection.cpp 
b/backend/src/backend/gen_insn_selection.cpp
index 1711ab6..a9f6bce 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -618,7 +618,7 @@ namespace gbe
 /*! No-op */
 void NOP(void);
 /*! Wait instruction (used for the barrier) */
-void WAIT(void);
+void WAIT(uint32_t n = 0);
 /*! Atomic instruction */
 void ATOMIC(Reg dst, uint32_t function, uint32_t srcNum, Reg src0, Reg 
src1, Reg src2, GenRegister bti, vector temps);
 /*! Read 64 bits float/int array */
@@ -1282,7 +1282,11 @@ namespace gbe
 
   void Selection::Opaque::EOT(void) { this->appendInsn(SEL_OP_EOT, 0, 0); }
   void Selection::Opaque::NOP(void) { this->appendInsn(SEL_OP_NOP, 0, 0); }
-  void Selection::Opaque::WAIT(void) { this->appendInsn(SEL_OP_WAIT, 0, 0); }
+  void Selection::Opaque::WAIT(uint32_t n)
+  {
+SelectionInstruction *insn = this->appendInsn(SEL_OP_WAIT, 0, 0);
+insn->extra.waitType = n;
+  }
 
   void Selection::Opaque::READ64(Reg addr,
  const GenRegister *dst,
@@ -3331,6 +3335,21 @@ namespace gbe
 DECL_CTOR(SyncInstruction, 1,1);
   };
 
+  /*! Wait instruction */
+  DECL_PATTERN(WaitInstruction)
+  {
+INLINE bool emitOne(Selection::Opaque , const ir::WaitInstruction 
, bool ) const
+{
+  using namespace ir;
+  // Debugwait will use reg 1, which is different from barrier
+  sel.curr.predicate = GEN_PREDICATE_NONE;
+  sel.WAIT(1);
+  return true;
+}
+
+DECL_CTOR(WaitInstruction, 1,1);
+  };
+
   INLINE uint32_t getByteScatterGatherSize(Selection::Opaque , ir::Type 
type) {
 using namespace ir;
 switch (type) {
@@ -5543,6 +5562,7 @@ namespace gbe
 this->insert();
 this->insert();
 this->insert();
+this->insert();
 
 // Sort all the patterns with the number of instructions they output
 for (uint32_t op = 0; op < ir::OP_INVALID; ++op)
diff --git a/backend/src/backend/gen_insn_selection.hpp 
b/backend/src/backend/gen_insn_selection.hpp
index 275eb9c..7e6ce96 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -130,6 +130,7 @@ namespace gbe

[Beignet] [PATCH 1/2] Backend: enable to choose notification register

2015-11-05 Thread Pan Xiuli

There are 3 notification can be used by wait, so we
should be able to choose which one we'd like to use.
Also the 3 reg is n0.0 n0.1 and n0.2 so also change
the function name.

Signed-off-by: Pan Xiuli 
---
 backend/src/backend/gen_encoder.cpp  | 4 ++--
 backend/src/backend/gen_encoder.hpp  | 2 +-
 backend/src/backend/gen_register.hpp | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/backend/src/backend/gen_encoder.cpp 
b/backend/src/backend/gen_encoder.cpp
index cac29e8..dc49689 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -1106,9 +1106,9 @@ namespace gbe
 this->setSrc1(insn, src1);
   }
 
-  void GenEncoder::WAIT(void) {
+  void GenEncoder::WAIT(uint32_t n) {
  GenNativeInstruction *insn = this->next(GEN_OPCODE_WAIT);
- GenRegister src = GenRegister::notification1();
+ GenRegister src = GenRegister::notification0(n);
  this->setDst(insn, GenRegister::null());
  this->setSrc0(insn, src);
  this->setSrc1(insn, GenRegister::null());
diff --git a/backend/src/backend/gen_encoder.hpp 
b/backend/src/backend/gen_encoder.hpp
index 79e7b6e..3549661 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -167,7 +167,7 @@ namespace gbe
 /*! No-op */
 void NOP(void);
 /*! Wait instruction (used for the barrier) */
-void WAIT(void);
+void WAIT(uint32_t n = 0);
 /*! Atomic instructions */
 virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, 
GenRegister bti, uint32_t srcNum);
 /*! Untyped read (upto 4 channels) */
diff --git a/backend/src/backend/gen_register.hpp 
b/backend/src/backend/gen_register.hpp
index aac2da5..68a210e 100644
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -809,10 +809,10 @@ namespace gbe
  GEN_HORIZONTAL_STRIDE_0);
 }
 
-static INLINE GenRegister notification1(void) {
+static INLINE GenRegister notification0(uint32_t subnr) {
   return GenRegister(GEN_ARCHITECTURE_REGISTER_FILE,
  GEN_ARF_NOTIFICATION_COUNT,
- 0,
+ subnr,
  GEN_TYPE_UD,
  GEN_VERTICAL_STRIDE_0,
  GEN_WIDTH_1,
-- 
2.1.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet

Re: [Beignet] [PATCH v2 1/2] Add extension clCreateBufferFromFdINTEL to create cl buffer by external buffer object's fd.

2015-11-05 Thread Weng, Chuanbo

Ping for review, thanks!

-Original Message-
From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of Weng, 
Chuanbo
Sent: Tuesday, October 27, 2015 10:16
To: beignet@lists.freedesktop.org
Cc: Wu, Zhiwen
Subject: Re: [Beignet] [PATCH v2 1/2] Add extension clCreateBufferFromFdINTEL 
to create cl buffer by external buffer object's fd.

Ping for review, thanks.

-Original Message-
From: Weng, Chuanbo
Sent: Monday, September 21, 2015 16:20
To: beignet@lists.freedesktop.org
Cc: Wu, Zhiwen; Weng, Chuanbo
Subject: [PATCH v2 1/2] Add extension clCreateBufferFromFdINTEL to create cl 
buffer by external buffer object's fd.

Before this patch, Beignet can only create cl buffer from external bo by its 
handle using clCreateBufferFromLibvaIntel. Render node is the first choice of 
accessing gpu in currect Beignet implementation. DRM_IOCTL_GEM_OPEN is used by 
clCreateBufferFromLibvaIntel but forbidden in Render node mode.
So it's necessary to add this extension to support buffer sharing between 
different libraries.

v2:
Seperate clCreateMemObjectFromFdIntel into two extensions: 
clCreateBufferFromFdINTEL and clCreateImageFromFdINTEL.

Signed-off-by: Chuanbo Weng 
---
 include/CL/cl_intel.h| 16 
 src/cl_api.c | 23 +++
 src/cl_driver.h  |  3 +++
 src/cl_driver_defs.c |  1 +
 src/cl_mem.c | 30 ++
 src/cl_mem.h |  5 +
 src/intel/intel_driver.c | 34 +++---
 7 files changed, 109 insertions(+), 3 deletions(-)

diff --git a/include/CL/cl_intel.h b/include/CL/cl_intel.h index 
28bcb62..01da553 100644
--- a/include/CL/cl_intel.h
+++ b/include/CL/cl_intel.h
@@ -133,6 +133,22 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL 
*clGetMemObjectFdIntel_fn)(
  cl_mem   /* Memory Obejct */,
  int* /* returned fd */);
 
+typedef struct _cl_import_buffer_info_intel {
+int fd;
+int size;
+} cl_import_buffer_info_intel;
+
+/* Create memory object from external buffer object by fd */ extern 
+CL_API_ENTRY cl_mem CL_API_CALL
+clCreateBufferFromFdINTEL(cl_context/* context */,
+  const cl_import_buffer_info_intel *   /* info */,
+  cl_int *  /* errcode_ret 
*/);
+
+typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateBufferFromFdINTEL_fn)(
+ cl_context/* context 
*/,
+ const cl_import_buffer_info_intel *   /* info */,
+ cl_int *  /* 
errcode_ret */);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/cl_api.c b/src/cl_api.c index dbbcbb0..ba82743 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -3187,6 +3187,7 @@ internal_clGetExtensionFunctionAddress(const char 
*func_name)
   EXTFUNC(clCreateBufferFromLibvaIntel)
   EXTFUNC(clCreateImageFromLibvaIntel)
   EXTFUNC(clGetMemObjectFdIntel)
+  EXTFUNC(clCreateBufferFromFdINTEL)
   return NULL;
 }
 
@@ -3355,3 +3356,25 @@ clGetMemObjectFdIntel(cl_context context,
 error:
   return err;
 }
+
+cl_mem
+clCreateBufferFromFdINTEL(cl_context context,
+  const cl_import_buffer_info_intel* info,
+  cl_int *errorcode_ret) {
+  cl_mem mem = NULL;
+  cl_int err = CL_SUCCESS;
+  CHECK_CONTEXT (context);
+
+  if (!info) {
+err = CL_INVALID_VALUE;
+goto error;
+  }
+
+  mem = cl_mem_new_buffer_from_fd(context, info->fd, info->size, );
+
+error:
+  if (errorcode_ret)
+*errorcode_ret = err;
+  return mem;
+}
diff --git a/src/cl_driver.h b/src/cl_driver.h index 1ab4dff..e0991c1 100644
--- a/src/cl_driver.h
+++ b/src/cl_driver.h
@@ -381,6 +381,9 @@ extern cl_buffer_get_fd_cb *cl_buffer_get_fd;  typedef int 
(cl_buffer_get_tiling_align_cb)(cl_context ctx, uint32_t tiling_mode, uint32_t 
dim);  extern cl_buffer_get_tiling_align_cb *cl_buffer_get_tiling_align;
 
+typedef cl_buffer (cl_buffer_get_buffer_from_fd_cb)(cl_context ctx, int 
+fd, int size); extern cl_buffer_get_buffer_from_fd_cb 
+*cl_buffer_get_buffer_from_fd;
+
 /* Get the device id */
 typedef int (cl_driver_get_device_id_cb)(void);
 extern cl_driver_get_device_id_cb *cl_driver_get_device_id; diff --git 
a/src/cl_driver_defs.c b/src/cl_driver_defs.c index b77acdc..b3e8403 100644
--- a/src/cl_driver_defs.c
+++ b/src/cl_driver_defs.c
@@ -53,6 +53,7 @@ LOCAL cl_buffer_get_buffer_from_libva_cb 
*cl_buffer_get_buffer_from_libva = NULL  LOCAL 
cl_buffer_get_image_from_libva_cb *cl_buffer_get_image_from_libva = NULL;  
LOCAL cl_buffer_get_fd_cb *cl_buffer_get_fd = NULL;  LOCAL 
cl_buffer_get_tiling_align_cb *cl_buffer_get_tiling_align = NULL;
+LOCAL cl_buffer_get_buffer_from_fd_cb *cl_buffer_get_buffer_from_fd = 
+NULL;
 
 /* cl_khr_gl_sharing */
 LOCAL

Re: [Beignet] [PATCH] utests: fix compiler_fill_image_2d_array random bug

2015-11-05 Thread Pan, Xiuli

Ping for review.

-Original Message-
From: Pan, Xiuli 
Sent: Thursday, October 29, 2015 1:47 PM
To: beignet@lists.freedesktop.org
Cc: Pan, Xiuli 
Subject: [PATCH] utests: fix compiler_fill_image_2d_array random bug

Use safer image write instead of map and memset. When
create image without data, we could not set pitch and
we don't know the pitch either. So use map and memset
the space is too dangerous if pitch is bigger than
w*sizeof(bpp), in this case the actually pitch is 512
but memset use pitch as 64*4=256. With only half space
set to 0, there will be undefined behavior when we want
to check the result for those space that we haven't set
to 0.

Signed-off-by: Pan Xiuli 
---
 utests/compiler_fill_image_2d_array.cpp | 18 --
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/utests/compiler_fill_image_2d_array.cpp 
b/utests/compiler_fill_image_2d_array.cpp
index fc09362..ab7470e 100644
--- a/utests/compiler_fill_image_2d_array.cpp
+++ b/utests/compiler_fill_image_2d_array.cpp
@@ -11,6 +11,7 @@ static void compiler_fill_image_2d_array(void)
   size_t origin[3] = { };
   size_t region[3];
   uint32_t* dst;
+  uint32_t* src;
 
   memset(, 0x0, sizeof(cl_image_desc));
   memset(, 0x0, sizeof(cl_image_format));
@@ -28,9 +29,16 @@ static void compiler_fill_image_2d_array(void)
 
   OCL_CREATE_IMAGE(buf[0], 0, , , NULL);
 
-  OCL_MAP_BUFFER_GTT(0);
-  memset(buf_data[0], 0, sizeof(uint32_t) * w * h * array);
-  OCL_UNMAP_BUFFER_GTT(0);
+  region[0] = w;
+  region[1] = h;
+  region[2] = array;
+
+  // As we don't know the pitch right now, we cannot
+  // use map to setup the image. It is safer to use
+  // write image
+  src = (uint32_t*)malloc(sizeof(uint32_t) * w * h * array);
+  memset(src, 0, sizeof(uint32_t) * w * h * array);
+  OCL_WRITE_IMAGE(buf[0], origin, region, src);
 
   // Run the kernel
   OCL_SET_ARG(0, sizeof(cl_mem), [0]);
@@ -43,9 +51,6 @@ static void compiler_fill_image_2d_array(void)
   OCL_NDRANGE(3);
 
   // Check result
-  region[0] = w;
-  region[1] = h;
-  region[2] = array;
   dst = (uint32_t*)malloc(w*h*array*sizeof(uint32_t));
   OCL_READ_IMAGE(buf[0], origin, region, dst);
 
@@ -79,6 +84,7 @@ static void compiler_fill_image_2d_array(void)
 }
   }
   free(dst);
+  free(src);
 }
 
 MAKE_UTEST_FROM_FUNCTION(compiler_fill_image_2d_array);
-- 
2.1.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet

Re: [Beignet] [PATCH v3] GBE: Don't read past end of printf format string

2015-11-05 Thread Pan, Xiuli

Ping for pushed.

-Original Message-
From: Pan, Xiuli 
Sent: Wednesday, November 4, 2015 9:48 AM
To: Rebecca N. Palmer ; beignet@lists.freedesktop.org
Subject: RE: [Beignet] [PATCH v3] GBE: Don't read past end of printf format 
string

LGTM, Thanks for your help!

-Original Message-
From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of 
Rebecca N. Palmer
Sent: Wednesday, November 4, 2015 6:19 AM
To: beignet@lists.freedesktop.org
Subject: Re: [Beignet] [PATCH v3] GBE: Don't read past end of printf format 
string

When p == end (the null terminator byte), don't try to read p + 1:
as this is outside the string, it might be a '%' from a different object 
(causing __parse_printf_state(end + 2, end, ...) to be called, which will 
fail), or an invalid address.

Signed-off-by: Rebecca Palmer 
---
 backend/src/llvm/llvm_printf_parser.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/src/llvm/llvm_printf_parser.cpp 
b/backend/src/llvm/llvm_printf_parser.cpp
index bdaed8a..f427107 100644
--- a/backend/src/llvm/llvm_printf_parser.cpp
+++ b/backend/src/llvm/llvm_printf_parser.cpp
@@ -229,7 +229,7 @@ again:
 printf("string end with %%\n");
 goto error;
   }
-  if (*(p + 1) == '%') { // %%
+  if (p + 1 < end && *(p + 1) == '%') { // %%
 p += 2;
 goto again;
   }

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet
___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet

Re: [Beignet] [PATCH] GBE: fix printf class static variable bug

2015-11-05 Thread Song, Ruiling

Thread_local is not needed to pass data from one llvm pass to another.
You can still access the info after pass that has already run.
In a later llvm pass, you can use getAnalysis() to get the the 
PrintfParser pass handle.

Then expose an interface in PrintfParser like
map *PrintfParser::getPrintfInfo() { return 
 }
then you can query the printfInfo in GenWriter.

Thanks!
Ruiling
> -Original Message-
> From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of
> Pan, Xiuli
> Sent: Friday, November 6, 2015 9:44 AM
> To: Pan, Xiuli; beignet@lists.freedesktop.org
> Subject: Re: [Beignet] [PATCH] GBE: fix printf class static variable bug
> 
> Ping for review.
> 
> -Original Message-
> From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of
> Pan Xiuli
> Sent: Tuesday, November 3, 2015 11:30 AM
> To: beignet@lists.freedesktop.org
> Cc: Pan, Xiuli 
> Subject: [Beignet] [PATCH] GBE: fix printf class static variable bug
> 
> The PrintfParse::printfs static is not thread
> safe and maybe reset or adding something wrong
> when runing in mutlithread. Fix the problem by
> change the printfs to a thread local variable.
> 
> Signed-off-by: Pan Xiuli 
> ---
>  backend/src/llvm/llvm_printf_parser.cpp | 9 -
>  1 file changed, 4 insertions(+), 5 deletions(-)
> 
> diff --git a/backend/src/llvm/llvm_printf_parser.cpp
> b/backend/src/llvm/llvm_printf_parser.cpp
> index bdaed8a..93f87ea 100644
> --- a/backend/src/llvm/llvm_printf_parser.cpp
> +++ b/backend/src/llvm/llvm_printf_parser.cpp
> @@ -45,6 +45,8 @@ namespace gbe
>  {
>using namespace ir;
> 
> +  thread_local map printfs;
> +
>/* Return the conversion_specifier if succeed, -1 if failed. */
>static char __parse_printf_state(char *begin, char *end, char** rend,
> PrintfState * state)
>{
> @@ -301,7 +303,6 @@ error:
>  Value* g1Xg2Xg3;
>  Value* wg_offset;
>  int out_buf_sizeof_offset;
> -static map printfs;
>  int printf_num;
>  int totalSizeofSize;
> 
> @@ -972,12 +973,10 @@ error:
>  return false;
>}
> 
> -  map PrintfParser::printfs;
> -
>void* getPrintfInfo(CallInst* inst)
>{
> -if (PrintfParser::printfs[inst])
> -  return (void*)PrintfParser::printfs[inst];
> +if (printfs[inst])
> +  return (void*)printfs[inst];
>  return NULL;
>}
> 
> --
> 2.1.4
> 
> ___
> Beignet mailing list
> Beignet@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
> ___
> Beignet mailing list
> Beignet@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet

Re: [Beignet] [PATCH] GBE: fix printf class static variable bug

2015-11-05 Thread Pan, Xiuli

Yes, but the problem is that if two thread has kernel with printf functions,
the map printfs will be cleared in 
construction
and destructor. This will cause the one who is still need info in printfs get 
null pointer.
thread_local now is to protect printfs from other thread but not pass data from 
one
pass to another. 

I tried the getAnalysis, but what we need the printfs as you said is in 
GenWriter and
It could not be used there as easier as now.
Every passmanger will clear the printfs and need a new map to avoid disturb each
other. It is just ok when only one pass exist, but in multithread there will be 
a lot of
passes in the same time. If using interface like: 
 *PrintfParser::getPrintfInfo() { return  }
It still a static one, and threads may have disturb.


-Original Message-
From: Song, Ruiling 
Sent: Friday, November 6, 2015 9:54 AM
To: Pan, Xiuli ; Pan, Xiuli ; 
beignet@lists.freedesktop.org
Subject: RE: [Beignet] [PATCH] GBE: fix printf class static variable bug

Thread_local is not needed to pass data from one llvm pass to another.
You can still access the info after pass that has already run.
In a later llvm pass, you can use getAnalysis() to get the the 
PrintfParser pass handle.

Then expose an interface in PrintfParser 
then you can query the printfInfo in GenWriter.

Thanks!
Ruiling
> -Original Message-
> From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of
> Pan, Xiuli
> Sent: Friday, November 6, 2015 9:44 AM
> To: Pan, Xiuli; beignet@lists.freedesktop.org
> Subject: Re: [Beignet] [PATCH] GBE: fix printf class static variable bug
> 
> Ping for review.
> 
> -Original Message-
> From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of
> Pan Xiuli
> Sent: Tuesday, November 3, 2015 11:30 AM
> To: beignet@lists.freedesktop.org
> Cc: Pan, Xiuli 
> Subject: [Beignet] [PATCH] GBE: fix printf class static variable bug
> 
> The PrintfParse::printfs static is not thread
> safe and maybe reset or adding something wrong
> when runing in mutlithread. Fix the problem by
> change the printfs to a thread local variable.
> 
> Signed-off-by: Pan Xiuli 
> ---
>  backend/src/llvm/llvm_printf_parser.cpp | 9 -
>  1 file changed, 4 insertions(+), 5 deletions(-)
> 
> diff --git a/backend/src/llvm/llvm_printf_parser.cpp
> b/backend/src/llvm/llvm_printf_parser.cpp
> index bdaed8a..93f87ea 100644
> --- a/backend/src/llvm/llvm_printf_parser.cpp
> +++ b/backend/src/llvm/llvm_printf_parser.cpp
> @@ -45,6 +45,8 @@ namespace gbe
>  {
>using namespace ir;
> 
> +  thread_local map printfs;
> +
>/* Return the conversion_specifier if succeed, -1 if failed. */
>static char __parse_printf_state(char *begin, char *end, char** rend,
> PrintfState * state)
>{
> @@ -301,7 +303,6 @@ error:
>  Value* g1Xg2Xg3;
>  Value* wg_offset;
>  int out_buf_sizeof_offset;
> -static map printfs;
>  int printf_num;
>  int totalSizeofSize;
> 
> @@ -972,12 +973,10 @@ error:
>  return false;
>}
> 
> -  map PrintfParser::printfs;
> -
>void* getPrintfInfo(CallInst* inst)
>{
> -if (PrintfParser::printfs[inst])
> -  return (void*)PrintfParser::printfs[inst];
> +if (printfs[inst])
> +  return (void*)printfs[inst];
>  return NULL;
>}
> 
> --
> 2.1.4
> 
> ___
> Beignet mailing list
> Beignet@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
> ___
> Beignet mailing list
> Beignet@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet

Re: [Beignet] [PATCH v6 1/4] Add built-in function __gen_ocl_vme.

2015-11-05 Thread Weng, Chuanbo

Hi Ruiling,
As we discussed before, I have refined code to handle both simd8 and 
simd16 in backend and ocl kernel. Please
confirm if no problem so that this patchset can be pushed. Thanks! 

-Original Message-
From: Weng, Chuanbo 
Sent: Friday, November 06, 2015 11:28
To: beignet@lists.freedesktop.org
Cc: Weng, Chuanbo
Subject: [PATCH v6 1/4] Add built-in function __gen_ocl_vme.

__gen_ocl_vme is used for hardware accelerated video motion estimation.
It gets payload values as parameters and uses MOV to pass these payload values 
to VME SEND Message's payload grfs. The int8 return value is used to store SEND 
Message writeback.

v2:
Remove unnecessary 5 parameters(src_grf*) of built-in function(we just need to 
allocate related registers in gen_insn_selection step).

v3:
Remove redundant code and change MAX_SRC_NUM to 40.

v4:
Choose message response length by message type instead of hard code.

v5:
Choose message response length by message type in the whole backend pipeline.

v6:
Treat simd8 and simd16 differently when mov payload value to consecutive 
payload grfs.

Signed-off-by: Chuanbo Weng 
---
 backend/src/backend/gen/gen_mesa_disasm.c  | 14 
 backend/src/backend/gen7_instruction.hpp   | 15 
 backend/src/backend/gen_context.cpp| 98 ++
 backend/src/backend/gen_context.hpp|  1 +
 backend/src/backend/gen_defs.hpp   | 15 
 backend/src/backend/gen_encoder.cpp| 44 ++
 backend/src/backend/gen_encoder.hpp| 13 +++
 .../src/backend/gen_insn_gen7_schedule_info.hxx|  1 +
 backend/src/backend/gen_insn_selection.cpp | 73 
 backend/src/backend/gen_insn_selection.hpp | 14 +++-
 backend/src/backend/gen_insn_selection.hxx |  1 +
 backend/src/ir/instruction.cpp | 66 +++
 backend/src/ir/instruction.hpp | 17 +++-
 backend/src/ir/instruction.hxx |  1 +
 backend/src/libocl/include/ocl_misc.h  | 15 
 backend/src/llvm/llvm_gen_backend.cpp  | 47 +++
 backend/src/llvm/llvm_gen_ocl_function.hxx |  2 +
 backend/src/llvm/llvm_scalarize.cpp|  4 +
 18 files changed, 436 insertions(+), 5 deletions(-)

diff --git a/backend/src/backend/gen/gen_mesa_disasm.c 
b/backend/src/backend/gen/gen_mesa_disasm.c
index 5b71cfa..3198da7 100644
--- a/backend/src/backend/gen/gen_mesa_disasm.c
+++ b/backend/src/backend/gen/gen_mesa_disasm.c
@@ -476,6 +476,13 @@ static int column;
 
 static int gen_version;
 
+#define GEN7_BITS_FIELD(inst, gen7) \
+  ({\
+int bits;   \
+  bits = ((const union Gen7NativeInstruction *)inst)->gen7; \
+bits;   \
+  })
+
 #define GEN_BITS_FIELD(inst, gen)   \
   ({\
 int bits;   \
@@ -530,6 +537,8 @@ static int gen_version;
 #define EXECUTION_SIZE(inst)   GEN_BITS_FIELD(inst, header.execution_size)
 #define BRANCH_JIP(inst)   GEN_BITS_FIELD2(inst, 
bits3.gen7_branch.jip, bits3.gen8_branch.jip/8)
 #define BRANCH_UIP(inst)   GEN_BITS_FIELD2(inst, 
bits3.gen7_branch.uip, bits2.gen8_branch.uip/8)
+#define VME_BTI(inst)  GEN7_BITS_FIELD(inst, bits3.vme_gen7.bti)
+#define VME_MSG_TYPE(inst) GEN7_BITS_FIELD(inst, 
bits3.vme_gen7.msg_type)
 #define SAMPLE_BTI(inst)   GEN_BITS_FIELD(inst, bits3.sampler_gen7.bti)
 #define SAMPLER(inst)  GEN_BITS_FIELD(inst, 
bits3.sampler_gen7.sampler)
 #define SAMPLER_MSG_TYPE(inst) GEN_BITS_FIELD(inst, 
bits3.sampler_gen7.msg_type)
@@ -1431,6 +1440,11 @@ int gen_disasm (FILE *file, const void *inst, uint32_t 
deviceID, uint32_t compac
 
 if (GEN_BITS_FIELD2(inst, bits1.da1.src1_reg_file, 
bits2.da1.src1_reg_file) == GEN_IMMEDIATE_VALUE) {
   switch (target) {
+case GEN_SFID_VIDEO_MOTION_EST:
+  format(file, " (bti: %d, msg_type: %d)",
+ VME_BTI(inst),
+ VME_MSG_TYPE(inst));
+  break;
 case GEN_SFID_SAMPLER:
   format(file, " (%d, %d, %d, %d)",
  SAMPLE_BTI(inst),
diff --git a/backend/src/backend/gen7_instruction.hpp 
b/backend/src/backend/gen7_instruction.hpp
index 51f342b..258dd24 100644
--- a/backend/src/backend/gen7_instruction.hpp
+++ b/backend/src/backend/gen7_instruction.hpp
@@ -350,6 +350,21 @@ union Gen7NativeInstruction
 uint32_t end_of_thread:1;
   } sampler_gen7;
 
+  struct {
+uint32_t bti:8;
+uint32_t vme_search_path_lut:3;
+uint32_t lut_sub:2;
+uint32_t msg_type:2;
+uint32_t stream_in:1;
+uint32_t stream_out:1;

Re: [Beignet] [PATCH 1/7 V2] Backend: Delete the useless MOV_DF instruction.

2015-11-05 Thread Yang, Rong R

The patchset LGTM, pushed, thanks.

> -Original Message-
> From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of
> He Junyan
> Sent: Thursday, November 5, 2015 16:21
> To: beignet@lists.freedesktop.org
> Subject: Re: [Beignet] [PATCH 1/7 V2] Backend: Delete the useless MOV_DF
> instruction.
> 
> V2:
> 
> Fix uniform bug in conversion.
> Delete verbose printf in utests.
> Fix a bug for BSW when convert half to double.
> 
> On Thu, Nov 05, 2015 at 04:15:41PM +0800, junyan...@inbox.com wrote:
> > Date: Thu,  5 Nov 2015 16:15:41 +0800
> > From: junyan...@inbox.com
> > To: beignet@lists.freedesktop.org
> > Subject: [Beignet] [PATCH 1/7 V2] Backend: Delete the useless MOV_DF
> > instruction.
> > X-Mailer: git-send-email 1.7.9.5
> >
> > From: Junyan He 
> >
> > Because just platform after BDW will support double, the special
> > instruction for double MOV is not needed anymore.
> >
> > Signed-off-by: Junyan He 
> > ---
> >  backend/src/backend/gen75_encoder.cpp  | 36 -
> >  backend/src/backend/gen75_encoder.hpp  |  1 -
> >  backend/src/backend/gen8_encoder.cpp   | 36 -
> >  backend/src/backend/gen8_encoder.hpp   |  1 -
> >  backend/src/backend/gen_context.cpp|  3 ---
> >  backend/src/backend/gen_encoder.cpp| 43 
> > --
> >  backend/src/backend/gen_encoder.hpp|  2 --
> >  backend/src/backend/gen_insn_selection.cpp | 23 +---
> > backend/src/backend/gen_insn_selection.hxx |  1 -
> >  9 files changed, 1 insertion(+), 145 deletions(-)
> >
> > diff --git a/backend/src/backend/gen75_encoder.cpp
> > b/backend/src/backend/gen75_encoder.cpp
> > index 135be02..5d1a964 100644
> > --- a/backend/src/backend/gen75_encoder.cpp
> > +++ b/backend/src/backend/gen75_encoder.cpp
> > @@ -251,42 +251,6 @@ namespace gbe
> >  pop();
> >}
> >
> > -  void Gen75Encoder::MOV_DF(GenRegister dest, GenRegister src0,
> GenRegister tmp) {
> > -GBE_ASSERT((src0.type == GEN_TYPE_F && dest.isdf()) || (src0.isdf()
> && dest.type == GEN_TYPE_F));
> > -GenRegister r = GenRegister::retype(tmp, GEN_TYPE_F);
> > -int w = curr.execWidth;
> > -GenRegister r0;
> > -r0 = GenRegister::h2(r);
> > -push();
> > -curr.execWidth = 4;
> > -curr.predicate = GEN_PREDICATE_NONE;
> > -curr.noMask = 1;
> > -MOV(r0, src0);
> > -MOV(GenRegister::suboffset(r0, 4), GenRegister::suboffset(src0, 4));
> > -curr.noMask = 0;
> > -curr.quarterControl = 0;
> > -curr.nibControl = 0;
> > -MOV(dest, r0);
> > -curr.nibControl = 1;
> > -MOV(GenRegister::suboffset(dest, 4), GenRegister::suboffset(r0, 4));
> > -pop();
> > -if (w == 16) {
> > -  push();
> > -  curr.execWidth = 4;
> > -  curr.predicate = GEN_PREDICATE_NONE;
> > -  curr.noMask = 1;
> > -  MOV(r0, GenRegister::suboffset(src0, 8));
> > -  MOV(GenRegister::suboffset(r0, 4), GenRegister::suboffset(src0, 12));
> > -  curr.noMask = 0;
> > -  curr.quarterControl = 1;
> > -  curr.nibControl = 0;
> > -  MOV(GenRegister::suboffset(dest, 8), r0);
> > -  curr.nibControl = 1;
> > -  MOV(GenRegister::suboffset(dest, 12), GenRegister::suboffset(r0, 4));
> > -  pop();
> > -}
> > -  }
> > -
> >void Gen75Encoder::JMPI(GenRegister src, bool longjmp) {
> >  alu2(this, GEN_OPCODE_JMPI, GenRegister::ip(), GenRegister::ip(), src);
> >}
> > diff --git a/backend/src/backend/gen75_encoder.hpp
> > b/backend/src/backend/gen75_encoder.hpp
> > index e494f29..f5044c0 100644
> > --- a/backend/src/backend/gen75_encoder.hpp
> > +++ b/backend/src/backend/gen75_encoder.hpp
> > @@ -42,7 +42,6 @@ namespace gbe
> >  virtual void JMPI(GenRegister src, bool longjmp = false);
> >  /*! Patch JMPI/BRC/BRD (located at index insnID) with the given jump
> distance */
> >  virtual void patchJMPI(uint32_t insnID, int32_t jip, int32_t uip);
> > -virtual void MOV_DF(GenRegister dest, GenRegister src0, GenRegister
> tmp = GenRegister::null());
> >  virtual void LOAD_DF_IMM(GenRegister dest, GenRegister tmp, double
> value);
> >  virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister 
> > src,
> GenRegister bti, uint32_t srcNum);
> >  virtual void UNTYPED_READ(GenRegister dst, GenRegister src,
> > GenRegister bti, uint32_t elemNum); diff --git
> > a/backend/src/backend/gen8_encoder.cpp
> > b/backend/src/backend/gen8_encoder.cpp
> > index 55fc3fb..98c3917 100644
> > --- a/backend/src/backend/gen8_encoder.cpp
> > +++ b/backend/src/backend/gen8_encoder.cpp
> > @@ -260,42 +260,6 @@ namespace gbe
> >  MOV(dest, value);
> >}
> >
> > -  void Gen8Encoder::MOV_DF(GenRegister dest, GenRegister src0,
> GenRegister tmp) {
> > -GBE_ASSERT((src0.type == GEN_TYPE_F && dest.isdf()) || (src0.isdf()
> && dest.type == GEN_TYPE_F));
> > -GenRegister r = GenRegister::retype(tmp,

Re: [Beignet] [PATCH] utests: fix image_from_buffer bugs

2015-11-05 Thread Yang, Rong R

Pushed.

> -Original Message-
> From: Pan, Xiuli
> Sent: Friday, November 6, 2015 9:43
> To: Luo, Xionghu; beignet@lists.freedesktop.org
> Cc: Yang, Rong R
> Subject: RE: [Beignet] [PATCH] utests: fix image_from_buffer bugs
> 
> Ping for pushed.
> 
> -Original Message-
> From: Luo, Xionghu
> Sent: Wednesday, October 28, 2015 9:42 AM
> To: Pan, Xiuli ; beignet@lists.freedesktop.org
> Cc: Pan, Xiuli 
> Subject: RE: [Beignet] [PATCH] utests: fix image_from_buffer bugs
> 
> This patch LGTM.
> Thanks.
> 
> Luo Xionghu
> Best Regards
> 
> -Original Message-
> From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of
> Pan Xiuli
> Sent: Tuesday, October 27, 2015 2:16 PM
> To: beignet@lists.freedesktop.org
> Cc: Pan, Xiuli
> Subject: [Beignet] [PATCH] utests: fix image_from_buffer bugs
> 
> Fixed 2 bugs:
> 1.This test case uses usrptr, so we should never free the orginal buffer 
> space,
> otherwise undefined behavior would happen: adding or losing one header
> file causing data in front broken, NDRangeKernel fail etc.
> 2.The utest need to test when to free image from buffer and the buffer, but
> the utest helper function will released it again and causes libc made some
> warnings. We just make the global variable to NULL to avoid these questions.
> These will fix the utests image_from_buffer broken.
> 
> Signed-off-by: Pan Xiuli 
> ---
>  utests/image_from_buffer.cpp | 17 +
>  1 file changed, 9 insertions(+), 8 deletions(-)
> 
> diff --git a/utests/image_from_buffer.cpp b/utests/image_from_buffer.cpp
> index 78d6797..b1171d1 100644
> --- a/utests/image_from_buffer.cpp
> +++ b/utests/image_from_buffer.cpp
> @@ -32,13 +32,13 @@ static void image_from_buffer(void)
> 
>// Setup kernel and images
>size_t buffer_sz = sizeof(uint32_t) * w * h;
> -  //buf_data[0] = (uint32_t*) malloc(buffer_sz);
> -  buf_data[0] = (uint32_t*)memalign(base_address_alignment, buffer_sz);
> +  uint32_t* src_data;
> +  src_data = (uint32_t*)memalign(base_address_alignment, buffer_sz);
>for (uint32_t j = 0; j < h; ++j)
>  for (uint32_t i = 0; i < w; i++)
> -  ((uint32_t*)buf_data[0])[j * w + i] = j * w + i;
> +  src_data[j * w + i] = j * w + i;
> 
> -  cl_mem buff = clCreateBuffer(ctx, CL_MEM_READ_ONLY |
> CL_MEM_USE_HOST_PTR, buffer_sz, buf_data[0], );
> +  cl_mem buff = clCreateBuffer(ctx, CL_MEM_READ_ONLY |
> + CL_MEM_USE_HOST_PTR, buffer_sz, src_data, );
> 
>OCL_ASSERT(error == CL_SUCCESS);
>format.image_channel_order = CL_RGBA; @@ -49,7 +49,7 @@ static void
> image_from_buffer(void)
>desc.image_row_pitch = w * sizeof(uint32_t);
> 
>desc.buffer = 0;
> -  OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, , ,
> buf_data[0]);
> +  OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, , ,
> + src_data);
> 
>desc.buffer = buff;
>OCL_CREATE_IMAGE(buf[1], 0, , , NULL); @@ -58,9 +58,6
> @@ static void image_from_buffer(void)
>desc.image_row_pitch = 0;
>OCL_CREATE_IMAGE(buf[2], CL_MEM_WRITE_ONLY, , ,
> NULL);
> 
> -  free(buf_data[0]);
> -  buf_data[0] = NULL;
> -
>OCL_SET_ARG(0, sizeof(cl_mem), [1]);
>OCL_SET_ARG(1, sizeof(cl_mem), [2]);
> 
> @@ -87,6 +84,8 @@ static void image_from_buffer(void)
>OCL_UNMAP_BUFFER_GTT(1);
>OCL_UNMAP_BUFFER_GTT(2);
> 
> +  free(src_data);
> +
>//spec didn't tell the sequence of release buffer of image. so release 
> either
> buffer or image first is ok here.
>//we follow the rule of destroy the bo at the last release, then the 
> access of
> buffer after release image is legal
>//and vice verse.
> @@ -98,6 +97,8 @@ static void image_from_buffer(void)
>clReleaseMemObject(buf[1]);
>  #endif
>clReleaseMemObject(buf[2]);
> +  buf[1] = NULL;
> +  buf[2] = NULL;
>  }
> 
>  MAKE_UTEST_FROM_FUNCTION(image_from_buffer);
> --
> 2.1.4
> 
> ___
> Beignet mailing list
> Beignet@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet

Re: [Beignet] [PATCH v2 1/2] add benckmark for copy data from buffer to buffer

2015-11-05 Thread Yang, Rong R

Pushed.

> -Original Message-
> From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of
> Song, Ruiling
> Sent: Wednesday, November 4, 2015 16:42
> To: Meng, Mengmeng; beignet@lists.freedesktop.org
> Cc: Meng, Mengmeng
> Subject: Re: [Beignet] [PATCH v2 1/2] add benckmark for copy data from
> buffer to buffer
> 
> This patchset LGTM. This benchmark is important to compare buffer/image
> performance on different generations.
> 
> Thanks!
> Ruiling
> 
> > -Original Message-
> > From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf
> > Of Meng Mengmeng
> > Sent: Wednesday, November 4, 2015 4:17 PM
> > To: beignet@lists.freedesktop.org
> > Cc: Meng, Mengmeng
> > Subject: [Beignet] [PATCH v2 1/2] add benckmark for copy data from
> > buffer to buffer
> >
> > Set the data format as 1920 * 1080 four channels and type as char,short and
> int.
> >
> > Signed-off-by: Meng Mengmeng 
> > ---
> >  benchmark/CMakeLists.txt|  3 +-
> >  benchmark/benchmark_copy_buffer.cpp | 55
> > +
> >  kernels/bench_copy_buffer.cl| 27 ++
> >  3 files changed, 84 insertions(+), 1 deletion(-)  create mode 100644
> > benchmark/benchmark_copy_buffer.cpp
> >  create mode 100644 kernels/bench_copy_buffer.cl
> >
> > diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index
> > 3e43a21..03a56f2 100644
> > --- a/benchmark/CMakeLists.txt
> > +++ b/benchmark/CMakeLists.txt
> > @@ -16,7 +16,8 @@ set (benchmark_sources
> >benchmark_read_buffer.cpp
> >benchmark_read_image.cpp
> >benchmark_copy_buffer_to_image.cpp
> > -  benchmark_copy_image_to_buffer.cpp)
> > +  benchmark_copy_image_to_buffer.cpp
> > +  benchmark_copy_buffer.cpp)
> >
> >
> >  SET(CMAKE_CXX_FLAGS "-DBUILD_BENCHMARK ${CMAKE_CXX_FLAGS}")
> diff
> > --git a/benchmark/benchmark_copy_buffer.cpp
> > b/benchmark/benchmark_copy_buffer.cpp
> > new file mode 100644
> > index 000..88983a7
> > --- /dev/null
> > +++ b/benchmark/benchmark_copy_buffer.cpp
> > @@ -0,0 +1,55 @@
> > +#include "utests/utest_helper.hpp"
> > +#include 
> > +
> > +#define BENCH_COPY_BUFFER(T, K, M) \
> > +double benchmark_copy_buffer_ ##T(void) \ { \
> > +  struct timeval start,stop; \
> > + \
> > +  const size_t w = 1920; \
> > +  const size_t h = 1080; \
> > +  const size_t sz = 4 * w * h; \
> > + \
> > +  OCL_CREATE_BUFFER(buf[0], 0, sz * sizeof(M), NULL); \
> > +  OCL_CREATE_BUFFER(buf[1], 0, sz * sizeof(M), NULL); \  \
> > +  OCL_CREATE_KERNEL_FROM_FILE("bench_copy_buffer",K); \  \
> > +  OCL_MAP_BUFFER(0); \
> > +  for (size_t i = 0; i < sz; i ++) { \
> > +((M *)(buf_data[0]))[i] = rand(); \
> > +  } \
> > +  OCL_UNMAP_BUFFER(0); \
> > + \
> > +  OCL_SET_ARG(0, sizeof(cl_mem), [0]); \
> > +  OCL_SET_ARG(1, sizeof(cl_mem), [1]); \  \
> > +  globals[0] = w; \
> > +  globals[1] = h; \
> > +  locals[0] = 16; \
> > +  locals[1] = 4; \
> > + \
> > +  gettimeofday(,0); \
> > +  for (size_t i=0; i<100; i++) { \
> > +OCL_NDRANGE(2); \
> > +  } \
> > +  OCL_FINISH(); \
> > + \
> > +  OCL_MAP_BUFFER(1); \
> > +  OCL_UNMAP_BUFFER(1); \
> > +  gettimeofday(,0); \
> > + \
> > +  clReleaseMemObject(buf[0]); \
> > +  free(buf_data[0]); \
> > +  buf_data[0] = NULL; \
> > + \
> > +  double elapsed = time_subtract(, , 0); \  \
> > +  return BANDWIDTH(sz * sizeof(M) * 2 * 100, elapsed); \ } \  \
> >
> +MAKE_BENCHMARK_FROM_FUNCTION_KEEP_PROGRAM(benchmark_cop
> y_bu
> > ffer_ ##T,true);
> > +
> > +BENCH_COPY_BUFFER(uchar,"bench_copy_buffer_uchar",unsigned char)
> > +BENCH_COPY_BUFFER(ushort,"bench_copy_buffer_ushort",unsigned
> short)
> > +BENCH_COPY_BUFFER(uint,"bench_copy_buffer_uint",unsigned int)
> > diff --git a/kernels/bench_copy_buffer.cl
> > b/kernels/bench_copy_buffer.cl new file mode 100644 index
> > 000..ed20352
> > --- /dev/null
> > +++ b/kernels/bench_copy_buffer.cl
> > @@ -0,0 +1,27 @@
> > +__kernel void
> > +bench_copy_buffer_uchar(__global uchar4* src, __global uchar4* dst) {
> > +  int x = (int)get_global_id(0);
> > +  int y = (int)get_global_id(1);
> > +  int x_sz = (int)get_global_size(0);
> > +  dst[y * x_sz + x] =  src[y * x_sz + x]; }
> > +
> > +__kernel void
> > +bench_copy_buffer_ushort(__global ushort4* src, __global ushort4*
> > +dst) {
> > +  int x = (int)get_global_id(0);
> > +  int y = (int)get_global_id(1);
> > +  int x_sz = (int)get_global_size(0);
> > +  dst[y * x_sz + x] =  src[y * x_sz + x]; }
> > +
> > +__kernel void
> > +bench_copy_buffer_uint(__global uint4* src, __global uint4* dst) {
> > +  int x = (int)get_global_id(0);
> > +  int y = (int)get_global_id(1);
> > +  int x_sz = (int)get_global_size(0);
> > +  dst[y * x_sz + x] =  src[y * x_sz + x]; }
> > +
> > --
> > 1.9.1
> >
> > ___
> > Beignet mailing list
> > Beignet@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/beignet
> ___
> Beignet mailing list
>

Re: [Beignet] [PATCH V3] GBE: Refine ir for memory operation like atomic/load/store

2015-11-05 Thread Yang, Rong R

Pushed.

> -Original Message-
> From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of
> Ruiling Song
> Sent: Wednesday, November 4, 2015 15:50
> To: beignet@lists.freedesktop.org
> Cc: Song, Ruiling
> Subject: [Beignet] [PATCH V3] GBE: Refine ir for memory operation like
> atomic/load/store
> 
> the legacyMode means what kind of address mode to choose.
> when legacyMode is true, we need to do complex bti analysis.
> 
> dynamicBti and staticBti are most for platforms before BDW.
> And stateless is for platform BDW+
> 
> v2:
>  only do analyzePointerOrigin() under legacyMode.
> 
> v3:
>  fix conflict with master, and some reorder warning.
> 
> Signed-off-by: Ruiling Song 
> ---
>  backend/src/backend/gen_insn_selection.cpp | 132 --
>  backend/src/ir/context.hpp |  19 --
>  backend/src/ir/instruction.cpp | 410 
> +
>  backend/src/ir/instruction.hpp |  78 +++---
>  backend/src/ir/lowering.cpp|   4 +-
>  backend/src/llvm/llvm_gen_backend.cpp  | 393 +---
> ---
>  6 files changed, 531 insertions(+), 505 deletions(-)
> 
> diff --git a/backend/src/backend/gen_insn_selection.cpp
> b/backend/src/backend/gen_insn_selection.cpp
> index 2452aea..5ec420e 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -1254,11 +1254,11 @@ namespace gbe
>}
> 
>void Selection::Opaque::ATOMIC(Reg dst, uint32_t function,
> - uint32_t srcNum, Reg src0,
> + uint32_t msgPayload, Reg src0,
>   Reg src1, Reg src2, GenRegister bti,
>   vector temps) {
>  unsigned dstNum = 1 + temps.size();
> -SelectionInstruction *insn = this->appendInsn(SEL_OP_ATOMIC, dstNum,
> srcNum + 1);
> +SelectionInstruction *insn = this->appendInsn(SEL_OP_ATOMIC, dstNum,
> msgPayload + 1);
> 
>  if (bti.file != GEN_IMMEDIATE_VALUE) {
>insn->state.flag = 0;
> @@ -1272,14 +1272,15 @@ namespace gbe
>  }
> 
>  insn->src(0) = src0;
> -if(srcNum > 1) insn->src(1) = src1;
> -if(srcNum > 2) insn->src(2) = src2;
> -insn->src(srcNum) = bti;
> +if(msgPayload > 1) insn->src(1) = src1;
> +if(msgPayload > 2) insn->src(2) = src2;
> +insn->src(msgPayload) = bti;
> +
>  insn->extra.function = function;
> -insn->extra.elem = srcNum;
> +insn->extra.elem = msgPayload;
> 
>  SelectionVector *vector = this->appendVector();
> -vector->regNum = srcNum;
> +vector->regNum = msgPayload; //bti not included in SelectionVector
>  vector->offsetID = 0;
>  vector->reg = >src(0);
>  vector->isSrc = 1;
> @@ -3424,8 +3425,6 @@ namespace gbe
> uint32_t valueNum,
> ir::BTI bti) const
>  {
> -//GenRegister temp = getRelativeAddress(sel, addr, 
> sel.selReg(bti.base,
> ir::TYPE_U32));
> -
>  GenRegister b = bti.isConst ? GenRegister::immud(bti.imm) :
> sel.selReg(bti.reg, ir::TYPE_U32);
>  sel.UNTYPED_READ(addr, dst.data(), valueNum, b,
> sel.getBTITemps(bti));
>  }
> @@ -3726,28 +3725,12 @@ namespace gbe
>return false;
>  }
> 
> -INLINE ir::BTI getBTI(SelectionDAG , const ir::LoadInstruction )
> const {
> -  using namespace ir;
> -  SelectionDAG *child0 = dag.child[0];
> -  ir::BTI b;
> -  if (insn.isFixedBTI()) {
> -const auto  = cast(child0->insn);
> -const auto imm = immInsn.getImmediate();
> -b.isConst = 1;
> -b.imm = imm.getIntegerValue();
> -  } else {
> -b.isConst = 0;
> -b.reg = insn.getBTI();
> -  }
> -  return b;
> -}
> -
>  /*! Implements base class */
>  virtual bool emit(Selection::Opaque  , SelectionDAG ) const
>  {
>using namespace ir;
>const ir::LoadInstruction  = cast(dag.insn);
> -  GenRegister address = sel.selReg(insn.getAddress(), ir::TYPE_U32);
> +  GenRegister address = sel.selReg(insn.getAddressRegister(),
> ir::TYPE_U32);
>GBE_ASSERT(insn.getAddressSpace() == MEM_GLOBAL ||
>   insn.getAddressSpace() == MEM_CONSTANT ||
>   insn.getAddressSpace() == MEM_PRIVATE ||
> @@ -3755,8 +3738,17 @@ namespace gbe
>   insn.getAddressSpace() == MEM_MIXED);
>//GBE_ASSERT(sel.isScalarReg(insn.getValue(0)) == false);
> 
> -  BTI bti = getBTI(dag, insn);
> -
> +  BTI bti;
> +  AddressMode am = insn.getAddressMode();
> +  if (am == AM_StaticBti) {
> +bti.isConst = 1;
> +bti.imm = insn.getSurfaceIndex();
> +  } else if (am == AM_DynamicBti) {
> +bti.isConst = 0;
> +bti.reg = insn.getBtiReg();
> +  } else {
> +assert(0 && "stateless not supported yet");
> +  }
>const Type type = insn.getValueType();
>const

Re: [Beignet] Fwd: [Bug 1277925] New: clinfo: Failed to release test userptr object! (9) i915 kernel driver may not be sane!

2015-11-05 Thread Igor Gnatenko

Thanks. Backported to Fedora. Works perfectly.

On Wed, Nov 4, 2015, 11:48 PM Rebecca N. Palmer 
wrote:

> Specifically (and assuming it is the same bug on Ivy Bridge and
> Haswell), fixed in git master by
>
> http://cgit.freedesktop.org/beignet/commit/?id=f48b4f6766fcaa193652fcbe6ea0bb29f92e45aa
> , still present in 1.1.x.
>
> ___
> Beignet mailing list
> Beignet@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
>
-- 

-Igor Gnatenko
___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet

Re: [Beignet] [PATCH] utests: fix compiler_fill_image_2d_array random bug

2015-11-05 Thread Yang, Rong R

LGTM, thanks, pushed.

> -Original Message-
> From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of
> Pan Xiuli
> Sent: Thursday, October 29, 2015 13:47
> To: beignet@lists.freedesktop.org
> Cc: Pan, Xiuli
> Subject: [Beignet] [PATCH] utests: fix compiler_fill_image_2d_array random
> bug
> 
> Use safer image write instead of map and memset. When create image
> without data, we could not set pitch and we don't know the pitch either. So
> use map and memset the space is too dangerous if pitch is bigger than
> w*sizeof(bpp), in this case the actually pitch is 512 but memset use pitch as
> 64*4=256. With only half space set to 0, there will be undefined behavior
> when we want to check the result for those space that we haven't set to 0.
> 
> Signed-off-by: Pan Xiuli 
> ---
>  utests/compiler_fill_image_2d_array.cpp | 18 --
>  1 file changed, 12 insertions(+), 6 deletions(-)
> 
> diff --git a/utests/compiler_fill_image_2d_array.cpp
> b/utests/compiler_fill_image_2d_array.cpp
> index fc09362..ab7470e 100644
> --- a/utests/compiler_fill_image_2d_array.cpp
> +++ b/utests/compiler_fill_image_2d_array.cpp
> @@ -11,6 +11,7 @@ static void compiler_fill_image_2d_array(void)
>size_t origin[3] = { };
>size_t region[3];
>uint32_t* dst;
> +  uint32_t* src;
> 
>memset(, 0x0, sizeof(cl_image_desc));
>memset(, 0x0, sizeof(cl_image_format)); @@ -28,9 +29,16 @@
> static void compiler_fill_image_2d_array(void)
> 
>OCL_CREATE_IMAGE(buf[0], 0, , , NULL);
> 
> -  OCL_MAP_BUFFER_GTT(0);
> -  memset(buf_data[0], 0, sizeof(uint32_t) * w * h * array);
> -  OCL_UNMAP_BUFFER_GTT(0);
> +  region[0] = w;
> +  region[1] = h;
> +  region[2] = array;
> +
> +  // As we don't know the pitch right now, we cannot  // use map to
> + setup the image. It is safer to use  // write image  src =
> + (uint32_t*)malloc(sizeof(uint32_t) * w * h * array);  memset(src, 0,
> + sizeof(uint32_t) * w * h * array);  OCL_WRITE_IMAGE(buf[0], origin,
> + region, src);
> 
>// Run the kernel
>OCL_SET_ARG(0, sizeof(cl_mem), [0]); @@ -43,9 +51,6 @@ static void
> compiler_fill_image_2d_array(void)
>OCL_NDRANGE(3);
> 
>// Check result
> -  region[0] = w;
> -  region[1] = h;
> -  region[2] = array;
>dst = (uint32_t*)malloc(w*h*array*sizeof(uint32_t));
>OCL_READ_IMAGE(buf[0], origin, region, dst);
> 
> @@ -79,6 +84,7 @@ static void compiler_fill_image_2d_array(void)
>  }
>}
>free(dst);
> +  free(src);
>  }
> 
>  MAKE_UTEST_FROM_FUNCTION(compiler_fill_image_2d_array);
> --
> 2.1.4
> 
> ___
> Beignet mailing list
> Beignet@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet

Re: [Beignet] [PATCH v3] GBE: Don't read past end of printf format string

2015-11-05 Thread Yang, Rong R

Pushed.

> -Original Message-
> From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of
> Pan, Xiuli
> Sent: Friday, November 6, 2015 9:44
> To: 'Rebecca N. Palmer'; 'beignet@lists.freedesktop.org'
> Subject: Re: [Beignet] [PATCH v3] GBE: Don't read past end of printf format
> string
> 
> Ping for pushed.
> 
> -Original Message-
> From: Pan, Xiuli
> Sent: Wednesday, November 4, 2015 9:48 AM
> To: Rebecca N. Palmer ;
> beignet@lists.freedesktop.org
> Subject: RE: [Beignet] [PATCH v3] GBE: Don't read past end of printf format
> string
> 
> LGTM, Thanks for your help!
> 
> -Original Message-
> From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of
> Rebecca N. Palmer
> Sent: Wednesday, November 4, 2015 6:19 AM
> To: beignet@lists.freedesktop.org
> Subject: Re: [Beignet] [PATCH v3] GBE: Don't read past end of printf format
> string
> 
> When p == end (the null terminator byte), don't try to read p + 1:
> as this is outside the string, it might be a '%' from a different object 
> (causing
> __parse_printf_state(end + 2, end, ...) to be called, which will fail), or an
> invalid address.
> 
> Signed-off-by: Rebecca Palmer 
> ---
>  backend/src/llvm/llvm_printf_parser.cpp | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/backend/src/llvm/llvm_printf_parser.cpp
> b/backend/src/llvm/llvm_printf_parser.cpp
> index bdaed8a..f427107 100644
> --- a/backend/src/llvm/llvm_printf_parser.cpp
> +++ b/backend/src/llvm/llvm_printf_parser.cpp
> @@ -229,7 +229,7 @@ again:
>  printf("string end with %%\n");
>  goto error;
>}
> -  if (*(p + 1) == '%') { // %%
> +  if (p + 1 < end && *(p + 1) == '%') { // %%
>  p += 2;
>  goto again;
>}
> 
> ___
> Beignet mailing list
> Beignet@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
> ___
> Beignet mailing list
> Beignet@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet

[Beignet] [PATCH 1/7 V2] Backend: Delete the useless MOV_DF instruction.

[Beignet] [PATCH 6/7 V2] Backend: Fix a potential bug for uniform conversion.

[Beignet] [PATCH 2/7 V2] Backend: Delete LOAD_DF_IMM instruction.

[Beignet] [PATCH 5/7 V2] Utest: Fix a bug for double div.

[Beignet] [PATCH 3/7 V2] Backend: Add double conversion to insn selection.

Re: [Beignet] Unrecoverable system lockup when allocating too much memory

Re: [Beignet] [PATCH 1/7 V2] Backend: Delete the useless MOV_DF instruction.

[Beignet] [PATCH v6 2/4] Add extensions intel_accelerator and basic intel_motion_estimation.

[Beignet] [PATCH v6 3/4] Add basic utest for block_motion_estimate_intel.

[Beignet] [PATCH v6 1/4] Add built-in function __gen_ocl_vme.

[Beignet] [PATCH v6 4/4] Add document of video motion estimation support.

Re: [Beignet] [PATCH] utests: fix image_from_buffer bugs

Re: [Beignet] [PATCH] GBE: fix printf class static variable bug

[Beignet] [PATCH V2 2/2] Backend: add debugwait function

[Beignet] [PATCH 1/2] Backend: enable to choose notification register

Re: [Beignet] [PATCH v2 1/2] Add extension clCreateBufferFromFdINTEL to create cl buffer by external buffer object's fd.

Re: [Beignet] [PATCH] utests: fix compiler_fill_image_2d_array random bug

Re: [Beignet] [PATCH v3] GBE: Don't read past end of printf format string

Re: [Beignet] [PATCH] GBE: fix printf class static variable bug

Re: [Beignet] [PATCH] GBE: fix printf class static variable bug

Re: [Beignet] [PATCH v6 1/4] Add built-in function __gen_ocl_vme.

Re: [Beignet] [PATCH 1/7 V2] Backend: Delete the useless MOV_DF instruction.

Re: [Beignet] [PATCH] utests: fix image_from_buffer bugs

Re: [Beignet] [PATCH v2 1/2] add benckmark for copy data from buffer to buffer

Re: [Beignet] [PATCH V3] GBE: Refine ir for memory operation like atomic/load/store

Re: [Beignet] Fwd: [Bug 1277925] New: clinfo: Failed to release test userptr object! (9) i915 kernel driver may not be sane!

Re: [Beignet] [PATCH] utests: fix compiler_fill_image_2d_array random bug

Re: [Beignet] [PATCH v3] GBE: Don't read past end of printf format string

28 matches

Site Navigation

Mail list logo

Footer information