[Beignet] [PATCH 1/4] prepare gen9 sends binary format and enable the ASM dump for sends

2016-11-21 Thread Guo, Yejun
Signed-off-by: Guo, Yejun 
---
 backend/src/backend/gen/gen_mesa_disasm.c |  28 ++--
 backend/src/backend/gen9_instruction.hpp  | 112 ++
 backend/src/backend/gen_defs.hpp  |   3 +
 3 files changed, 139 insertions(+), 4 deletions(-)
 create mode 100644 backend/src/backend/gen9_instruction.hpp

diff --git a/backend/src/backend/gen/gen_mesa_disasm.c 
b/backend/src/backend/gen/gen_mesa_disasm.c
index c30f168..4f6c35d 100644
--- a/backend/src/backend/gen/gen_mesa_disasm.c
+++ b/backend/src/backend/gen/gen_mesa_disasm.c
@@ -50,6 +50,7 @@
 
 #include "backend/gen_defs.hpp"
 #include "backend/gen7_instruction.hpp"
+#include "backend/gen9_instruction.hpp"
 #include "src/cl_device_data.h"
 
 static const struct {
@@ -104,6 +105,7 @@ static const struct {
 
   [GEN_OPCODE_SEND] = { .name = "send", .nsrc = 2, .ndst = 1 },
   [GEN_OPCODE_SENDC] = { .name = "sendc", .nsrc = 2, .ndst = 1 },
+  [GEN_OPCODE_SENDS] = { .name = "sends", .nsrc = 2, .ndst = 1 },
   [GEN_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
   [GEN_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 0, .ndst = 0 },
   [GEN_OPCODE_BRD] = { .name = "brd", .nsrc = 0, .ndst = 0 },
@@ -1411,7 +1413,8 @@ int gen_disasm (FILE *file, const void *inst, uint32_t 
deviceID, uint32_t compac
 }
 
   } else if (OPCODE(inst) != GEN_OPCODE_SEND &&
- OPCODE(inst) != GEN_OPCODE_SENDC) {
+ OPCODE(inst) != GEN_OPCODE_SENDC &&
+ OPCODE(inst) != GEN_OPCODE_SENDS) {
 err |= control(file, "conditional modifier", conditional_modifier,
COND_DST_OR_MODIFIER(inst), NULL);
 if (COND_DST_OR_MODIFIER(inst))
@@ -1426,7 +1429,17 @@ int gen_disasm (FILE *file, const void *inst, uint32_t 
deviceID, uint32_t compac
 string(file, ")");
   }
 
-  if (opcode[OPCODE(inst)].nsrc == 3) {
+  if (OPCODE(inst) == GEN_OPCODE_SENDS) {
+const union Gen9NativeInstruction *gen9_insn = (const union 
Gen9NativeInstruction *)inst;
+pad(file, 16);
+string(file, "null");
+pad(file, 32);
+format(file, "g%d(addLen:%d)", gen9_insn->bits2.sends.src0_reg_nr, 
gen9_insn->bits3.sends_untyped_rw.src0_length);
+pad(file, 48);
+format(file, "g%d(dataLen:%d)", gen9_insn->bits1.sends.src1_reg_nr, 
gen9_insn->bits2.sends.src1_length);
+pad(file, 64);
+format(file, "0x%x", gen9_insn->bits3.ud);
+  } else if (opcode[OPCODE(inst)].nsrc == 3) {
 pad(file, 16);
 err |= dest_3src(file, inst);
 
@@ -1469,7 +1482,8 @@ int gen_disasm (FILE *file, const void *inst, uint32_t 
deviceID, uint32_t compac
   }
 
   if (OPCODE(inst) == GEN_OPCODE_SEND ||
-  OPCODE(inst) == GEN_OPCODE_SENDC) {
+  OPCODE(inst) == GEN_OPCODE_SENDC ||
+  OPCODE(inst) == GEN_OPCODE_SENDS) {
 enum GenMessageTarget target = COND_DST_OR_MODIFIER(inst);
 
 newline(file);
@@ -1484,7 +1498,13 @@ int gen_disasm (FILE *file, const void *inst, uint32_t 
deviceID, uint32_t compac
  target, );
 }
 
-if (GEN_BITS_FIELD2(inst, bits1.da1.src1_reg_file, 
bits2.da1.src1_reg_file) == GEN_IMMEDIATE_VALUE) {
+int immbti = 0;
+if (OPCODE(inst) == GEN_OPCODE_SENDS) {
+  const union Gen9NativeInstruction *gen9_insn = (const union 
Gen9NativeInstruction *)inst;
+  immbti = !(gen9_insn->bits2.sends.sel_reg32_desc);
+} else
+  immbti = (GEN_BITS_FIELD2(inst, bits1.da1.src1_reg_file, 
bits2.da1.src1_reg_file) == GEN_IMMEDIATE_VALUE);
+if (immbti) {
   switch (target) {
 case GEN_SFID_VIDEO_MOTION_EST:
   format(file, " (bti: %d, msg_type: %d)",
diff --git a/backend/src/backend/gen9_instruction.hpp 
b/backend/src/backend/gen9_instruction.hpp
new file mode 100644
index 000..9d57f08
--- /dev/null
+++ b/backend/src/backend/gen9_instruction.hpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see .
+ *
+ * Author: Guo, Yejun 
+ */
+
+
+#ifndef __GEN9_INSTRUCTION_HPP__
+#define __GEN9_INSTRUCTION_HPP__
+
+union Gen9NativeInstruction
+{
+  struct {
+struct {
+  uint32_t opcode:7;
+  uint32_t pad:1;
+  uint32_t access_mode:1;
+  uint32_t dependency_control:2;
+  uint32_t nib_ctrl:1;
+  uint32_t quarter_control:2;
+  uint32_t thread_control:2;
+  uint32_t predicate_control:4;
+  

[Beignet] [PATCH 2/4] support sends (split send) for untyped write

2016-11-21 Thread Guo, Yejun
sends is a new instruction starting from gen9 to split the registers
of address and data for write, the register pressure can be loosed
since they are not necessary to be continuous any more.

more patches for sends will be sent out.

we can choose send or sends based on hasSends() in selection stage,
only enabeld as default for skylake now.

Signed-off-by: Guo, Yejun 
---
 backend/src/backend/gen75_encoder.cpp  |  2 +-
 backend/src/backend/gen75_encoder.hpp  |  2 +-
 backend/src/backend/gen8_context.cpp   | 21 +++
 backend/src/backend/gen8_encoder.cpp   |  2 +-
 backend/src/backend/gen8_encoder.hpp   |  2 +-
 backend/src/backend/gen9_encoder.cpp   | 58 ++
 backend/src/backend/gen9_encoder.hpp   |  3 +-
 backend/src/backend/gen_context.cpp| 41 -
 backend/src/backend/gen_encoder.cpp| 12 ++-
 backend/src/backend/gen_encoder.hpp|  4 ++-
 backend/src/backend/gen_insn_selection.cpp | 22 ++--
 backend/src/backend/gen_insn_selection.hpp |  1 +
 12 files changed, 137 insertions(+), 33 deletions(-)

diff --git a/backend/src/backend/gen75_encoder.cpp 
b/backend/src/backend/gen75_encoder.cpp
index fc37991..9cafaa7 100644
--- a/backend/src/backend/gen75_encoder.cpp
+++ b/backend/src/backend/gen75_encoder.cpp
@@ -199,7 +199,7 @@ namespace gbe
 return insn->bits3.ud;
   }
 
-  void Gen75Encoder::UNTYPED_WRITE(GenRegister msg, GenRegister bti, uint32_t 
elemNum) {
+  void Gen75Encoder::UNTYPED_WRITE(GenRegister msg, GenRegister data, 
GenRegister bti, uint32_t elemNum) {
 GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
 assert(elemNum >= 1 || elemNum <= 4);
 this->setHeader(insn);
diff --git a/backend/src/backend/gen75_encoder.hpp 
b/backend/src/backend/gen75_encoder.hpp
index d06f393..517afff 100644
--- a/backend/src/backend/gen75_encoder.hpp
+++ b/backend/src/backend/gen75_encoder.hpp
@@ -44,7 +44,7 @@ namespace gbe
 virtual void patchJMPI(uint32_t insnID, int32_t jip, int32_t uip);
 virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, 
GenRegister bti, uint32_t srcNum);
 virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister 
bti, uint32_t elemNum);
-virtual void UNTYPED_WRITE(GenRegister src, GenRegister bti, uint32_t 
elemNum);
+virtual void UNTYPED_WRITE(GenRegister src, GenRegister data, GenRegister 
bti, uint32_t elemNum);
 virtual void setHeader(GenNativeInstruction *insn);
 virtual void setDPUntypedRW(GenNativeInstruction *insn, uint32_t bti, 
uint32_t rgba,
uint32_t msg_type, uint32_t msg_length, uint32_t 
response_length);
diff --git a/backend/src/backend/gen8_context.cpp 
b/backend/src/backend/gen8_context.cpp
index 71c54fb..95b1013 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -968,6 +968,9 @@ namespace gbe
 GBE_ASSERT(elemNum == 1);
 const GenRegister addr = ra->genReg(insn.src(elemNum));
 const GenRegister bti = ra->genReg(insn.src(elemNum*2+1));
+GenRegister data = ra->genReg(insn.src(elemNum+1));
+if (!insn.extra.splitSend)
+  data = addr;
 
 /* Because BDW's store and load send instructions for 64 bits require the 
bti to be surfaceless,
which we can not accept. We just fallback to 2 DW untypewrite here. */
@@ -978,11 +981,15 @@ namespace gbe
 }
 
 if (bti.file == GEN_IMMEDIATE_VALUE) {
-  p->UNTYPED_WRITE(addr, bti, elemNum*2);
+  p->UNTYPED_WRITE(addr, data, bti, elemNum*2);
 } else {
   const GenRegister tmp = ra->genReg(insn.dst(elemNum));
   const GenRegister btiTmp = ra->genReg(insn.dst(elemNum + 1));
-  unsigned desc = p->generateUntypedWriteMessageDesc(0, elemNum*2);
+  unsigned desc = 0;
+  if (insn.extra.splitSend)
+desc = p->generateUntypedWriteSendsMessageDesc(0, elemNum*2);
+  else
+desc = p->generateUntypedWriteMessageDesc(0, elemNum*2);
 
   unsigned jip0 = beforeMessage(insn, bti, tmp, btiTmp, desc);
 
@@ -990,7 +997,7 @@ namespace gbe
   p->push();
 p->curr.predicate = GEN_PREDICATE_NORMAL;
 p->curr.useFlag(insn.state.flag, insn.state.subFlag);
-p->UNTYPED_WRITE(addr, GenRegister::addr1(0), elemNum*2);
+p->UNTYPED_WRITE(addr, data, GenRegister::addr1(0), elemNum*2);
   p->pop();
   afterMessage(insn, bti, tmp, btiTmp, jip0);
 }
@@ -1351,7 +1358,7 @@ namespace gbe
   nextDst = GenRegister::Qn(tempDst, 1);
   p->MOV(nextDst, nextSrc);
 p->pop();
-p->UNTYPED_WRITE(addr, GenRegister::immud(bti), 1);
+p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1);
 p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
 
 p->push();
@@ -1367,7 +1374,7 @@ namespace gbe
   nextDst = GenRegister::Qn(tempDst, 1);
   p->MOV(nextDst, nextSrc);
 p->pop();
-p->UNTYPED_WRITE(addr, GenRegister::immud(bti), 1);
+

[Beignet] [PATCH 4/4] add sends support for byte write

2016-11-21 Thread Guo, Yejun
Signed-off-by: Guo, Yejun 
---
 backend/src/backend/gen9_encoder.cpp   | 58 ++
 backend/src/backend/gen9_encoder.hpp   |  2 ++
 backend/src/backend/gen_context.cpp| 15 +---
 backend/src/backend/gen_encoder.cpp| 14 +++-
 backend/src/backend/gen_encoder.hpp|  4 ++-
 backend/src/backend/gen_insn_selection.cpp | 26 ++
 6 files changed, 107 insertions(+), 12 deletions(-)

diff --git a/backend/src/backend/gen9_encoder.cpp 
b/backend/src/backend/gen9_encoder.cpp
index 351788c..45e8551 100644
--- a/backend/src/backend/gen9_encoder.cpp
+++ b/backend/src/backend/gen9_encoder.cpp
@@ -123,4 +123,62 @@ namespace gbe
 gen9_insn->bits2.sends.sel_reg32_desc = 1;
 }
   }
+
+  unsigned Gen9Encoder::setByteScatterSendsMessageDesc(GenNativeInstruction 
*insn, unsigned bti, unsigned elemSize)
+  {
+Gen9NativeInstruction *gen9_insn = >gen9_insn;
+gen9_insn->bits3.sends_byte_rw.header_present = 0;
+gen9_insn->bits3.sends_byte_rw.response_length = 0;
+gen9_insn->bits3.sends_byte_rw.end_of_thread = 0;
+gen9_insn->bits3.sends_byte_rw.msg_type = GEN7_BYTE_SCATTER;
+gen9_insn->bits3.sends_byte_rw.bti = bti;
+gen9_insn->bits3.sends_byte_rw.data_size = elemSize;
+
+if (this->curr.execWidth == 8) {
+  gen9_insn->bits3.sends_byte_rw.src0_length = 1;
+  gen9_insn->bits3.sends_byte_rw.simd_mode = GEN_BYTE_SCATTER_SIMD8;
+} else if (this->curr.execWidth == 16) {
+  gen9_insn->bits3.sends_byte_rw.src0_length = 2;
+  gen9_insn->bits3.sends_byte_rw.simd_mode = GEN_BYTE_SCATTER_SIMD16;
+}
+
+return gen9_insn->bits3.ud;
+  }
+
+  void Gen9Encoder::BYTE_SCATTER(GenRegister addr, GenRegister data, 
GenRegister bti, uint32_t elemSize)
+  {
+if (addr.reg() == data.reg())
+  Gen8Encoder::BYTE_SCATTER(addr, data, bti, elemSize);
+else {
+  GenNativeInstruction *insn = this->next(GEN_OPCODE_SENDS);
+  Gen9NativeInstruction *gen9_insn = >gen9_insn;
+  this->setHeader(insn);
+  insn->header.destreg_or_condmod = GEN_SFID_DATAPORT_DATA;
+
+  gen9_insn->bits1.sends.dest_reg_file_0 = 1;//01 for GRF
+  gen9_insn->bits1.sends.src1_reg_file_0 = 1;
+
+  gen9_insn->bits1.sends.src1_reg_nr = data.nr;
+  gen9_insn->bits1.sends.dest_subreg_nr = 0;
+  gen9_insn->bits1.sends.dest_reg_nr = 0;
+  gen9_insn->bits1.sends.dest_address_mode = 0;  //direct mode
+
+  gen9_insn->bits2.sends.src0_subreg_nr = addr.subnr;
+  gen9_insn->bits2.sends.src0_reg_nr = addr.nr;
+  gen9_insn->bits2.sends.src0_address_mode = 0;
+
+  if (this->curr.execWidth == 8)
+gen9_insn->bits2.sends.src1_length = 1;
+  else if (this->curr.execWidth == 16)
+gen9_insn->bits2.sends.src1_length = 2;
+  else
+assert(!"unsupported");
+
+  if (bti.file == GEN_IMMEDIATE_VALUE) {
+gen9_insn->bits2.sends.sel_reg32_desc = 0;
+setByteScatterSendsMessageDesc(insn, bti.value.ud, elemSize);
+  } else
+gen9_insn->bits2.sends.sel_reg32_desc = 1;
+}
+  }
 } /* End of the name space. */
diff --git a/backend/src/backend/gen9_encoder.hpp 
b/backend/src/backend/gen9_encoder.hpp
index 7b9f0df..d78b029 100644
--- a/backend/src/backend/gen9_encoder.hpp
+++ b/backend/src/backend/gen9_encoder.hpp
@@ -49,6 +49,8 @@ namespace gbe
 bool isUniform);
 virtual void UNTYPED_WRITE(GenRegister addr, GenRegister data, GenRegister 
bti, uint32_t elemNum);
 virtual unsigned setUntypedWriteSendsMessageDesc(GenNativeInstruction 
*insn, unsigned bti, unsigned elemNum);
+virtual void BYTE_SCATTER(GenRegister addr, GenRegister data, GenRegister 
bti, uint32_t elemSize);
+virtual unsigned setByteScatterSendsMessageDesc(GenNativeInstruction 
*insn, unsigned bti, unsigned elemSize);
   };
 }
 #endif /* __GBE_GEN9_ENCODER_HPP__ */
diff --git a/backend/src/backend/gen_context.cpp 
b/backend/src/backend/gen_context.cpp
index 848933e..9505592 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -2220,16 +2220,23 @@ namespace gbe
   }
 
   void GenContext::emitByteScatterInstruction(const SelectionInstruction 
) {
-const GenRegister src = ra->genReg(insn.src(0));
+const GenRegister addr = ra->genReg(insn.src(0));
+GenRegister data = ra->genReg(insn.src(1));
+if (!insn.extra.splitSend)
+  data = addr;
 const uint32_t elemSize = insn.extra.elem;
 const GenRegister bti = ra->genReg(insn.src(2));
 
 if (bti.file == GEN_IMMEDIATE_VALUE) {
-  p->BYTE_SCATTER(src, bti, elemSize);
+  p->BYTE_SCATTER(addr, data, bti, elemSize);
 } else {
   const GenRegister tmp = ra->genReg(insn.dst(0));
   const GenRegister btiTmp = ra->genReg(insn.dst(1));
-  unsigned desc = p->generateByteScatterMessageDesc(0, elemSize);
+  unsigned desc = 0;
+  if (insn.extra.splitSend)
+desc = p->generateByteScatterSendsMessageDesc(0, 

[Beignet] [PATCH 1/2] remove some redundant code for printf

2016-11-21 Thread Guo, Yejun
tmp0 is added into src in selection stage, and just ignored at context
stage, it is redundant.

Signed-off-by: Guo, Yejun 
---
 backend/src/backend/gen_context.cpp|  2 --
 backend/src/backend/gen_insn_selection.cpp | 54 +-
 2 files changed, 15 insertions(+), 41 deletions(-)

diff --git a/backend/src/backend/gen_context.cpp 
b/backend/src/backend/gen_context.cpp
index c38b7af..186c8d9 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -3474,8 +3474,6 @@ namespace gbe
 const GenRegister tmp1 = ra->genReg(insn.dst(2));
 GenRegister src;
 uint32_t srcNum = insn.srcNum;
-if (insn.extra.continueFlag)
-  srcNum--;
 
 GenRegister addr = GenRegister::retype(tmp0, GEN_TYPE_UD);
 GenRegister data = GenRegister::retype(tmp1, GEN_TYPE_UD);
diff --git a/backend/src/backend/gen_insn_selection.cpp 
b/backend/src/backend/gen_insn_selection.cpp
index c14e0bc..1808c7b 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -2131,49 +2131,25 @@ namespace gbe
 
   void Selection::Opaque::PRINTF(GenRegister dst, uint8_t bti, GenRegister 
tmp0, GenRegister tmp1,
GenRegister src[8], int srcNum, uint16_t num, bool isContinue, 
uint32_t totalSize) {
-if (isContinue) {
-  SelectionInstruction *insn = this->appendInsn(SEL_OP_PRINTF, 3, srcNum + 
1);
-  SelectionVector *vector = this->appendVector();
-
-  for (int i = 0; i < srcNum; i++)
-insn->src(i) = src[i];
-
-  insn->src(srcNum) = tmp0;
-
-  insn->dst(0) = dst;
-  insn->dst(1) = tmp0;
-  insn->dst(2) = tmp1;
-
-  vector->regNum = 2;
-  vector->reg = >dst(1);
-  vector->offsetID = 0;
-  vector->isSrc = 0;
-
-  insn->extra.printfSize = static_cast(totalSize);
-  insn->extra.continueFlag = isContinue;
-  insn->extra.printfBTI = bti;
-  insn->extra.printfNum = num;
-} else {
-  SelectionInstruction *insn = this->appendInsn(SEL_OP_PRINTF, 3, srcNum);
-  SelectionVector *vector = this->appendVector();
+SelectionInstruction *insn = this->appendInsn(SEL_OP_PRINTF, 3, srcNum);
+SelectionVector *vector = this->appendVector();
 
-  for (int i = 0; i < srcNum; i++)
-insn->src(i) = src[i];
+for (int i = 0; i < srcNum; i++)
+  insn->src(i) = src[i];
 
-  insn->dst(0) = dst;
-  insn->dst(1) = tmp0;
-  insn->dst(2) = tmp1;
+insn->dst(0) = dst;
+insn->dst(1) = tmp0;
+insn->dst(2) = tmp1;
 
-  vector->regNum = 2;
-  vector->reg = >dst(1);
-  vector->offsetID = 0;
-  vector->isSrc = 0;
+vector->regNum = 2;
+vector->reg = >dst(1);
+vector->offsetID = 0;
+vector->isSrc = 0;
 
-  insn->extra.printfSize = static_cast(totalSize);
-  insn->extra.continueFlag = isContinue;
-  insn->extra.printfBTI = bti;
-  insn->extra.printfNum = num;
-}
+insn->extra.printfSize = static_cast(totalSize);
+insn->extra.continueFlag = isContinue;
+insn->extra.printfBTI = bti;
+insn->extra.printfNum = num;
   }
 
   void Selection::Opaque::WORKGROUP_OP(uint32_t wg_op,
-- 
1.9.1

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 2/2] do not care dst for printf

2016-11-21 Thread Guo, Yejun
acutally, the dst of printf means nothing, don't need to touch it.

Signed-off-by: Guo, Yejun 
---
 backend/src/backend/gen_context.cpp| 14 ++
 backend/src/backend/gen_insn_selection.cpp | 20 +---
 2 files changed, 11 insertions(+), 23 deletions(-)

diff --git a/backend/src/backend/gen_context.cpp 
b/backend/src/backend/gen_context.cpp
index 186c8d9..a73ccb6 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -3469,9 +3469,8 @@ namespace gbe
   }
 
   void GenContext::emitPrintfInstruction(const SelectionInstruction ) {
-const GenRegister dst = ra->genReg(insn.dst(0));
-const GenRegister tmp0 = ra->genReg(insn.dst(1));
-const GenRegister tmp1 = ra->genReg(insn.dst(2));
+const GenRegister tmp0 = ra->genReg(insn.dst(0));
+const GenRegister tmp1 = ra->genReg(insn.dst(1));
 GenRegister src;
 uint32_t srcNum = insn.srcNum;
 
@@ -3518,15 +3517,6 @@ namespace gbe
 emitPrintfLongInstruction(addr, data, src, insn.extra.printfBTI);
   }
 }
-
-if (dst.hstride == GEN_HORIZONTAL_STRIDE_0) {
-  p->push();
-  p->curr.execWidth = 1;
-}
-p->MOV(dst, GenRegister::immd(0));
-if (dst.hstride == GEN_HORIZONTAL_STRIDE_0) {
-  p->pop();
-}
   }
 
   void GenContext::setA0Content(uint16_t new_a0[16], uint16_t max_offset, int 
sz) {
diff --git a/backend/src/backend/gen_insn_selection.cpp 
b/backend/src/backend/gen_insn_selection.cpp
index 1808c7b..88fe1a6 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -705,7 +705,7 @@ namespace gbe
 /*! Store the profiling info */
 void STORE_PROFILING(uint32_t profilingType, uint32_t bti, GenRegister 
tmp0, GenRegister tmp1, GenRegister ts[5], int tsNum);
 /*! Printf */
-void PRINTF(GenRegister dst, uint8_t bti, GenRegister tmp0, GenRegister 
tmp1, GenRegister src[8],
+void PRINTF(uint8_t bti, GenRegister tmp0, GenRegister tmp1, GenRegister 
src[8],
 int srcNum, uint16_t num, bool isContinue, uint32_t totalSize);
 /*! Multiply 64-bit integers */
 void I64MUL(Reg dst, Reg src0, Reg src1, GenRegister *tmp, bool 
native_long);
@@ -2129,20 +2129,19 @@ namespace gbe
 }
   }
 
-  void Selection::Opaque::PRINTF(GenRegister dst, uint8_t bti, GenRegister 
tmp0, GenRegister tmp1,
+  void Selection::Opaque::PRINTF(uint8_t bti, GenRegister tmp0, GenRegister 
tmp1,
GenRegister src[8], int srcNum, uint16_t num, bool isContinue, 
uint32_t totalSize) {
-SelectionInstruction *insn = this->appendInsn(SEL_OP_PRINTF, 3, srcNum);
+SelectionInstruction *insn = this->appendInsn(SEL_OP_PRINTF, 2, srcNum);
 SelectionVector *vector = this->appendVector();
 
 for (int i = 0; i < srcNum; i++)
   insn->src(i) = src[i];
 
-insn->dst(0) = dst;
-insn->dst(1) = tmp0;
-insn->dst(2) = tmp1;
+insn->dst(0) = tmp0;
+insn->dst(1) = tmp1;
 
 vector->regNum = 2;
-vector->reg = >dst(1);
+vector->reg = >dst(0);
 vector->offsetID = 0;
 vector->isSrc = 0;
 
@@ -7041,8 +7040,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling 
BVAR in program.cpp
   uint8_t BTI = insn.getBti();
   GenRegister tmp0, tmp1;
   uint32_t srcNum = insn.getSrcNum();
-  GenRegister dst = sel.selReg(insn.getDst(0), TYPE_S32);
-  //GBE_ASSERT(srcNum);
+
   uint32_t i = 0;
   uint32_t totalSize = 0;
   bool isContinue = false;
@@ -7063,14 +7061,14 @@ extern bool OCL_DEBUGINFO; // first defined by calling 
BVAR in program.cpp
   i = 0;
   GenRegister regs[8];
   if (srcNum == 0) {
-  sel.PRINTF(dst, BTI, tmp0, tmp1, regs, srcNum, num, isContinue, 
totalSize);
+  sel.PRINTF(BTI, tmp0, tmp1, regs, srcNum, num, isContinue, 
totalSize);
   } else {
 do {
   uint32_t s = srcNum < 8 ? srcNum : 8;
   for (uint32_t j = 0; j < s; j++) {
 regs[j] = sel.selReg(insn.getSrc(i + j), insn.getType(i + j));
   }
-  sel.PRINTF(dst, BTI, tmp0, tmp1, regs, s, num, isContinue, 
totalSize);
+  sel.PRINTF(BTI, tmp0, tmp1, regs, s, num, isContinue, totalSize);
 
   if (srcNum > 8) {
 srcNum -= 8;
-- 
1.9.1

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet