From: Pan Xiuli <xiuli....@intel.com> For GEN8+ and OpenCL2.0 we use stateless buffer and need A64 buffer read write. Add A64 encoder for Oword block read and write.
Signed-off-by: Pan Xiuli <xiuli....@intel.com> --- backend/src/backend/gen8_encoder.cpp | 70 +++++++++++++++++++++ backend/src/backend/gen8_encoder.hpp | 4 ++ backend/src/backend/gen8_instruction.hpp | 13 ++++ backend/src/backend/gen_context.cpp | 103 ++++++++++++++++++++++++------- backend/src/backend/gen_defs.hpp | 3 + backend/src/backend/gen_encoder.cpp | 8 +++ backend/src/backend/gen_encoder.hpp | 4 ++ backend/src/ir/instruction.cpp | 8 +-- 8 files changed, 184 insertions(+), 29 deletions(-) diff --git a/backend/src/backend/gen8_encoder.cpp b/backend/src/backend/gen8_encoder.cpp index 277260f..2f69116 100644 --- a/backend/src/backend/gen8_encoder.cpp +++ b/backend/src/backend/gen8_encoder.cpp @@ -637,4 +637,74 @@ namespace gbe gen8_insn->bits1.da3srcacc.src2_abs = src2.absolute; gen8_insn->bits1.da3srcacc.src2_negate = src2.negation; } + + static void setOBlockRWA64(GenEncoder *p, + GenNativeInstruction *insn, + uint32_t bti, + uint32_t size, + uint32_t msg_type, + uint32_t msg_length, + uint32_t response_length) + { + const GenMessageTarget sfid = GEN_SFID_DATAPORT1_DATA; + p->setMessageDescriptor(insn, sfid, msg_length, response_length); + assert(size == 0 || size == 1 || size == 2 || size == 4 || size == 8); + Gen8NativeInstruction *gen8_insn = &insn->gen8_insn; + + gen8_insn->bits3.gen8_block_rw_a64.msg_type = msg_type; + gen8_insn->bits3.gen8_block_rw_a64.bti = bti; + // For OWord Block read, we use unaligned read + gen8_insn->bits3.gen8_block_rw_a64.msg_sub_type = msg_type == GEN8_P1_BLOCK_READ_A64 ? 1 : 0; + gen8_insn->bits3.gen8_block_rw_a64.block_size = size <= 2 ? size : (size == 4 ? 3 : 4); + gen8_insn->bits3.gen8_block_rw_a64.header_present = 1; + } + + void Gen8Encoder::OBREADA64(GenRegister dst, GenRegister header, uint32_t bti, uint32_t size) { + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); + const uint32_t msg_length = 1; + uint32_t rsize = size / 2; + uint32_t msgsize = size; + // When size is 1 OWord, which means half a reg, we need to know which half to use + if (size == 1) { + if (dst.subnr == 0) + msgsize = 0; + else + msgsize = 1; + } + rsize = rsize == 0 ? 1 : rsize; + const uint32_t response_length = rsize; // Size is in regs + this->setHeader(insn); + this->setDst(insn, GenRegister::uw16grf(dst.nr, 0)); + this->setSrc0(insn, GenRegister::ud8grf(header.nr, 0)); + this->setSrc1(insn, GenRegister::immud(0)); + setOBlockRWA64(this, + insn, + bti, + msgsize, + GEN8_P1_BLOCK_READ_A64, + msg_length, + response_length); + + } + + void Gen8Encoder::OBWRITEA64(GenRegister header, uint32_t bti, uint32_t size) { + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); + uint32_t rsize = size / 2; + rsize = rsize == 0 ? 1 : rsize; + const uint32_t msg_length = 1 + rsize; // Size is in owords + const uint32_t response_length = 0; + uint32_t msgsize = size; + msgsize = msgsize == 1 ? 0 : msgsize; + this->setHeader(insn); + this->setSrc0(insn, GenRegister::ud8grf(header.nr, 0)); + this->setSrc1(insn, GenRegister::immud(0)); + this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UW)); + setOBlockRWA64(this, + insn, + bti, + msgsize, + GEN8_P1_BLOCK_WRITE_A64, + msg_length, + response_length); + } } /* End of the name space. */ diff --git a/backend/src/backend/gen8_encoder.hpp b/backend/src/backend/gen8_encoder.hpp index 12b3765..b0aec3a 100644 --- a/backend/src/backend/gen8_encoder.hpp +++ b/backend/src/backend/gen8_encoder.hpp @@ -71,6 +71,10 @@ namespace gbe uint32_t dstAcc, uint32_t src0Acc, uint32_t src1Acc); void MADM(GenRegister dst, GenRegister src0, GenRegister src1, GenRegister src2, uint32_t dstAcc, uint32_t src0Acc, uint32_t src1Acc, uint32_t src2Acc); + /*! A64 OBlock read */ + virtual void OBREADA64(GenRegister dst, GenRegister header, uint32_t bti, uint32_t elemSize); + /*! A64 OBlock write */ + virtual void OBWRITEA64(GenRegister header, uint32_t bti, uint32_t elemSize); }; } #endif /* __GBE_GEN8_ENCODER_HPP__ */ diff --git a/backend/src/backend/gen8_instruction.hpp b/backend/src/backend/gen8_instruction.hpp index 549948a..e76ecaa 100644 --- a/backend/src/backend/gen8_instruction.hpp +++ b/backend/src/backend/gen8_instruction.hpp @@ -604,6 +604,19 @@ union Gen8NativeInstruction uint32_t end_of_thread:1; } gen7_msg_gw; + struct { + uint32_t bti:8; + uint32_t block_size:3; // oword size + uint32_t msg_sub_type:2; // 00 OWord block R/W 01 Unaligned OWord block read 10 Oword Dual Block R/W 11 HWord Block R/W + uint32_t ignored:1; + uint32_t msg_type:5; // 10100 A64 block read, 10101 A64 block write + uint32_t header_present:1; + uint32_t response_length:5; + uint32_t msg_length:4; + uint32_t pad2:2; + uint32_t end_of_thread:1; + } gen8_block_rw_a64; + struct { uint32_t jip:32; } gen8_branch; diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 6bb0f22..e10d89b 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -3502,14 +3502,20 @@ namespace gbe void GenContext::emitOBReadInstruction(const SelectionInstruction &insn) { const GenRegister dst= ra->genReg(insn.dst(1)); + const GenRegister addrreg = ra->genReg(insn.src(0)); uint32_t type = dst.type; uint32_t typesize = typeSize(type); - const GenRegister addr = GenRegister::toUniform(ra->genReg(insn.src(0)), GEN_TYPE_UD); - const GenRegister header = GenRegister::retype(ra->genReg(insn.dst(0)), GEN_TYPE_UD); - const GenRegister headeraddr = GenRegister::offset(header, 0, 2*4); const uint32_t vec_size = insn.extra.elem; const GenRegister tmp = GenRegister::retype(ra->genReg(insn.dst(1 + vec_size)), type); const uint32_t simdWidth = p->curr.execWidth; + const GenRegister header = GenRegister::retype(ra->genReg(insn.dst(0)), GEN_TYPE_UD); + const GenRegister addr = GenRegister::toUniform(addrreg, addrreg.type); + GenRegister headeraddr; + bool isA64 = insn.getbti() == 255; + if (isA64) + headeraddr = GenRegister::retype(GenRegister::offset(header, 0, 0), GEN_TYPE_UL); + else + headeraddr = GenRegister::offset(header, 0, 2*4); // Make header p->push(); @@ -3525,7 +3531,9 @@ namespace gbe p->MOV(headeraddr, addr); // Put zero in the general state base address - p->MOV(GenRegister::offset(header, 0, 5 * 4), GenRegister::immud(0)); + if (!isA64) + p->MOV(GenRegister::offset(header, 0, 5 * 4), GenRegister::immud(0)); + } p->pop(); // Now read the data, oword block read can only work with simd16 and no mask @@ -3534,7 +3542,12 @@ namespace gbe { p->curr.execWidth = 16; p->curr.noMask = 1; - p->OBREAD(dst, header, insn.getbti(), simdWidth * typesize / 16); + if (isA64) { + //p->curr.execWidth = 8; + p->OBREADA64(dst, header, insn.getbti(), simdWidth * typesize / 16); + } + else + p->OBREAD(dst, header, insn.getbti(), simdWidth * typesize / 16); } p->pop(); } else if (vec_size == 2) { @@ -3542,7 +3555,10 @@ namespace gbe { p->curr.execWidth = 16; p->curr.noMask = 1; - p->OBREAD(tmp, header, insn.getbti(), simdWidth * typesize / 8); + if (isA64) + p->OBREADA64(tmp, header, insn.getbti(), simdWidth * typesize / 8); + else + p->OBREAD(tmp, header, insn.getbti(), simdWidth * typesize / 8); } p->pop(); p->MOV(ra->genReg(insn.dst(1)), GenRegister::offset(tmp, 0)); @@ -3553,7 +3569,10 @@ namespace gbe { p->curr.execWidth = 16; p->curr.noMask = 1; - p->OBREAD(tmp, header, insn.getbti(), 2 * typesize); + if (isA64) + p->OBREADA64(tmp, header, insn.getbti(), 2 * typesize); + else + p->OBREAD(tmp, header, insn.getbti(), 2 * typesize); } p->pop(); for (uint32_t j = 0; j < 4; j++) @@ -3569,7 +3588,10 @@ namespace gbe } p->pop(); } - p->OBREAD(tmp, header, insn.getbti(), 8); + if (isA64) + p->OBREADA64(tmp, header, insn.getbti(), 8); + else + p->OBREAD(tmp, header, insn.getbti(), 8); for (uint32_t j = 0; j < 8 / typesize ; j++) p->MOV(ra->genReg(insn.dst(1 + j + i * 2)), GenRegister::offset(tmp, 0 ,j * simdWidth * typesize )); } @@ -3590,7 +3612,10 @@ namespace gbe { p->curr.execWidth = 16; p->curr.noMask = 1; - p->OBREAD(tmp, header, insn.getbti(), 8); + if (isA64) + p->OBREADA64(tmp, header, insn.getbti(), 8); + else + p->OBREAD(tmp, header, insn.getbti(), 8); } p->pop(); for (uint32_t j = 0; j < 16 / typesize; j++) @@ -3607,7 +3632,10 @@ namespace gbe } p->pop(); } - p->OBREAD(tmp, header, insn.getbti(), 8); + if (isA64) + p->OBREADA64(tmp, header, insn.getbti(), 8); + else + p->OBREAD(tmp, header, insn.getbti(), 8); for (uint32_t j = 0; j < 8 / typesize; j++) p->MOV(ra->genReg(insn.dst(1 + j + i * 8 / typesize)), GenRegister::offset(tmp, 0 ,j * simdWidth * typesize )); } @@ -3616,16 +3644,23 @@ namespace gbe } void GenContext::emitOBWriteInstruction(const SelectionInstruction &insn) { - const GenRegister addr = GenRegister::toUniform(ra->genReg(insn.src(0)), GEN_TYPE_UD); + const GenRegister addrreg = ra->genReg(insn.src(0)); const GenRegister header = GenRegister::retype(ra->genReg(insn.dst(0)), GEN_TYPE_UD); - const GenRegister headeraddr = GenRegister::offset(header, 0, 2*4); uint32_t type = ra->genReg(insn.src(1)).type; uint32_t typesize = typeSize(type); const uint32_t vec_size = insn.extra.elem; const GenRegister tmp = GenRegister::offset(header, 1); + const GenRegister addr = GenRegister::toUniform(addrreg, addrreg.type); + GenRegister headeraddr; + bool isA64 = insn.getbti() == 255; + if (isA64) + headeraddr = GenRegister::retype(GenRegister::offset(header, 0, 0), GEN_TYPE_UL); + else + headeraddr = GenRegister::offset(header, 0, 2*4); const uint32_t simdWidth = p->curr.execWidth; uint32_t tmp_size = simdWidth * vec_size / 8; tmp_size = tmp_size > 4 ? 4 : tmp_size; + uint32_t offset_size = isA64 ? 128 : 8; p->push(); // Copy r0 into the header first @@ -3636,10 +3671,14 @@ namespace gbe // Update the header with the current address p->curr.execWidth = 1; - p->SHR(headeraddr, addr, GenRegister::immud(4)); + if (isA64) + p->MOV(headeraddr, addr); + else + p->SHR(headeraddr, addr, GenRegister::immud(4)); // Put zero in the general state base address - p->MOV(GenRegister::offset(header, 0, 5*4), GenRegister::immud(0)); + if (!isA64) + p->MOV(GenRegister::offset(header, 0, 5*4), GenRegister::immud(0)); p->pop(); // Now write the data, oword block write can only work with simd16 and no mask @@ -3649,7 +3688,10 @@ namespace gbe { p->curr.execWidth = 16; p->curr.noMask = 1; - p->OBWRITE(header, insn.getbti(), simdWidth * typesize / 16); + if (isA64) + p->OBWRITEA64(header, insn.getbti(), simdWidth * typesize / 16); + else + p->OBWRITE(header, insn.getbti(), simdWidth * typesize / 16); } p->pop(); } else if (vec_size == 2) { @@ -3659,7 +3701,10 @@ namespace gbe { p->curr.execWidth = 16; p->curr.noMask = 1; - p->OBWRITE(header, insn.getbti(), simdWidth * typesize / 8); + if (isA64) + p->OBWRITEA64(header, insn.getbti(), simdWidth * typesize / 8); + else + p->OBWRITE(header, insn.getbti(), simdWidth * typesize / 8); } p->pop(); } else if (vec_size == 4) { @@ -3670,7 +3715,10 @@ namespace gbe { p->curr.execWidth = 16; p->curr.noMask = 1; - p->OBWRITE(header, insn.getbti(), 2 * typesize); + if (isA64) + p->OBWRITEA64(header, insn.getbti(), 2 * typesize); + else + p->OBWRITE(header, insn.getbti(), 2 * typesize); } p->pop(); } else { @@ -3682,11 +3730,14 @@ namespace gbe { // Update the address in header p->curr.execWidth = 1; - p->ADD(headeraddr, headeraddr, GenRegister::immud(8)); + p->ADD(headeraddr, headeraddr, GenRegister::immud(offset_size)); } p->pop(); } - p->OBWRITE(header, insn.getbti(), 8); + if (isA64) + p->OBWRITEA64(header, insn.getbti(), 8); + else + p->OBWRITE(header, insn.getbti(), 8); } } } else if (vec_size == 8) { @@ -3699,7 +3750,7 @@ namespace gbe { // Update the address in header p->curr.execWidth = 1; - p->ADD(headeraddr, headeraddr, GenRegister::immud(8)); + p->ADD(headeraddr, headeraddr, GenRegister::immud(offset_size)); } p->pop(); } @@ -3707,7 +3758,10 @@ namespace gbe { p->curr.execWidth = 16; p->curr.noMask = 1; - p->OBWRITE(header, insn.getbti(), 8); + if (isA64) + p->OBWRITEA64(header, insn.getbti(), 8); + else + p->OBWRITE(header, insn.getbti(), 8); } p->pop(); } @@ -3720,11 +3774,14 @@ namespace gbe { // Update the address in header p->curr.execWidth = 1; - p->ADD(headeraddr, headeraddr, GenRegister::immud(8)); + p->ADD(headeraddr, headeraddr, GenRegister::immud(offset_size)); } p->pop(); } - p->OBWRITE(header, insn.getbti(), 8); + if (isA64) + p->OBWRITEA64(header, insn.getbti(), 8); + else + p->OBWRITE(header, insn.getbti(), 8); } } } else NOT_SUPPORTED; diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp index bcbb23f..de88e11 100644 --- a/backend/src/backend/gen_defs.hpp +++ b/backend/src/backend/gen_defs.hpp @@ -357,6 +357,9 @@ enum GenMessageTarget { #define GEN75_P1_ATOMIC_COUNTER_4X2 12 //1100: Atomic Counter Operation 4X2 #define GEN75_P1_TYPED_SURFACE_WRITE 13 //1101: Typed Surface Write +#define GEN8_P1_BLOCK_READ_A64 20 //10100 +#define GEN8_P1_BLOCK_WRITE_A64 21 //10101 + /* Data port data cache scratch messages*/ #define GEN_SCRATCH_READ 0 #define GEN_SCRATCH_WRITE 1 diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp index a6f8db8..5d5f564 100644 --- a/backend/src/backend/gen_encoder.cpp +++ b/backend/src/backend/gen_encoder.cpp @@ -1338,6 +1338,14 @@ namespace gbe response_length); } + void GenEncoder::OBREADA64(GenRegister dst, GenRegister header, uint32_t bti, uint32_t elemSize) { + NOT_SUPPORTED; + } + + void GenEncoder::OBWRITEA64(GenRegister header, uint32_t bti, uint32_t elemSize) { + NOT_SUPPORTED; + } + void GenEncoder::EOT(uint32_t msg) { GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD)); diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp index 0f835ca..963c811 100644 --- a/backend/src/backend/gen_encoder.hpp +++ b/backend/src/backend/gen_encoder.hpp @@ -275,6 +275,10 @@ namespace gbe virtual void MBREAD(GenRegister dst, GenRegister header, uint32_t bti, uint32_t elemSize); /*! MBlock write */ virtual void MBWRITE(GenRegister header, uint32_t bti, uint32_t elemSize); + /*! A64 OBlock read */ + virtual void OBREADA64(GenRegister dst, GenRegister header, uint32_t bti, uint32_t elemSize); + /*! A64 OBlock write */ + virtual void OBWRITEA64(GenRegister header, uint32_t bti, uint32_t elemSize); GBE_CLASS(GenEncoder); //!< Use custom allocators virtual void alu3(uint32_t opcode, GenRegister dst, diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index 512055c..e722dbe 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -1652,12 +1652,8 @@ namespace ir { whyNot = "Wrong number of source."; return false; } else { - const RegisterFamily fam = fn.getPointerFamily(); - for (uint32_t srcID = 1; srcID < this->srcNum; ++srcID) { - const Register regID = fn.getRegister(src, srcID); - if (UNLIKELY(checkRegisterData(fam, regID, fn, whyNot) == false)) - return false; - } + if (UNLIKELY(checkRegisterData(FAMILY_DWORD, fn.getRegister(src, 1), fn, whyNot) == false)) + return false; } break; default: -- 2.7.4 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet