Pushed.
> -----Original Message----- > From: Beignet [mailto:[email protected]] On Behalf Of > Ruiling Song > Sent: Wednesday, November 4, 2015 15:50 > To: [email protected] > Cc: Song, Ruiling > Subject: [Beignet] [PATCH V3] GBE: Refine ir for memory operation like > atomic/load/store > > the legacyMode means what kind of address mode to choose. > when legacyMode is true, we need to do complex bti analysis. > > dynamicBti and staticBti are most for platforms before BDW. > And stateless is for platform BDW+ > > v2: > only do analyzePointerOrigin() under legacyMode. > > v3: > fix conflict with master, and some reorder warning. > > Signed-off-by: Ruiling Song <[email protected]> > --- > backend/src/backend/gen_insn_selection.cpp | 132 ++++------ > backend/src/ir/context.hpp | 19 -- > backend/src/ir/instruction.cpp | 410 > +++++++++++++++++------------ > backend/src/ir/instruction.hpp | 78 +++--- > backend/src/ir/lowering.cpp | 4 +- > backend/src/llvm/llvm_gen_backend.cpp | 393 +++++++++++++----------- > --- > 6 files changed, 531 insertions(+), 505 deletions(-) > > diff --git a/backend/src/backend/gen_insn_selection.cpp > b/backend/src/backend/gen_insn_selection.cpp > index 2452aea..5ec420e 100644 > --- a/backend/src/backend/gen_insn_selection.cpp > +++ b/backend/src/backend/gen_insn_selection.cpp > @@ -1254,11 +1254,11 @@ namespace gbe > } > > void Selection::Opaque::ATOMIC(Reg dst, uint32_t function, > - uint32_t srcNum, Reg src0, > + uint32_t msgPayload, Reg src0, > Reg src1, Reg src2, GenRegister bti, > vector<GenRegister> temps) { > unsigned dstNum = 1 + temps.size(); > - SelectionInstruction *insn = this->appendInsn(SEL_OP_ATOMIC, dstNum, > srcNum + 1); > + SelectionInstruction *insn = this->appendInsn(SEL_OP_ATOMIC, dstNum, > msgPayload + 1); > > if (bti.file != GEN_IMMEDIATE_VALUE) { > insn->state.flag = 0; > @@ -1272,14 +1272,15 @@ namespace gbe > } > > insn->src(0) = src0; > - if(srcNum > 1) insn->src(1) = src1; > - if(srcNum > 2) insn->src(2) = src2; > - insn->src(srcNum) = bti; > + if(msgPayload > 1) insn->src(1) = src1; > + if(msgPayload > 2) insn->src(2) = src2; > + insn->src(msgPayload) = bti; > + > insn->extra.function = function; > - insn->extra.elem = srcNum; > + insn->extra.elem = msgPayload; > > SelectionVector *vector = this->appendVector(); > - vector->regNum = srcNum; > + vector->regNum = msgPayload; //bti not included in SelectionVector > vector->offsetID = 0; > vector->reg = &insn->src(0); > vector->isSrc = 1; > @@ -3424,8 +3425,6 @@ namespace gbe > uint32_t valueNum, > ir::BTI bti) const > { > - //GenRegister temp = getRelativeAddress(sel, addr, > sel.selReg(bti.base, > ir::TYPE_U32)); > - > GenRegister b = bti.isConst ? GenRegister::immud(bti.imm) : > sel.selReg(bti.reg, ir::TYPE_U32); > sel.UNTYPED_READ(addr, dst.data(), valueNum, b, > sel.getBTITemps(bti)); > } > @@ -3726,28 +3725,12 @@ namespace gbe > return false; > } > > - INLINE ir::BTI getBTI(SelectionDAG &dag, const ir::LoadInstruction &insn) > const { > - using namespace ir; > - SelectionDAG *child0 = dag.child[0]; > - ir::BTI b; > - if (insn.isFixedBTI()) { > - const auto &immInsn = cast<LoadImmInstruction>(child0->insn); > - const auto imm = immInsn.getImmediate(); > - b.isConst = 1; > - b.imm = imm.getIntegerValue(); > - } else { > - b.isConst = 0; > - b.reg = insn.getBTI(); > - } > - return b; > - } > - > /*! Implements base class */ > virtual bool emit(Selection::Opaque &sel, SelectionDAG &dag) const > { > using namespace ir; > const ir::LoadInstruction &insn = cast<ir::LoadInstruction>(dag.insn); > - GenRegister address = sel.selReg(insn.getAddress(), ir::TYPE_U32); > + GenRegister address = sel.selReg(insn.getAddressRegister(), > ir::TYPE_U32); > GBE_ASSERT(insn.getAddressSpace() == MEM_GLOBAL || > insn.getAddressSpace() == MEM_CONSTANT || > insn.getAddressSpace() == MEM_PRIVATE || > @@ -3755,8 +3738,17 @@ namespace gbe > insn.getAddressSpace() == MEM_MIXED); > //GBE_ASSERT(sel.isScalarReg(insn.getValue(0)) == false); > > - BTI bti = getBTI(dag, insn); > - > + BTI bti; > + AddressMode am = insn.getAddressMode(); > + if (am == AM_StaticBti) { > + bti.isConst = 1; > + bti.imm = insn.getSurfaceIndex(); > + } else if (am == AM_DynamicBti) { > + bti.isConst = 0; > + bti.reg = insn.getBtiReg(); > + } else { > + assert(0 && "stateless not supported yet"); > + } > const Type type = insn.getValueType(); > const uint32_t elemSize = getByteScatterGatherSize(sel, type); > bool allConstant = isAllConstant(bti); > @@ -3784,12 +3776,7 @@ namespace gbe > this->emitUnalignedByteGather(sel, insn, elemSize, address, bti); > } > > - > - // for fixed bti, don't generate the useless loadi > - if (insn.isFixedBTI()) > - dag.child[0] = NULL; > markAllChildren(dag); > - > return true; > } > }; > @@ -3893,32 +3880,26 @@ namespace gbe > } > } > > - > - INLINE ir::BTI getBTI(SelectionDAG &dag, const ir::StoreInstruction > &insn) > const { > - using namespace ir; > - SelectionDAG *child0 = dag.child[0]; > - ir::BTI b; > - if (insn.isFixedBTI()) { > - const auto &immInsn = cast<LoadImmInstruction>(child0->insn); > - const auto imm = immInsn.getImmediate(); > - b.isConst = 1; > - b.imm = imm.getIntegerValue(); > - } else { > - b.isConst = 0; > - b.reg = insn.getBTI(); > - } > - return b; > - } > virtual bool emit(Selection::Opaque &sel, SelectionDAG &dag) const > { > using namespace ir; > const ir::StoreInstruction &insn = > cast<ir::StoreInstruction>(dag.insn); > - GenRegister address = sel.selReg(insn.getAddress(), ir::TYPE_U32); > + GenRegister address = sel.selReg(insn.getAddressRegister(), > ir::TYPE_U32); > const Type type = insn.getValueType(); > const uint32_t elemSize = getByteScatterGatherSize(sel, type); > > - const bool isUniform = sel.isScalarReg(insn.getAddress()) && > sel.isScalarReg(insn.getValue(0)); > - BTI bti = getBTI(dag, insn); > + const bool isUniform = sel.isScalarReg(insn.getAddressRegister()) && > sel.isScalarReg(insn.getValue(0)); > + BTI bti; > + AddressMode am = insn.getAddressMode(); > + if (am == AM_StaticBti) { > + bti.isConst = 1; > + bti.imm = insn.getSurfaceIndex(); > + } else if (am == AM_DynamicBti) { > + bti.isConst = 0; > + bti.reg = insn.getBtiReg(); > + } else { > + assert(0 && "stateless not supported yet"); > + } > > if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_QWORD) > this->emitWrite64(sel, insn, address, bti); > @@ -3928,11 +3909,7 @@ namespace gbe > this->emitByteScatter(sel, insn, elemSize, address, bti, isUniform); > } > > - // for fixed bti, don't generate the useless loadi > - if (insn.isFixedBTI()) > - dag.child[0] = NULL; > markAllChildren(dag); > - > return true; > } > }; > @@ -4795,47 +4772,36 @@ namespace gbe > this->opcodes.push_back(ir::Opcode(op)); > } > > - INLINE ir::BTI getBTI(SelectionDAG &dag, const ir::AtomicInstruction > &insn) > const { > - using namespace ir; > - SelectionDAG *child0 = dag.child[0]; > - ir::BTI b; > - if (insn.isFixedBTI()) { > - const auto &immInsn = cast<LoadImmInstruction>(child0->insn); > - const auto imm = immInsn.getImmediate(); > - b.isConst = 1; > - b.imm = imm.getIntegerValue(); > - } else { > - b.isConst = 0; > - b.reg = insn.getBTI(); > - } > - return b; > - } > - > INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const { > using namespace ir; > const ir::AtomicInstruction &insn = > cast<ir::AtomicInstruction>(dag.insn); > > - ir::BTI b = getBTI(dag, insn); > + ir::BTI b; > const AtomicOps atomicOp = insn.getAtomicOpcode(); > unsigned srcNum = insn.getSrcNum(); > - unsigned opNum = srcNum - 1; > + unsigned msgPayload; > + > + AddressMode AM = insn.getAddressMode(); > + if (AM == AM_DynamicBti) { > + b.reg = insn.getBtiReg(); > + msgPayload = srcNum - 1; > + } else { > + b.imm = insn.getSurfaceIndex(); > + b.isConst = 1; > + msgPayload = srcNum; > + } > > GenRegister dst = sel.selReg(insn.getDst(0), TYPE_U32); > GenRegister bti = b.isConst ? GenRegister::immud(b.imm) : > sel.selReg(b.reg, ir::TYPE_U32); > - GenRegister src0 = sel.selReg(insn.getSrc(1), TYPE_U32); //address > + GenRegister src0 = sel.selReg(insn.getAddressRegister(), TYPE_U32); > GenRegister src1 = src0, src2 = src0; > - if(srcNum > 2) src1 = sel.selReg(insn.getSrc(2), TYPE_U32); > - if(srcNum > 3) src2 = sel.selReg(insn.getSrc(3), TYPE_U32); > + if(msgPayload > 1) src1 = sel.selReg(insn.getSrc(1), TYPE_U32); > + if(msgPayload > 2) src2 = sel.selReg(insn.getSrc(2), TYPE_U32); > > GenAtomicOpCode genAtomicOp = (GenAtomicOpCode)atomicOp; > + sel.ATOMIC(dst, genAtomicOp, msgPayload, src0, src1, src2, bti, > sel.getBTITemps(b)); > > - sel.ATOMIC(dst, genAtomicOp, opNum, src0, src1, src2, bti, > sel.getBTITemps(b)); > - > - // for fixed bti, don't generate the useless loadi > - if (insn.isFixedBTI()) > - dag.child[0] = NULL; > markAllChildren(dag); > - > return true; > } > }; > diff --git a/backend/src/ir/context.hpp b/backend/src/ir/context.hpp > index 0f7ded4..ab0d8b5 100644 > --- a/backend/src/ir/context.hpp > +++ b/backend/src/ir/context.hpp > @@ -189,25 +189,6 @@ namespace ir { > this->append(insn); > } > > - /*! LOAD with the destinations directly specified */ > - template <typename... Args> > - void LOAD(Type type, Register offset, AddressSpace space, bool > dwAligned, bool fixedBTI, Register bti, Args...values) > - { > - const Tuple index = this->tuple(values...); > - const uint16_t valueNum = std::tuple_size<std::tuple<Args...>>::value; > - GBE_ASSERT(valueNum > 0); > - this->LOAD(type, index, offset, space, valueNum, dwAligned, fixedBTI, > bti); > - } > - > - /*! STORE with the sources directly specified */ > - template <typename... Args> > - void STORE(Type type, Register offset, AddressSpace space, bool > dwAligned, bool fixedBTI, Register bti, Args...values) > - { > - const Tuple index = this->tuple(values...); > - const uint16_t valueNum = std::tuple_size<std::tuple<Args...>>::value; > - GBE_ASSERT(valueNum > 0); > - this->STORE(type, index, offset, space, valueNum, dwAligned, fixedBTI, > bti); > - } > void appendSurface(uint8_t bti, Register reg) { fn->appendSurface(bti, > reg); } > > protected: > diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp > index f93c528..0246920 100644 > --- a/backend/src/ir/instruction.cpp > +++ b/backend/src/ir/instruction.cpp > @@ -316,60 +316,104 @@ namespace ir { > Type srcType; //!< Type to convert from > }; > > + class ALIGNED_INSTRUCTION MemInstruction : > + public BasePolicy > + { > + public: > + MemInstruction(AddressMode _AM, > + AddressSpace _AS, > + bool _dwAligned, > + Type _type, > + Register _offset) > + : AM(_AM), > + AS(_AS), > + dwAligned(_dwAligned), > + type(_type), > + SurfaceIndex(0), > + offset(_offset) { > + } > + AddressMode getAddressMode() const { return AM; } > + AddressSpace getAddressSpace() const { return AS; } > + /*! MemInstruction may have one possible btiReg */ > + Register getBtiReg() const { assert(AM == AM_DynamicBti); > return > BtiReg; } > + unsigned getSurfaceIndex() const { assert(AM != AM_DynamicBti); > return SurfaceIndex; } > + Register getAddressRegister()const { return offset; } > + unsigned getAddressIndex() const { return 0; } > + Type getValueType() const { return type; } > + INLINE bool isAligned(void) const { return !!dwAligned; } > + > + void setSurfaceIndex (unsigned id) { SurfaceIndex = id; } > + void setBtiReg(Register reg) { BtiReg = reg; } > + protected: > + /*! including address reg + optional bti reg */ > + int getBaseSrcNum() const { return AM == AM_DynamicBti ? 2 > : 1; } > + bool hasExtraBtiReg() const { return AM == AM_DynamicBti; } > + AddressMode AM; > + AddressSpace AS; > + uint8_t dwAligned : 1; > + Type type; > + union { > + Register BtiReg; > + unsigned SurfaceIndex; > + }; > + Register offset; > + }; > + > class ALIGNED_INSTRUCTION AtomicInstruction : > - public BasePolicy, > + public MemInstruction, > public NDstPolicy<AtomicInstruction, 1> > { > public: > AtomicInstruction(AtomicOps atomicOp, > + Type type, > Register dst, > AddressSpace addrSpace, > - Register bti, > - bool fixedBTI, > - Tuple src) > + Register address, > + Tuple payload, > + AddressMode AM) > + : MemInstruction(AM, addrSpace, true, type, address) > { > this->opcode = OP_ATOMIC; > this->atomicOp = atomicOp; > this->dst[0] = dst; > - this->src = src; > - this->addrSpace = addrSpace; > - this->bti = bti; > - this->fixedBTI = fixedBTI ? 1: 0; > - srcNum = 2; > + this->payload = payload; > + > + int payloadNum = 1; > if((atomicOp == ATOMIC_OP_INC) || > (atomicOp == ATOMIC_OP_DEC)) > - srcNum = 1; > + payloadNum = 0; > if(atomicOp == ATOMIC_OP_CMPXCHG) > - srcNum = 3; > - srcNum++; > + payloadNum = 2; > + > + srcNum = payloadNum + getBaseSrcNum(); > } > INLINE Register getSrc(const Function &fn, uint32_t ID) const { > - GBE_ASSERTM(ID < srcNum, "Out-of-bound source register for atomic"); > - if (ID == 0u) > - return bti; > - else > - return fn.getRegister(src, ID -1); > + GBE_ASSERTM((int)ID < (int)srcNum, "Out-of-bound source register for > atomic"); > + if (ID == 0) { > + return offset; > + } else if (hasExtraBtiReg() && (int)ID == (int)srcNum-1) { > + return getBtiReg(); > + } else { > + return fn.getRegister(payload, ID - 1); > + } > } > INLINE void setSrc(Function &fn, uint32_t ID, Register reg) { > - GBE_ASSERTM(ID < srcNum, "Out-of-bound source register for atomic"); > - if (ID == 0u) > - bti = reg; > - else > - fn.setRegister(src, ID - 1, reg); > + GBE_ASSERTM((int)ID < (int)srcNum, "Out-of-bound source register for > atomic"); > + if (ID == 0) { > + offset = reg; > + } else if (hasExtraBtiReg() && (int)ID == (int)srcNum - 1) { > + setBtiReg(reg); > + } else { > + fn.setRegister(payload, ID - 1, reg); > + } > } > INLINE uint32_t getSrcNum(void) const { return srcNum; } > > - INLINE AddressSpace getAddressSpace(void) const { return this- > >addrSpace; } > - INLINE Register getBTI(void) const { return bti; } > - INLINE bool isFixedBTI(void) const { return !!fixedBTI; } > INLINE AtomicOps getAtomicOpcode(void) const { return this- > >atomicOp; } > INLINE bool wellFormed(const Function &fn, std::string &whyNot) const; > INLINE void out(std::ostream &out, const Function &fn) const; > Register dst[1]; > - Tuple src; > - AddressSpace addrSpace; //!< Address space > - Register bti; //!< bti > - uint8_t fixedBTI:1; //!< fixed bti or not > + Tuple payload; > uint8_t srcNum:3; //!<Source Number > AtomicOps atomicOp:6; //!<Source Number > }; > @@ -428,119 +472,102 @@ namespace ir { > Register dst[0]; //!< No destination > }; > > + > class ALIGNED_INSTRUCTION LoadInstruction : > - public BasePolicy, > - public NSrcPolicy<LoadInstruction, 2> > + public MemInstruction > { > - public: > - LoadInstruction(Type type, > - Tuple dstValues, > - Register offset, > - AddressSpace addrSpace, > - uint32_t valueNum, > - bool dwAligned, > - bool fixedBTI, > - Register bti) > - { > - GBE_ASSERT(valueNum < 128); > - this->opcode = OP_LOAD; > - this->type = type; > - this->offset = offset; > - this->values = dstValues; > - this->addrSpace = addrSpace; > - this->valueNum = valueNum; > - this->dwAligned = dwAligned ? 1 : 0; > - this->fixedBTI = fixedBTI ? 1 : 0; > - this->bti = bti; > - } > - INLINE Register getDst(const Function &fn, uint32_t ID) const { > - GBE_ASSERTM(ID < valueNum, "Out-of-bound source register"); > - return fn.getRegister(values, ID); > - } > - INLINE void setDst(Function &fn, uint32_t ID, Register reg) { > - GBE_ASSERTM(ID < valueNum, "Out-of-bound source register"); > - fn.setRegister(values, ID, reg); > - } > - INLINE uint32_t getDstNum(void) const { return valueNum; } > - INLINE Type getValueType(void) const { return type; } > - INLINE uint32_t getValueNum(void) const { return valueNum; } > - INLINE AddressSpace getAddressSpace(void) const { return addrSpace; } > - INLINE Register getBTI(void) const { return bti; } > - INLINE bool wellFormed(const Function &fn, std::string &why) const; > - INLINE void out(std::ostream &out, const Function &fn) const; > - INLINE bool isAligned(void) const { return !!dwAligned; } > - INLINE bool isFixedBTI(void) const { return !!fixedBTI; } > - Type type; //!< Type to store > - Register src[0]; //!< Address where to load from > - Register bti; > - Register offset; //!< Alias to make it similar to store > - Tuple values; //!< Values to load > - AddressSpace addrSpace; //!< Where to load > - uint8_t fixedBTI:1; > - uint8_t valueNum:7; //!< Number of values to load > - uint8_t dwAligned:1; //!< DWORD aligned is what matters with GEN > - }; > + public: > + LoadInstruction(Type type, > + Tuple dstValues, > + Register offset, > + AddressSpace AS, > + uint32_t _valueNum, > + bool dwAligned, > + AddressMode AM) > + : MemInstruction(AM, AS, dwAligned, type, offset), > + valueNum(_valueNum), > + values(dstValues) > + { > + this->opcode = OP_LOAD; > + } > > + INLINE unsigned getSrcNum() const { return getBaseSrcNum(); } > + INLINE Register getSrc(const Function &fn, unsigned id) const { > + if (id == 0) return offset; > + if (hasExtraBtiReg() && id == 1) return BtiReg; > + assert(0 && "LoadInstruction::getSrc() out-of-range"); > + return ir::Register(0); > + } > + INLINE void setSrc(Function &fn, unsigned id, Register reg) { > + assert(id < getSrcNum()); > + if (id == 0) { offset = reg; return; } > + if (id == 1) { setBtiReg(reg); return; } > + } > + INLINE unsigned getDstNum() const { return valueNum; } > + INLINE Register getDst(const Function &fn, unsigned id) const { > + assert(id < valueNum); > + return fn.getRegister(values, id); > + } > + INLINE void setDst(Function &fn, unsigned id, Register reg) { > + assert(id < getDstNum()); > + fn.setRegister(values, id, reg); > + } > + INLINE uint32_t getValueNum(void) const { return valueNum; } > + INLINE Register getValue(const Function &fn, unsigned id) const { > + assert(id < valueNum); > + return fn.getRegister(values, id); > + } > + INLINE bool wellFormed(const Function &fn, std::string &why) const; > + INLINE void out(std::ostream &out, const Function &fn) const; > + > + uint8_t valueNum; > + Tuple values; > + }; > class ALIGNED_INSTRUCTION StoreInstruction : > - public BasePolicy, public NDstPolicy<StoreInstruction, 0> > + public MemInstruction, > + public NDstPolicy<StoreInstruction, 0> > { > - public: > - StoreInstruction(Type type, > - Tuple values, > - Register offset, > - AddressSpace addrSpace, > - uint32_t valueNum, > - bool dwAligned, > - bool fixedBTI, > - Register bti) > - { > - GBE_ASSERT(valueNum < 255); > - this->opcode = OP_STORE; > - this->type = type; > - this->offset = offset; > - this->values = values; > - this->addrSpace = addrSpace; > - this->valueNum = valueNum; > - this->dwAligned = dwAligned ? 1 : 0; > - this->fixedBTI = fixedBTI ? 1 : 0; > - this->bti = bti; > - } > - INLINE Register getSrc(const Function &fn, uint32_t ID) const { > - GBE_ASSERTM(ID < valueNum + 2u, "Out-of-bound source register for > store"); > - if (ID == 0u) > - return bti; > - else if (ID == 1u) > - return offset; > - else > - return fn.getRegister(values, ID - 2); > - } > - INLINE void setSrc(Function &fn, uint32_t ID, Register reg) { > - GBE_ASSERTM(ID < valueNum + 2u, "Out-of-bound source register for > store"); > - if (ID == 0u) > - bti = reg; > - else if (ID == 1u) > - offset = reg; > - else > - fn.setRegister(values, ID - 2, reg); > - } > - INLINE uint32_t getSrcNum(void) const { return valueNum + 2u; } > - INLINE uint32_t getValueNum(void) const { return valueNum; } > - INLINE Type getValueType(void) const { return type; } > - INLINE AddressSpace getAddressSpace(void) const { return addrSpace; } > - INLINE Register getBTI(void) const { return bti; } > - INLINE bool wellFormed(const Function &fn, std::string &why) const; > - INLINE void out(std::ostream &out, const Function &fn) const; > - INLINE bool isAligned(void) const { return !!dwAligned; } > - INLINE bool isFixedBTI(void) const { return !!fixedBTI; } > - Type type; //!< Type to store > - Register bti; > - Register offset; //!< First source is the offset where to store > - Tuple values; //!< Values to store > - AddressSpace addrSpace; //!< Where to store > - uint8_t fixedBTI:1; //!< Which btis need access > - uint8_t valueNum:7; //!< Number of values to store > - uint8_t dwAligned:1; //!< DWORD aligned is what matters with GEN > - Register dst[0]; //!< No destination > + public: > + StoreInstruction(Type type, > + Tuple values, > + Register offset, > + AddressSpace addrSpace, > + uint32_t valueNum, > + bool dwAligned, > + AddressMode AM) > + : MemInstruction(AM, addrSpace, dwAligned, type, offset) > + { > + this->opcode = OP_STORE; > + this->values = values; > + this->valueNum = valueNum; > + } > + INLINE unsigned getValueNum() const { return valueNum; } > + INLINE Register getValue(const Function &fn, unsigned id) const { > + return fn.getRegister(values, id); > + } > + INLINE unsigned getSrcNum() const { return getBaseSrcNum() + > valueNum; } > + INLINE Register getSrc(const Function &fn, unsigned id) const { > + if (id == 0) return offset; > + if (id <= valueNum) return fn.getRegister(values, id-1); > + if (hasExtraBtiReg() && (int)id == (int)valueNum+1) return > getBtiReg(); > + assert(0 && "StoreInstruction::getSrc() out-of-range"); > + return Register(0); > + } > + INLINE void setSrc(Function &fn, unsigned id, Register reg) { > + if (id == 0) { offset = reg; return; } > + if (id > 0 && id <= valueNum) { fn.setRegister(values, id-1, reg); > return; } > + if (hasExtraBtiReg() && > + (int)id == (int)valueNum + 1) { > + setBtiReg(reg); > + return; > + } > + assert(0 && "StoreInstruction::setSrc() index out-of-range"); > + } > + INLINE bool wellFormed(const Function &fn, std::string &why) const; > + INLINE void out(std::ostream &out, const Function &fn) const; > + Register dst[0]; > + uint8_t valueNum; > + Tuple values; > }; > > class ALIGNED_INSTRUCTION SampleInstruction : // TODO > @@ -1037,8 +1064,6 @@ namespace ir { > if (UNLIKELY(checkRegisterData(FAMILY_DWORD, getSrc(fn, srcID+1u), > fn, whyNot) == false)) > return false; > > - if (UNLIKELY(checkRegisterData(FAMILY_DWORD, bti, fn, whyNot) == > false)) > - return false; > return true; > } > > @@ -1065,7 +1090,7 @@ namespace ir { > template <typename T> > INLINE bool wellFormedLoadStore(const T &insn, const Function &fn, > std::string &whyNot) > { > - if (UNLIKELY(insn.offset >= fn.regNum())) { > + if (UNLIKELY(insn.getAddressRegister() >= fn.regNum())) { > whyNot = "Out-of-bound offset register index"; > return false; > } > @@ -1073,10 +1098,11 @@ namespace ir { > whyNot = "Out-of-bound tuple index"; > return false; > } > + > // Check all registers > - const RegisterFamily family = getFamily(insn.type); > - for (uint32_t valueID = 0; valueID < insn.valueNum; ++valueID) { > - const Register regID = fn.getRegister(insn.values, valueID); > + const RegisterFamily family = getFamily(insn.getValueType()); > + for (uint32_t valueID = 0; valueID < insn.getValueNum(); ++valueID) { > + const Register regID = insn.getValue(fn, valueID);; > if (UNLIKELY(checkRegisterData(family, regID, fn, whyNot) == false)) > return false; > } > @@ -1260,12 +1286,18 @@ namespace ir { > > INLINE void AtomicInstruction::out(std::ostream &out, const Function &fn) > const { > this->outOpcode(out); > - out << "." << addrSpace; > + out << "." << AS; > out << " %" << this->getDst(fn, 0); > - out << " {" << "%" << this->getSrc(fn, 1) << "}"; > - for (uint32_t i = 2; i < srcNum; ++i) > + out << " {" << "%" << this->getSrc(fn, 0) << "}"; > + for (uint32_t i = 1; i < srcNum; ++i) > out << " %" << this->getSrc(fn, i); > - out << (fixedBTI ? " bti" : " bti(mixed)") << " %" << this->getBTI(); > + AddressMode am = this->getAddressMode(); > + out << " bti:"; > + if ( am == AM_DynamicBti) { > + out << " %" << this->getBtiReg(); > + } else { > + out << this->getSurfaceIndex(); > + } > } > > > @@ -1294,23 +1326,35 @@ namespace ir { > > INLINE void LoadInstruction::out(std::ostream &out, const Function &fn) > const { > this->outOpcode(out); > - out << "." << type << "." << addrSpace << (dwAligned ? "." : ".un") << > "aligned"; > + out << "." << type << "." << AS << (dwAligned ? "." : ".un") << > "aligned"; > out << " {"; > for (uint32_t i = 0; i < valueNum; ++i) > out << "%" << this->getDst(fn, i) << (i != (valueNum-1u) ? " " : ""); > out << "}"; > - out << " %" << this->getSrc(fn, 1); > - out << (fixedBTI ? " bti" : " bti(mixed)") << " %" << this->getBTI(); > + out << " %" << this->getSrc(fn, 0); > + AddressMode am = this->getAddressMode(); > + out << " bti:"; > + if ( am == AM_DynamicBti) { > + out << " %" << this->getBtiReg(); > + } else { > + out << this->getSurfaceIndex(); > + } > } > > INLINE void StoreInstruction::out(std::ostream &out, const Function &fn) > const { > this->outOpcode(out); > - out << "." << type << "." << addrSpace << (dwAligned ? "." : ".un") << > "aligned"; > - out << " %" << this->getSrc(fn, 1) << " {"; > + out << "." << type << "." << AS << (dwAligned ? "." : ".un") << > "aligned"; > + out << " %" << this->getSrc(fn, 0) << " {"; > for (uint32_t i = 0; i < valueNum; ++i) > - out << "%" << this->getSrc(fn, i+2) << (i != (valueNum-1u) ? " " : > ""); > + out << "%" << this->getSrc(fn, i+1) << (i != (valueNum-1u) ? " " : > ""); > out << "}"; > - out << (fixedBTI ? " bti" : " bti(mixed)") << " %" << this->getBTI(); > + AddressMode am = this->getAddressMode(); > + out << " bti:"; > + if ( am == AM_DynamicBti) { > + out << " %" << this->getBtiReg(); > + } else { > + out << this->getSurfaceIndex(); > + } > } > > INLINE void ReadARFInstruction::out(std::ostream &out, const Function > &fn) const { > @@ -1664,19 +1708,17 @@ DECL_MEM_FN(BitCastInstruction, Type, > getSrcType(void), getSrcType()) > DECL_MEM_FN(BitCastInstruction, Type, getDstType(void), getDstType()) > DECL_MEM_FN(ConvertInstruction, Type, getSrcType(void), getSrcType()) > DECL_MEM_FN(ConvertInstruction, Type, getDstType(void), getDstType()) > -DECL_MEM_FN(AtomicInstruction, AddressSpace, getAddressSpace(void), > getAddressSpace()) > +DECL_MEM_FN(MemInstruction, AddressSpace, getAddressSpace(void), > getAddressSpace()) > +DECL_MEM_FN(MemInstruction, AddressMode, getAddressMode(void), > getAddressMode()) > +DECL_MEM_FN(MemInstruction, Register, getAddressRegister(void), > getAddressRegister()) > +DECL_MEM_FN(MemInstruction, Register, getBtiReg(void), getBtiReg()) > +DECL_MEM_FN(MemInstruction, unsigned, getSurfaceIndex(void), > getSurfaceIndex()) > +DECL_MEM_FN(MemInstruction, Type, getValueType(void), > getValueType()) > +DECL_MEM_FN(MemInstruction, bool, isAligned(void), isAligned()) > +DECL_MEM_FN(MemInstruction, unsigned, getAddressIndex(void), > getAddressIndex()) > DECL_MEM_FN(AtomicInstruction, AtomicOps, getAtomicOpcode(void), > getAtomicOpcode()) > -DECL_MEM_FN(AtomicInstruction, bool, isFixedBTI(void), isFixedBTI()) > -DECL_MEM_FN(StoreInstruction, Type, getValueType(void), > getValueType()) > DECL_MEM_FN(StoreInstruction, uint32_t, getValueNum(void), > getValueNum()) > -DECL_MEM_FN(StoreInstruction, AddressSpace, getAddressSpace(void), > getAddressSpace()) > -DECL_MEM_FN(StoreInstruction, bool, isAligned(void), isAligned()) > -DECL_MEM_FN(StoreInstruction, bool, isFixedBTI(void), isFixedBTI()) > -DECL_MEM_FN(LoadInstruction, Type, getValueType(void), getValueType()) > DECL_MEM_FN(LoadInstruction, uint32_t, getValueNum(void), > getValueNum()) > -DECL_MEM_FN(LoadInstruction, AddressSpace, getAddressSpace(void), > getAddressSpace()) > -DECL_MEM_FN(LoadInstruction, bool, isAligned(void), isAligned()) > -DECL_MEM_FN(LoadInstruction, bool, isFixedBTI(void), isFixedBTI()) > DECL_MEM_FN(LoadImmInstruction, Type, getType(void), getType()) > DECL_MEM_FN(LabelInstruction, LabelIndex, getLabelIndex(void), > getLabelIndex()) > DECL_MEM_FN(BranchInstruction, bool, isPredicated(void), isPredicated()) > @@ -1702,6 +1744,15 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, > getImageIndex(void), getImageIndex > > #undef DECL_MEM_FN > > +#define DECL_MEM_FN(CLASS, RET, PROTOTYPE, CALL) \ > + RET CLASS::PROTOTYPE { \ > + return reinterpret_cast<internal::CLASS*>(this)->CALL; \ > + } > +DECL_MEM_FN(MemInstruction, void, setSurfaceIndex(unsigned id), > setSurfaceIndex(id)) > +DECL_MEM_FN(MemInstruction, void, setBtiReg(Register reg), > setBtiReg(reg)) > + > +#undef DECL_MEM_FN > + > Immediate LoadImmInstruction::getImmediate(void) const { > const Function &fn = this->getFunction(); > return reinterpret_cast<const internal::LoadImmInstruction*>(this)- > >getImmediate(fn); > @@ -1843,8 +1894,16 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, > getImageIndex(void), getImageIndex > } > > // For all unary functions with given opcode > - Instruction ATOMIC(AtomicOps atomicOp, Register dst, AddressSpace > space, Register bti, bool fixedBTI, Tuple src) { > - return internal::AtomicInstruction(atomicOp, dst, space, bti, fixedBTI, > src).convert(); > + Instruction ATOMIC(AtomicOps atomicOp, Type type, Register dst, > AddressSpace space, Register address, Tuple payload, AddressMode AM, > Register bti) { > + internal::AtomicInstruction insn = internal::AtomicInstruction(atomicOp, > type, dst, space, address, payload, AM); > + insn.setBtiReg(bti); > + return insn.convert(); > + } > + > + Instruction ATOMIC(AtomicOps atomicOp, Type type, Register dst, > AddressSpace space, Register address, Tuple payload, AddressMode AM, > unsigned SurfaceIndex) { > + internal::AtomicInstruction insn = internal::AtomicInstruction(atomicOp, > type, dst, space, address, payload, AM); > + insn.setSurfaceIndex(SurfaceIndex); > + return insn.convert(); > } > > // BRA > @@ -1892,10 +1951,25 @@ DECL_MEM_FN(GetImageInfoInstruction, > uint8_t, getImageIndex(void), getImageIndex > AddressSpace space, \ > uint32_t valueNum, \ > bool dwAligned, \ > - bool fixedBTI, \ > + AddressMode AM, \ > + unsigned SurfaceIndex) \ > + { \ > + internal::CLASS insn = > internal::CLASS(type,tuple,offset,space,valueNum,dwAligned,AM); \ > + insn.setSurfaceIndex(SurfaceIndex);\ > + return insn.convert(); \ > + } \ > + Instruction NAME(Type type, \ > + Tuple tuple, \ > + Register offset, \ > + AddressSpace space, \ > + uint32_t valueNum, \ > + bool dwAligned, \ > + AddressMode AM, \ > Register bti) \ > { \ > - return > internal::CLASS(type,tuple,offset,space,valueNum,dwAligned,fixedBTI,bti).c > onvert(); \ > + internal::CLASS insn = > internal::CLASS(type,tuple,offset,space,valueNum,dwAligned,AM); \ > + insn.setBtiReg(bti); \ > + return insn.convert(); \ > } > > DECL_EMIT_FUNCTION(LOAD, LoadInstruction) > diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp > index 3f3c655..b8f95ba 100644 > --- a/backend/src/ir/instruction.hpp > +++ b/backend/src/ir/instruction.hpp > @@ -65,6 +65,13 @@ namespace ir { > MEM_INVALID > }; > > + enum AddressMode : uint8_t { > + AM_DynamicBti = 0, > + AM_Stateless, > + AM_StaticBti, > + AM_INVALID > + }; > + > enum AtomicOps { > ATOMIC_OP_AND = 1, > ATOMIC_OP_OR = 2, > @@ -288,20 +295,30 @@ namespace ir { > static bool isClassOf(const Instruction &insn); > }; > > + class MemInstruction : public Instruction { > + public: > + unsigned getSurfaceIndex() const; > + unsigned getAddressIndex() const; > + /*! Address space that is manipulated here */ > + AddressMode getAddressMode() const; > + Register getBtiReg() const; > + /*! Return the register that contains the addresses */ > + Register getAddressRegister() const; > + AddressSpace getAddressSpace() const; > + /*! Return the types of the values */ > + Type getValueType() const; > + bool isAligned(void) const; > + void setBtiReg(Register reg); > + void setSurfaceIndex(unsigned idx); > + }; > + > /*! Atomic instruction */ > - class AtomicInstruction : public Instruction { > + class AtomicInstruction : public MemInstruction { > public: > /*! Where the address register goes */ > - static const uint32_t btiIndex = 0; > - static const uint32_t addressIndex = 1; > - /*! Address space that is manipulated here */ > - AddressSpace getAddressSpace(void) const; > - Register getBTI(void) const { return this->getSrc(btiIndex); } > - bool isFixedBTI(void) const; > + static const uint32_t addressIndex = 0; > /*! Return the atomic function code */ > AtomicOps getAtomicOpcode(void) const; > - /*! Return the register that contains the addresses */ > - INLINE Register getAddress(void) const { return this- > >getSrc(addressIndex); } > /*! Return true if the given instruction is an instance of this class */ > static bool isClassOf(const Instruction &insn); > }; > @@ -309,27 +326,15 @@ namespace ir { > /*! Store instruction. First source is the address. Next sources are the > * values to store contiguously at the given address > */ > - class StoreInstruction : public Instruction { > + class StoreInstruction : public MemInstruction { > public: > /*! Where the address register goes */ > - static const uint32_t btiIndex = 0; > - static const uint32_t addressIndex = 1; > - /*! Return the types of the values to store */ > - Type getValueType(void) const; > - /*! Give the number of values the instruction is storing (srcNum-1) */ > + static const uint32_t addressIndex = 0; > uint32_t getValueNum(void) const; > - Register getBTI(void) const { return this->getSrc(btiIndex); } > - bool isFixedBTI(void) const; > - /*! Address space that is manipulated here */ > - AddressSpace getAddressSpace(void) const; > - /*! DWORD aligned means untyped read for Gen. That is what matters */ > - bool isAligned(void) const; > - /*! Return the register that contains the addresses */ > - INLINE Register getAddress(void) const { return this- > >getSrc(addressIndex); } > /*! Return the register that contain value valueID */ > INLINE Register getValue(uint32_t valueID) const { > GBE_ASSERT(valueID < this->getValueNum()); > - return this->getSrc(valueID + 2u); > + return this->getSrc(valueID + 1u); > } > /*! Return true if the given instruction is an instance of this class */ > static bool isClassOf(const Instruction &insn); > @@ -339,20 +344,10 @@ namespace ir { > * The multiple destinations are the contiguous values loaded at the given > * address > */ > - class LoadInstruction : public Instruction { > + class LoadInstruction : public MemInstruction { > public: > - /*! Type of the loaded values (ie type of all the destinations) */ > - Type getValueType(void) const; > /*! Number of values loaded (ie number of destinations) */ > uint32_t getValueNum(void) const; > - /*! Address space that is manipulated here */ > - AddressSpace getAddressSpace(void) const; > - /*! DWORD aligned means untyped read for Gen. That is what matters */ > - bool isAligned(void) const; > - /*! Return the register that contains the addresses */ > - INLINE Register getAddress(void) const { return this->getSrc(1u); } > - Register getBTI(void) const {return this->getSrc(0u);} > - bool isFixedBTI(void) const; > /*! Return the register that contain value valueID */ > INLINE Register getValue(uint32_t valueID) const { > return this->getDst(valueID); > @@ -725,7 +720,8 @@ namespace ir { > /*! F32TO16.{dstType <- srcType} dst src */ > Instruction F32TO16(Type dstType, Type srcType, Register dst, Register > src); > /*! atomic dst addr.space {src1 {src2}} */ > - Instruction ATOMIC(AtomicOps opcode, Register dst, AddressSpace space, > Register bti, bool fixedBTI, Tuple src); > + Instruction ATOMIC(AtomicOps opcode, Type, Register dst, AddressSpace > space, Register ptr, Tuple payload, AddressMode, unsigned); > + Instruction ATOMIC(AtomicOps opcode, Type, Register dst, AddressSpace > space, Register ptr, Tuple src, AddressMode, Register); > /*! bra labelIndex */ > Instruction BRA(LabelIndex labelIndex); > /*! (pred) bra labelIndex */ > @@ -740,10 +736,12 @@ namespace ir { > Instruction WHILE(LabelIndex labelIndex, Register pred); > /*! ret */ > Instruction RET(void); > - /*! load.type.space {dst1,...,dst_valueNum} offset value */ > - Instruction LOAD(Type type, Tuple dst, Register offset, AddressSpace > space, uint32_t valueNum, bool dwAligned, bool fixedBTI, Register bti); > - /*! store.type.space offset {src1,...,src_valueNum} value */ > - Instruction STORE(Type type, Tuple src, Register offset, AddressSpace > space, uint32_t valueNum, bool dwAligned, bool fixedBTI, Register bti); > + /*! load.type.space {dst1,...,dst_valueNum} offset value, {bti} */ > + Instruction LOAD(Type type, Tuple dst, Register offset, AddressSpace > space, uint32_t valueNum, bool dwAligned, AddressMode, unsigned > SurfaceIndex); > + Instruction LOAD(Type type, Tuple dst, Register offset, AddressSpace > space, uint32_t valueNum, bool dwAligned, AddressMode, Register bti); > + /*! store.type.space offset {src1,...,src_valueNum} value {bti}*/ > + Instruction STORE(Type type, Tuple src, Register offset, AddressSpace > space, uint32_t valueNum, bool dwAligned, AddressMode, unsigned > SurfaceIndex); > + Instruction STORE(Type type, Tuple src, Register offset, AddressSpace > space, uint32_t valueNum, bool dwAligned, AddressMode, Register bti); > /*! loadi.type dst value */ > Instruction LOADI(Type type, Register dst, ImmediateIndex value); > /*! sync.params... (see Sync instruction) */ > diff --git a/backend/src/ir/lowering.cpp b/backend/src/ir/lowering.cpp > index 9fcdf74..66ced8c 100644 > --- a/backend/src/ir/lowering.cpp > +++ b/backend/src/ir/lowering.cpp > @@ -320,7 +320,7 @@ namespace ir { > continue; > > IndirectLoad indirectLoad; > - Register addr = load->getAddress(); > + Register addr = load->getAddressRegister(); > indirectLoad.argID = argID; > indirectLoad.load = insn; > > @@ -368,7 +368,7 @@ namespace ir { > > const Register reg = load->getValue(valueID); > > - Instruction mov = ir::INDIRECT_MOV(type, reg, arg, > load->getAddress(), > offset); > + Instruction mov = ir::INDIRECT_MOV(type, reg, arg, load- > >getAddressRegister(), offset); > mov.insert(ins_after, &ins_after); > replaced = true; > } > diff --git a/backend/src/llvm/llvm_gen_backend.cpp > b/backend/src/llvm/llvm_gen_backend.cpp > index 7299d53..39665b8 100644 > --- a/backend/src/llvm/llvm_gen_backend.cpp > +++ b/backend/src/llvm/llvm_gen_backend.cpp > @@ -386,6 +386,36 @@ namespace gbe > ir::Context &ctx; > }; > > + class GenWriter; > + class MemoryInstHelper { > + public: > + MemoryInstHelper(ir::Context &c, ir::Unit &u, GenWriter *w, bool l) > + : ctx(c), > + unit(u), > + writer(w), > + legacyMode(l) > + { } > + void emitUnalignedDQLoadStore(Value *llvmValues); > + ir::Tuple getValueTuple(llvm::Value *llvmValues, llvm::Type > *elemType, > unsigned start, unsigned elemNum); > + void emitBatchLoadOrStore(const ir::Type type, const uint32_t > elemNum, Value *llvmValues, Type * elemType); > + ir::Register getOffsetAddress(ir::Register basePtr, unsigned offset); > + void shootMessage(ir::Type type, ir::Register offset, > ir::Tuple value, > unsigned elemNum); > + template <bool isLoad, typename T> > + void emitLoadOrStore(T &I); > + private: > + ir::Context &ctx; > + ir::Unit &unit; > + GenWriter *writer; > + bool legacyMode; > + ir::AddressSpace addrSpace; > + ir::Register mBTI; > + ir::Register mPtr; > + ir::AddressMode mAddressMode; > + unsigned SurfaceIndex; > + bool isLoad; > + bool dwAligned; > + }; > + > /*! Translate LLVM IR code to Gen IR code */ > class GenWriter : public FunctionPass, public InstVisitor<GenWriter> > { > @@ -437,6 +467,9 @@ namespace gbe > Function *Func; > const Module *TheModule; > int btiBase; > + /*! legacyMode is for hardware before BDW, > + * which do not support stateless memory access */ > + bool legacyMode; > public: > static char ID; > explicit GenWriter(ir::Unit &unit) > @@ -446,7 +479,8 @@ namespace gbe > regTranslator(ctx), > LI(0), > TheModule(0), > - btiBase(BTI_RESERVED_NUM) > + btiBase(BTI_RESERVED_NUM), > + legacyMode(true) > { > #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=7 > initializeLoopInfoWrapperPassPass(*PassRegistry::getPassRegistry()); > @@ -491,7 +525,8 @@ namespace gbe > > Func = &F; > assignBti(F); > - analyzePointerOrigin(F); > + if (legacyMode) > + analyzePointerOrigin(F); > > #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=7 > LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); > @@ -643,6 +678,7 @@ namespace gbe > ir::ImmediateIndex processSeqConstant(ConstantDataSequential *seq, > int index, ConstTypeId tid); > ir::ImmediateIndex processConstantVector(ConstantVector *cv, int > index); > + friend class MemoryInstHelper; > }; > > char GenWriter::ID = 0; > @@ -3570,47 +3606,55 @@ namespace gbe > CallSite::arg_iterator AI = CS.arg_begin(); > CallSite::arg_iterator AE = CS.arg_end(); > GBE_ASSERT(AI != AE); > - > - ir::AddressSpace addrSpace; > - > Value *llvmPtr = *AI; > - Value *bti = getBtiRegister(llvmPtr); > - Value *ptrBase = getPointerBase(llvmPtr); > + ir::AddressSpace addrSpace = addressSpaceLLVMToGen(llvmPtr- > >getType()->getPointerAddressSpace()); > ir::Register pointer = this->getRegister(llvmPtr); > - ir::Register baseReg = this->getRegister(ptrBase); > > + ir::Register ptr; > ir::Register btiReg; > - bool fixedBTI = false; > - if (isa<ConstantInt>(bti)) { > - fixedBTI = true; > - unsigned index = cast<ConstantInt>(bti)->getZExtValue(); > - addrSpace = btiToGen(index); > - ir::ImmediateIndex immIndex = ctx.newImmediate((uint32_t)index); > - btiReg = ctx.reg(ir::FAMILY_DWORD); > - ctx.LOADI(ir::TYPE_U32, btiReg, immIndex); > + unsigned SurfaceIndex = 0xff;; > + > + ir::AddressMode AM; > + if (legacyMode) { > + Value *bti = getBtiRegister(llvmPtr); > + Value *ptrBase = getPointerBase(llvmPtr); > + ir::Register baseReg = this->getRegister(ptrBase); > + if (isa<ConstantInt>(bti)) { > + AM = ir::AM_StaticBti; > + SurfaceIndex = cast<ConstantInt>(bti)->getZExtValue(); > + addrSpace = btiToGen(SurfaceIndex); > + } else { > + AM = ir::AM_DynamicBti; > + addrSpace = ir::MEM_MIXED; > + btiReg = this->getRegister(bti); > + } > + const ir::RegisterFamily pointerFamily = ctx.getPointerFamily(); > + ptr = ctx.reg(pointerFamily); > + ctx.SUB(ir::TYPE_U32, ptr, pointer, baseReg); > } else { > - addrSpace = ir::MEM_MIXED; > - btiReg = this->getRegister(bti); > + AM = ir::AM_Stateless; > + ptr = pointer; > } > > - const ir::RegisterFamily pointerFamily = ctx.getPointerFamily(); > - const ir::Register ptr = ctx.reg(pointerFamily); > - ctx.SUB(ir::TYPE_U32, ptr, pointer, baseReg); > - > const ir::Register dst = this->getRegister(&I); > > - uint32_t srcNum = 0; > - vector<ir::Register> src; > - src.push_back(ptr); > - srcNum++; > + uint32_t payloadNum = 0; > + vector<ir::Register> payload; > AI++; > > while(AI != AE) { > - src.push_back(this->getRegister(*(AI++))); > - srcNum++; > + payload.push_back(this->getRegister(*(AI++))); > + payloadNum++; > + } > + ir::Type type = getType(ctx, llvmPtr->getType()- > >getPointerElementType()); > + const ir::Tuple payloadTuple = payloadNum == 0 ? > + ir::Tuple(0) : > + ctx.arrayTuple(&payload[0], payloadNum); > + if (AM == ir::AM_DynamicBti) { > + ctx.ATOMIC(opcode, type, dst, addrSpace, ptr, payloadTuple, AM, > btiReg); > + } else { > + ctx.ATOMIC(opcode, type, dst, addrSpace, ptr, payloadTuple, AM, > SurfaceIndex); > } > - const ir::Tuple srcTuple = ctx.arrayTuple(&src[0], srcNum); > - ctx.ATOMIC(opcode, dst, addrSpace, btiReg, fixedBTI, srcTuple); > } > > /* append a new sampler. should be called before any reference to > @@ -4323,65 +4367,82 @@ namespace gbe > this->newRegister(&I); > } > void GenWriter::regAllocateStoreInst(StoreInst &I) {} > + void GenWriter::emitLoadInst(LoadInst &I) { > + MemoryInstHelper *h = new MemoryInstHelper(ctx, unit, this, > legacyMode); > + h->emitLoadOrStore<true>(I); > + delete h; > + } > > - void GenWriter::emitBatchLoadOrStore(const ir::Type type, const uint32_t > elemNum, > - Value *llvmValues, const ir::Register > ptr, > - const ir::AddressSpace addrSpace, > - Type * elemType, bool isLoad, > ir::Register bti, > - bool dwAligned, bool fixedBTI) { > - const ir::RegisterFamily pointerFamily = ctx.getPointerFamily(); > - uint32_t totalSize = elemNum * getFamilySize(getFamily(type)); > - uint32_t msgNum = totalSize > 16 ? totalSize / 16 : 1; > - const uint32_t perMsgNum = elemNum / msgNum; > + void GenWriter::emitStoreInst(StoreInst &I) { > + MemoryInstHelper *h = new MemoryInstHelper(ctx, unit, this, > legacyMode); > + h->emitLoadOrStore<false>(I); > + delete h; > + } > > - for (uint32_t msg = 0; msg < msgNum; ++msg) { > - // Build the tuple data in the vector > + llvm::FunctionPass *createGenPass(ir::Unit &unit) { > + return new GenWriter(unit); > + } > + > + ir::Tuple MemoryInstHelper::getValueTuple(llvm::Value *llvmValues, > llvm::Type *elemType, unsigned start, unsigned elemNum) { > vector<ir::Register> tupleData; // put registers here > - for (uint32_t elemID = 0; elemID < perMsgNum; ++elemID) { > + for (uint32_t elemID = 0; elemID < elemNum; ++elemID) { > ir::Register reg; > - if(regTranslator.isUndefConst(llvmValues, elemID)) { > + if(writer->regTranslator.isUndefConst(llvmValues, elemID)) { > Value *v = Constant::getNullValue(elemType); > - reg = this->getRegister(v); > + reg = writer->getRegister(v); > } else > - reg = this->getRegister(llvmValues, perMsgNum*msg+elemID); > + reg = writer->getRegister(llvmValues, start + elemID); > > tupleData.push_back(reg); > } > - const ir::Tuple tuple = ctx.arrayTuple(&tupleData[0], perMsgNum); > - > - // We may need to update to offset the pointer > - ir::Register addr; > - if (msg == 0) > - addr = ptr; > - else { > - const ir::Register offset = ctx.reg(pointerFamily); > - ir::ImmediateIndex immIndex; > - ir::Type immType; > + const ir::Tuple tuple = ctx.arrayTuple(&tupleData[0], elemNum); > + return tuple; > + } > + > + void MemoryInstHelper::emitBatchLoadOrStore(const ir::Type type, const > uint32_t elemNum, > + Value *llvmValues, > + Type * elemType) { > + uint32_t totalSize = elemNum * getFamilySize(getFamily(type)); > + uint32_t msgNum = totalSize > 16 ? totalSize / 16 : 1; > + const uint32_t perMsgNum = elemNum / msgNum; > + > + for (uint32_t msg = 0; msg < msgNum; ++msg) { > + // Build the tuple data in the vector > + ir::Tuple tuple = getValueTuple(llvmValues, elemType, perMsgNum*msg, > perMsgNum); > // each message can read/write 16 byte > const int32_t stride = 16; > - if (pointerFamily == ir::FAMILY_DWORD) { > - immIndex = ctx.newImmediate(int32_t(msg*stride)); > - immType = ir::TYPE_S32; > - } else { > - immIndex = ctx.newImmediate(int64_t(msg*stride)); > - immType = ir::TYPE_S64; > - } > + ir::Register addr = getOffsetAddress(mPtr, msg*stride); > + shootMessage(type, addr, tuple, perMsgNum); > + } > + } > + > + ir::Register MemoryInstHelper::getOffsetAddress(ir::Register basePtr, > unsigned offset) { > + const ir::RegisterFamily pointerFamily = ctx.getPointerFamily(); > + ir::Register addr; > + if (offset == 0) > + addr = basePtr; > + else { > + const ir::Register offsetReg = ctx.reg(pointerFamily); > + ir::ImmediateIndex immIndex; > + ir::Type immType; > > - addr = ctx.reg(pointerFamily); > - ctx.LOADI(immType, offset, immIndex); > - ctx.ADD(immType, addr, ptr, offset); > + if (pointerFamily == ir::FAMILY_DWORD) { > + immIndex = ctx.newImmediate(int32_t(offset)); > + immType = ir::TYPE_S32; > + } else { > + immIndex = ctx.newImmediate(int64_t(offset)); > + immType = ir::TYPE_S64; > } > > - // Emit the instruction > - if (isLoad) > - ctx.LOAD(type, tuple, addr, addrSpace, perMsgNum, dwAligned, > fixedBTI, bti); > - else > - ctx.STORE(type, tuple, addr, addrSpace, perMsgNum, dwAligned, > fixedBTI, bti); > + addr = ctx.reg(pointerFamily); > + ctx.LOADI(immType, offsetReg, immIndex); > + ctx.ADD(immType, addr, basePtr, offsetReg); > } > + return addr; > } > > // handle load of dword/qword with unaligned address > - void GenWriter::emitUnalignedDQLoadStore(ir::Register ptr, Value > *llvmValues, ir::AddressSpace addrSpace, ir::Register bti, bool isLoad, bool > dwAligned, bool fixedBTI) > + void MemoryInstHelper::emitUnalignedDQLoadStore(Value *llvmValues) > { > Type *llvmType = llvmValues->getType(); > unsigned byteSize = getTypeByteSize(unit, llvmType); > @@ -4395,19 +4456,7 @@ namespace gbe > } > const ir::Type type = getType(ctx, elemType); > > - vector<ir::Register> tupleData; > - for (uint32_t elemID = 0; elemID < elemNum; ++elemID) { > - ir::Register reg; > - if(regTranslator.isUndefConst(llvmValues, elemID)) { > - Value *v = Constant::getNullValue(elemType); > - reg = this->getRegister(v); > - } else > - reg = this->getRegister(llvmValues, elemID); > - > - tupleData.push_back(reg); > - } > - const ir::Tuple tuple = ctx.arrayTuple(&tupleData[0], elemNum); > - > + ir::Tuple tuple = getValueTuple(llvmValues, elemType, 0, elemNum); > vector<ir::Register> byteTupleData; > for (uint32_t elemID = 0; elemID < byteSize; ++elemID) { > byteTupleData.push_back(ctx.reg(ir::FAMILY_BYTE)); > @@ -4415,97 +4464,83 @@ namespace gbe > const ir::Tuple byteTuple = ctx.arrayTuple(&byteTupleData[0], byteSize); > > if (isLoad) { > - ctx.LOAD(ir::TYPE_U8, byteTuple, ptr, addrSpace, byteSize, dwAligned, > fixedBTI, bti); > + shootMessage(ir::TYPE_U8, mPtr, byteTuple, byteSize); > ctx.BITCAST(type, ir::TYPE_U8, tuple, byteTuple, elemNum, byteSize); > } else { > ctx.BITCAST(ir::TYPE_U8, type, byteTuple, tuple, byteSize, elemNum); > // FIXME: byte scatter does not handle correctly vector store, after > fix > that, > // we can directly use on store instruction like: > // ctx.STORE(ir::TYPE_U8, byteTuple, ptr, addrSpace, byteSize, > dwAligned, fixedBTI, bti); > - const ir::RegisterFamily pointerFamily = ctx.getPointerFamily(); > for (uint32_t elemID = 0; elemID < byteSize; elemID++) { > - const ir::Register reg = byteTupleData[elemID]; > - ir::Register addr; > - if (elemID == 0) > - addr = ptr; > - else { > - const ir::Register offset = ctx.reg(pointerFamily); > - ir::ImmediateIndex immIndex; > - immIndex = ctx.newImmediate(int32_t(elemID)); > - addr = ctx.reg(pointerFamily); > - ctx.LOADI(ir::TYPE_S32, offset, immIndex); > - ctx.ADD(ir::TYPE_S32, addr, ptr, offset); > - } > - ctx.STORE(ir::TYPE_U8, addr, addrSpace, dwAligned, fixedBTI, bti, > reg); > + const ir::Register addr = getOffsetAddress(mPtr, elemID); > + const ir::Tuple value = ctx.arrayTuple(&byteTupleData[elemID], 1); > + shootMessage(ir::TYPE_U8, addr, value, 1); > } > } > } > > - extern int OCL_SIMD_WIDTH; > template <bool isLoad, typename T> > - INLINE void GenWriter::emitLoadOrStore(T &I) > - { > + void MemoryInstHelper::emitLoadOrStore(T &I) { > Value *llvmPtr = I.getPointerOperand(); > Value *llvmValues = getLoadOrStoreValue(I); > Type *llvmType = llvmValues->getType(); > - const bool dwAligned = (I.getAlignment() % 4) == 0; > - ir::AddressSpace addrSpace; > - const ir::Register pointer = this->getRegister(llvmPtr); > + dwAligned = (I.getAlignment() % 4) == 0; > + addrSpace = addressSpaceLLVMToGen(llvmPtr->getType()- > >getPointerAddressSpace()); > + const ir::Register pointer = writer->getRegister(llvmPtr); > const ir::RegisterFamily pointerFamily = ctx.getPointerFamily(); > > - Value *bti = getBtiRegister(llvmPtr); > - Value *ptrBase = getPointerBase(llvmPtr); > - ir::Register baseReg = this->getRegister(ptrBase); > - bool zeroBase = false; > - if (isa<ConstantPointerNull>(ptrBase)) { > - zeroBase = true; > - } > - > - ir::Register btiReg; > - bool fixedBTI = false; > - if (isa<ConstantInt>(bti)) { > - fixedBTI = true; > - unsigned index = cast<ConstantInt>(bti)->getZExtValue(); > - addrSpace = btiToGen(index); > - ir::ImmediateIndex immIndex = ctx.newImmediate((uint32_t)index); > - btiReg = ctx.reg(ir::FAMILY_DWORD); > - ctx.LOADI(ir::TYPE_U32, btiReg, immIndex); > - } else { > - addrSpace = ir::MEM_MIXED; > - btiReg = this->getRegister(bti); > - } > - > + this->isLoad = isLoad; > Type *scalarType = llvmType; > if (!isScalarType(llvmType)) { > VectorType *vectorType = cast<VectorType>(llvmType); > scalarType = vectorType->getElementType(); > } > > - ir::Register ptr = ctx.reg(pointerFamily); > - // FIXME: avoid subtraction zero at this stage is not a good idea, > - // but later ArgumentLower pass need to match exact load/addImm > pattern > - // so, I avoid subtracting zero base to satisfy ArgumentLower pass. > - if (!zeroBase) > - ctx.SUB(ir::TYPE_U32, ptr, pointer, baseReg); > - else > - ptr = pointer; > + // calculate bti and pointer operand > + if (legacyMode) { > + Value *bti = writer->getBtiRegister(llvmPtr); > + Value *ptrBase = writer->getPointerBase(llvmPtr); > + ir::Register baseReg = writer->getRegister(ptrBase); > + bool zeroBase = isa<ConstantPointerNull>(ptrBase) ? true : false; > + > + if (isa<ConstantInt>(bti)) { > + SurfaceIndex = cast<ConstantInt>(bti)->getZExtValue(); > + addrSpace = btiToGen(SurfaceIndex); > + mAddressMode = ir::AM_StaticBti; > + } else { > + addrSpace = ir::MEM_MIXED; > + mBTI = writer->getRegister(bti); > + mAddressMode = ir::AM_DynamicBti; > + } > + mPtr = ctx.reg(pointerFamily); > + > + // FIXME: avoid subtraction zero at this stage is not a good idea, > + // but later ArgumentLower pass need to match exact load/addImm > pattern > + // so, I avoid subtracting zero base to satisfy ArgumentLower pass. > + if (!zeroBase) > + ctx.SUB(ir::TYPE_U32, mPtr, pointer, baseReg); > + else > + mPtr = pointer; > + } else { > + mPtr = pointer; > + SurfaceIndex = 0xff; > + mAddressMode = ir::AM_Stateless; > + } > > unsigned primitiveBits = scalarType->getPrimitiveSizeInBits(); > if (!dwAligned > && (primitiveBits == 64 > || primitiveBits == 32) > ) { > - emitUnalignedDQLoadStore(ptr, llvmValues, addrSpace, btiReg, isLoad, > dwAligned, fixedBTI); > + emitUnalignedDQLoadStore(llvmValues); > return; > } > // Scalar is easy. We neednot build register tuples > if (isScalarType(llvmType) == true) { > const ir::Type type = getType(ctx, llvmType); > - const ir::Register values = this->getRegister(llvmValues); > - if (isLoad) > - ctx.LOAD(type, ptr, addrSpace, dwAligned, fixedBTI, btiReg, values); > - else > - ctx.STORE(type, ptr, addrSpace, dwAligned, fixedBTI, btiReg, values); > + const ir::Register values = writer->getRegister(llvmValues); > + const ir::Tuple tuple = ctx.arrayTuple(&values, 1); > + shootMessage(type, mPtr, tuple, 1); > } > // A vector type requires to build a tuple > else { > @@ -4521,7 +4556,7 @@ namespace gbe > // And the llvm does cast a type3 data to type4 for load/store > instruction, > // so a 4 elements vector may only have 3 valid elements. We need to > fix > it to correct element > // count here. > - if (elemNum == 4 && regTranslator.isUndefConst(llvmValues, 3)) > + if (elemNum == 4 && writer->regTranslator.isUndefConst(llvmValues, 3)) > elemNum = 3; > > // The code is going to be fairly different from types to types (based > on > @@ -4532,72 +4567,44 @@ namespace gbe > if(dataFamily == ir::FAMILY_DWORD && addrSpace != > ir::MEM_CONSTANT) { > // One message is enough here. Nothing special to do > if (elemNum <= 4) { > - // Build the tuple data in the vector > - vector<ir::Register> tupleData; // put registers here > - for (uint32_t elemID = 0; elemID < elemNum; ++elemID) { > - ir::Register reg; > - if(regTranslator.isUndefConst(llvmValues, elemID)) { > - Value *v = Constant::getNullValue(elemType); > - reg = this->getRegister(v); > - } else > - reg = this->getRegister(llvmValues, elemID); > - > - tupleData.push_back(reg); > - } > - const ir::Tuple tuple = ctx.arrayTuple(&tupleData[0], elemNum); > - > - // Emit the instruction > - if (isLoad) > - ctx.LOAD(type, tuple, ptr, addrSpace, elemNum, dwAligned, > fixedBTI, > btiReg); > - else > - ctx.STORE(type, tuple, ptr, addrSpace, elemNum, dwAligned, > fixedBTI, > btiReg); > + ir::Tuple tuple = getValueTuple(llvmValues, elemType, 0, elemNum); > + shootMessage(type, mPtr, tuple, elemNum); > } > - // Not supported by the hardware. So, we split the message and we use > - // strided loads and stores > else { > - emitBatchLoadOrStore(type, elemNum, llvmValues, ptr, addrSpace, > elemType, isLoad, btiReg, dwAligned, fixedBTI); > + emitBatchLoadOrStore(type, elemNum, llvmValues, elemType); > } > } > else if((dataFamily == ir::FAMILY_WORD && (isLoad || elemNum % 2 == > 0)) || > (dataFamily == ir::FAMILY_BYTE && (isLoad || elemNum % 4 == > 0))) { > - emitBatchLoadOrStore(type, elemNum, llvmValues, ptr, addrSpace, > elemType, isLoad, btiReg, dwAligned, fixedBTI); > + emitBatchLoadOrStore(type, elemNum, llvmValues, elemType); > } else { > for (uint32_t elemID = 0; elemID < elemNum; elemID++) { > - if(regTranslator.isUndefConst(llvmValues, elemID)) > + if(writer->regTranslator.isUndefConst(llvmValues, elemID)) > continue; > > - const ir::Register reg = this->getRegister(llvmValues, elemID); > - ir::Register addr; > - if (elemID == 0) > - addr = ptr; > - else { > - const ir::Register offset = ctx.reg(pointerFamily); > - ir::ImmediateIndex immIndex; > - int elemSize = getTypeByteSize(unit, elemType); > - immIndex = ctx.newImmediate(int32_t(elemID * elemSize)); > - addr = ctx.reg(pointerFamily); > - ctx.LOADI(ir::TYPE_S32, offset, immIndex); > - ctx.ADD(ir::TYPE_S32, addr, ptr, offset); > - } > - if (isLoad) > - ctx.LOAD(type, addr, addrSpace, dwAligned, fixedBTI, btiReg, reg); > - else > - ctx.STORE(type, addr, addrSpace, dwAligned, fixedBTI, btiReg, > reg); > + const ir::Register reg = writer->getRegister(llvmValues, elemID); > + int elemSize = getTypeByteSize(unit, elemType); > + > + ir::Register addr = getOffsetAddress(mPtr, elemID*elemSize); > + const ir::Tuple tuple = ctx.arrayTuple(®, 1); > + shootMessage(type, addr, tuple, 1); > } > } > } > } > > - void GenWriter::emitLoadInst(LoadInst &I) { > - this->emitLoadOrStore<true>(I); > - } > - > - void GenWriter::emitStoreInst(StoreInst &I) { > - this->emitLoadOrStore<false>(I); > - } > - > - llvm::FunctionPass *createGenPass(ir::Unit &unit) { > - return new GenWriter(unit); > + void MemoryInstHelper::shootMessage(ir::Type type, ir::Register offset, > ir::Tuple value, unsigned elemNum) { > + if (mAddressMode == ir::AM_DynamicBti) { > + if (isLoad) > + ctx.LOAD(type, value, offset, addrSpace, elemNum, dwAligned, > mAddressMode, mBTI); > + else > + ctx.STORE(type, value, offset, addrSpace, elemNum, dwAligned, > mAddressMode, mBTI); > + } else { > + if (isLoad) > + ctx.LOAD(type, value, offset, addrSpace, elemNum, dwAligned, > mAddressMode, SurfaceIndex); > + else > + ctx.STORE(type, value, offset, addrSpace, elemNum, dwAligned, > mAddressMode, SurfaceIndex); > + } > } > } /* namespace gbe */ > > -- > 2.3.1 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
