On 02/07/2016 12:23 AM, Ilia Mirkin wrote:
On Sat, Feb 6, 2016 at 5:38 PM, Samuel Pitoiset <[email protected]> wrote:Signed-off-by: Samuel Pitoiset <[email protected]> --- .../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 1 + .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 102 ++++++++++++++++++++- .../nouveau/codegen/nv50_ir_lowering_nvc0.h | 1 + 3 files changed, 102 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index f6605eb..42b2a84 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -398,6 +398,7 @@ CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc) srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20); break; default: + srcId(i->src(s), 49);Yeah.... no :) I'd want to see some assert's here to make sure that this is what you think it is. Also, as I recall this is related to SELP emission, nothing here.
Oh right, I forgot to clean up this part. :-)
// ignore here, can be predicate or flags, but must not be address break; } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index e7cb54b..243e23a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -1033,6 +1033,99 @@ NVC0LoweringPass::handleSUQ(Instruction *suq) return true; } +void +NVC0LoweringPass::handleSharedATOM(Instruction *atom) +{ + assert(atom->src(0).getFile() == FILE_MEMORY_SHARED); + + BasicBlock *currBB = atom->bb; + BasicBlock *tryLockAndSetBB = atom->bb->splitBefore(atom, false); + BasicBlock *joinBB = atom->bb->splitAfter(atom); + + bld.setPosition(currBB, true); + assert(!currBB->joinAt); + currBB->joinAt = bld.mkFlow(OP_JOINAT, joinBB, CC_ALWAYS, NULL); + + bld.mkFlow(OP_BRA, tryLockAndSetBB, CC_ALWAYS, NULL); + currBB->cfg.attach(&tryLockAndSetBB->cfg, Graph::Edge::TREE); + + bld.setPosition(tryLockAndSetBB, true); + + Instruction *ld = + bld.mkLoad(TYPE_U32, atom->getDef(0), + bld.mkSymbol(FILE_MEMORY_SHARED, 0, TYPE_U32, 0), NULL); + ld->setDef(1, bld.getSSA(1, FILE_PREDICATE)); + ld->subOp = NV50_IR_SUBOP_LOAD_LOCKED; + + Value *stVal; + if (atom->subOp == NV50_IR_SUBOP_ATOM_EXCH) { + // Read the old value, and write the new one. + stVal = atom->getSrc(1); + } else if (atom->subOp == NV50_IR_SUBOP_ATOM_CAS) { + CmpInstruction *set = + bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE), + TYPE_U32, ld->getDef(0), atom->getSrc(1)); + set->setPredicate(CC_P, ld->getDef(1)); + + CmpInstruction *selp = + bld.mkCmp(OP_SELP, CC_NOT_P, TYPE_U32, bld.getSSA(4, FILE_ADDRESS), + TYPE_U32, ld->getDef(0), atom->getSrc(2), + set->getDef(0)); + selp->setPredicate(CC_P, ld->getDef(1)); + + stVal = selp->getDef(0); + } else { + operation op; + + switch (atom->subOp) { + case NV50_IR_SUBOP_ATOM_ADD: + op = OP_ADD; + break; + case NV50_IR_SUBOP_ATOM_AND: + op = OP_AND; + break; + case NV50_IR_SUBOP_ATOM_OR: + op = OP_OR; + break; + case NV50_IR_SUBOP_ATOM_XOR: + op = OP_XOR; + break; + case NV50_IR_SUBOP_ATOM_MIN: + op = OP_MIN; + break; + case NV50_IR_SUBOP_ATOM_MAX: + op = OP_MAX; + break; + default: + assert(0); + } + + Instruction *i = + bld.mkOp2(op, atom->dType, bld.getSSA(4, FILE_ADDRESS), ld->getDef(0), + atom->getSrc(1));Why is this FILE_ADDRESS? This is just a regular operation, nothing to do with address registers. Just bld.getSSA() should be fine here.
Ok.
+ i->setPredicate(CC_P, ld->getDef(1)); + + stVal = i->getDef(0); + } + + Instruction *st = + bld.mkStore(OP_STORE, TYPE_U32, + bld.mkSymbol(FILE_MEMORY_SHARED, 0, TYPE_U32, 0), + NULL, stVal); + st->setPredicate(CC_P, ld->getDef(1)); + st->subOp = NV50_IR_SUBOP_STORE_UNLOCKED; + + // Loop until the lock is acquired. + bld.mkFlow(OP_BRA, tryLockAndSetBB, CC_NOT_P, ld->getDef(1)); + tryLockAndSetBB->cfg.attach(&tryLockAndSetBB->cfg, Graph::Edge::BACK); + bld.mkFlow(OP_BRA, joinBB, CC_ALWAYS, NULL);You need an edge to the joinBB as well, no? (a CROSS edge, I guess).
Mmmh... Yeah probably.
+ + bld.remove(atom); + + bld.setPosition(joinBB, false); + bld.mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1; +} + bool NVC0LoweringPass::handleATOM(Instruction *atom) { @@ -1044,8 +1137,8 @@ NVC0LoweringPass::handleATOM(Instruction *atom) sv = SV_LBASE; break; case FILE_MEMORY_SHARED: - sv = SV_SBASE; - break; + handleSharedATOM(atom); + return true; default: assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL); base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16); @@ -1072,6 +1165,11 @@ NVC0LoweringPass::handleATOM(Instruction *atom) bool NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl) { + if (cas->src(0).getFile() == FILE_MEMORY_SHARED) { + // ATOM_CAS and ATOM_EXCH are handled in handleSharedATOM(). + return false; + } + if (cas->subOp != NV50_IR_SUBOP_ATOM_CAS && cas->subOp != NV50_IR_SUBOP_ATOM_EXCH) return false; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index 09ec7e6..6eb8aff 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -105,6 +105,7 @@ protected: bool handleATOM(Instruction *); bool handleCasExch(Instruction *, bool needCctl); void handleSurfaceOpNVE4(TexInstruction *); + void handleSharedATOM(Instruction *); void checkPredicate(Instruction *); -- 2.6.4 _______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
