This patch doesn't touch NTID since it seems very difficult (or impossible) to generate. Seemingly because the state tracker or glsl compiler is turning gl_WorkGroupSize into a immediate. Such a transformation is not possible with GL_ARB_compute_variable_group_size but gl_WorkGroupSize is not available when that extension is enabled.
On Kepler+, it seems to end up being lowered into constant buffer loads anyway. On Fri, Jul 6, 2018 at 9:21 PM, Rhys Perry <[email protected]> wrote: > total instructions in shared programs : 5804448 -> 5804690 (0.00%) > total gprs used in shared programs : 670065 -> 670065 (0.00%) > total shared used in shared programs : 548832 -> 548832 (0.00%) > total local used in shared programs : 21068 -> 21068 (0.00%) > > local shared gpr inst bytes > helped 0 0 0 5 5 > hurt 0 0 0 191 191 > > Signed-off-by: Rhys Perry <[email protected]> > --- > Instead of combining SV_TID reads into a SV_COMBINED_TID read, this has > SV_TID reads lowered into a SV_COMBINED_TID read which can them be > combined by CSE and then turned into a SV_TID read by AlgebraicOpt if it > doesn't create another RDSV. > > src/gallium/drivers/nouveau/codegen/nv50_ir.h | 1 + > .../drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 1 + > .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 1 + > .../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 1 + > .../nouveau/codegen/nv50_ir_lowering_nv50.cpp | 3 ++ > .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 12 +++++++ > .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 40 > ++++++++++++++++++++++ > .../drivers/nouveau/codegen/nv50_ir_print.cpp | 1 + > .../nouveau/codegen/nv50_ir_target_nv50.cpp | 1 + > 9 files changed, 61 insertions(+) > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h > b/src/gallium/drivers/nouveau/codegen/nv50_ir.h > index f4f3c70888..0b220cc48d 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h > @@ -453,6 +453,7 @@ enum SVSemantic > SV_TESS_INNER, > SV_TESS_COORD, > SV_TID, > + SV_COMBINED_TID, > SV_CTAID, > SV_NTID, > SV_GRIDID, > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp > index 647d1a5d0e..2118c3153f 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp > @@ -2297,6 +2297,7 @@ CodeEmitterGK110::getSRegEncoding(const ValueRef& ref) > case SV_INVOCATION_ID: return 0x11; > case SV_YDIR: return 0x12; > case SV_THREAD_KILL: return 0x13; > + case SV_COMBINED_TID: return 0x20; > case SV_TID: return 0x21 + SDATA(ref).sv.index; > case SV_CTAID: return 0x25 + SDATA(ref).sv.index; > case SV_NTID: return 0x29 + SDATA(ref).sv.index; > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp > index 26826d6360..694d1b10a3 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp > @@ -267,6 +267,7 @@ CodeEmitterGM107::emitSYS(int pos, const Value *val) > case SV_INVOCATION_ID : id = 0x11; break; > case SV_THREAD_KILL : id = 0x13; break; > case SV_INVOCATION_INFO: id = 0x1d; break; > + case SV_COMBINED_TID : id = 0x20; break; > case SV_TID : id = 0x21 + val->reg.data.sv.index; break; > case SV_CTAID : id = 0x25 + val->reg.data.sv.index; break; > case SV_LANEMASK_EQ : id = 0x38; break; > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp > index d85fdda56f..b6e35dd0ee 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp > @@ -1990,6 +1990,7 @@ CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref) > case SV_INVOCATION_ID: return 0x11; > case SV_YDIR: return 0x12; > case SV_THREAD_KILL: return 0x13; > + case SV_COMBINED_TID: return 0x20; > case SV_TID: return 0x21 + SDATA(ref).sv.index; > case SV_CTAID: return 0x25 + SDATA(ref).sv.index; > case SV_NTID: return 0x29 + SDATA(ref).sv.index; > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp > index 36ab837f6e..1f0fd466a9 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp > @@ -1201,6 +1201,9 @@ NV50LoweringPreSSA::handleRDSV(Instruction *i) > bld.mkMov(def, bld.mkImm(0)); > } > break; > + case SV_COMBINED_TID: > + bld.mkMov(def, tid); > + break; > case SV_SAMPLE_POS: { > Value *off = new_LValue(func, FILE_ADDRESS); > bld.mkOp1(OP_RDSV, TYPE_U32, def, bld.mkSysVal(SV_SAMPLE_INDEX, 0)); > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > index 597dcdffbe..1410cf26c8 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > @@ -2576,6 +2576,18 @@ NVC0LoweringPass::handleRDSV(Instruction *i) > // TGSI backend may use 4th component of TID,NTID,CTAID,NCTAID > i->op = OP_MOV; > i->setSrc(0, bld.mkImm((sv == SV_NTID || sv == SV_NCTAID) ? 1 : 0)); > + } else > + if (sv == SV_TID) { > + // Help CSE combine TID fetches > + Value *tid = bld.mkOp1v(OP_RDSV, TYPE_U32, bld.getScratch(), > + bld.mkSysVal(SV_COMBINED_TID, 0)); > + i->op = OP_EXTBF; > + i->setSrc(0, tid); > + switch (sym->reg.data.sv.index) { > + case 0: i->setSrc(1, bld.mkImm(0x1000)); break; > + case 1: i->setSrc(1, bld.mkImm(0x0a10)); break; > + case 2: i->setSrc(1, bld.mkImm(0x061a)); break; > + } > } > if (sv == SV_VERTEX_COUNT) { > bld.setPosition(i, true); > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > index 39177bd044..4dd3a6cdaa 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > @@ -1654,6 +1654,7 @@ ModifierFolding::visit(BasicBlock *bb) > // SLCT(a, b, const) -> cc(const) ? a : b > // RCP(RCP(a)) -> a > // MUL(MUL(a, b), const) -> MUL_Xconst(a, b) > +// EXTBF(RDSV(COMBINED_TID)) -> RDSV(TID) > class AlgebraicOpt : public Pass > { > private: > @@ -1671,6 +1672,7 @@ private: > void handleCVT_EXTBF(Instruction *); > void handleSUCLAMP(Instruction *); > void handleNEG(Instruction *); > + void handleEXTBF_RDSV(Instruction *); > > BuildUtil bld; > }; > @@ -2175,6 +2177,41 @@ AlgebraicOpt::handleNEG(Instruction *i) { > } > } > > +// EXTBF(RDSV(COMBINED_TID)) -> RDSV(TID) > +void > +AlgebraicOpt::handleEXTBF_RDSV(Instruction *i) > +{ > + Instruction *rdsv = i->getSrc(0)->getUniqueInsn(); > + if (rdsv->op != OP_RDSV || > + rdsv->getSrc(0)->asSym()->reg.data.sv.sv != SV_COMBINED_TID) > + return; > + // Avoid creating more RDSV instructions > + if (rdsv->getDef(0)->refCount() > 1) > + return; > + > + ImmediateValue imm; > + if (!i->src(1).getImmediate(imm)) > + return; > + > + int index; > + if (imm.isInteger(0x1000)) > + index = 0; > + else > + if (imm.isInteger(0x0a10)) > + index = 1; > + else > + if (imm.isInteger(0x061a)) > + index = 2; > + else > + return; > + > + bld.setPosition(i, false); > + > + i->op = OP_RDSV; > + i->setSrc(0, bld.mkSysVal(SV_TID, index)); > + i->setSrc(1, NULL); > +} > + > bool > AlgebraicOpt::visit(BasicBlock *bb) > { > @@ -2215,6 +2252,9 @@ AlgebraicOpt::visit(BasicBlock *bb) > case OP_NEG: > handleNEG(i); > break; > + case OP_EXTBF: > + handleEXTBF_RDSV(i); > + break; > default: > break; > } > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp > index cbb21f5f72..ee3506fbae 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp > @@ -306,6 +306,7 @@ static const char *SemanticStr[SV_LAST + 1] = > "TESS_INNER", > "TESS_COORD", > "TID", > + "COMBINED_TID", > "CTAID", > "NTID", > "GRIDID", > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp > index dc73231394..1ad3467337 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp > @@ -257,6 +257,7 @@ TargetNV50::getSVAddress(DataFile shaderFile, const > Symbol *sym) const > case SV_NTID: > return 0x2 + 2 * sym->reg.data.sv.index; > case SV_TID: > + case SV_COMBINED_TID: > return 0; > case SV_SAMPLE_POS: > return 0; /* sample position is handled differently */ > -- > 2.14.4 > _______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
