On 04/28/2016 04:12 PM, Samuel Pitoiset wrote:
On 04/28/2016 04:05 PM, Hans de Goede wrote:Add support for SV_WORK_DIM for nvc0 and nve4. Signed-off-by: Hans de Goede <hdego...@redhat.com> --- Changes in v1 (first non RFC posting): -Adjust NVC0_CB_AUX_GRID_SIZE for the extra value in grid-info -Use NVC0_CB_AUX_GRID_SIZE instead of a hardcoded value when uploading the grid info -Also implement SV_WORK_DIM for nvc0 --- src/gallium/drivers/nouveau/codegen/nv50_ir.h | 1 + .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 1 + .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 2 ++ .../nouveau/codegen/nv50_ir_target_nvc0.cpp | 1 + src/gallium/drivers/nouveau/nvc0/nvc0_compute.c | 24 ++++++++++++++++------ src/gallium/drivers/nouveau/nvc0/nvc0_context.h | 4 ++-- src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 2 +- src/gallium/drivers/nouveau/nvc0/nve4_compute.c | 7 ++++--- 8 files changed, 30 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index 94e54bb..41804b6 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -461,6 +461,7 @@ enum SVSemantic SV_BASEVERTEX, SV_BASEINSTANCE, SV_DRAWID, + SV_WORK_DIM, SV_UNDEFINED, SV_LAST }; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 3708f37..f75f480 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -408,6 +408,7 @@ static nv50_ir::SVSemantic translateSysVal(uint sysval) case TGSI_SEMANTIC_BASEVERTEX: return nv50_ir::SV_BASEVERTEX; case TGSI_SEMANTIC_BASEINSTANCE: return nv50_ir::SV_BASEINSTANCE; case TGSI_SEMANTIC_DRAWID: return nv50_ir::SV_DRAWID; + case TGSI_SEMANTIC_WORK_DIM: return nv50_ir::SV_WORK_DIM; default: assert(0); return nv50_ir::SV_CLOCK; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 3bce962..1785623 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -2178,6 +2178,8 @@ NVC0LoweringPass::handleRDSV(Instruction *i) i->setSrc(0, bld.mkImm(sv == SV_GRIDID ? 0 : 1)); return true; } + // Fallthrough + case SV_WORK_DIM: addr += prog->driver->prop.cp.gridInfoBase; bld.mkLoad(TYPE_U32, i->getDef(0), bld.mkSymbol(FILE_MEMORY_CONST, prog->driver->io.auxCBSlot, diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp index 9e1e7bf..80cb9fd 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp @@ -293,6 +293,7 @@ TargetNVC0::getSVAddress(DataFile shaderFile, const Symbol *sym) const case SV_NTID: return kepler ? (0x00 + idx * 4) : ~0; case SV_NCTAID: return kepler ? (0x0c + idx * 4) : ~0; case SV_GRIDID: return kepler ? 0x18 : ~0; + case SV_WORK_DIM: return 0x1c; case SV_SAMPLE_INDEX: return 0; case SV_SAMPLE_POS: return 0; case SV_SAMPLE_MASK: return 0; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c index bbc8edb..9f85ead 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c @@ -284,7 +284,8 @@ nvc0_state_validate_cp(struct nvc0_context *nvc0, uint32_t mask) } static void -nvc0_compute_upload_input(struct nvc0_context *nvc0, const void *input) +nvc0_compute_upload_input(struct nvc0_context *nvc0, + const struct pipe_grid_info *info) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_screen *screen = nvc0->screen; @@ -303,11 +304,22 @@ nvc0_compute_upload_input(struct nvc0_context *nvc0, const void *input) /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */ BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + cp->parm_size / 4); PUSH_DATA (push, 0); - PUSH_DATAp(push, input, cp->parm_size / 4); - - BEGIN_NVC0(push, NVC0_CP(FLUSH), 1); - PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB); + PUSH_DATAp(push, info->input, cp->parm_size / 4); } + + BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); + PUSH_DATA (push, 2048); + PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5)); + PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5)); + BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); + PUSH_DATA (push, (15 << 8) | 1); + BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 1); + /* + 0x1c as we only upload work_dim on nvc0, the rest uses special regs */ + PUSH_DATA (push, NVC0_CB_AUX_GRID_INFO + 0x1c);Well, I would prefer to see NVC0_CB_AUX_GRID_INFO(3) here to avoid this magic offset. You also need to change: #define NVC0_CB_AUX_GRID_INFO 0x0e0 /* CP */ into #define NVC0_CB_AUX_GRID_INFO(i) 0x0e0 + (i * 0x4) /* CP */ And in nvc0_program.c as well.
Err, should be 7 actually, but you get the idea. :-)
+ PUSH_DATA (push, info->work_dim); + + BEGIN_NVC0(push, NVC0_CP(FLUSH), 1); + PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB); } void @@ -325,7 +337,7 @@ nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) return; } - nvc0_compute_upload_input(nvc0, info->input); + nvc0_compute_upload_input(nvc0, info); BEGIN_NVC0(push, NVC0_CP(CP_START_ID), 1); PUSH_DATA (push, nvc0_program_symbol_offset(cp, info->pc)); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h index 7fcbf4a..7d25c46 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -113,9 +113,9 @@ /* 8 sets of 32-bits coordinate offsets */ #define NVC0_CB_AUX_MS_INFO 0x0a0 /* CP */ #define NVC0_CB_AUX_MS_SIZE (8 * 2 * 4) -/* block/grid size, at 3 32-bits integers each and gridid */ +/* block/grid size, at 3 32-bits integers each, gridid and work_dim */ #define NVC0_CB_AUX_GRID_INFO 0x0e0 /* CP */ -#define NVC0_CB_AUX_GRID_SIZE (7 * 4) +#define NVC0_CB_AUX_GRID_SIZE (8 * 4) /* 8 user clip planes, at 4 32-bits floats each */ #define NVC0_CB_AUX_UCP_INFO 0x100 #define NVC0_CB_AUX_UCP_SIZE (PIPE_MAX_CLIP_PLANES * 4 * 4) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index ca6349c..126a038 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -549,7 +549,6 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, if (chipset >= NVISA_GK104_CHIPSET) { info->io.auxCBSlot = 7; info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0); - info->prop.cp.gridInfoBase = NVC0_CB_AUX_GRID_INFO; info->io.uboInfoBase = NVC0_CB_AUX_UBO_INFO(0); info->io.suInfoBase = NVC0_CB_AUX_SU_INFO(0); } else { @@ -558,6 +557,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, info->io.msInfoCBSlot = 0; info->io.msInfoBase = NVC0_CB_AUX_MS_INFO; info->io.bufInfoBase = NVC0_CB_AUX_BUF_INFO(0); + info->prop.cp.gridInfoBase = NVC0_CB_AUX_GRID_INFO; } else { if (chipset >= NVISA_GK104_CHIPSET) { info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0); diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c index 1fe6026..b6496d3 100644 --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c @@ -496,7 +496,7 @@ nve4_compute_upload_input(struct nvc0_context *nvc0, PUSH_DATAh(push, address + NVC0_CB_AUX_GRID_INFO); PUSH_DATA (push, address + NVC0_CB_AUX_GRID_INFO); BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); - PUSH_DATA (push, 7 * 4); + PUSH_DATA (push, NVC0_CB_AUX_GRID_SIZE); PUSH_DATA (push, 0x1); if (unlikely(info->indirect)) { @@ -506,18 +506,19 @@ nve4_compute_upload_input(struct nvc0_context *nvc0, nouveau_pushbuf_space(push, 16, 0, 1); PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain); - BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 7); + BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 8); PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); PUSH_DATAp(push, info->block, 3); nouveau_pushbuf_data(push, res->bo, offset, NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4); } else { - BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 7); + BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 8); PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); PUSH_DATAp(push, info->block, 3); PUSH_DATAp(push, info->grid, 3); } PUSH_DATA (push, 0); + PUSH_DATA (push, info->work_dim); BEGIN_NVC0(push, NVE4_CP(FLUSH), 1); PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
-- -Samuel _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev