Module: Mesa Branch: main Commit: 23795dc318a0df9e233123b76c1d61506a6289ce URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=23795dc318a0df9e233123b76c1d61506a6289ce
Author: Karol Herbst <[email protected]> Date: Thu Jul 20 15:38:13 2023 +0200 nvc0: fix num_gprs for Volta+ Overallocating by 2 gprs for ugprs is a wild guess by me. It does make sense though as each subgroup shares 64 ugprs and that's 2 per thread. Signed-off-by: Karol Herbst <[email protected]> Reviewed-by: M Henning <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24261> --- src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 4 ++-- src/nouveau/codegen/nv50_ir_target_nvc0.cpp | 11 ++++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index 63f781bb2ba..33edd13b051 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -686,9 +686,9 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, prog->relocs = info_out.bin.relocData; prog->fixups = info_out.bin.fixupData; if (info_out.target >= NVISA_GV100_CHIPSET) - prog->num_gprs = MIN2(info_out.bin.maxGPR + 5, 255); //XXX: why? + prog->num_gprs = MAX2(4, info_out.bin.maxGPR + 3); else - prog->num_gprs = MAX2(4, (info_out.bin.maxGPR + 1)); + prog->num_gprs = MAX2(4, info_out.bin.maxGPR + 1); prog->cp.smem_size = info_out.bin.smemSize; prog->num_barriers = info_out.numBarriers; diff --git a/src/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/nouveau/codegen/nv50_ir_target_nvc0.cpp index da6c37c96c0..475539091eb 100644 --- a/src/nouveau/codegen/nv50_ir_target_nvc0.cpp +++ b/src/nouveau/codegen/nv50_ir_target_nvc0.cpp @@ -265,9 +265,18 @@ void TargetNVC0::initOpInfo() unsigned int TargetNVC0::getFileSize(DataFile file) const { - const unsigned int gprs = (chipset >= NVISA_GK20A_CHIPSET) ? 255 : 63; const unsigned int smregs = (chipset >= NVISA_GK104_CHIPSET) ? 65536 : 32768; const unsigned int bs = (chipset >= NVISA_GV100_CHIPSET) ? 16 : 0; + unsigned int gprs; + + /* probably because of ugprs? */ + if (chipset >= NVISA_GV100_CHIPSET) + gprs = 253; + else if (chipset >= NVISA_GK20A_CHIPSET) + gprs = 255; + else + gprs = 63; + switch (file) { case FILE_NULL: return 0; case FILE_GPR: return MIN2(gprs, smregs / threads);
