Module: Mesa
Branch: main
Commit: 23795dc318a0df9e233123b76c1d61506a6289ce
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=23795dc318a0df9e233123b76c1d61506a6289ce

Author: Karol Herbst <[email protected]>
Date:   Thu Jul 20 15:38:13 2023 +0200

nvc0: fix num_gprs for Volta+

Overallocating by 2 gprs for ugprs is a wild guess by me. It does make
sense though as each subgroup shares 64 ugprs and that's 2 per thread.

Signed-off-by: Karol Herbst <[email protected]>
Reviewed-by: M Henning <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24261>

---

 src/gallium/drivers/nouveau/nvc0/nvc0_program.c |  4 ++--
 src/nouveau/codegen/nv50_ir_target_nvc0.cpp     | 11 ++++++++++-
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 63f781bb2ba..33edd13b051 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -686,9 +686,9 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t 
chipset,
    prog->relocs = info_out.bin.relocData;
    prog->fixups = info_out.bin.fixupData;
    if (info_out.target >= NVISA_GV100_CHIPSET)
-      prog->num_gprs = MIN2(info_out.bin.maxGPR + 5, 255); //XXX: why?
+      prog->num_gprs = MAX2(4, info_out.bin.maxGPR + 3);
    else
-      prog->num_gprs = MAX2(4, (info_out.bin.maxGPR + 1));
+      prog->num_gprs = MAX2(4, info_out.bin.maxGPR + 1);
    prog->cp.smem_size = info_out.bin.smemSize;
    prog->num_barriers = info_out.numBarriers;
 
diff --git a/src/nouveau/codegen/nv50_ir_target_nvc0.cpp 
b/src/nouveau/codegen/nv50_ir_target_nvc0.cpp
index da6c37c96c0..475539091eb 100644
--- a/src/nouveau/codegen/nv50_ir_target_nvc0.cpp
+++ b/src/nouveau/codegen/nv50_ir_target_nvc0.cpp
@@ -265,9 +265,18 @@ void TargetNVC0::initOpInfo()
 unsigned int
 TargetNVC0::getFileSize(DataFile file) const
 {
-   const unsigned int gprs = (chipset >= NVISA_GK20A_CHIPSET) ? 255 : 63;
    const unsigned int smregs = (chipset >= NVISA_GK104_CHIPSET) ? 65536 : 
32768;
    const unsigned int bs = (chipset >= NVISA_GV100_CHIPSET) ? 16 : 0;
+   unsigned int gprs;
+
+   /* probably because of ugprs? */
+   if (chipset >= NVISA_GV100_CHIPSET)
+      gprs = 253;
+   else if (chipset >= NVISA_GK20A_CHIPSET)
+      gprs = 255;
+   else
+      gprs = 63;
+
    switch (file) {
    case FILE_NULL:          return 0;
    case FILE_GPR:           return MIN2(gprs, smregs / threads);

Reply via email to