Module: Mesa
Branch: staging/23.0
Commit: 016fc3c449c2f7fd96c0a89b0f43d5f9ed5eb1a3
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=016fc3c449c2f7fd96c0a89b0f43d5f9ed5eb1a3

Author: Karol Herbst <[email protected]>
Date:   Wed Mar 22 22:10:55 2023 +0100

nvc0: enable fp helper invocation memory loads on Turing+

Starting with GSP this will be the only way of doing so, for older gens
we'll just fix it up on the kernel side.

Cc: mesa-stable
Acked-by: M Henning <[email protected]>
Signed-off-by: Karol Herbst <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21989>
(cherry picked from commit 72b7e20bf77c805cbdb816d6cd02e87d43fddf20)

---

 .pick_status.json                                  |  2 +-
 src/gallium/drivers/nouveau/nvc0/mme/comc597.mme.h | 53 ++++++++++++++++++++++
 src/gallium/drivers/nouveau/nvc0/nvc0_macros.h     |  6 ++-
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c     |  8 ++++
 4 files changed, 66 insertions(+), 3 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index ee7c8b29c85..5f9cb20b438 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -1669,7 +1669,7 @@
         "description": "nvc0: enable fp helper invocation memory loads on 
Turing+",
         "nominated": true,
         "nomination_type": 0,
-        "resolution": 0,
+        "resolution": 1,
         "main_sha": null,
         "because_sha": null
     },
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/comc597.mme.h 
b/src/gallium/drivers/nouveau/nvc0/mme/comc597.mme.h
index cfcc650bffd..7fec2519aeb 100644
--- a/src/gallium/drivers/nouveau/nvc0/mme/comc597.mme.h
+++ b/src/gallium/drivers/nouveau/nvc0/mme/comc597.mme.h
@@ -834,6 +834,59 @@ const uint32_t mmec597_conservative_raster_state[] = {
                  ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      
NONE),
 };
 
+const uint32_t mmec597_set_priv_reg[] = {
+// r0 = load();
+// mthd(WAIT_FOR_IDLE, 0);
+// send(0);
+// mthd(SET_MME_SHADOW_SCRATCH(0), 1);
+// send(0);
+   MME_INSN(0,   ADD,   R0,     LOAD0,      ZERO, (0<<12)|0x0110/4,     
IMMED0, IMMEDHIGH0,
+                 ADD, ZERO,      ZERO,      ZERO, (1<<12)|0x3400/4,     
IMMED1, IMMEDHIGH0),
+// send(load());
+   MME_INSN(0,   ADD, ZERO,      ZERO,      ZERO,                0,       
NONE,      LOAD0,
+                 ADD, ZERO,      ZERO,      ZERO,                0,       
NONE,       NONE),
+// alu0 = r0;
+// r0 = read(NVC597_SET_MME_SHADOW_SCRATCH(26));
+// send(load());
+// mthd(SET_FALCON04, 0);
+// send(alu0);
+   MME_INSN(0,   ADD, ZERO,        R0,      ZERO, (0<<12)|0x2310/4,       
NONE,      LOAD0,
+               STATE,   R0,     IMMED,      ZERO,         0x3468/4,     
IMMED0,       ALU0),
+// r0 &= 0xffff;
+   MME_INSN(0, MERGE,   R0,      ZERO,        R0, (0<<10)|(8<<5)|0,       
NONE,       NONE,
+                 ADD, ZERO,      ZERO,      ZERO,                0,       
NONE,       NONE),
+// if (r0 == 2) {
+   MME_INSN(0,   BEQ, ZERO,        R0, IMMEDPAIR,   (2<<14)|0x0004,       
NONE,       NONE,
+                 ADD, ZERO,      ZERO,      ZERO,                2,       
NONE,       NONE),
+//    do {
+//       r0 = read(NVC597_SET_MME_SHADOW_SCRATCH(0));
+//       mthd(NO_OPERATION);
+//       send(0);
+   MME_INSN(0, STATE,   R0,     IMMED,      ZERO,         0x3400/4,     
IMMED1, IMMEDHIGH1,
+                 ADD, ZERO,      ZERO,      ZERO,         0x0100/4,       
NONE,       NONE),
+//    } while(r0 != 1);
+   MME_INSN(1,   BEQ, ZERO,        R0, IMMEDPAIR,   (1<<14)|0x1fff,       
NONE,       NONE,
+                 ADD, ZERO,      ZERO,      ZERO,                1,       
NONE,       NONE),
+// } else {
+   MME_INSN(0,   JAL, ZERO,      ZERO,      ZERO,   (1<<15)|0x0003,       
NONE,       NONE,
+                 ADD, ZERO,      ZERO,      ZERO,                0,       
NONE,       NONE),
+//    while (HW_LOOP_COUNT < 10) {
+   MME_INSN(0,  LOOP, ZERO, IMMEDPAIR,      ZERO,                2,       
NONE,       NONE,
+                 ADD, ZERO,      ZERO,      ZERO,               10,       
NONE,       NONE),
+//       mthd(NO_OPERATION, 0);
+//       send(0);
+//    }
+// }
+   MME_INSN(0,   ADD, ZERO,      ZERO,      ZERO, (0<<12)|0x0100/4,     
IMMED0,     IMMED1,
+                 ADD, ZERO,      ZERO,      ZERO,                0,       
NONE,       NONE),
+// nop
+   MME_INSN(1,   ADD, ZERO,      ZERO,      ZERO,                0,       
NONE,       NONE,
+                 ADD, ZERO,      ZERO,      ZERO,                0,       
NONE,       NONE),
+// nop
+   MME_INSN(0,   ADD, ZERO,      ZERO,      ZERO,                0,       
NONE,       NONE,
+                 ADD, ZERO,      ZERO,      ZERO,                0,       
NONE,       NONE),
+};
+
 const uint32_t mmec597_compute_counter[] = {
 // r0 = load();
 // r1 = 1;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h 
b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
index f4842fd6d68..566eaf3f68f 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h
@@ -39,8 +39,10 @@
 
 #define NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE                        
0x00003868
 
-#define NVC0_3D_MACRO_COMPUTE_COUNTER                          0x00003870
+#define NVC0_3D_MACRO_SET_PRIV_REG                             0x00003870
 
-#define NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY                 0x00003878
+#define NVC0_3D_MACRO_COMPUTE_COUNTER                          0x00003878
+
+#define NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY                 0x00003880
 
 #endif /* __NVC0_MACROS_H__ */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 9b7281e3de8..c1685332362 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -1479,6 +1479,7 @@ nvc0_screen_create(struct nouveau_device *dev)
       MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, 
mmec597_draw_elts_indirect_count);
       MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mmec597_query_buffer_write);
       MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, 
mmec597_conservative_raster_state);
+      MK_MACRO(NVC0_3D_MACRO_SET_PRIV_REG, mmec597_set_priv_reg);
       MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mmec597_compute_counter);
       MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, 
mmec597_compute_counter_to_query);
    }
@@ -1555,6 +1556,13 @@ nvc0_screen_create(struct nouveau_device *dev)
    BEGIN_NVC0(push, NVC0_3D(LINKED_TSC), 1);
    PUSH_DATA (push, 0);
 
+   if (screen->eng3d->oclass >= TU102_3D_CLASS) {
+      BEGIN_1IC0(push, NVC0_3D(MACRO_SET_PRIV_REG), 3);
+      PUSH_DATA (push, 0x419ba4);
+      PUSH_DATA (push, 0x00000000);
+      PUSH_DATA (push, 0x00000008);
+   }
+
    PUSH_KICK (push);
 
    screen->tic.entries = CALLOC(

Reply via email to