Module: Mesa
Branch: master
Commit: c52e92ec3a37c9ab3fb35132e62e1ddf6a770c27
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=c52e92ec3a37c9ab3fb35132e62e1ddf6a770c27

Author: Samuel Pitoiset <samuel.pitoi...@gmail.com>
Date:   Wed May 25 23:36:48 2016 +0200

nvc0: allow to monitor MP perf counters with compute shaders

To read out MP perf counters we use a compute shader and need to upload
input data like a 64-bits addr used to store the values and a sequence
ID for synchronization. Currently, this input data is uploaded as user
uniforms which means that it's sticked to c0[], but if a compute shader
from a real application is used, monitoring those performance counters
will just overwrite some data and miserably crash.

Instead, sticking the 64-bits addr and the sequence into the driver
constant buffer seems like much better and will allow to monitor
counters with GL 4.3 apps.

Tested on GF119 and GK110, but should not hurt anything on GK104.

Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
Reviewed-by: Ilia Mirkin <imir...@alum.mit.edu>

---

 src/gallium/drivers/nouveau/nvc0/nvc0_context.h    |  3 +
 .../drivers/nouveau/nvc0/nvc0_query_hw_sm.c        | 71 ++++++++++++++++------
 2 files changed, 55 insertions(+), 19 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h 
b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 5be78aa..c099758 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -133,6 +133,9 @@
 /* 8 surfaces, at 16 32-bits integers each */
 #define NVC0_CB_AUX_SU_INFO(i)      0x400 + (i) * 16 * 4
 #define NVC0_CB_AUX_SU_SIZE         (NVC0_MAX_IMAGES * 16 * 4)
+/* 1 64-bits address and 1 32-bits sequence */
+#define NVC0_CB_AUX_MP_INFO         0x600
+#define NVC0_CB_AUX_MP_SIZE         3 * 4
 /* 4 32-bits floats for the vertex runout, put at the end */
 #define NVC0_CB_AUX_RUNOUT_INFO     NVC0_CB_USR_SIZE + NVC0_CB_AUX_SIZE
 
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
index 54e0140..27cbbc4 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
@@ -372,9 +372,9 @@ static const uint64_t nve4_read_hw_sm_counters_code[] =
     * mov b32 $r6 $pm6
     * mov b32 $r7 $pm7
     * set $p0 0x1 eq u32 $r8 0x0
-    * mov b32 $r10 c0[0x0]
+    * mov b32 $r10 c7[0x600]
     * ext u32 $r8 $r12 0x414
-    * mov b32 $r11 c0[0x4]
+    * mov b32 $r11 c7[0x604]
     * sched 0x04 0x2e 0x04 0x20 0x20 0x28 0x04
     * ext u32 $r9 $r12 0x208
     * (not $p0) exit
@@ -392,7 +392,7 @@ static const uint64_t nve4_read_hw_sm_counters_code[] =
     * add b32 $r12 $c $r12 $r9
     * st b128 wt g[$r10d] $r0q
     * sched 0x4 0x2c 0x20 0x04 0x2e 0x00 0x00
-    * mov b32 $r0 c0[0x8]
+    * mov b32 $r0 c7[0x608]
     * add b32 $r13 $r13 0x0 $c
     * $p1 st b128 wt g[$r12d+0x40] $r4q
     * st b32 wt g[$r12d+0x50] $r0
@@ -410,9 +410,9 @@ static const uint64_t nve4_read_hw_sm_counters_code[] =
    0x2c00000028019c04ULL,
    0x2c0000002c01dc04ULL,
    0x190e0000fc81dc03ULL,
-   0x2800400000029de4ULL,
+   0x28005c1800029de4ULL,
    0x7000c01050c21c03ULL,
-   0x280040001002dde4ULL,
+   0x28005c181002dde4ULL,
    0x204282020042e047ULL,
    0x7000c00820c25c03ULL,
    0x80000000000021e7ULL,
@@ -430,7 +430,7 @@ static const uint64_t nve4_read_hw_sm_counters_code[] =
    0x4801000024c31c03ULL,
    0x9400000000a01fc5ULL,
    0x200002e04202c047ULL,
-   0x2800400020001de4ULL,
+   0x28005c1820001de4ULL,
    0x0800000000d35c42ULL,
    0x9400000100c107c5ULL,
    0x9400000140c01f85ULL,
@@ -453,9 +453,9 @@ static const uint64_t nvf0_read_hw_sm_counters_code[] =
    0x86400000051c001aULL,
    0x86400000059c001eULL,
    0xdb201c007f9c201eULL,
-   0x64c03c00001c002aULL,
+   0x64c03ce0c01c002aULL,
    0xc00000020a1c3021ULL,
-   0x64c03c00009c002eULL,
+   0x64c03ce0c09c002eULL,
    0x0810a0808010b810ULL,
    0xc0000001041c3025ULL,
    0x180000000020003cULL,
@@ -473,7 +473,7 @@ static const uint64_t nvf0_read_hw_sm_counters_code[] =
    0xe0840000049c3032ULL,
    0xfe800000001c2800ULL,
    0x080000b81080b010ULL,
-   0x64c03c00011c0002ULL,
+   0x64c03ce0c11c0002ULL,
    0xe08040007f9c3436ULL,
    0xfe80000020043010ULL,
    0xfc800000281c3000ULL,
@@ -1105,14 +1105,14 @@ static const uint64_t nvc0_read_hw_sm_counters_code[] =
     * mov b32 $r6 $pm6
     * mov b32 $r7 $pm7
     * set $p0 0x1 eq u32 $r8 0x0
-    * mov b32 $r10 c0[0x0]
-    * mov b32 $r11 c0[0x4]
+    * mov b32 $r10 c15[0x600]
+    * mov b32 $r11 c15[0x604]
     * ext u32 $r8 $r9 0x414
     * (not $p0) exit
     * mul $r8 u32 $r8 u32 48
     * add b32 $r10 $c $r10 $r8
     * add b32 $r11 $r11 0x0 $c
-    * mov b32 $r8 c0[0x8]
+    * mov b32 $r8 c15[0x608]
     * st b128 wt g[$r10d+0x00] $r0q
     * st b128 wt g[$r10d+0x10] $r4q
     * st b32 wt g[$r10d+0x20] $r8
@@ -1128,14 +1128,14 @@ static const uint64_t nvc0_read_hw_sm_counters_code[] =
    0x2c00000028019c04ULL,
    0x2c0000002c01dc04ULL,
    0x190e0000fc81dc03ULL,
-   0x2800400000029de4ULL,
-   0x280040001002dde4ULL,
+   0x28007c1800029de4ULL,
+   0x28007c181002dde4ULL,
    0x7000c01050921c03ULL,
    0x80000000000021e7ULL,
    0x10000000c0821c02ULL,
    0x4801000020a29c03ULL,
    0x0800000000b2dc42ULL,
-   0x2800400020021de4ULL,
+   0x28007c1820021de4ULL,
    0x9400000000a01fc5ULL,
    0x9400000040a11fc5ULL,
    0x9400000080a21f85ULL,
@@ -1813,6 +1813,40 @@ nvc0_hw_sm_get_program(struct nvc0_screen *screen)
    return prog;
 }
 
+static inline void
+nvc0_hw_sm_upload_input(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
+{
+   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+   struct nvc0_screen *screen = nvc0->screen;
+   uint64_t address;
+   const int s = 5;
+
+   address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s);
+
+   PUSH_SPACE(push, 11);
+
+   if (screen->base.class_3d >= NVE4_3D_CLASS) {
+      BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
+      PUSH_DATAh(push, address + NVC0_CB_AUX_MP_INFO);
+      PUSH_DATA (push, address + NVC0_CB_AUX_MP_INFO);
+      BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
+      PUSH_DATA (push, 3 * 4);
+      PUSH_DATA (push, 0x1);
+      BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 3);
+      PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
+   } else {
+      BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
+      PUSH_DATA (push, 2048);
+      PUSH_DATAh(push, address);
+      PUSH_DATA (push, address);
+      BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 3);
+      PUSH_DATA (push, NVC0_CB_AUX_MP_INFO);
+   }
+   PUSH_DATA (push, (hq->bo->offset + hq->base_offset));
+   PUSH_DATAh(push, (hq->bo->offset + hq->base_offset));
+   PUSH_DATA (push, hq->sequence);
+}
+
 static void
 nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
 {
@@ -1857,11 +1891,10 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct 
nvc0_hw_query *hq)
    PUSH_SPACE(push, 1);
    IMMED_NVC0(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 0);
 
-   pipe->bind_compute_state(pipe, screen->pm.prog);
-   input[0] = (hq->bo->offset + hq->base_offset);
-   input[1] = (hq->bo->offset + hq->base_offset) >> 32;
-   input[2] = hq->sequence;
+   /* upload input data for the compute shader which reads MP counters */
+   nvc0_hw_sm_upload_input(nvc0, hq);
 
+   pipe->bind_compute_state(pipe, screen->pm.prog);
    for (i = 0; i < 3; i++) {
       info.block[i] = block[i];
       info.grid[i] = grid[i];

_______________________________________________
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to