Module: Mesa
Branch: master
Commit: 5d14562da86f1f3ee1a747183d8bac183ce75fd9
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=5d14562da86f1f3ee1a747183d8bac183ce75fd9

Author: Indrajit Kumar Das <[email protected]>
Date:   Fri Oct 16 10:27:02 2020 +0530

radeonsi/gfx10: fix overflow and primitive queries

This aligns the offsets to match the memory layout of the query buffer
defined by gfx10_sh_query_buffer_mem and calls si_launch_grid_internal
to flush caches and wait for completion of shaders prior to retrieving
results.

Acked-by: Pierre-Eric Pelloux-Prayer <[email protected]>
Reviewed-by: Marek Olšák <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7181>

---

 src/gallium/drivers/radeonsi/gfx10_query.c       | 11 ++++++-----
 src/gallium/drivers/radeonsi/si_compute_blit.c   | 10 ++++------
 src/gallium/drivers/radeonsi/si_pipe.h           |  7 +++++++
 src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c | 23 +++++++++++------------
 4 files changed, 28 insertions(+), 23 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/gfx10_query.c 
b/src/gallium/drivers/radeonsi/gfx10_query.c
index 18823a7c929..cb541f06bef 100644
--- a/src/gallium/drivers/radeonsi/gfx10_query.c
+++ b/src/gallium/drivers/radeonsi/gfx10_query.c
@@ -360,11 +360,11 @@ static void gfx10_sh_query_get_result_resource(struct 
si_context *sctx, struct s
    if (index >= 0) {
       switch (query->b.type) {
       case PIPE_QUERY_PRIMITIVES_GENERATED:
-         consts.offset = sizeof(uint32_t) * query->stream;
+         consts.offset = 4 * sizeof(uint64_t) * query->stream + 2 * 
sizeof(uint64_t);
          consts.config = 0;
          break;
       case PIPE_QUERY_PRIMITIVES_EMITTED:
-         consts.offset = sizeof(uint32_t) * (4 + query->stream);
+         consts.offset = 4 * sizeof(uint64_t) * query->stream + 3 * 
sizeof(uint64_t);
          consts.config = 0;
          break;
       case PIPE_QUERY_SO_STATISTICS:
@@ -372,7 +372,7 @@ static void gfx10_sh_query_get_result_resource(struct 
si_context *sctx, struct s
          consts.config = 0;
          break;
       case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
-         consts.offset = sizeof(uint32_t) * query->stream;
+         consts.offset = 4 * sizeof(uint64_t) * query->stream;
          consts.config = 2;
          break;
       case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
@@ -454,8 +454,9 @@ static void gfx10_sh_query_get_result_resource(struct 
si_context *sctx, struct s
          si_cp_wait_mem(sctx, sctx->gfx_cs, va, 0x00000001, 0x00000001, 0);
       }
 
-      sctx->b.launch_grid(&sctx->b, &grid);
-      sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
+      void *saved_cs = sctx->cs_shader_state.program;
+      si_launch_grid_internal((struct si_context *)&sctx->b, &grid, saved_cs,
+                              SI_CS_WAIT_FOR_IDLE | 
SI_CS_PARTIAL_FLUSH_DISABLE);
 
       if (qbuf == query->last)
          break;
diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c 
b/src/gallium/drivers/radeonsi/si_compute_blit.c
index a66968c167d..62d022c394d 100644
--- a/src/gallium/drivers/radeonsi/si_compute_blit.c
+++ b/src/gallium/drivers/radeonsi/si_compute_blit.c
@@ -60,15 +60,13 @@ unsigned si_get_flush_flags(struct si_context *sctx, enum 
si_coherency coher,
    }
 }
 
-#define SI_CS_IMAGE_OP           (1 << 0)
-#define SI_CS_WAIT_FOR_IDLE      (1 << 1)
-#define SI_CS_RENDER_COND_ENABLE (1 << 2)
-
-static void si_launch_grid_internal(struct si_context *sctx, struct 
pipe_grid_info *info,
+void si_launch_grid_internal(struct si_context *sctx, struct pipe_grid_info 
*info,
                                     void *restore_cs, unsigned flags)
 {
    /* Wait for previous shaders to finish. */
-   sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_PS_PARTIAL_FLUSH;
+   sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
+   if (!(flags & SI_CS_PARTIAL_FLUSH_DISABLE))
+      sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
    /* Invalidate L0-L1 caches. */
    /* sL0 is never invalidated, because src resources don't use it. */
    sctx->flags |= SI_CONTEXT_INV_VCACHE;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index e7d575308fd..20230556744 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1339,8 +1339,15 @@ bool vi_dcc_clear_level(struct si_context *sctx, struct 
si_texture *tex, unsigne
 void si_init_clear_functions(struct si_context *sctx);
 
 /* si_compute_blit.c */
+#define SI_CS_IMAGE_OP              (1 << 0)
+#define SI_CS_WAIT_FOR_IDLE         (1 << 1)
+#define SI_CS_RENDER_COND_ENABLE    (1 << 2)
+#define SI_CS_PARTIAL_FLUSH_DISABLE (1 << 3)
+
 unsigned si_get_flush_flags(struct si_context *sctx, enum si_coherency coher,
                             enum si_cache_policy cache_policy);
+void si_launch_grid_internal(struct si_context *sctx, struct pipe_grid_info 
*info,
+                                    void *restore_cs, unsigned flags);
 void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, 
uint64_t offset,
                      uint64_t size, uint32_t *clear_value, uint32_t 
clear_value_size,
                      enum si_coherency coher, bool force_cpdma);
diff --git a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c 
b/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c
index c1a150d6ab3..68e2e041acf 100644
--- a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c
+++ b/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c
@@ -816,7 +816,7 @@ void *gfx10_create_sh_query_result_cs(struct si_context 
*sctx)
                                    "DCL BUFFER[2]\n"
                                    "DCL CONST[0][0..0]\n"
                                    "DCL TEMP[0..5]\n"
-                                   "IMM[0] UINT32 {0, 7, 0, 4294967295}\n"
+                                   "IMM[0] UINT32 {0, 7, 256, 4294967295}\n"
                                    "IMM[1] UINT32 {1, 2, 4, 8}\n"
                                    "IMM[2] UINT32 {16, 32, 64, 128}\n"
 
@@ -855,13 +855,13 @@ void *gfx10_create_sh_query_result_cs(struct si_context 
*sctx)
                                    "UADD TEMP[1].x, TEMP[1].xxxx, 
IMM[0].wwww\n"
 
                                    /*
-                                   fence = buffer[0]@(base_offset + 32);
+                                   fence = buffer[0]@(base_offset + 
sizeof(gfx10_sh_query_buffer_mem.stream));
                                    if (!fence) {
                                            acc_missing = ~0u;
                                            break;
                                    }
                                    */
-                                   "UADD TEMP[5].x, TEMP[1].yyyy, 
IMM[2].yyyy\n"
+                                   "UADD TEMP[5].x, TEMP[1].yyyy, 
IMM[2].wwww\n"
                                    "LOAD TEMP[5].x, BUFFER[0], TEMP[5].xxxx\n"
                                    "USEQ TEMP[5], TEMP[5].xxxx, IMM[0].xxxx\n"
                                    "UIF TEMP[5]\n"
@@ -897,22 +897,21 @@ void *gfx10_create_sh_query_result_cs(struct si_context 
*sctx)
 
                                    /*
                                    do {
-                                           generated = buffer[0]@stream_offset;
-                                           emitted = buffer[0]@(stream_offset 
+ 16);
+                                           generated = 
buffer[0]@(stream_offset + 2 * sizeof(uint64_t));
+                                           emitted = buffer[0]@(stream_offset 
+ 3 * sizeof(uint64_t));
                                            if (generated != emitted) {
                                                    acc_result = 1;
                                                    result_remaining = 0;
                                                    break;
                                            }
 
-                                           stream_offset += 4;
+                                           stream_offset += 
sizeof(gfx10_sh_query_buffer_mem.stream[0]);
                                    } while (--count);
                                    */
                                    "BGNLOOP\n"
                                    "UADD TEMP[5].x, TEMP[2].xxxx, 
IMM[2].xxxx\n"
-                                   "LOAD TEMP[4].x, BUFFER[0], TEMP[2].xxxx\n"
-                                   "LOAD TEMP[4].y, BUFFER[0], TEMP[5].xxxx\n"
-                                   "USNE TEMP[5], TEMP[4].xxxx, TEMP[4].yyyy\n"
+                                   "LOAD TEMP[4].xyzw, BUFFER[0], 
TEMP[5].xxxx\n"
+                                   "USNE TEMP[5], TEMP[4].xyxy, TEMP[4].zwzw\n"
                                    "UIF TEMP[5]\n"
                                    "MOV TEMP[0].x, IMM[1].xxxx\n"
                                    "MOV TEMP[1].y, IMM[0].xxxx\n"
@@ -924,15 +923,15 @@ void *gfx10_create_sh_query_result_cs(struct si_context 
*sctx)
                                    "UIF TEMP[5]\n"
                                    "BRK\n"
                                    "ENDIF\n"
-                                   "UADD TEMP[2].x, TEMP[2].xxxx, 
IMM[1].zzzz\n"
+                                   "UADD TEMP[2].x, TEMP[2].xxxx, 
IMM[2].yyyy\n"
                                    "ENDLOOP\n"
                                    "ENDIF\n"
 
                                    /*
-                                           base_offset += 64;
+                                           base_offset += 
sizeof(gfx10_sh_query_buffer_mem);
                                    } // end outer loop
                                    */
-                                   "UADD TEMP[1].y, TEMP[1].yyyy, 
IMM[2].zzzz\n"
+                                   "UADD TEMP[1].y, TEMP[1].yyyy, 
IMM[0].zzzz\n"
                                    "ENDLOOP\n"
 
                                    /*

_______________________________________________
mesa-commit mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to