Module: Mesa
Branch: main
Commit: f8d205c4009ca534bac39dcff307aaf6b42b520f
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f8d205c4009ca534bac39dcff307aaf6b42b520f

Author: Pierre-Eric Pelloux-Prayer <[email protected]>
Date:   Mon May  2 16:10:35 2022 +0200

radeonsi: fix gs_invocation query with NGG

When NGG is active, the GS invocation counter is always incremented, even
if there's no explicit GS.

Implementing the counter manually fixes it:
  * in emit_gs_epilogue for the legacy path
  * in gfx10_ngg_gs_emit_prologue for the ngg path

This fixes piglit's arb_query_buffer_object-qbo test.

Reviewed-by: Marek Olšák <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15861>

---

 .../drivers/radeonsi/ci/gfx10-navi10-fail.csv      | 21 ---------------------
 .../radeonsi/ci/gfx10_3-sienna_cichlid-fail.csv    | 21 ---------------------
 src/gallium/drivers/radeonsi/gfx10_shader_ngg.c    | 22 ++++++++++++++++++++++
 src/gallium/drivers/radeonsi/si_query.c            |  2 +-
 src/gallium/drivers/radeonsi/si_shader_llvm_gs.c   |  6 ++++++
 5 files changed, 29 insertions(+), 43 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/ci/gfx10-navi10-fail.csv 
b/src/gallium/drivers/radeonsi/ci/gfx10-navi10-fail.csv
index e6c33aac5d9..308a7745a51 100644
--- a/src/gallium/drivers/radeonsi/ci/gfx10-navi10-fail.csv
+++ b/src/gallium/drivers/radeonsi/ci/gfx10-navi10-fail.csv
@@ -79,27 +79,6 @@ 
spec@arb_gpu_shader_fp64@execution@conversion@vert-conversion-explicit-dvec3-vec
 
spec@arb_gpu_shader_fp64@execution@conversion@vert-conversion-explicit-dvec4-vec4,Fail
 
spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex,Fail
 
spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex@'vs_input2[1][0]'
 on GL_PROGRAM_INPUT,Fail
-spec@arb_query_buffer_object@coherency,Fail
-spec@arb_query_buffer_object@coherency@index-buffer-GL_GEOMETRY_SHADER_INVOCATIONS,Fail
-spec@arb_query_buffer_object@coherency@indirect-dispatch-GL_GEOMETRY_SHADER_INVOCATIONS,Fail
-spec@arb_query_buffer_object@coherency@indirect-draw-GL_GEOMETRY_SHADER_INVOCATIONS,Fail
-spec@arb_query_buffer_object@coherency@indirect-draw-count-GL_GEOMETRY_SHADER_INVOCATIONS,Fail
-spec@arb_query_buffer_object@qbo,Fail
-spec@arb_query_buffer_object@qbo@query-GL_GEOMETRY_SHADER_INVOCATIONS-ASYNC-GL_INT,Fail
-spec@arb_query_buffer_object@qbo@query-GL_GEOMETRY_SHADER_INVOCATIONS-ASYNC-GL_UNSIGNED_INT,Fail
-spec@arb_query_buffer_object@qbo@query-GL_GEOMETRY_SHADER_INVOCATIONS-ASYNC-GL_UNSIGNED_INT64_ARB,Fail
-spec@arb_query_buffer_object@qbo@query-GL_GEOMETRY_SHADER_INVOCATIONS-ASYNC_CPU_READ_AFTER-GL_INT,Fail
-spec@arb_query_buffer_object@qbo@query-GL_GEOMETRY_SHADER_INVOCATIONS-ASYNC_CPU_READ_AFTER-GL_UNSIGNED_INT,Fail
-spec@arb_query_buffer_object@qbo@query-GL_GEOMETRY_SHADER_INVOCATIONS-ASYNC_CPU_READ_AFTER-GL_UNSIGNED_INT64_ARB,Fail
-spec@arb_query_buffer_object@qbo@query-GL_GEOMETRY_SHADER_INVOCATIONS-ASYNC_CPU_READ_BEFORE-GL_INT,Fail
-spec@arb_query_buffer_object@qbo@query-GL_GEOMETRY_SHADER_INVOCATIONS-ASYNC_CPU_READ_BEFORE-GL_UNSIGNED_INT,Fail
-spec@arb_query_buffer_object@qbo@query-GL_GEOMETRY_SHADER_INVOCATIONS-ASYNC_CPU_READ_BEFORE-GL_UNSIGNED_INT64_ARB,Fail
-spec@arb_query_buffer_object@qbo@query-GL_GEOMETRY_SHADER_INVOCATIONS-SYNC-GL_INT,Fail
-spec@arb_query_buffer_object@qbo@query-GL_GEOMETRY_SHADER_INVOCATIONS-SYNC-GL_UNSIGNED_INT,Fail
-spec@arb_query_buffer_object@qbo@query-GL_GEOMETRY_SHADER_INVOCATIONS-SYNC-GL_UNSIGNED_INT64_ARB,Fail
-spec@arb_query_buffer_object@qbo@query-GL_GEOMETRY_SHADER_INVOCATIONS-SYNC_CPU_READ_AFTER_CACHE_TEST-GL_INT,Fail
-spec@arb_query_buffer_object@qbo@query-GL_GEOMETRY_SHADER_INVOCATIONS-SYNC_CPU_READ_AFTER_CACHE_TEST-GL_UNSIGNED_INT,Fail
-spec@arb_query_buffer_object@qbo@query-GL_GEOMETRY_SHADER_INVOCATIONS-SYNC_CPU_READ_AFTER_CACHE_TEST-GL_UNSIGNED_INT64_ARB,Fail
 spec@arb_shader_clock@execution@clock,Fail
 spec@arb_shader_clock@execution@clock2x32,Fail
 spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgradcube,Fail
diff --git a/src/gallium/drivers/radeonsi/ci/gfx10_3-sienna_cichlid-fail.csv 
b/src/gallium/drivers/radeonsi/ci/gfx10_3-sienna_cichlid-fail.csv
index 9d88292a488..b9687f876a2 100644
--- a/src/gallium/drivers/radeonsi/ci/gfx10_3-sienna_cichlid-fail.csv
+++ b/src/gallium/drivers/radeonsi/ci/gfx10_3-sienna_cichlid-fail.csv
@@ -82,27 +82,6 @@ 
spec@arb_gpu_shader_fp64@execution@conversion@vert-conversion-explicit-dvec4-vec
 spec@arb_pipeline_statistics_query@arb_pipeline_statistics_query-frag,Fail
 
spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex,Fail
 
spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex@'vs_input2[1][0]'
 on GL_PROGRAM_INPUT,Fail
-spec@arb_query_buffer_object@coherency,Fail
-spec@arb_query_buffer_object@coherency@index-buffer-GL_GEOMETRY_SHADER_INVOCATIONS,Fail
-spec@arb_query_buffer_object@coherency@indirect-dispatch-GL_GEOMETRY_SHADER_INVOCATIONS,Fail
-spec@arb_query_buffer_object@coherency@indirect-draw-GL_GEOMETRY_SHADER_INVOCATIONS,Fail
-spec@arb_query_buffer_object@coherency@indirect-draw-count-GL_GEOMETRY_SHADER_INVOCATIONS,Fail
-spec@arb_query_buffer_object@qbo,Fail
-spec@arb_query_buffer_object@qbo@query-GL_GEOMETRY_SHADER_INVOCATIONS-ASYNC-GL_INT,Fail
-spec@arb_query_buffer_object@qbo@query-GL_GEOMETRY_SHADER_INVOCATIONS-ASYNC-GL_UNSIGNED_INT,Fail
-spec@arb_query_buffer_object@qbo@query-GL_GEOMETRY_SHADER_INVOCATIONS-ASYNC-GL_UNSIGNED_INT64_ARB,Fail
-spec@arb_query_buffer_object@qbo@query-GL_GEOMETRY_SHADER_INVOCATIONS-ASYNC_CPU_READ_AFTER-GL_INT,Fail
-spec@arb_query_buffer_object@qbo@query-GL_GEOMETRY_SHADER_INVOCATIONS-ASYNC_CPU_READ_AFTER-GL_UNSIGNED_INT,Fail
-spec@arb_query_buffer_object@qbo@query-GL_GEOMETRY_SHADER_INVOCATIONS-ASYNC_CPU_READ_AFTER-GL_UNSIGNED_INT64_ARB,Fail
-spec@arb_query_buffer_object@qbo@query-GL_GEOMETRY_SHADER_INVOCATIONS-ASYNC_CPU_READ_BEFORE-GL_INT,Fail
-spec@arb_query_buffer_object@qbo@query-GL_GEOMETRY_SHADER_INVOCATIONS-ASYNC_CPU_READ_BEFORE-GL_UNSIGNED_INT,Fail
-spec@arb_query_buffer_object@qbo@query-GL_GEOMETRY_SHADER_INVOCATIONS-ASYNC_CPU_READ_BEFORE-GL_UNSIGNED_INT64_ARB,Fail
-spec@arb_query_buffer_object@qbo@query-GL_GEOMETRY_SHADER_INVOCATIONS-SYNC-GL_INT,Fail
-spec@arb_query_buffer_object@qbo@query-GL_GEOMETRY_SHADER_INVOCATIONS-SYNC-GL_UNSIGNED_INT,Fail
-spec@arb_query_buffer_object@qbo@query-GL_GEOMETRY_SHADER_INVOCATIONS-SYNC-GL_UNSIGNED_INT64_ARB,Fail
-spec@arb_query_buffer_object@qbo@query-GL_GEOMETRY_SHADER_INVOCATIONS-SYNC_CPU_READ_AFTER_CACHE_TEST-GL_INT,Fail
-spec@arb_query_buffer_object@qbo@query-GL_GEOMETRY_SHADER_INVOCATIONS-SYNC_CPU_READ_AFTER_CACHE_TEST-GL_UNSIGNED_INT,Fail
-spec@arb_query_buffer_object@qbo@query-GL_GEOMETRY_SHADER_INVOCATIONS-SYNC_CPU_READ_AFTER_CACHE_TEST-GL_UNSIGNED_INT64_ARB,Fail
 spec@arb_shader_clock@execution@clock,Fail
 spec@arb_shader_clock@execution@clock2x32,Fail
 spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgradcube,Fail
diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c 
b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
index d4a96d71087..c7ae46fa402 100644
--- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
+++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
@@ -1839,6 +1839,28 @@ void gfx10_ngg_gs_emit_prologue(struct si_shader_context 
*ctx)
    }
    ac_build_endif(&ctx->ac, 5090);
 
+   tmp = si_is_gs_thread(ctx);
+   ac_build_ifcc(&ctx->ac, tmp, 15090);
+      {
+         tmp = si_unpack_param(ctx, ctx->vs_state_bits, 31, 1);
+         tmp = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
+         ac_build_ifcc(&ctx->ac, tmp, 5109); /* if (GS_PIPELINE_STATS_EMU) */
+         LLVMValueRef args[] = {
+            ctx->ac.i32_1,
+            ngg_get_emulated_counters_buf(ctx),
+            LLVMConstInt(ctx->ac.i32,
+                         
(si_hw_query_dw_offset(PIPE_STAT_QUERY_GS_INVOCATIONS) +
+                             SI_QUERY_STATS_END_OFFSET_DW) * 4,
+                         false),
+            ctx->ac.i32_0,                            /* soffset */
+            ctx->ac.i32_0,                            /* cachepolicy */
+         };
+
+         ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.raw.buffer.atomic.add.i32", 
ctx->ac.i32, args, 5, 0);
+         ac_build_endif(&ctx->ac, 5109);
+      }
+   ac_build_endif(&ctx->ac, 15090);
+
    ac_build_s_barrier(&ctx->ac);
 }
 
diff --git a/src/gallium/drivers/radeonsi/si_query.c 
b/src/gallium/drivers/radeonsi/si_query.c
index 06f45270dc5..7977192e37e 100644
--- a/src/gallium/drivers/radeonsi/si_query.c
+++ b/src/gallium/drivers/radeonsi/si_query.c
@@ -730,7 +730,7 @@ static struct pipe_query *si_query_hw_create(struct 
si_screen *sscreen, unsigned
       query->result_size += 8; /* for the fence + alignment */
       query->b.num_cs_dw_suspend = 6 + si_cp_write_fence_dwords(sscreen);
       query->index = index;
-      if (index == PIPE_STAT_QUERY_GS_PRIMITIVES &&
+      if ((index == PIPE_STAT_QUERY_GS_PRIMITIVES || index == 
PIPE_STAT_QUERY_GS_INVOCATIONS) &&
           sscreen->use_ngg && (sscreen->info.chip_class >= GFX10 && 
sscreen->info.chip_class <= GFX10_3))
          query->flags |= SI_QUERY_EMULATE_GS_COUNTERS;
       break;
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c 
b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c
index a607f94fb31..c862e2850ae 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c
@@ -253,7 +253,13 @@ static void emit_gs_epilogue(struct si_shader_context *ctx)
             ctx->ac.i32_0,                            /* soffset */
             ctx->ac.i32_0,                            /* cachepolicy */
          };
+         ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.raw.buffer.atomic.add.i32", 
ctx->ac.i32, args, 5, 0);
 
+         args[0] = ctx->ac.i32_1;
+         args[2] = LLVMConstInt(ctx->ac.i32,
+                                
(si_hw_query_dw_offset(PIPE_STAT_QUERY_GS_INVOCATIONS) +
+                                    SI_QUERY_STATS_END_OFFSET_DW) * 4,
+                                 false);
          ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.raw.buffer.atomic.add.i32", 
ctx->ac.i32, args, 5, 0);
       }
       ac_build_endif(&ctx->ac, 5229);

Reply via email to