Module: Mesa
Branch: main
Commit: daaa8ddb8ee1fd2a6ed70a98f8bfc04bcd31179d
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=daaa8ddb8ee1fd2a6ed70a98f8bfc04bcd31179d

Author: Qiang Yu <[email protected]>
Date:   Thu Aug 11 10:19:47 2022 +0800

ac/llvm,radeonsi: lower nir primitive counter add intrinsics

Reviewed-by: Pierre-Eric Pelloux-Prayer <[email protected]>
Reviewed-by: Marek Olšák <[email protected]>
Signed-off-by: Qiang Yu <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18010>

---

 src/amd/llvm/ac_nir_to_llvm.c                     | 15 ------------
 src/amd/llvm/ac_shader_abi.h                      |  9 -------
 src/gallium/drivers/radeonsi/gfx10_shader_ngg.c   | 30 -----------------------
 src/gallium/drivers/radeonsi/si_nir_lower_abi.c   | 20 +++++++++++++++
 src/gallium/drivers/radeonsi/si_shader_internal.h |  2 --
 src/gallium/drivers/radeonsi/si_shader_llvm.c     |  1 -
 6 files changed, 20 insertions(+), 57 deletions(-)

diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c
index c092aa1f523..3f8aec622ca 100644
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -4465,21 +4465,6 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, 
nir_intrinsic_instr *ins
       result = ac_build_gather_values(&ctx->ac, global_count, 
instr->num_components);
       break;
    }
-   case nir_intrinsic_atomic_add_gs_emit_prim_count_amd:
-      ctx->abi->atomic_add_prim_count(ctx->abi, ~0U, get_src(ctx, 
instr->src[0]),
-                                      ac_prim_count_gs_emit);
-      break;
-   case nir_intrinsic_atomic_add_gen_prim_count_amd:
-   case nir_intrinsic_atomic_add_xfb_prim_count_amd: {
-      LLVMValueRef prim_count = get_src(ctx, instr->src[0]);
-      unsigned stream = nir_intrinsic_stream_id(instr);
-      enum ac_prim_count count_type =
-         instr->intrinsic == nir_intrinsic_atomic_add_gen_prim_count_amd ?
-         ac_prim_count_gen : ac_prim_count_xfb;
-
-      ctx->abi->atomic_add_prim_count(ctx->abi, stream, prim_count, 
count_type);
-      break;
-   }
    default:
       fprintf(stderr, "Unknown intrinsic: ");
       nir_print_instr(&instr->instr, stderr);
diff --git a/src/amd/llvm/ac_shader_abi.h b/src/amd/llvm/ac_shader_abi.h
index 36519b995f0..83aadfba544 100644
--- a/src/amd/llvm/ac_shader_abi.h
+++ b/src/amd/llvm/ac_shader_abi.h
@@ -34,12 +34,6 @@
 
 #define AC_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1)
 
-enum ac_prim_count {
-   ac_prim_count_gs_emit,
-   ac_prim_count_gen,
-   ac_prim_count_xfb,
-};
-
 /* Document the shader ABI during compilation. This is what allows radeonsi and
  * radv to share a compiler backend.
  */
@@ -73,9 +67,6 @@ struct ac_shader_abi {
    void (*emit_vertex_with_counter)(struct ac_shader_abi *abi, unsigned stream,
                                     LLVMValueRef vertexidx, LLVMValueRef 
*addrs);
 
-   void (*atomic_add_prim_count)(struct ac_shader_abi *abi, unsigned stream,
-                                 LLVMValueRef prim_count, enum ac_prim_count 
count_type);
-
    LLVMValueRef (*load_inputs)(struct ac_shader_abi *abi,
                                unsigned driver_location, unsigned component,
                                unsigned num_components, unsigned vertex_index,
diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c 
b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
index d38c9453f7c..17d0bea7441 100644
--- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
+++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
@@ -124,36 +124,6 @@ void gfx10_ngg_export_vertex(struct ac_shader_abi *abi)
    si_llvm_build_vs_exports(ctx, outputs, num_outputs);
 }
 
-void gfx10_ngg_atomic_add_prim_count(struct ac_shader_abi *abi, unsigned 
stream,
-                                     LLVMValueRef prim_count, enum 
ac_prim_count count_type)
-{
-   struct si_shader_context *ctx = si_shader_context_from_abi(abi);
-
-   unsigned offset;
-   LLVMValueRef query_buf;
-   if (count_type == ac_prim_count_gs_emit) {
-      offset = si_query_pipestat_end_dw_offset(ctx->screen, 
PIPE_STAT_QUERY_GS_PRIMITIVES) * 4;
-      query_buf = ngg_get_emulated_counters_buf(ctx);
-   } else {
-      offset = count_type == ac_prim_count_gen ?
-         offsetof(struct gfx10_sh_query_buffer_mem, 
stream[stream].generated_primitives) :
-         offsetof(struct gfx10_sh_query_buffer_mem, 
stream[stream].emitted_primitives);
-
-      query_buf = ngg_get_query_buf(ctx);
-   }
-
-   LLVMValueRef args[] = {
-      prim_count,
-      query_buf,
-      LLVMConstInt(ctx->ac.i32, offset, false),
-      ctx->ac.i32_0, /* soffset */
-      ctx->ac.i32_0, /* cachepolicy */
-   };
-
-   ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.raw.buffer.atomic.add.i32",
-                      ctx->ac.i32, args, 5, 0);
-}
-
 void gfx10_ngg_gs_emit_begin(struct si_shader_context *ctx)
 {
    LLVMBuilderRef builder = ctx->ac.builder;
diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c 
b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c
index 4725c3a015e..4576317879f 100644
--- a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c
+++ b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c
@@ -233,6 +233,26 @@ static bool lower_abi_instr(nir_builder *b, nir_instr 
*instr, struct lower_abi_s
       replacement = load_internal_binding(b, args, slot);
       break;
    }
+   case nir_intrinsic_atomic_add_gs_emit_prim_count_amd:
+   case nir_intrinsic_atomic_add_gen_prim_count_amd:
+   case nir_intrinsic_atomic_add_xfb_prim_count_amd: {
+      unsigned offset;
+      nir_ssa_def *buf;
+      if (intrin->intrinsic == 
nir_intrinsic_atomic_add_gs_emit_prim_count_amd) {
+         buf = load_internal_binding(b, args, 
SI_GS_QUERY_EMULATED_COUNTERS_BUF);
+         offset = si_query_pipestat_end_dw_offset(sel->screen, 
PIPE_STAT_QUERY_GS_PRIMITIVES) * 4;
+      } else {
+         unsigned stream = nir_intrinsic_stream_id(intrin);
+         buf = load_internal_binding(b, args, SI_GS_QUERY_BUF);
+         offset = intrin->intrinsic == 
nir_intrinsic_atomic_add_gen_prim_count_amd ?
+            offsetof(struct gfx10_sh_query_buffer_mem, 
stream[stream].generated_primitives) :
+            offsetof(struct gfx10_sh_query_buffer_mem, 
stream[stream].emitted_primitives);
+      }
+
+      nir_ssa_def *prim_count = intrin->src[0].ssa;
+      nir_buffer_atomic_add_amd(b, 32, buf, prim_count, .base = offset);
+      break;
+   }
    default:
       return false;
    }
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h 
b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 494937e1cde..93e3925ce25 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -183,8 +183,6 @@ LLVMValueRef gfx10_get_thread_id_in_tg(struct 
si_shader_context *ctx);
 unsigned gfx10_ngg_get_vertices_per_prim(struct si_shader *shader);
 bool gfx10_ngg_export_prim_early(struct si_shader *shader);
 void gfx10_ngg_export_vertex(struct ac_shader_abi *abi);
-void gfx10_ngg_atomic_add_prim_count(struct ac_shader_abi *abi, unsigned 
stream,
-                                     LLVMValueRef prim_count, enum 
ac_prim_count count_type);
 void gfx10_ngg_gs_emit_begin(struct si_shader_context *ctx);
 unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader);
 bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader);
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c 
b/src/gallium/drivers/radeonsi/si_shader_llvm.c
index c1d91d4198a..fc9ea32c161 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c
@@ -778,7 +778,6 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, 
struct si_shader *shad
 
    ctx->abi.intrinsic_load = si_llvm_load_intrinsic;
    ctx->abi.export_vertex = gfx10_ngg_export_vertex;
-   ctx->abi.atomic_add_prim_count = gfx10_ngg_atomic_add_prim_count;
 
    si_llvm_init_resource_callbacks(ctx);
    si_llvm_create_main_func(ctx);

Reply via email to