Module: Mesa
Branch: main
Commit: 3c1ebebeae2ad888b030f85acbdc4b86b30c414a
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=3c1ebebeae2ad888b030f85acbdc4b86b30c414a

Author: Qiang Yu <[email protected]>
Date:   Wed Jun  8 11:09:35 2022 +0800

radeonsi: use nir_lower_gs_intrinsics

Replace some llvm code.

Acked-by: Pierre-Eric Pelloux-Prayer <[email protected]>
Reviewed-by: Marek Olšák <[email protected]>
Signed-off-by: Qiang Yu <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17109>

---

 src/amd/llvm/ac_nir_to_llvm.c                     |  3 --
 src/amd/llvm/ac_shader_abi.h                      |  2 -
 src/gallium/drivers/radeonsi/gfx10_shader_ngg.c   | 28 +++++---------
 src/gallium/drivers/radeonsi/si_shader_internal.h |  3 +-
 src/gallium/drivers/radeonsi/si_shader_llvm_gs.c  | 47 ++++-------------------
 src/gallium/drivers/radeonsi/si_shader_nir.c      |  3 ++
 6 files changed, 22 insertions(+), 64 deletions(-)

diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c
index 6431ae63a0e..8b3dff9782a 100644
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -4053,9 +4053,6 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, 
nir_intrinsic_instr *ins
                                        false);
       break;
    }
-   case nir_intrinsic_emit_vertex:
-      ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), 
ctx->abi->outputs);
-      break;
    case nir_intrinsic_emit_vertex_with_counter: {
       unsigned stream = nir_intrinsic_stream_id(instr);
       LLVMValueRef next_vertex = get_src(ctx, instr->src[0]);
diff --git a/src/amd/llvm/ac_shader_abi.h b/src/amd/llvm/ac_shader_abi.h
index 00f32c941a2..1268669a5f5 100644
--- a/src/amd/llvm/ac_shader_abi.h
+++ b/src/amd/llvm/ac_shader_abi.h
@@ -68,8 +68,6 @@ struct ac_shader_abi {
 
    void (*export_vertex)(struct ac_shader_abi *abi);
 
-   void (*emit_vertex)(struct ac_shader_abi *abi, unsigned stream, 
LLVMValueRef *addrs);
-
    void (*emit_primitive)(struct ac_shader_abi *abi, unsigned stream);
 
    void (*emit_vertex_with_counter)(struct ac_shader_abi *abi, unsigned stream,
diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c 
b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
index 192cd0bd20f..b78957c0fe9 100644
--- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
+++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
@@ -1885,27 +1885,12 @@ static LLVMValueRef ngg_gs_get_emit_primflag_ptr(struct 
si_shader_context *ctx,
    return LLVMBuildGEP2(ctx->ac.builder, vertexptr.pointee_type, 
vertexptr.value, gep_idx, 3, "");
 }
 
-void gfx10_ngg_gs_emit_vertex(struct si_shader_context *ctx, unsigned stream, 
LLVMValueRef *addrs)
+void gfx10_ngg_gs_emit_vertex(struct si_shader_context *ctx, unsigned stream,
+                              LLVMValueRef vertexidx, LLVMValueRef *addrs)
 {
    const struct si_shader_selector *sel = ctx->shader->selector;
    const struct si_shader_info *info = &sel->info;
    LLVMBuilderRef builder = ctx->ac.builder;
-   LLVMValueRef tmp;
-   const LLVMValueRef vertexidx = LLVMBuildLoad2(builder, ctx->ac.i32, 
ctx->gs_next_vertex[stream], "");
-
-   /* If this thread has already emitted the declared maximum number of
-    * vertices, skip the write: excessive vertex emissions are not
-    * supposed to have any effect.
-    */
-   const LLVMValueRef can_emit =
-      LLVMBuildICmp(builder, LLVMIntULT, vertexidx,
-                    LLVMConstInt(ctx->ac.i32, sel->info.base.gs.vertices_out, 
false), "");
-
-   tmp = LLVMBuildAdd(builder, vertexidx, ctx->ac.i32_1, "");
-   tmp = LLVMBuildSelect(builder, can_emit, tmp, vertexidx, "");
-   LLVMBuildStore(builder, tmp, ctx->gs_next_vertex[stream]);
-
-   ac_build_ifcc(&ctx->ac, can_emit, 9001);
 
    const struct ac_llvm_pointer vertexptr = ngg_gs_emit_vertex_ptr(ctx, 
gfx10_get_thread_id_in_tg(ctx), vertexidx);
    unsigned out_idx = 0;
@@ -1923,6 +1908,13 @@ void gfx10_ngg_gs_emit_vertex(struct si_shader_context 
*ctx, unsigned stream, LL
    }
    assert(out_idx * 4 == info->gsvs_vertex_size);
 
+   /* Store the current number of emitted vertices to zero out remaining
+    * primitive flags in case the geometry shader doesn't emit the maximum
+    * number of vertices.
+    */
+   LLVMValueRef tmp = LLVMBuildAdd(builder, vertexidx, ctx->ac.i32_1, "");
+   LLVMBuildStore(builder, tmp, ctx->gs_next_vertex[stream]);
+
    /* Determine and store whether this vertex completed a primitive. */
    const LLVMValueRef curverts = LLVMBuildLoad2(builder, ctx->ac.i32, 
ctx->gs_curprim_verts[stream], "");
 
@@ -1955,8 +1947,6 @@ void gfx10_ngg_gs_emit_vertex(struct si_shader_context 
*ctx, unsigned stream, LL
    tmp = LLVMBuildLoad2(builder, ctx->ac.i32, ctx->gs_generated_prims[stream], 
"");
    tmp = LLVMBuildAdd(builder, tmp, LLVMBuildZExt(builder, iscompleteprim, 
ctx->ac.i32, ""), "");
    LLVMBuildStore(builder, tmp, ctx->gs_generated_prims[stream]);
-
-   ac_build_endif(&ctx->ac, 9001);
 }
 
 void gfx10_ngg_gs_emit_begin(struct si_shader_context *ctx)
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h 
b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 3ce257881b6..daa3766c69c 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -188,7 +188,8 @@ void gfx10_ngg_culling_build_end(struct si_shader_context 
*ctx);
 void gfx10_ngg_build_end(struct si_shader_context *ctx);
 void gfx10_ngg_atomic_add_prim_count(struct ac_shader_abi *abi, unsigned 
stream,
                                      LLVMValueRef prim_count, enum 
ac_prim_count count_type);
-void gfx10_ngg_gs_emit_vertex(struct si_shader_context *ctx, unsigned stream, 
LLVMValueRef *addrs);
+void gfx10_ngg_gs_emit_vertex(struct si_shader_context *ctx, unsigned stream,
+                              LLVMValueRef vertexidx, LLVMValueRef *addrs);
 void gfx10_ngg_gs_emit_begin(struct si_shader_context *ctx);
 void gfx10_ngg_gs_build_end(struct si_shader_context *ctx);
 unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader);
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c 
b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c
index 363267119bc..c5b1c123231 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c
@@ -162,48 +162,23 @@ void si_llvm_gs_build_end(struct si_shader_context *ctx)
 }
 
 /* Emit one vertex from the geometry shader */
-static void si_llvm_emit_vertex(struct ac_shader_abi *abi, unsigned stream, 
LLVMValueRef *addrs)
+static void si_llvm_emit_vertex(struct ac_shader_abi *abi, unsigned stream,
+                                LLVMValueRef vertexidx, LLVMValueRef *addrs)
 {
    struct si_shader_context *ctx = si_shader_context_from_abi(abi);
 
    if (ctx->shader->key.ge.as_ngg) {
-      gfx10_ngg_gs_emit_vertex(ctx, stream, addrs);
+      gfx10_ngg_gs_emit_vertex(ctx, stream, vertexidx, addrs);
       return;
    }
 
    struct si_shader_info *info = &ctx->shader->selector->info;
    struct si_shader *shader = ctx->shader;
    LLVMValueRef soffset = ac_get_arg(&ctx->ac, ctx->args.gs2vs_offset);
-   LLVMValueRef gs_next_vertex;
-   LLVMValueRef can_emit;
-   unsigned chan, offset;
-   int i;
-
-   /* Write vertex attribute values to GSVS ring */
-   gs_next_vertex = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.i32, 
ctx->gs_next_vertex[stream], "");
 
-   /* If this thread has already emitted the declared maximum number of
-    * vertices, skip the write: excessive vertex emissions are not
-    * supposed to have any effect.
-    *
-    * If the shader has no writes to memory, kill it instead. This skips
-    * further memory loads and may allow LLVM to skip to the end
-    * altogether.
-    */
-   can_emit =
-      LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, gs_next_vertex,
-                    LLVMConstInt(ctx->ac.i32, 
shader->selector->info.base.gs.vertices_out, 0), "");
-
-   bool use_kill = !info->base.writes_memory;
-   if (use_kill) {
-      ac_build_kill_if_false(&ctx->ac, can_emit);
-   } else {
-      ac_build_ifcc(&ctx->ac, can_emit, 6505);
-   }
-
-   offset = 0;
-   for (i = 0; i < info->num_outputs; i++) {
-      for (chan = 0; chan < 4; chan++) {
+   unsigned offset = 0;
+   for (unsigned i = 0; i < info->num_outputs; i++) {
+      for (unsigned chan = 0; chan < 4; chan++) {
          if (!(info->output_usagemask[i] & (1 << chan)) ||
              ((info->output_streams[i] >> (2 * chan)) & 3) != stream)
             continue;
@@ -213,7 +188,7 @@ static void si_llvm_emit_vertex(struct ac_shader_abi *abi, 
unsigned stream, LLVM
             LLVMConstInt(ctx->ac.i32, offset * 
shader->selector->info.base.gs.vertices_out, 0);
          offset++;
 
-         voffset = LLVMBuildAdd(ctx->ac.builder, voffset, gs_next_vertex, "");
+         voffset = LLVMBuildAdd(ctx->ac.builder, voffset, vertexidx, "");
          voffset = LLVMBuildMul(ctx->ac.builder, voffset, 
LLVMConstInt(ctx->ac.i32, 4, 0), "");
 
          out_val = ac_to_integer(&ctx->ac, out_val);
@@ -223,9 +198,6 @@ static void si_llvm_emit_vertex(struct ac_shader_abi *abi, 
unsigned stream, LLVM
       }
    }
 
-   gs_next_vertex = LLVMBuildAdd(ctx->ac.builder, gs_next_vertex, 
ctx->ac.i32_1, "");
-   LLVMBuildStore(ctx->ac.builder, gs_next_vertex, 
ctx->gs_next_vertex[stream]);
-
    /* Signal vertex emission if vertex data was written. */
    if (offset) {
       ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | 
(stream << 8),
@@ -234,9 +206,6 @@ static void si_llvm_emit_vertex(struct ac_shader_abi *abi, 
unsigned stream, LLVM
       ctx->gs_emitted_vertices = LLVMBuildAdd(ctx->ac.builder, 
ctx->gs_emitted_vertices,
                                               ctx->ac.i32_1, "vert");
    }
-
-   if (!use_kill)
-      ac_build_endif(&ctx->ac, 6505);
 }
 
 /* Cut one primitive from the geometry shader */
@@ -601,6 +570,6 @@ struct si_shader *si_generate_gs_copy_shader(struct 
si_screen *sscreen,
 
 void si_llvm_init_gs_callbacks(struct si_shader_context *ctx)
 {
-   ctx->abi.emit_vertex = si_llvm_emit_vertex;
+   ctx->abi.emit_vertex_with_counter = si_llvm_emit_vertex;
    ctx->abi.emit_primitive = si_llvm_emit_primitive;
 }
diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c 
b/src/gallium/drivers/radeonsi/si_shader_nir.c
index 6fac0ae9f5d..b0c2e4ff5b5 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -297,6 +297,9 @@ static void si_lower_nir(struct si_screen *sscreen, struct 
nir_shader *nir)
        nir->info.stage == MESA_SHADER_GEOMETRY)
       NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_shader_out);
 
+   if (nir->info.stage == MESA_SHADER_GEOMETRY)
+      NIR_PASS_V(nir, nir_lower_gs_intrinsics, 
nir_lower_gs_intrinsics_per_stream);
+
    if (nir->info.stage == MESA_SHADER_COMPUTE) {
       if (nir->info.cs.derivative_group == DERIVATIVE_GROUP_QUADS) {
          /* If we are shuffling local_invocation_id for quad derivatives, we

Reply via email to