Module: Mesa
Branch: main
Commit: 7bc56911f812d11d757b9824e6dad87b319b4481
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=7bc56911f812d11d757b9824e6dad87b319b4481

Author: Qiang Yu <[email protected]>
Date:   Sat Jun 11 15:29:50 2022 +0800

radeonsi: implement export_vertex abi

Used by ngg lower.

Acked-by: Pierre-Eric Pelloux-Prayer <[email protected]>
Reviewed-by: Marek Olšák <[email protected]>
Signed-off-by: Qiang Yu <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17109>

---

 src/gallium/drivers/radeonsi/gfx10_shader_ngg.c   | 29 +++++++++++++++++++++++
 src/gallium/drivers/radeonsi/si_shader.c          |  9 +++++++
 src/gallium/drivers/radeonsi/si_shader_internal.h |  1 +
 src/gallium/drivers/radeonsi/si_shader_llvm.c     |  9 ++++++-
 4 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c 
b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
index b78957c0fe9..8ecfd74e24f 100644
--- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
+++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
@@ -1753,6 +1753,35 @@ void gfx10_ngg_build_end(struct si_shader_context *ctx)
    ac_build_endif(&ctx->ac, 6002);
 }
 
+void gfx10_ngg_export_vertex(struct ac_shader_abi *abi)
+{
+   struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+   struct si_shader_info *info = &ctx->shader->selector->info;
+   struct si_shader_output_values outputs[PIPE_MAX_SHADER_OUTPUTS];
+   LLVMValueRef *addrs = ctx->abi.outputs;
+
+   unsigned num_outputs = info->num_outputs;
+   /* if needed, nir ngg lower will append primitive id export at last */
+   if (ctx->shader->key.ge.mono.u.vs_export_prim_id)
+      num_outputs++;
+
+   for (unsigned i = 0; i < num_outputs; i++) {
+      if (i < info->num_outputs) {
+         outputs[i].semantic = info->output_semantic[i];
+         outputs[i].vertex_streams = info->output_streams[i];
+      } else {
+         outputs[i].semantic = VARYING_SLOT_PRIMITIVE_ID;
+         outputs[i].vertex_streams = 0;
+      }
+
+      for (unsigned j = 0; j < 4; j++)
+         outputs[i].values[j] =
+            LLVMBuildLoad2(ctx->ac.builder, ctx->ac.f32, addrs[4 * i + j], "");
+   }
+
+   si_llvm_build_vs_exports(ctx, NULL, outputs, num_outputs);
+}
+
 void gfx10_ngg_atomic_add_prim_count(struct ac_shader_abi *abi, unsigned 
stream,
                                      LLVMValueRef prim_count, enum 
ac_prim_count count_type)
 {
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index feda45dade7..dd21c9182de 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1682,6 +1682,15 @@ static void si_nir_assign_param_offsets(nir_shader *nir, 
struct si_shader *shade
          assert(intr->num_components == 1); /* only scalar stores expected */
          nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
 
+         /* primitive id output is added by ngg lowering, so we don't have its
+          * output info pre-build in si_shader_info. It's handled at last of
+          * this function.
+          */
+         if ((nir->info.stage == MESA_SHADER_VERTEX ||
+              nir->info.stage == MESA_SHADER_TESS_EVAL) &&
+             sem.location == VARYING_SLOT_PRIMITIVE_ID)
+            continue;
+
          /* Assign the param index if it's unassigned. */
          if (nir_slot_is_varying(sem.location) && !sem.no_varying &&
              (sem.gs_streams & 0x3) == 0 &&
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h 
b/src/gallium/drivers/radeonsi/si_shader_internal.h
index daa3766c69c..405deed969b 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -186,6 +186,7 @@ void gfx10_ngg_build_export_prim(struct si_shader_context 
*ctx, LLVMValueRef use
                                  LLVMValueRef prim_passthrough);
 void gfx10_ngg_culling_build_end(struct si_shader_context *ctx);
 void gfx10_ngg_build_end(struct si_shader_context *ctx);
+void gfx10_ngg_export_vertex(struct ac_shader_abi *abi);
 void gfx10_ngg_atomic_add_prim_count(struct ac_shader_abi *abi, unsigned 
stream,
                                      LLVMValueRef prim_count, enum 
ac_prim_count count_type);
 void gfx10_ngg_gs_emit_vertex(struct si_shader_context *ctx, unsigned stream,
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c 
b/src/gallium/drivers/radeonsi/si_shader_llvm.c
index 681a6af904d..ea61cdd1e2c 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c
@@ -965,6 +965,7 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, 
struct si_shader *shad
    ctx->abi.intrinsic_load = si_llvm_load_intrinsic;
    ctx->abi.load_user_clip_plane = si_llvm_load_user_clip_plane;
    ctx->abi.load_streamout_buffer = si_llvm_load_streamout_buffer;
+   ctx->abi.export_vertex = gfx10_ngg_export_vertex;
    ctx->abi.atomic_add_prim_count = gfx10_ngg_atomic_add_prim_count;
 
    si_llvm_init_resource_callbacks(ctx);
@@ -1217,7 +1218,13 @@ bool si_llvm_translate_nir(struct si_shader_context 
*ctx, struct si_shader *shad
                                 info->options & SI_PROFILE_CLAMP_DIV_BY_ZERO;
    ctx->abi.use_waterfall_for_divergent_tex_samplers = true;
 
-   for (unsigned i = 0; i < info->num_outputs; i++) {
+   unsigned num_outputs = info->num_outputs;
+   /* need extra output to hold primitive id added by nir ngg lower */
+   if (ctx->stage <= MESA_SHADER_GEOMETRY && shader->key.ge.as_ngg &&
+       ctx->shader->key.ge.mono.u.vs_export_prim_id)
+      num_outputs++;
+
+   for (unsigned i = 0; i < num_outputs; i++) {
       LLVMTypeRef type = ctx->ac.f32;
 
       /* Only FS uses unpacked f16. Other stages pack 16-bit outputs into low 
and high bits of f32. */

Reply via email to