Module: Mesa Branch: main Commit: 7bc56911f812d11d757b9824e6dad87b319b4481 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7bc56911f812d11d757b9824e6dad87b319b4481
Author: Qiang Yu <[email protected]> Date: Sat Jun 11 15:29:50 2022 +0800 radeonsi: implement export_vertex abi Used by ngg lower. Acked-by: Pierre-Eric Pelloux-Prayer <[email protected]> Reviewed-by: Marek Olšák <[email protected]> Signed-off-by: Qiang Yu <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17109> --- src/gallium/drivers/radeonsi/gfx10_shader_ngg.c | 29 +++++++++++++++++++++++ src/gallium/drivers/radeonsi/si_shader.c | 9 +++++++ src/gallium/drivers/radeonsi/si_shader_internal.h | 1 + src/gallium/drivers/radeonsi/si_shader_llvm.c | 9 ++++++- 4 files changed, 47 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index b78957c0fe9..8ecfd74e24f 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -1753,6 +1753,35 @@ void gfx10_ngg_build_end(struct si_shader_context *ctx) ac_build_endif(&ctx->ac, 6002); } +void gfx10_ngg_export_vertex(struct ac_shader_abi *abi) +{ + struct si_shader_context *ctx = si_shader_context_from_abi(abi); + struct si_shader_info *info = &ctx->shader->selector->info; + struct si_shader_output_values outputs[PIPE_MAX_SHADER_OUTPUTS]; + LLVMValueRef *addrs = ctx->abi.outputs; + + unsigned num_outputs = info->num_outputs; + /* if needed, nir ngg lower will append primitive id export at last */ + if (ctx->shader->key.ge.mono.u.vs_export_prim_id) + num_outputs++; + + for (unsigned i = 0; i < num_outputs; i++) { + if (i < info->num_outputs) { + outputs[i].semantic = info->output_semantic[i]; + outputs[i].vertex_streams = info->output_streams[i]; + } else { + outputs[i].semantic = VARYING_SLOT_PRIMITIVE_ID; + outputs[i].vertex_streams = 0; + } + + for (unsigned j = 0; j < 4; j++) + outputs[i].values[j] = + LLVMBuildLoad2(ctx->ac.builder, ctx->ac.f32, addrs[4 * i + j], ""); + } + + si_llvm_build_vs_exports(ctx, NULL, outputs, num_outputs); +} + void gfx10_ngg_atomic_add_prim_count(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef prim_count, enum ac_prim_count count_type) { diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index feda45dade7..dd21c9182de 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1682,6 +1682,15 @@ static void si_nir_assign_param_offsets(nir_shader *nir, struct si_shader *shade assert(intr->num_components == 1); /* only scalar stores expected */ nir_io_semantics sem = nir_intrinsic_io_semantics(intr); + /* primitive id output is added by ngg lowering, so we don't have its + * output info pre-build in si_shader_info. It's handled at last of + * this function. + */ + if ((nir->info.stage == MESA_SHADER_VERTEX || + nir->info.stage == MESA_SHADER_TESS_EVAL) && + sem.location == VARYING_SLOT_PRIMITIVE_ID) + continue; + /* Assign the param index if it's unassigned. */ if (nir_slot_is_varying(sem.location) && !sem.no_varying && (sem.gs_streams & 0x3) == 0 && diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index daa3766c69c..405deed969b 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -186,6 +186,7 @@ void gfx10_ngg_build_export_prim(struct si_shader_context *ctx, LLVMValueRef use LLVMValueRef prim_passthrough); void gfx10_ngg_culling_build_end(struct si_shader_context *ctx); void gfx10_ngg_build_end(struct si_shader_context *ctx); +void gfx10_ngg_export_vertex(struct ac_shader_abi *abi); void gfx10_ngg_atomic_add_prim_count(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef prim_count, enum ac_prim_count count_type); void gfx10_ngg_gs_emit_vertex(struct si_shader_context *ctx, unsigned stream, diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index 681a6af904d..ea61cdd1e2c 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -965,6 +965,7 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad ctx->abi.intrinsic_load = si_llvm_load_intrinsic; ctx->abi.load_user_clip_plane = si_llvm_load_user_clip_plane; ctx->abi.load_streamout_buffer = si_llvm_load_streamout_buffer; + ctx->abi.export_vertex = gfx10_ngg_export_vertex; ctx->abi.atomic_add_prim_count = gfx10_ngg_atomic_add_prim_count; si_llvm_init_resource_callbacks(ctx); @@ -1217,7 +1218,13 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad info->options & SI_PROFILE_CLAMP_DIV_BY_ZERO; ctx->abi.use_waterfall_for_divergent_tex_samplers = true; - for (unsigned i = 0; i < info->num_outputs; i++) { + unsigned num_outputs = info->num_outputs; + /* need extra output to hold primitive id added by nir ngg lower */ + if (ctx->stage <= MESA_SHADER_GEOMETRY && shader->key.ge.as_ngg && + ctx->shader->key.ge.mono.u.vs_export_prim_id) + num_outputs++; + + for (unsigned i = 0; i < num_outputs; i++) { LLVMTypeRef type = ctx->ac.f32; /* Only FS uses unpacked f16. Other stages pack 16-bit outputs into low and high bits of f32. */
