Module: Mesa Branch: main Commit: da4f49d0ad7b4d81d4293206145743d04d261bf0 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=da4f49d0ad7b4d81d4293206145743d04d261bf0
Author: Qiang Yu <[email protected]> Date: Sun Oct 9 10:30:24 2022 +0800 radeonsi: cleanup si_llvm_build_vs_exports gfx11 code It's now completely handled in ac_nir_lower_ngg.c export_vertex_params_gfx11. Acked-by: Pierre-Eric Pelloux-Prayer <[email protected]> Reviewed-by: Marek Olšák <[email protected]> Signed-off-by: Qiang Yu <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17109> --- src/gallium/drivers/radeonsi/gfx10_shader_ngg.c | 2 +- src/gallium/drivers/radeonsi/si_shader_internal.h | 3 +- src/gallium/drivers/radeonsi/si_shader_llvm.c | 2 +- src/gallium/drivers/radeonsi/si_shader_llvm_gs.c | 2 +- src/gallium/drivers/radeonsi/si_shader_llvm_vs.c | 66 ++--------------------- 5 files changed, 9 insertions(+), 66 deletions(-) diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index 36b67ffc545..2f6489b3c89 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -119,7 +119,7 @@ void gfx10_ngg_export_vertex(struct ac_shader_abi *abi) LLVMBuildLoad2(ctx->ac.builder, ctx->ac.f32, addrs[4 * i + j], ""); } - si_llvm_build_vs_exports(ctx, NULL, outputs, num_outputs); + si_llvm_build_vs_exports(ctx, outputs, num_outputs); } void gfx10_ngg_atomic_add_prim_count(struct ac_shader_abi *abi, unsigned stream, diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 0017c5b90cf..1a9a49faf27 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -222,7 +222,6 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * struct si_shader *shader, const struct pipe_stream_output_info *so, struct util_debug_callback *debug, struct nir_shader *nir, bool free_nir); -LLVMValueRef si_llvm_build_attr_ring_desc(struct si_shader_context *ctx); /* si_shader_llvm_gs.c */ LLVMValueRef si_is_es_thread(struct si_shader_context *ctx); @@ -263,7 +262,7 @@ void si_llvm_streamout_store_output(struct si_shader_context *ctx, LLVMValueRef struct si_shader_output_values *shader_out); void si_llvm_emit_streamout(struct si_shader_context *ctx, struct si_shader_output_values *outputs, unsigned noutput, unsigned stream); -void si_llvm_build_vs_exports(struct si_shader_context *ctx, LLVMValueRef num_export_threads, +void si_llvm_build_vs_exports(struct si_shader_context *ctx, struct si_shader_output_values *outputs, unsigned noutput); void si_llvm_vs_build_end(struct si_shader_context *ctx); void si_llvm_build_vs_prolog(struct si_shader_context *ctx, union si_shader_part_key *key); diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index dd5f5745c91..b255310198f 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -732,7 +732,7 @@ static LLVMValueRef si_get_num_vertices_per_prim(struct si_shader_context *ctx) return LLVMConstInt(ctx->ac.i32, num_vertices, false); } -LLVMValueRef si_llvm_build_attr_ring_desc(struct si_shader_context *ctx) +static LLVMValueRef si_llvm_build_attr_ring_desc(struct si_shader_context *ctx) { struct si_shader *shader = ctx->shader; diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c index f5510c2bc6c..440a6d25dbb 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c @@ -526,7 +526,7 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen, if (stream == 0) { si_vertex_color_clamping(&ctx, outputs, gsinfo->num_outputs); - si_llvm_build_vs_exports(&ctx, NULL, outputs, gsinfo->num_outputs); + si_llvm_build_vs_exports(&ctx, outputs, gsinfo->num_outputs); } LLVMBuildBr(builder, end_bb); diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c index 597ea18eb95..417073ee251 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c @@ -503,10 +503,8 @@ static void si_llvm_init_vs_export_args(struct si_shader_context *ctx, const LLV /** * Generate export instructions for hardware VS shader stage or NGG GS stage * (position and parameter data only). - * - * \param num_export_threads The number of threads that are active for exports. Only used by gfx11. */ -void si_llvm_build_vs_exports(struct si_shader_context *ctx, LLVMValueRef num_export_threads, +void si_llvm_build_vs_exports(struct si_shader_context *ctx, struct si_shader_output_values *outputs, unsigned noutput) { struct si_shader *shader = ctx->shader; @@ -720,63 +718,9 @@ void si_llvm_build_vs_exports(struct si_shader_context *ctx, LLVMValueRef num_ex ¶m_exports[offset]); } - if (ctx->screen->info.gfx_level >= GFX11) { - /* Store primitive exports to alloca variables, so that we can read them outside this branch. */ - for (unsigned i = 0; i < shader->info.nr_param_exports; i++) { - for (unsigned chan = 0; chan < 4; chan++) { - param_exports[i].out[chan] = - ac_build_alloca_init(&ctx->ac, param_exports[i].out[chan], ""); - } - } - ac_build_endif(&ctx->ac, 0); - - if (!num_export_threads) - num_export_threads = si_unpack_param(ctx, ctx->args.merged_wave_info, 0, 8); - - /* We should always store full vec4s in groups of 8 lanes for the best performance even if - * some of them are garbage or have unused components, so align the number of export threads - * to 8. - */ - num_export_threads = LLVMBuildAdd(ctx->ac.builder, num_export_threads, - LLVMConstInt(ctx->ac.i32, 7, 0), ""); - num_export_threads = LLVMBuildAnd(ctx->ac.builder, num_export_threads, - LLVMConstInt(ctx->ac.i32, ~7, 0), ""); - ac_build_ifcc(&ctx->ac, - LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, - ac_get_thread_id(&ctx->ac), num_export_threads, ""), 0); - - LLVMValueRef attr_rsrc = si_llvm_build_attr_ring_desc(ctx); - LLVMValueRef attr_offset = LLVMBuildShl(ctx->ac.builder, - si_unpack_param(ctx, ctx->args.gs_attr_offset, 0, 15), - LLVMConstInt(ctx->ac.i32, 9, 0), ""); /* 512B increments */ - LLVMValueRef vindex = gfx10_get_thread_id_in_tg(ctx); - - LLVMValueRef soffset[32]; - - /* Compute scalar offsets first. */ - for (unsigned i = 0; i < shader->info.nr_param_exports; i++) { - soffset[i] = LLVMBuildAdd(ctx->ac.builder, attr_offset, - LLVMConstInt(ctx->ac.i32, 32 * i * 16, 0), ""); - } - - /* Write attributes to the attribute ring buffer. */ - for (unsigned i = 0; i < shader->info.nr_param_exports; i++) { - for (unsigned chan = 0; chan < 4; chan++) { - param_exports[i].out[chan] = - LLVMBuildLoad2(ctx->ac.builder, ctx->ac.f32, param_exports[i].out[chan], ""); - } - - LLVMValueRef vdata = ac_build_gather_values_extended(&ctx->ac, param_exports[i].out, - 4, 1, false); - - ac_build_buffer_store_dword(&ctx->ac, attr_rsrc, vdata, vindex, - ctx->ac.i32_0, soffset[i], ac_swizzled); - } - } else { - /* Export attributes using parameter exports. */ - for (unsigned i = 0; i < shader->info.nr_param_exports; i++) - ac_build_export(&ctx->ac, ¶m_exports[i]); - } + /* Export attributes using parameter exports. */ + for (unsigned i = 0; i < shader->info.nr_param_exports; i++) + ac_build_export(&ctx->ac, ¶m_exports[i]); } void si_llvm_vs_build_end(struct si_shader_context *ctx) @@ -813,7 +757,7 @@ void si_llvm_vs_build_end(struct si_shader_context *ctx) i++; } - si_llvm_build_vs_exports(ctx, NULL, outputs, i); + si_llvm_build_vs_exports(ctx, outputs, i); FREE(outputs); }
