Null exports should only be needed when no other exports are emitted. This removes a bunch of 'exp null off, off, off, off done vm'.
Affected games are Dota 2 and Wolfenstein 2, not sure if that really helps, but code size is decreasing there. Polaris10: Totals from affected shaders: SGPRS: 8216 -> 8216 (0.00 %) VGPRS: 7072 -> 7072 (0.00 %) Spilled SGPRs: 0 -> 0 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Code Size: 454968 -> 453896 (-0.24 %) bytes Max Waves: 772 -> 772 (0.00 %) Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> --- src/amd/common/ac_nir_to_llvm.c | 98 ++++++++++++++++++++++------------------- 1 file changed, 52 insertions(+), 46 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index ea51c3a54a..c370c80ec7 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -6484,67 +6484,73 @@ handle_tcs_outputs_post(struct radv_shader_context *ctx) write_tess_factors(ctx); } -static bool -si_export_mrt_color(struct radv_shader_context *ctx, - LLVMValueRef *color, unsigned index, bool is_last, - struct ac_export_args *args) -{ - /* Export */ - si_llvm_init_export_args(ctx, color, 0xf, - V_008DFC_SQ_EXP_MRT + index, args); - - if (is_last) { - args->valid_mask = 1; /* whether the EXEC mask is valid */ - args->done = 1; /* DONE bit */ - } else if (!args->enabled_channels) - return false; /* unnecessary NULL export */ - - return true; -} +struct radv_ps_exports { + unsigned num; + struct ac_export_args args[10]; +}; static void radv_export_mrt_z(struct radv_shader_context *ctx, LLVMValueRef depth, LLVMValueRef stencil, - LLVMValueRef samplemask) + LLVMValueRef samplemask, struct radv_ps_exports *exp) { struct ac_export_args args; ac_export_mrt_z(&ctx->ac, depth, stencil, samplemask, &args); - ac_build_export(&ctx->ac, &args); + memcpy(&exp->args[exp->num++], &args, sizeof(args)); } static void handle_fs_outputs_post(struct radv_shader_context *ctx) { - unsigned index = 0; LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL; - struct ac_export_args color_args[8]; + struct radv_ps_exports exp = {}; + struct ac_export_args args[8]; + int last_color_export = -1; + int last_non_null = -1; + unsigned colors_written; + + /* Find the last written color export. */ + colors_written = ctx->output_mask >> FRAG_RESULT_DATA0; + if (!ctx->shader_info->info.ps.writes_z && + !ctx->shader_info->info.ps.writes_stencil && + !ctx->shader_info->info.ps.writes_sample_mask) { + last_color_export = util_last_bit(colors_written) - 1; + } + + /* Get the export arguments, and find the last non-null color export. */ + for (unsigned mrt = 0; mrt < 8; mrt++) { + unsigned index = mrt + FRAG_RESULT_DATA0; + LLVMValueRef color[4]; + + if (!(colors_written & (1 << mrt))) + continue; - for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { - LLVMValueRef values[4]; - bool last = false; + for (unsigned chan = 0; chan < 4; chan++) { + color[chan] = + ac_to_float(&ctx->ac, + radv_load_output(ctx, index, chan)); + } - if (!(ctx->output_mask & (1ull << i))) - continue; + si_llvm_init_export_args(ctx, color, 0xf, + V_008DFC_SQ_EXP_MRT + mrt, &args[mrt]); + if (args[mrt].enabled_channels) + last_non_null = mrt; + } - if (i < FRAG_RESULT_DATA0) + /* Emit all exports. */ + for (unsigned mrt = 0; mrt < 8; mrt++) { + if (!(colors_written & (1 << mrt))) continue; - for (unsigned j = 0; j < 4; j++) - values[j] = ac_to_float(&ctx->ac, - radv_load_output(ctx, i, j)); + if (last_color_export != -1 && last_non_null == mrt) { + args[mrt].valid_mask = 1; /* whether the EXEC mask is valid */ + args[mrt].done = 1; /* DONE bit */ + } else if (!args[mrt].enabled_channels) + continue; /* unnecessary NULL export */ - if (!ctx->shader_info->info.ps.writes_z && - !ctx->shader_info->info.ps.writes_stencil && - !ctx->shader_info->info.ps.writes_sample_mask) - last = ctx->output_mask <= ((1ull << (i + 1)) - 1); - - bool ret = si_export_mrt_color(ctx, values, - i - FRAG_RESULT_DATA0, - last, &color_args[index]); - if (ret) - index++; + memcpy(&exp.args[exp.num++], &args[mrt], sizeof(args[mrt])); } /* Process depth, stencil, samplemask. */ @@ -6561,14 +6567,14 @@ handle_fs_outputs_post(struct radv_shader_context *ctx) radv_load_output(ctx, FRAG_RESULT_SAMPLE_MASK, 0)); } - /* Export PS outputs. */ - for (unsigned i = 0; i < index; i++) - ac_build_export(&ctx->ac, &color_args[i]); - if (depth || stencil || samplemask) - radv_export_mrt_z(ctx, depth, stencil, samplemask); - else if (!index) + radv_export_mrt_z(ctx, depth, stencil, samplemask, &exp); + else if (last_color_export == -1 || last_non_null == -1) ac_build_export_null(&ctx->ac); + + /* Export PS outputs. */ + for (unsigned i = 0; i < exp.num; i++) + ac_build_export(&ctx->ac, &exp.args[i]); } static void -- 2.16.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev