Module: Mesa
Branch: main
Commit: ee9ccd7ac523243408b8bcde9c7896a2aee1d7c2
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=ee9ccd7ac523243408b8bcde9c7896a2aee1d7c2

Author: Qiang Yu <[email protected]>
Date:   Mon Dec 26 10:03:59 2022 +0800

radeonsi: only init llvm output when needed.

These are the cases which still uses nir_store_output.

Reviewed-by: Marek Olšák <[email protected]>
Signed-off-by: Qiang Yu <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21435>

---

 src/gallium/drivers/radeonsi/si_shader_llvm.c | 33 +++++++++++++++------------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c 
b/src/gallium/drivers/radeonsi/si_shader_llvm.c
index 8e40c0cacf0..425cc281b21 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c
@@ -1000,23 +1000,28 @@ static bool si_llvm_translate_nir(struct 
si_shader_context *ctx, struct si_shade
    ctx->abi.disable_aniso_single_level = true;
    ctx->abi.conformant_trunc_coord = ctx->screen->info.conformant_trunc_coord;
 
-   unsigned num_outputs = info->num_outputs;
-   /* need extra output to hold primitive id added by nir lower */
-   if (ctx->stage <= MESA_SHADER_GEOMETRY &&
-       ctx->shader->key.ge.mono.u.vs_export_prim_id)
-      num_outputs++;
+   bool ls_need_output =
+      ctx->stage == MESA_SHADER_VERTEX && shader->key.ge.as_ls &&
+      shader->key.ge.opt.same_patch_vertices;
+
+   bool tcs_need_output =
+      ctx->stage == MESA_SHADER_TESS_CTRL && 
info->tessfactors_are_def_in_all_invocs;
 
-   for (unsigned i = 0; i < num_outputs; i++) {
-      LLVMTypeRef type = ctx->ac.f32;
+   bool ps_need_output = ctx->stage == MESA_SHADER_FRAGMENT;
 
-      /* Only FS uses unpacked f16. Other stages pack 16-bit outputs into low 
and high bits of f32. */
-      if (nir->info.stage == MESA_SHADER_FRAGMENT &&
-          
nir_alu_type_get_type_size(ctx->shader->selector->info.output_type[i]) == 16)
-         type = ctx->ac.f16;
+   if (ls_need_output || tcs_need_output || ps_need_output) {
+      for (unsigned i = 0; i < info->num_outputs; i++) {
+         LLVMTypeRef type = ctx->ac.f32;
 
-      for (unsigned j = 0; j < 4; j++) {
-         ctx->abi.outputs[i * 4 + j] = ac_build_alloca_undef(&ctx->ac, type, 
"");
-         ctx->abi.is_16bit[i * 4 + j] = type == ctx->ac.f16;
+         /* Only FS uses unpacked f16. Other stages pack 16-bit outputs into 
low and high bits of f32. */
+         if (nir->info.stage == MESA_SHADER_FRAGMENT &&
+             
nir_alu_type_get_type_size(ctx->shader->selector->info.output_type[i]) == 16)
+            type = ctx->ac.f16;
+
+         for (unsigned j = 0; j < 4; j++) {
+            ctx->abi.outputs[i * 4 + j] = ac_build_alloca_undef(&ctx->ac, 
type, "");
+            ctx->abi.is_16bit[i * 4 + j] = type == ctx->ac.f16;
+         }
       }
    }
 

Reply via email to