Just noticed that we can reduce the number of input VGPRs when dimensions y (and/or z) are unused. Similar to work groups.
Signed-off-by: Samuel Pitoiset <[email protected]> --- src/amd/common/ac_nir_to_llvm.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 391a4c2a60..5ba0d937c8 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -95,7 +95,7 @@ struct nir_to_llvm_context { LLVMValueRef view_index; LLVMValueRef num_work_groups; LLVMValueRef workgroup_ids[3]; - LLVMValueRef local_invocation_ids; + LLVMValueRef local_invocation_ids[3]; LLVMValueRef tg_size; LLVMValueRef vertex_buffers; @@ -829,8 +829,14 @@ static void create_function(struct nir_to_llvm_context *ctx, if (ctx->shader_info->info.cs.uses_local_invocation_idx) add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->tg_size); - add_arg(&args, ARG_VGPR, ctx->ac.v3i32, - &ctx->local_invocation_ids); + + for (int i = 0; i < 3; i++) { + ctx->local_invocation_ids[i] = NULL; + if (ctx->shader_info->info.cs.uses_thread_id[i]) { + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->local_invocation_ids[i]); + } + } break; case MESA_SHADER_VERTEX: declare_global_input_sgprs(ctx, stage, has_previous_stage, @@ -4370,7 +4376,14 @@ static void visit_intrinsic(struct ac_nir_context *ctx, break; } case nir_intrinsic_load_local_invocation_id: { - result = ctx->nctx->local_invocation_ids; + LLVMValueRef values[3]; + + for (int i = 0; i < 3; i++) { + values[i] = ctx->nctx->local_invocation_ids[i] ? + ctx->nctx->local_invocation_ids[i] : ctx->ac.i32_0; + } + + result = ac_build_gather_values(&ctx->ac, values, 3); break; } case nir_intrinsic_load_base_instance: -- 2.16.1 _______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
