Module: Mesa Branch: main Commit: 524a894ba482d6cf98de6df9745d6611694c9bd2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=524a894ba482d6cf98de6df9745d6611694c9bd2
Author: Georg Lehmann <[email protected]> Date: Sat Aug 5 19:02:53 2023 +0200 aco/gfx11: don't use bfe for local_invocation_id if the others are always 0 Foz-DB GFX1100: Totals from 4469 (3.37% of 132657) affected shaders: Instrs: 3895053 -> 3893529 (-0.04%); split: -0.04%, +0.00% CodeSize: 20244128 -> 20220952 (-0.11%); split: -0.11%, +0.00% Latency: 37864147 -> 37862227 (-0.01%); split: -0.01%, +0.00% InvThroughput: 5578100 -> 5576469 (-0.03%); split: -0.03%, +0.00% SClause: 108336 -> 108343 (+0.01%); split: -0.00%, +0.01% Copies: 275897 -> 275900 (+0.00%); split: -0.00%, +0.00% Reviewed-by: Timur Kristóf <[email protected]> Reviewed-by: Rhys Perry <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24514> --- src/amd/compiler/aco_instruction_selection.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index daea9e675af..07c387a8ea4 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -8212,9 +8212,20 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) /* Thread IDs are packed in VGPR0, 10 bits per component. */ for (uint32_t i = 0; i < 3; i++) { - local_ids[i] = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), - get_arg(ctx, ctx->args->local_invocation_ids), - Operand::c32(i * 10u), Operand::c32(10u)); + if (i == 0 && ctx->shader->info.workgroup_size[1] == 1 && + ctx->shader->info.workgroup_size[2] == 1 && + !ctx->shader->info.workgroup_size_variable) { + local_ids[i] = get_arg(ctx, ctx->args->local_invocation_ids); + } else if (i == 2 || (i == 1 && ctx->shader->info.workgroup_size[2] == 1 && + !ctx->shader->info.workgroup_size_variable)) { + local_ids[i] = + bld.vop2(aco_opcode::v_lshrrev_b32, bld.def(v1), Operand::c32(i * 10u), + get_arg(ctx, ctx->args->local_invocation_ids)); + } else { + local_ids[i] = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), + get_arg(ctx, ctx->args->local_invocation_ids), + Operand::c32(i * 10u), Operand::c32(10u)); + } } bld.pseudo(aco_opcode::p_create_vector, Definition(dst), local_ids[0], local_ids[1],
