Module: Mesa
Branch: main
Commit: 524a894ba482d6cf98de6df9745d6611694c9bd2
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=524a894ba482d6cf98de6df9745d6611694c9bd2

Author: Georg Lehmann <[email protected]>
Date:   Sat Aug  5 19:02:53 2023 +0200

aco/gfx11: don't use bfe for local_invocation_id if the others are always 0

Foz-DB GFX1100:
Totals from 4469 (3.37% of 132657) affected shaders:
Instrs: 3895053 -> 3893529 (-0.04%); split: -0.04%, +0.00%
CodeSize: 20244128 -> 20220952 (-0.11%); split: -0.11%, +0.00%
Latency: 37864147 -> 37862227 (-0.01%); split: -0.01%, +0.00%
InvThroughput: 5578100 -> 5576469 (-0.03%); split: -0.03%, +0.00%
SClause: 108336 -> 108343 (+0.01%); split: -0.00%, +0.01%
Copies: 275897 -> 275900 (+0.00%); split: -0.00%, +0.00%

Reviewed-by: Timur Kristóf <[email protected]>
Reviewed-by: Rhys Perry <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24514>

---

 src/amd/compiler/aco_instruction_selection.cpp | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/amd/compiler/aco_instruction_selection.cpp 
b/src/amd/compiler/aco_instruction_selection.cpp
index daea9e675af..07c387a8ea4 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -8212,9 +8212,20 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* 
instr)
 
          /* Thread IDs are packed in VGPR0, 10 bits per component. */
          for (uint32_t i = 0; i < 3; i++) {
-            local_ids[i] = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1),
-                                    get_arg(ctx, 
ctx->args->local_invocation_ids),
-                                    Operand::c32(i * 10u), Operand::c32(10u));
+            if (i == 0 && ctx->shader->info.workgroup_size[1] == 1 &&
+                ctx->shader->info.workgroup_size[2] == 1 &&
+                !ctx->shader->info.workgroup_size_variable) {
+               local_ids[i] = get_arg(ctx, ctx->args->local_invocation_ids);
+            } else if (i == 2 || (i == 1 && 
ctx->shader->info.workgroup_size[2] == 1 &&
+                                  !ctx->shader->info.workgroup_size_variable)) 
{
+               local_ids[i] =
+                  bld.vop2(aco_opcode::v_lshrrev_b32, bld.def(v1), 
Operand::c32(i * 10u),
+                           get_arg(ctx, ctx->args->local_invocation_ids));
+            } else {
+               local_ids[i] = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1),
+                                       get_arg(ctx, 
ctx->args->local_invocation_ids),
+                                       Operand::c32(i * 10u), 
Operand::c32(10u));
+            }
          }
 
          bld.pseudo(aco_opcode::p_create_vector, Definition(dst), 
local_ids[0], local_ids[1],

Reply via email to