Module: Mesa
Branch: main
Commit: 58f8e0e2a0cb033a12ec68065f49e97aca383243
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=58f8e0e2a0cb033a12ec68065f49e97aca383243

Author: Rhys Perry <[email protected]>
Date:   Tue Jun 13 15:27:26 2023 +0100

nir,aco: add INCLUDE_HELPERS index to reduce intrinsic

Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Georg Lehmann <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23621>

---

 src/amd/compiler/aco_instruction_selection.cpp | 8 +++++---
 src/compiler/nir/nir_intrinsics.py             | 5 ++++-
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/amd/compiler/aco_instruction_selection.cpp 
b/src/amd/compiler/aco_instruction_selection.cpp
index d34c955818d..f34fd756e85 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -7892,7 +7892,7 @@ emit_uniform_reduce(isel_context* ctx, 
nir_intrinsic_instr* instr)
 
       Temp thread_count =
          bld.sop1(Builder::s_bcnt1_i32, bld.def(s1), bld.def(s1, scc), 
Operand(exec, bld.lm));
-      thread_count = emit_wqm(bld, thread_count);
+      thread_count = emit_wqm(bld, thread_count, Temp(0, s1), 
nir_intrinsic_include_helpers(instr));
 
       emit_addition_uniform_reduce(ctx, op, dst, instr->src[0], thread_count);
    } else {
@@ -8523,6 +8523,8 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* 
instr)
          instr->intrinsic == nir_intrinsic_reduce ? 
nir_intrinsic_cluster_size(instr) : 0;
       cluster_size = util_next_power_of_two(
          MIN2(cluster_size ? cluster_size : ctx->program->wave_size, 
ctx->program->wave_size));
+      bool create_helpers =
+         instr->intrinsic == nir_intrinsic_reduce && 
nir_intrinsic_include_helpers(instr);
 
       if (!nir_src_is_divergent(instr->src[0]) && cluster_size == 
ctx->program->wave_size &&
           instr->dest.ssa.bit_size != 1) {
@@ -8552,7 +8554,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* 
instr)
 
          switch (instr->intrinsic) {
          case nir_intrinsic_reduce:
-            emit_wqm(bld, emit_boolean_reduce(ctx, op, cluster_size, src), 
dst);
+            emit_wqm(bld, emit_boolean_reduce(ctx, op, cluster_size, src), 
dst, create_helpers);
             break;
          case nir_intrinsic_exclusive_scan:
             emit_wqm(bld, emit_boolean_exclusive_scan(ctx, op, src), dst);
@@ -8581,7 +8583,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* 
instr)
 
          Temp tmp_dst = emit_reduction_instr(ctx, aco_op, reduce_op, 
cluster_size,
                                              bld.def(dst.regClass()), src);
-         emit_wqm(bld, tmp_dst, dst);
+         emit_wqm(bld, tmp_dst, dst, create_helpers);
       }
       break;
    }
diff --git a/src/compiler/nir/nir_intrinsics.py 
b/src/compiler/nir/nir_intrinsics.py
index 68dcfa937c7..70dc50370e2 100644
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -172,6 +172,9 @@ index("unsigned", "reduction_op")
 # Cluster size for reduction operations
 index("unsigned", "cluster_size")
 
+# Requires that the operation creates and includes helper invocations
+index("bool", "include_helpers")
+
 # Parameter index for a load_param intrinsic
 index("unsigned", "param_idx")
 
@@ -445,7 +448,7 @@ intrinsic("rotate", src_comp=[0, 1], dest_comp=0, 
bit_sizes=src0,
           indices=[EXECUTION_SCOPE, CLUSTER_SIZE], flags=[CAN_ELIMINATE]);
 
 intrinsic("reduce", src_comp=[0], dest_comp=0, bit_sizes=src0,
-          indices=[REDUCTION_OP, CLUSTER_SIZE], flags=[CAN_ELIMINATE])
+          indices=[REDUCTION_OP, CLUSTER_SIZE, INCLUDE_HELPERS], 
flags=[CAN_ELIMINATE])
 intrinsic("inclusive_scan", src_comp=[0], dest_comp=0, bit_sizes=src0,
           indices=[REDUCTION_OP], flags=[CAN_ELIMINATE])
 intrinsic("exclusive_scan", src_comp=[0], dest_comp=0, bit_sizes=src0,

Reply via email to