Module: Mesa Branch: main Commit: 58f8e0e2a0cb033a12ec68065f49e97aca383243 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=58f8e0e2a0cb033a12ec68065f49e97aca383243
Author: Rhys Perry <[email protected]> Date: Tue Jun 13 15:27:26 2023 +0100 nir,aco: add INCLUDE_HELPERS index to reduce intrinsic Signed-off-by: Rhys Perry <[email protected]> Reviewed-by: Georg Lehmann <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23621> --- src/amd/compiler/aco_instruction_selection.cpp | 8 +++++--- src/compiler/nir/nir_intrinsics.py | 5 ++++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index d34c955818d..f34fd756e85 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -7892,7 +7892,7 @@ emit_uniform_reduce(isel_context* ctx, nir_intrinsic_instr* instr) Temp thread_count = bld.sop1(Builder::s_bcnt1_i32, bld.def(s1), bld.def(s1, scc), Operand(exec, bld.lm)); - thread_count = emit_wqm(bld, thread_count); + thread_count = emit_wqm(bld, thread_count, Temp(0, s1), nir_intrinsic_include_helpers(instr)); emit_addition_uniform_reduce(ctx, op, dst, instr->src[0], thread_count); } else { @@ -8523,6 +8523,8 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) instr->intrinsic == nir_intrinsic_reduce ? nir_intrinsic_cluster_size(instr) : 0; cluster_size = util_next_power_of_two( MIN2(cluster_size ? cluster_size : ctx->program->wave_size, ctx->program->wave_size)); + bool create_helpers = + instr->intrinsic == nir_intrinsic_reduce && nir_intrinsic_include_helpers(instr); if (!nir_src_is_divergent(instr->src[0]) && cluster_size == ctx->program->wave_size && instr->dest.ssa.bit_size != 1) { @@ -8552,7 +8554,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) switch (instr->intrinsic) { case nir_intrinsic_reduce: - emit_wqm(bld, emit_boolean_reduce(ctx, op, cluster_size, src), dst); + emit_wqm(bld, emit_boolean_reduce(ctx, op, cluster_size, src), dst, create_helpers); break; case nir_intrinsic_exclusive_scan: emit_wqm(bld, emit_boolean_exclusive_scan(ctx, op, src), dst); @@ -8581,7 +8583,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) Temp tmp_dst = emit_reduction_instr(ctx, aco_op, reduce_op, cluster_size, bld.def(dst.regClass()), src); - emit_wqm(bld, tmp_dst, dst); + emit_wqm(bld, tmp_dst, dst, create_helpers); } break; } diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 68dcfa937c7..70dc50370e2 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -172,6 +172,9 @@ index("unsigned", "reduction_op") # Cluster size for reduction operations index("unsigned", "cluster_size") +# Requires that the operation creates and includes helper invocations +index("bool", "include_helpers") + # Parameter index for a load_param intrinsic index("unsigned", "param_idx") @@ -445,7 +448,7 @@ intrinsic("rotate", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, indices=[EXECUTION_SCOPE, CLUSTER_SIZE], flags=[CAN_ELIMINATE]); intrinsic("reduce", src_comp=[0], dest_comp=0, bit_sizes=src0, - indices=[REDUCTION_OP, CLUSTER_SIZE], flags=[CAN_ELIMINATE]) + indices=[REDUCTION_OP, CLUSTER_SIZE, INCLUDE_HELPERS], flags=[CAN_ELIMINATE]) intrinsic("inclusive_scan", src_comp=[0], dest_comp=0, bit_sizes=src0, indices=[REDUCTION_OP], flags=[CAN_ELIMINATE]) intrinsic("exclusive_scan", src_comp=[0], dest_comp=0, bit_sizes=src0,
