Module: Mesa Branch: main Commit: 5311d8713d84b388fea41ca0efa5dfd4d774e136 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5311d8713d84b388fea41ca0efa5dfd4d774e136
Author: Faith Ekstrand <faith.ekstr...@collabora.com> Date: Wed Nov 29 20:20:26 2023 -0600 nak: Implement scan/reduce on booleans We could use the lowering in nir_lower_subgroups for this but it's a lot more complicated than we need and uses quad_any/all which we don't have. Fixes: cca40086c6a4 ("nak: Lower scan/reduce in NIR") Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26411> --- src/nouveau/compiler/nak_nir_lower_scan_reduce.c | 81 +++++++++++++++++++++--- 1 file changed, 71 insertions(+), 10 deletions(-) diff --git a/src/nouveau/compiler/nak_nir_lower_scan_reduce.c b/src/nouveau/compiler/nak_nir_lower_scan_reduce.c index 2d5923190d6..507cac62d5c 100644 --- a/src/nouveau/compiler/nak_nir_lower_scan_reduce.c +++ b/src/nouveau/compiler/nak_nir_lower_scan_reduce.c @@ -6,6 +6,73 @@ #include "nak_private.h" #include "nir_builder.h" +static nir_def * +cluster_mask(nir_builder *b, unsigned cluster_size) +{ + nir_def *mask = nir_ballot(b, 1, 32, nir_imm_true(b)); + + if (cluster_size < 32) { + nir_def *idx = nir_load_subgroup_invocation(b); + nir_def *cluster = nir_iand_imm(b, idx, ~(uint64_t)(cluster_size - 1)); + + nir_def *cluster_mask = nir_imm_int(b, BITFIELD_MASK(cluster_size)); + cluster_mask = nir_ishl(b, cluster_mask, cluster); + + mask = nir_iand(b, mask, cluster_mask); + } + + return mask; +} + +static nir_def * +build_scan_bool(nir_builder *b, nir_intrinsic_op op, nir_op red_op, + nir_def *data, unsigned cluster_size) +{ + /* Handle a couple of special cases first */ + if (op == nir_intrinsic_reduce && cluster_size == 32) { + switch (red_op) { + case nir_op_iand: + return nir_vote_all(b, 1, data); + case nir_op_ior: + return nir_vote_any(b, 1, data); + case nir_op_ixor: + /* The generic path is fine */ + break; + default: + unreachable("Unsupported boolean reduction op"); + } + } + + nir_def *mask = cluster_mask(b, cluster_size); + switch (op) { + case nir_intrinsic_exclusive_scan: + mask = nir_iand(b, mask, nir_load_subgroup_lt_mask(b, 1, 32)); + break; + case nir_intrinsic_inclusive_scan: + mask = nir_iand(b, mask, nir_load_subgroup_le_mask(b, 1, 32)); + break; + case nir_intrinsic_reduce: + break; + default: + unreachable("Unsupported scan/reduce op"); + } + + data = nir_ballot(b, 1, 32, data); + + switch (red_op) { + case nir_op_iand: + return nir_ieq_imm(b, nir_iand(b, nir_inot(b, data), mask), 0); + case nir_op_ior: + return nir_ine_imm(b, nir_iand(b, data, mask), 0); + case nir_op_ixor: { + nir_def *count = nir_bit_count(b, nir_iand(b, data, mask)); + return nir_ine_imm(b, nir_iand_imm(b, count, 1), 0); + } + default: + unreachable("Unsupported boolean reduction op"); + } +} + static nir_def * build_identity(nir_builder *b, nir_op op) { @@ -152,21 +219,15 @@ nak_nir_lower_scan_reduce_intrin(nir_builder *b, /* Simple case where we're not actually doing any reducing at all. */ assert(intrin->intrinsic == nir_intrinsic_reduce); data = intrin->src[0].ssa; + } else if (intrin->src[0].ssa->bit_size == 1) { + data = build_scan_bool(b, intrin->intrinsic, red_op, + intrin->src[0].ssa, cluster_size); } else { /* First, we need a mask of all invocations to be included in the * reduction or scan. For trivial cluster sizes, that's just the mask * of enabled channels. */ - nir_def *mask = nir_ballot(b, 1, 32, nir_imm_true(b)); - if (cluster_size < 32) { - nir_def *idx = nir_load_subgroup_invocation(b); - nir_def *cluster = nir_iand_imm(b, idx, ~(uint64_t)(cluster_size - 1)); - - nir_def *cluster_mask = nir_imm_int(b, BITFIELD_MASK(cluster_size)); - cluster_mask = nir_ishl(b, cluster_mask, cluster); - - mask = nir_iand(b, mask, cluster_mask); - } + nir_def *mask = cluster_mask(b, cluster_size); nir_def *full, *partial; nir_push_if(b, nir_ieq_imm(b, mask, -1));