Module: Mesa Branch: main Commit: daeab51a628a61dade7bac0c6d06361714f57e1b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=daeab51a628a61dade7bac0c6d06361714f57e1b
Author: Caio Oliveira <caio.olive...@intel.com> Date: Wed Oct 25 21:07:26 2023 -0700 intel/compiler: Re-enable opt_zero_samples() for Gfx7+ Inadvertently, because of a sequence of changes elsewhere, this pass ended up not having any effect: - Before Gfx5 the optimization is not applicable. - On Gfx5-6 it doesn't apply because it sampler operations don't currently use LOAD_PAYLOAD, but write the MOVs directly. Not clear to me whether they ever did. - On Gfx7+ it doesn't apply anymore because now the logical sampler operations are now lowered directly to SENDs, and the is_tex() check would skip them. Since the LOAD_PAYLOAD implementation applies for Gfx7+ only, rework the pass to work again by handling SEND instructions. To make the pass easier, the optimization will happen before opt_split_sends() so only one LOAD_PAYLOAD needs to be cared for. Update the code to accept BAD_FILE sources in addition to zeros, these are added in some cases as padding and effectively are don't care values, so we can assume them zeros. Reviewed-by: Kenneth Graunke <kenn...@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25742> --- src/intel/compiler/brw_fs.cpp | 63 ++++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 21 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index a794ed452a2..ec9d3a9b15d 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -3096,7 +3096,7 @@ load_payload_sources_read_for_size(fs_inst *lp, unsigned size_read) /** * Optimize sample messages that have constant zero values for the trailing - * texture coordinates. We can just reduce the message length for these + * parameters. We can just reduce the message length for these * instructions instead of reserving a register for it. Trailing parameters * that aren't sent default to zero anyway. This will cause the dead code * eliminator to remove the MOV instruction that would otherwise be emitted to @@ -3105,26 +3105,36 @@ load_payload_sources_read_for_size(fs_inst *lp, unsigned size_read) bool fs_visitor::opt_zero_samples() { - /* Gfx4 infers the texturing opcode based on the message length so we can't - * change it. Gfx12.5 has restrictions on the number of coordinate + /* Implementation supports only SENDs, so applicable to Gfx7+ only. */ + assert(devinfo->ver >= 7); + + /* Gfx12.5 has restrictions on the number of coordinate * parameters that have to be provided for some texture types * (Wa_14012688258). */ - if (devinfo->ver < 5 || intel_needs_workaround(devinfo, 14012688258)) + if (intel_needs_workaround(devinfo, 14012688258)) return false; bool progress = false; - foreach_block_and_inst(block, fs_inst, inst, cfg) { - if (!inst->is_tex()) + foreach_block_and_inst(block, fs_inst, send, cfg) { + if (send->opcode != SHADER_OPCODE_SEND || + send->sfid != BRW_SFID_SAMPLER) + continue; + + /* This pass works on SENDs before splitting. */ + if (send->ex_mlen > 0) continue; - fs_inst *load_payload = (fs_inst *) inst->prev; + fs_inst *lp = (fs_inst *) send->prev; - if (load_payload->is_head_sentinel() || - load_payload->opcode != SHADER_OPCODE_LOAD_PAYLOAD) + if (lp->is_head_sentinel() || lp->opcode != SHADER_OPCODE_LOAD_PAYLOAD) continue; + /* How much of the payload are actually read by this SEND. */ + const unsigned params = + load_payload_sources_read_for_size(lp, send->mlen * REG_SIZE); + /* We don't want to remove the message header or the first parameter. * Removing the first parameter is not allowed, see the Haswell PRM * volume 7, page 149: @@ -3132,11 +3142,17 @@ fs_visitor::opt_zero_samples() * "Parameter 0 is required except for the sampleinfo message, which * has no parameter 0" */ - while (inst->mlen > inst->header_size + inst->exec_size / 8 && - load_payload->src[(inst->mlen - inst->header_size) / - (inst->exec_size / 8) + - inst->header_size - 1].is_zero()) { - inst->mlen -= inst->exec_size / 8; + const unsigned first_param_idx = lp->header_size; + unsigned zero_size = 0; + for (unsigned i = params - 1; i > first_param_idx; i--) { + if (lp->src[i].file != BAD_FILE && !lp->src[i].is_zero()) + break; + zero_size += lp->exec_size * type_sz(lp->src[i].type) * lp->dst.stride; + } + + const unsigned zero_len = zero_size / (reg_unit(devinfo) * REG_SIZE); + if (zero_len > 0) { + send->mlen -= zero_len; progress = true; } } @@ -6382,7 +6398,18 @@ fs_visitor::optimize() OPT(lower_logical_sends); /* After logical SEND lowering. */ - OPT(opt_copy_propagation); + + if (OPT(opt_copy_propagation)) + OPT(opt_algebraic); + + /* Identify trailing zeros LOAD_PAYLOAD of sampler messages. + * Do this before splitting SENDs. + */ + if (devinfo->ver >= 7) { + if (OPT(opt_zero_samples) && OPT(opt_copy_propagation)) + OPT(opt_algebraic); + } + OPT(opt_split_sends); OPT(fixup_nomask_control_flow); @@ -6390,12 +6417,6 @@ fs_visitor::optimize() if (OPT(opt_copy_propagation)) OPT(opt_algebraic); - /* Only run after logical send lowering because it's easier to implement - * in terms of physical sends. - */ - if (OPT(opt_zero_samples) && OPT(opt_copy_propagation)) - OPT(opt_algebraic); - /* Run after logical send lowering to give it a chance to CSE the * LOAD_PAYLOAD instructions created to construct the payloads of * e.g. texturing messages in cases where it wasn't possible to CSE the