Module: Mesa Branch: main Commit: ef8553082eec11c0148c3decec139285e1527c5f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ef8553082eec11c0148c3decec139285e1527c5f
Author: Caio Oliveira <caio.olive...@intel.com> Date: Wed Oct 25 18:43:40 2023 -0700 intel/compiler: Rework opt_split_sends to not rely/modify LOAD_PAYLOAD This is a preparation to (re-)enable opt_zero_samples(), which will reduce a SEND mlen before we split it. When that happen, opt_split_sends() won't be able to rely on the fact that mlen covers the entire LOAD_PAYLOAD. Since we are changing that, take the opportunity to also not modify the existing LOAD_PAYLOAD, just create two new ones with the exact set of sources. This allows the pass to be further simplified by iterating forward and not require live_variables analysis. The helper function was added so can be used later for opt_zero_samples(). Reviewed-by: Kenneth Graunke <kenn...@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25742> --- src/intel/compiler/brw_fs.cpp | 80 ++++++++++++++++++++++++++----------------- 1 file changed, 48 insertions(+), 32 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 80f844fa9c3..a794ed452a2 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -3078,6 +3078,22 @@ fs_visitor::opt_algebraic() return progress; } +static unsigned +load_payload_sources_read_for_size(fs_inst *lp, unsigned size_read) +{ + assert(lp->opcode == SHADER_OPCODE_LOAD_PAYLOAD); + assert(size_read >= lp->header_size * REG_SIZE); + + unsigned i; + unsigned size = lp->header_size * REG_SIZE; + for (i = lp->header_size; size < size_read && i < lp->sources; i++) + size += lp->exec_size * type_sz(lp->src[i].type); + + /* Size read must cover exactly a subset of sources. */ + assert(size == size_read); + return i; +} + /** * Optimize sample messages that have constant zero values for the trailing * texture coordinates. We can just reduce the message length for these @@ -3155,23 +3171,14 @@ fs_visitor::opt_split_sends() bool progress = false; - const fs_live_variables &live = live_analysis.require(); - - int next_ip = 0; - - foreach_block_and_inst_safe(block, fs_inst, send, cfg) { - int ip = next_ip; - next_ip++; - + foreach_block_and_inst(block, fs_inst, send, cfg) { if (send->opcode != SHADER_OPCODE_SEND || send->mlen <= reg_unit(devinfo) || send->ex_mlen > 0) continue; - /* Don't split payloads which are also read later. */ assert(send->src[2].file == VGRF); - if (live.vgrf_end[send->src[2].nr] > ip) - continue; + /* Currently don't split sends that reuse a previously used payload. */ fs_inst *lp = (fs_inst *) send->prev; if (lp->is_head_sentinel() || lp->opcode != SHADER_OPCODE_LOAD_PAYLOAD) @@ -3183,37 +3190,46 @@ fs_visitor::opt_split_sends() /* Split either after the header (if present), or when consecutive * sources switch from one VGRF to a different one. */ - unsigned i = lp->header_size; - if (lp->header_size == 0) { - for (i = 1; i < lp->sources; i++) { - if (lp->src[i].file == BAD_FILE) + unsigned mid = lp->header_size; + if (mid == 0) { + for (mid = 1; mid < lp->sources; mid++) { + if (lp->src[mid].file == BAD_FILE) continue; - if (lp->src[0].file != lp->src[i].file || - lp->src[0].nr != lp->src[i].nr) + if (lp->src[0].file != lp->src[mid].file || + lp->src[0].nr != lp->src[mid].nr) break; } } - if (i != lp->sources) { - const fs_builder ibld(this, block, lp); - fs_inst *lp2 = - ibld.LOAD_PAYLOAD(lp->dst, &lp->src[i], lp->sources - i, 0); + /* SEND mlen might be smaller than what LOAD_PAYLOAD provides, so + * find out how many sources from the payload does it really need. + */ + const unsigned end = + load_payload_sources_read_for_size(lp, send->mlen * REG_SIZE); + + /* Nothing to split. */ + if (end <= mid) + continue; - lp->resize_sources(i); - lp->size_written -= lp2->size_written; + const fs_builder ibld(this, block, lp); + fs_inst *lp1 = ibld.LOAD_PAYLOAD(lp->dst, &lp->src[0], mid, lp->header_size); + fs_inst *lp2 = ibld.LOAD_PAYLOAD(lp->dst, &lp->src[mid], end - mid, 0); - lp->dst = fs_reg(VGRF, alloc.allocate(lp->size_written / REG_SIZE), lp->dst.type); - lp2->dst = fs_reg(VGRF, alloc.allocate(lp2->size_written / REG_SIZE), lp2->dst.type); + assert(lp1->size_written % REG_SIZE == 0); + assert(lp2->size_written % REG_SIZE == 0); + assert((lp1->size_written + lp2->size_written) / REG_SIZE == send->mlen); - send->resize_sources(4); - send->src[2] = lp->dst; - send->src[3] = lp2->dst; - send->ex_mlen = lp2->size_written / REG_SIZE; - send->mlen -= send->ex_mlen; + lp1->dst = fs_reg(VGRF, alloc.allocate(lp1->size_written / REG_SIZE), lp1->dst.type); + lp2->dst = fs_reg(VGRF, alloc.allocate(lp2->size_written / REG_SIZE), lp2->dst.type); - progress = true; - } + send->resize_sources(4); + send->src[2] = lp1->dst; + send->src[3] = lp2->dst; + send->ex_mlen = lp2->size_written / REG_SIZE; + send->mlen -= send->ex_mlen; + + progress = true; } if (progress)