Module: Mesa Branch: main Commit: 49b8ccbcdc2f893c220a2cbab84457b37f736085 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=49b8ccbcdc2f893c220a2cbab84457b37f736085
Author: Kenneth Graunke <[email protected]> Date: Wed Nov 22 15:53:47 2023 -0800 intel/fs: Drop opt_register_renaming() In the past, multiple writes to a single register were pretty common, but since we've transitioned to NIR, and leave the IR in SSA form for everything not captured in a phi-web, the pattern of generating new temporary registers at each step is a lot more common. This pass isn't nearly as useful now. Across fossil-db on Alchemist, this affects only 0.55% of shaders, which fall into two cases: - Coarse pixel shading pixel-X/Y setup. There are a few cases where we write a partial calculation into a register, then have a second instruction read that as a source and overwrite it as a destination. While we could use a temporary here, it doesn't actually help with register pressure at all, since there's the same amount of values live at both instructions regardless. So while this pass kicks in, it doesn't do anything useful. - Geometry shader control data bits (5 shaders total). We track masks for handling EndPrimitive in a single register across the program, and apparently in some cases can split the live range. However, it's a single register...only in geometry shaders...which use EndPrimitive. None of them appear to be in danger of spilling, either. So this tiny benefit doesn't seem to justify the cost of running the pass. So, just throw it out. It's not worth keeping. Reviewed-by: Lionel Landwerlin <[email protected]> Reviewed-by: Ian Romanick <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26343> --- src/gallium/drivers/zink/ci/traces-zink.yml | 2 +- src/intel/compiler/brw_fs.cpp | 64 ----------------------------- src/intel/compiler/brw_fs.h | 1 - 3 files changed, 1 insertion(+), 66 deletions(-) diff --git a/src/gallium/drivers/zink/ci/traces-zink.yml b/src/gallium/drivers/zink/ci/traces-zink.yml index a94101a6230..3113cd1134c 100644 --- a/src/gallium/drivers/zink/ci/traces-zink.yml +++ b/src/gallium/drivers/zink/ci/traces-zink.yml @@ -30,7 +30,7 @@ traces: checksum: 433b69bea68cfe81914b857bbdc60ea5 gputest/pixmark-piano-v2.trace: gl-zink-anv-tgl: - checksum: dcedec0979e2317e7c8277e463fb8f63 + checksum: efa7853c9fb984a6e69108d668a61b1e gputest/triangle-v2.trace: gl-zink-anv-tgl: checksum: 5f694874b15bcd7a3689b387c143590b diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 7c8276250c2..9dd660a1a81 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -2911,69 +2911,6 @@ fs_visitor::opt_split_sends() return progress; } - -bool -fs_visitor::opt_register_renaming() -{ - bool progress = false; - int depth = 0; - - unsigned remap[alloc.count]; - memset(remap, ~0u, sizeof(unsigned) * alloc.count); - - foreach_block_and_inst(block, fs_inst, inst, cfg) { - if (inst->opcode == BRW_OPCODE_IF || inst->opcode == BRW_OPCODE_DO) { - depth++; - } else if (inst->opcode == BRW_OPCODE_ENDIF || - inst->opcode == BRW_OPCODE_WHILE) { - depth--; - } - - /* Rewrite instruction sources. */ - for (int i = 0; i < inst->sources; i++) { - if (inst->src[i].file == VGRF && - remap[inst->src[i].nr] != ~0u && - remap[inst->src[i].nr] != inst->src[i].nr) { - inst->src[i].nr = remap[inst->src[i].nr]; - progress = true; - } - } - - const unsigned dst = inst->dst.nr; - - if (depth == 0 && - inst->dst.file == VGRF && - alloc.sizes[inst->dst.nr] * REG_SIZE == inst->size_written && - !inst->is_partial_write()) { - if (remap[dst] == ~0u) { - remap[dst] = dst; - } else { - remap[dst] = alloc.allocate(regs_written(inst)); - inst->dst.nr = remap[dst]; - progress = true; - } - } else if (inst->dst.file == VGRF && - remap[dst] != ~0u && - remap[dst] != dst) { - inst->dst.nr = remap[dst]; - progress = true; - } - } - - if (progress) { - invalidate_analysis(DEPENDENCY_INSTRUCTION_DETAIL | - DEPENDENCY_VARIABLES); - - for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) { - if (delta_xy[i].file == VGRF && remap[delta_xy[i].nr] != ~0u) { - delta_xy[i].nr = remap[delta_xy[i].nr]; - } - } - } - - return progress; -} - /** * Remove redundant or useless halts. * @@ -5999,7 +5936,6 @@ fs_visitor::optimize() OPT(dead_code_eliminate); OPT(opt_peephole_sel); OPT(dead_control_flow_eliminate, this); - OPT(opt_register_renaming); OPT(opt_saturate_propagation); OPT(register_coalesce); OPT(compute_to_mrf); diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index b766986588a..4b9c293671d 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -260,7 +260,6 @@ public: bool opt_cse_local(const brw::fs_live_variables &live, bblock_t *block, int &ip); bool opt_copy_propagation(); - bool opt_register_renaming(); bool opt_bank_conflicts(); bool opt_split_sends(); bool register_coalesce();
