Module: Mesa Branch: main Commit: 056e8ec8c3c166d6dc16a796760caa725eae252f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=056e8ec8c3c166d6dc16a796760caa725eae252f
Author: Alyssa Rosenzweig <[email protected]> Date: Wed Jun 21 08:51:22 2023 -0400 pan/mdg: Lower special reads better If we have multiple reads of the same SSA def in the same block, we don't need to emit multiple copies for it, we can just reuse a copy (OR'ing in the mask, knowing the source is already fully written since it's SSA). This will prevent some regressions in moves from the copyprop patch. There is a bit of a tradeoff here between increased pressure and reduced instruction count but I'm not too worried. The affect on pressure seems all over the place -- register use decreases overall, threads increase (great!) but a few shaders that were *already spilling*, spill a bit worse. I'm not terribly worried there. total instructions in shared programs: 1518289 -> 1514856 (-0.23%) instructions in affected programs: 292854 -> 289421 (-1.17%) helped: 1557 HURT: 232 Instructions are helped. total bundles in shared programs: 646903 -> 645327 (-0.24%) bundles in affected programs: 91872 -> 90296 (-1.72%) helped: 910 HURT: 256 Bundles are helped. total quadwords in shared programs: 1133728 -> 1130554 (-0.28%) quadwords in affected programs: 187170 -> 183996 (-1.70%) helped: 1399 HURT: 44 Quadwords are helped. total registers in shared programs: 90640 -> 90559 (-0.09%) registers in affected programs: 2676 -> 2595 (-3.03%) helped: 202 HURT: 124 Inconclusive result (%-change mean confidence interval includes 0). total threads in shared programs: 55561 -> 55590 (0.05%) threads in affected programs: 50 -> 79 (58.00%) helped: 23 HURT: 6 Threads are helped. total spills in shared programs: 1386 -> 1402 (1.15%) spills in affected programs: 231 -> 247 (6.93%) helped: 2 HURT: 13 total fills in shared programs: 5159 -> 5285 (2.44%) fills in affected programs: 1282 -> 1408 (9.83%) helped: 11 HURT: 16 Signed-off-by: Alyssa Rosenzweig <[email protected]> Reviewed-by: Italo Nicola <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23769> --- src/panfrost/midgard/midgard_ra.c | 59 +++++++++++++++++++-------------- src/panfrost/midgard/midgard_schedule.c | 2 +- 2 files changed, 36 insertions(+), 25 deletions(-) diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c index 1d3252627e3..85edd311fe3 100644 --- a/src/panfrost/midgard/midgard_ra.c +++ b/src/panfrost/midgard/midgard_ra.c @@ -173,6 +173,7 @@ mark_node_class(unsigned *bitfield, unsigned node) void mir_lower_special_reads(compiler_context *ctx) { + mir_compute_temp_count(ctx); size_t sz = BITSET_WORDS(ctx->temp_count) * sizeof(BITSET_WORD); /* Bitfields for the various types of registers we could have. aluw can @@ -276,36 +277,46 @@ mir_lower_special_reads(compiler_context *ctx) /* Insert move before each read/write, depending on the * hazard we're trying to account for */ - mir_foreach_instr_global_safe(ctx, pre_use) { - if (pre_use->type != classes[j]) - continue; + mir_foreach_block(ctx, block_) { + midgard_block *block = (midgard_block *)block_; + midgard_instruction *mov = NULL; - if (hazard_write) { - if (pre_use->dest != i) + mir_foreach_instr_in_block_safe(block, pre_use) { + if (pre_use->type != classes[j]) continue; - midgard_instruction m = v_mov(idx, i); - m.dest_type = pre_use->dest_type; - m.src_types[1] = m.dest_type; - m.mask = pre_use->mask; + if (hazard_write) { + if (pre_use->dest != i) + continue; - midgard_instruction *use = mir_next_op(pre_use); - assert(use); - mir_insert_instruction_before(ctx, use, m); - mir_rewrite_index_dst_single(pre_use, i, idx); - } else { - if (!mir_has_arg(pre_use, i)) - continue; + midgard_instruction m = v_mov(idx, i); + m.dest_type = pre_use->dest_type; + m.src_types[1] = m.dest_type; + m.mask = pre_use->mask; - idx = spill_idx++; + midgard_instruction *use = mir_next_op(pre_use); + assert(use); + mir_insert_instruction_before(ctx, use, m); + mir_rewrite_index_dst_single(pre_use, i, idx); + } else { + if (!mir_has_arg(pre_use, i)) + continue; + + unsigned mask = mir_from_bytemask( + mir_round_bytemask_up( + mir_bytemask_of_read_components(pre_use, i), 32), + 32); - midgard_instruction m = v_mov(i, idx); - m.mask = mir_from_bytemask( - mir_round_bytemask_up( - mir_bytemask_of_read_components(pre_use, i), 32), - 32); - mir_insert_instruction_before(ctx, pre_use, m); - mir_rewrite_index_src_single(pre_use, i, idx); + if (mov == NULL || !mir_is_ssa(i)) { + midgard_instruction m = v_mov(i, spill_idx++); + m.mask = mask; + mov = mir_insert_instruction_before(ctx, pre_use, m); + } else { + mov->mask |= mask; + } + + mir_rewrite_index_src_single(pre_use, i, mov->dest); + } } } } diff --git a/src/panfrost/midgard/midgard_schedule.c b/src/panfrost/midgard/midgard_schedule.c index 8dd258fa222..c6a51b90465 100644 --- a/src/panfrost/midgard/midgard_schedule.c +++ b/src/panfrost/midgard/midgard_schedule.c @@ -1602,8 +1602,8 @@ midgard_schedule_program(compiler_context *ctx) midgard_promote_uniforms(ctx); /* Must be lowered right before scheduling */ - mir_squeeze_index(ctx); mir_lower_special_reads(ctx); + mir_squeeze_index(ctx); if (ctx->stage == MESA_SHADER_FRAGMENT) { mir_invalidate_liveness(ctx);
