Module: Mesa
Branch: main
Commit: 056e8ec8c3c166d6dc16a796760caa725eae252f
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=056e8ec8c3c166d6dc16a796760caa725eae252f

Author: Alyssa Rosenzweig <[email protected]>
Date:   Wed Jun 21 08:51:22 2023 -0400

pan/mdg: Lower special reads better

If we have multiple reads of the same SSA def in the same block, we don't need
to emit multiple copies for it, we can just reuse a copy (OR'ing in the mask,
knowing the source is already fully written since it's SSA). This will prevent
some regressions in moves from the copyprop patch.

There is a bit of a tradeoff here between increased pressure and reduced
instruction count but I'm not too worried. The affect on pressure seems all over
the place -- register use decreases overall, threads increase (great!) but a few
shaders that were *already spilling*, spill a bit worse. I'm not terribly
worried there.

   total instructions in shared programs: 1518289 -> 1514856 (-0.23%)
   instructions in affected programs: 292854 -> 289421 (-1.17%)
   helped: 1557
   HURT: 232
   Instructions are helped.

   total bundles in shared programs: 646903 -> 645327 (-0.24%)
   bundles in affected programs: 91872 -> 90296 (-1.72%)
   helped: 910
   HURT: 256
   Bundles are helped.

   total quadwords in shared programs: 1133728 -> 1130554 (-0.28%)
   quadwords in affected programs: 187170 -> 183996 (-1.70%)
   helped: 1399
   HURT: 44
   Quadwords are helped.

   total registers in shared programs: 90640 -> 90559 (-0.09%)
   registers in affected programs: 2676 -> 2595 (-3.03%)
   helped: 202
   HURT: 124
   Inconclusive result (%-change mean confidence interval includes 0).

   total threads in shared programs: 55561 -> 55590 (0.05%)
   threads in affected programs: 50 -> 79 (58.00%)
   helped: 23
   HURT: 6
   Threads are helped.

   total spills in shared programs: 1386 -> 1402 (1.15%)
   spills in affected programs: 231 -> 247 (6.93%)
   helped: 2
   HURT: 13

   total fills in shared programs: 5159 -> 5285 (2.44%)
   fills in affected programs: 1282 -> 1408 (9.83%)
   helped: 11
   HURT: 16

Signed-off-by: Alyssa Rosenzweig <[email protected]>
Reviewed-by: Italo Nicola <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23769>

---

 src/panfrost/midgard/midgard_ra.c       | 59 +++++++++++++++++++--------------
 src/panfrost/midgard/midgard_schedule.c |  2 +-
 2 files changed, 36 insertions(+), 25 deletions(-)

diff --git a/src/panfrost/midgard/midgard_ra.c 
b/src/panfrost/midgard/midgard_ra.c
index 1d3252627e3..85edd311fe3 100644
--- a/src/panfrost/midgard/midgard_ra.c
+++ b/src/panfrost/midgard/midgard_ra.c
@@ -173,6 +173,7 @@ mark_node_class(unsigned *bitfield, unsigned node)
 void
 mir_lower_special_reads(compiler_context *ctx)
 {
+   mir_compute_temp_count(ctx);
    size_t sz = BITSET_WORDS(ctx->temp_count) * sizeof(BITSET_WORD);
 
    /* Bitfields for the various types of registers we could have. aluw can
@@ -276,36 +277,46 @@ mir_lower_special_reads(compiler_context *ctx)
          /* Insert move before each read/write, depending on the
           * hazard we're trying to account for */
 
-         mir_foreach_instr_global_safe(ctx, pre_use) {
-            if (pre_use->type != classes[j])
-               continue;
+         mir_foreach_block(ctx, block_) {
+            midgard_block *block = (midgard_block *)block_;
+            midgard_instruction *mov = NULL;
 
-            if (hazard_write) {
-               if (pre_use->dest != i)
+            mir_foreach_instr_in_block_safe(block, pre_use) {
+               if (pre_use->type != classes[j])
                   continue;
 
-               midgard_instruction m = v_mov(idx, i);
-               m.dest_type = pre_use->dest_type;
-               m.src_types[1] = m.dest_type;
-               m.mask = pre_use->mask;
+               if (hazard_write) {
+                  if (pre_use->dest != i)
+                     continue;
 
-               midgard_instruction *use = mir_next_op(pre_use);
-               assert(use);
-               mir_insert_instruction_before(ctx, use, m);
-               mir_rewrite_index_dst_single(pre_use, i, idx);
-            } else {
-               if (!mir_has_arg(pre_use, i))
-                  continue;
+                  midgard_instruction m = v_mov(idx, i);
+                  m.dest_type = pre_use->dest_type;
+                  m.src_types[1] = m.dest_type;
+                  m.mask = pre_use->mask;
 
-               idx = spill_idx++;
+                  midgard_instruction *use = mir_next_op(pre_use);
+                  assert(use);
+                  mir_insert_instruction_before(ctx, use, m);
+                  mir_rewrite_index_dst_single(pre_use, i, idx);
+               } else {
+                  if (!mir_has_arg(pre_use, i))
+                     continue;
+
+                  unsigned mask = mir_from_bytemask(
+                     mir_round_bytemask_up(
+                        mir_bytemask_of_read_components(pre_use, i), 32),
+                     32);
 
-               midgard_instruction m = v_mov(i, idx);
-               m.mask = mir_from_bytemask(
-                  mir_round_bytemask_up(
-                     mir_bytemask_of_read_components(pre_use, i), 32),
-                  32);
-               mir_insert_instruction_before(ctx, pre_use, m);
-               mir_rewrite_index_src_single(pre_use, i, idx);
+                  if (mov == NULL || !mir_is_ssa(i)) {
+                     midgard_instruction m = v_mov(i, spill_idx++);
+                     m.mask = mask;
+                     mov = mir_insert_instruction_before(ctx, pre_use, m);
+                  } else {
+                     mov->mask |= mask;
+                  }
+
+                  mir_rewrite_index_src_single(pre_use, i, mov->dest);
+               }
             }
          }
       }
diff --git a/src/panfrost/midgard/midgard_schedule.c 
b/src/panfrost/midgard/midgard_schedule.c
index 8dd258fa222..c6a51b90465 100644
--- a/src/panfrost/midgard/midgard_schedule.c
+++ b/src/panfrost/midgard/midgard_schedule.c
@@ -1602,8 +1602,8 @@ midgard_schedule_program(compiler_context *ctx)
    midgard_promote_uniforms(ctx);
 
    /* Must be lowered right before scheduling */
-   mir_squeeze_index(ctx);
    mir_lower_special_reads(ctx);
+   mir_squeeze_index(ctx);
 
    if (ctx->stage == MESA_SHADER_FRAGMENT) {
       mir_invalidate_liveness(ctx);

Reply via email to