Module: Mesa
Branch: main
Commit: 4e7a77709379f2d89518f61ceec661bd96c3e3df
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=4e7a77709379f2d89518f61ceec661bd96c3e3df

Author: Daniel Schürmann <[email protected]>
Date:   Wed May 19 09:23:20 2021 +0200

aco: try forming clauses even if reg_pressure exceeds

This patch allows to form clauses even if the register pressure
is at the limit with the effect that VMEM instructions are less
scattered after the first clause in a Block.
It respects the previous clause size to avoid excessive moving
of VMEM instructions.
VMEM_CLAUSE_MAX_GRAB_DIST is further reduced to compensate
some of the effects.

Totals from 28922 (19.26% of 150170) affected shaders: (GFX10.3)
VGPRs: 1546568 -> 1523072 (-1.52%); split: -1.52%, +0.00%
CodeSize: 117524892 -> 117510288 (-0.01%); split: -0.08%, +0.07%
MaxWaves: 605554 -> 611120 (+0.92%)
Instrs: 22292568 -> 22291927 (-0.00%); split: -0.10%, +0.09%
Latency: 488975399 -> 490230904 (+0.26%); split: -0.06%, +0.32%
InvThroughput: 117842300 -> 116521653 (-1.12%); split: -1.15%, +0.03%
VClause: 541550 -> 522464 (-3.52%); split: -9.73%, +6.20%
SClause: 718185 -> 718298 (+0.02%); split: -0.00%, +0.02%
Copies: 1420603 -> 1386949 (-2.37%); split: -2.64%, +0.27%
Branches: 559559 -> 559278 (-0.05%); split: -0.06%, +0.01%

Reviewed-by: Rhys Perry <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10896>

---

 src/amd/compiler/aco_scheduler.cpp | 29 ++++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/src/amd/compiler/aco_scheduler.cpp 
b/src/amd/compiler/aco_scheduler.cpp
index 42b830826a4..bfa08f5b725 100644
--- a/src/amd/compiler/aco_scheduler.cpp
+++ b/src/amd/compiler/aco_scheduler.cpp
@@ -37,7 +37,7 @@
 #define SMEM_MAX_MOVES      (64 - ctx.num_waves * 4)
 #define VMEM_MAX_MOVES      (256 - ctx.num_waves * 16)
 /* creating clauses decreases def-use distances, so make it less aggressive 
the lower num_waves is */
-#define VMEM_CLAUSE_MAX_GRAB_DIST (ctx.num_waves * 4)
+#define VMEM_CLAUSE_MAX_GRAB_DIST (ctx.num_waves * 2)
 #define POS_EXP_MAX_MOVES         512
 
 namespace aco {
@@ -788,6 +788,7 @@ schedule_VMEM(sched_ctx& ctx, Block* block, 
std::vector<RegisterDemand>& registe
    int window_size = VMEM_WINDOW_SIZE;
    int max_moves = VMEM_MAX_MOVES;
    int clause_max_grab_dist = VMEM_CLAUSE_MAX_GRAB_DIST;
+   bool only_clauses = false;
    int16_t k = 0;
 
    /* first, check if we have instructions before current to move down */
@@ -827,7 +828,23 @@ schedule_VMEM(sched_ctx& ctx, Block* block, 
std::vector<RegisterDemand>& registe
 
       /* if current depends on candidate, add additional dependencies and 
continue */
       bool can_move_down = !is_vmem || part_of_clause || 
candidate->definitions.empty();
-
+      if (only_clauses) {
+         /* In case of high register pressure, only try to form clauses,
+          * and only if the previous clause is not larger
+          * than the current one will be.
+          */
+         if (part_of_clause) {
+            int clause_size = cursor.insert_idx - cursor.insert_idx_clause;
+            int prev_clause_size = 1;
+            while (should_form_clause(current,
+                                      block->instructions[candidate_idx - 
prev_clause_size].get()))
+               prev_clause_size++;
+            if (prev_clause_size > clause_size + 1)
+               break;
+         } else {
+            can_move_down = false;
+         }
+      }
       HazardResult haz =
          perform_hazard_query(part_of_clause ? &clause_hq : &indep_hq, 
candidate.get(), false);
       if (haz == hazard_fail_reorder_ds || haz == hazard_fail_spill ||
@@ -856,7 +873,13 @@ schedule_VMEM(sched_ctx& ctx, Block* block, 
std::vector<RegisterDemand>& registe
          ctx.mv.downwards_skip(cursor);
          continue;
       } else if (res == move_fail_pressure) {
-         break;
+         only_clauses = true;
+         if (part_of_clause)
+            break;
+         add_to_hazard_query(&indep_hq, candidate.get());
+         add_to_hazard_query(&clause_hq, candidate.get());
+         ctx.mv.downwards_skip(cursor);
+         continue;
       }
       if (part_of_clause)
          add_to_hazard_query(&indep_hq, candidate_ptr);

Reply via email to