Module: Mesa
Branch: staging/22.2
Commit: 40da2cee3dd34e74d7a992285193fe97c8fec494
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=40da2cee3dd34e74d7a992285193fe97c8fec494

Author: Pavel Ondračka <[email protected]>
Date:   Wed Aug 10 09:17:56 2022 +0200

r300: fix variables detection for paired ALU and TEX instructions in different 
branches

TEX instrutions can't write xyz and w to separate registers so we
need to create variables from them first, otherwise we can create
two variables from ALU writing the same register xyz and w in other
branch (this usually works when TEX is not present as the xyz and
w can read/write from different registers).

This fixes regalloc because the variables are later used as a
graph nodes.

The variable order should not matter but it slightly does (leading
to approx 0.3% shader-db temps increase as compared to previous
state), so just sort the variables list afterwards to be as close
to the previous behavior as possible and prevent the regression.

CC: mesa-stable
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6936
Signed-off-by: Pavel Ondračka <[email protected]>
Reviewed-by: Filip Gawin <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17987>
(cherry picked from commit 88fd397c741c0e1fe0d851fbc566925078df6013)

---

 .pick_status.json                                  |  2 +-
 .../drivers/r300/compiler/radeon_variable.c        | 91 ++++++++++++++++++++--
 2 files changed, 87 insertions(+), 6 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index ab3ee104dd3..902e10a47f3 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -895,7 +895,7 @@
         "description": "r300: fix variables detection for paired ALU and TEX 
instructions in different branches",
         "nominated": true,
         "nomination_type": 0,
-        "resolution": 0,
+        "resolution": 1,
         "main_sha": null,
         "because_sha": null
     },
diff --git a/src/gallium/drivers/r300/compiler/radeon_variable.c 
b/src/gallium/drivers/r300/compiler/radeon_variable.c
index c021f5cc1f5..4c276a4c12f 100644
--- a/src/gallium/drivers/r300/compiler/radeon_variable.c
+++ b/src/gallium/drivers/r300/compiler/radeon_variable.c
@@ -26,6 +26,7 @@
  */
 
 #include <stdio.h>
+#include <stdlib.h>
 #include "radeon_variable.h"
 
 #include "memory_pool.h"
@@ -325,6 +326,31 @@ static void get_variable_pair_helper(
        get_variable_helper(variable_list, new_var);
 }
 
+/**
+ * Compare function for sorting variable pointers by the lowest instruction
+ * IP from it and its friends.
+ */
+static int cmpfunc_variable_by_ip (const void * a, const void * b) {
+       struct rc_variable * var_a = *(struct rc_variable **)a;
+       struct rc_variable * var_b = *(struct rc_variable **)b;
+       unsigned int min_ip_a = var_a->Inst->IP;
+       unsigned int min_ip_b = var_b->Inst->IP;
+
+       /* Find the minimal IP of a variable and its friends */
+       while (var_a->Friend) {
+               var_a = var_a->Friend;
+               if (var_a->Inst->IP < min_ip_a)
+                       min_ip_a = var_a->Inst->IP;
+       }
+       while (var_b->Friend) {
+               var_b = var_b->Friend;
+               if (var_b->Inst->IP < min_ip_b)
+                       min_ip_b = var_b->Inst->IP;
+       }
+
+       return (int)min_ip_a - (int)min_ip_b;
+}
+
 /**
  * Generate a list of variables used by the shader program.  Each instruction
  * that writes to a register is considered a variable.  The struct rc_variable
@@ -337,14 +363,42 @@ struct rc_list * rc_get_variables(struct radeon_compiler 
* c)
        struct rc_instruction * inst;
        struct rc_list * variable_list = NULL;
 
+       /* We search for the variables in two loops in order to get it right in
+        * the following specific case
+        *
+        * IF aluresult.x___;
+        *   ...
+        *   MAD temp[0].xyz, src0.000, src0.111, src0.000
+        *   MAD temp[0].w, src0.0, src0.1, src0.0
+        * ELSE;
+        *   ...
+        *   TXB temp[0], temp[1].xy_w, 2D[0] SEM_WAIT SEM_ACQUIRE;
+        * ENDIF;
+        * src0.xyz = input[0], src0.w = input[0], src1.xyz = temp[0], src1.w = 
temp[0] SEM_WAIT
+        * MAD temp[1].xyz, src0.xyz, src1.xyz, src0.000
+        * MAD temp[1].w, src0.w, src1.w, src0.0
+        *
+        * If we go just in one loop, we will first create two variables for the
+        * temp[0].xyz and temp[0].w. This happens because they don't share a 
reader
+        * as the src1.xyz and src1.w of the instruction where the value is 
used are
+        * in theory independent. They are not because the same register is 
written
+        * also by the texture instruction in the other branch and TEX can't 
write xyz
+        * and w separatelly.
+        *
+        * Therefore first search for RC_INSTRUCTION_NORMAL to create variables 
from
+        * the texture instruction and than the pair instructions will be 
properly
+        * marked as friends. So we will end with only one variable here as we 
should.
+        *
+        * This doesn't matter before the pair translation, because everything 
is
+        * RC_INSTRUCTION_NORMAL.
+        */
        for (inst = c->Program.Instructions.Next;
                                        inst != &c->Program.Instructions;
                                        inst = inst->Next) {
-               struct rc_reader_data reader_data;
-               struct rc_variable * new_var;
-               memset(&reader_data, 0, sizeof(reader_data));
-
                if (inst->Type == RC_INSTRUCTION_NORMAL) {
+                       struct rc_reader_data reader_data;
+                       struct rc_variable * new_var;
+                       memset(&reader_data, 0, sizeof(reader_data));
                        rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL);
                        if (reader_data.ReaderCount == 0) {
                                continue;
@@ -353,7 +407,15 @@ struct rc_list * rc_get_variables(struct radeon_compiler * 
c)
                                inst->U.I.DstReg.Index,
                                inst->U.I.DstReg.WriteMask, &reader_data);
                        get_variable_helper(&variable_list, new_var);
-               } else {
+               }
+       }
+
+       bool needs_sorting = false;
+       for (inst = c->Program.Instructions.Next;
+                                       inst != &c->Program.Instructions;
+                                       inst = inst->Next) {
+               if (inst->Type != RC_INSTRUCTION_NORMAL) {
+                       needs_sorting = true;
                        get_variable_pair_helper(&variable_list, c, inst,
                                                        &inst->U.P.RGB);
                        get_variable_pair_helper(&variable_list, c, inst,
@@ -361,6 +423,25 @@ struct rc_list * rc_get_variables(struct radeon_compiler * 
c)
                }
        }
 
+       if (variable_list && needs_sorting) {
+               unsigned int count = rc_list_count(variable_list);
+               struct rc_variable **variables = memory_pool_malloc(&c->Pool,
+                               sizeof(struct rc_variable *) * count);
+
+               struct rc_list * current = variable_list;
+               for(unsigned int i = 0; current; i++, current = current->Next) {
+                       struct rc_variable * var = current->Item;
+                       variables[i] = var;
+               }
+
+               qsort(variables, count, sizeof(struct rc_variable *), 
cmpfunc_variable_by_ip);
+
+               current = variable_list;
+               for(unsigned int i = 0; current; i++, current = current->Next) {
+                       current->Item = variables[i];
+               }
+       }
+
        return variable_list;
 }
 

Reply via email to