From: "Juan A. Suarez Romero" <jasua...@igalia.com>

Our current data flow analysis does not take into account that channels
on 64-bit operands are 64-bit. This is a problem when the same register
is accessed using both 64-bit and 32-bit channels. This is very common
in operations where we need to access 64-bit data in 32-bit chunks,
such as the double packing and packing operations.

This patch changes the analysis by checking the bits that each source
or destination datatype needs. Actually, rather than bits, we use
blocks of 32bits, which is the minimum channel size.

Because a vgrf can contain a dvec4 (256 bits), we reserve 8
32-bit blocks to map the channels.

v2 (Curro):
  - Simplify code by making the var_from_reg helpers take an extra
    argument with the register component we want.
  - Fix a couple of cases where we had to update the code to the new
    way of representing live variables.
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp             |  2 +-
 src/mesa/drivers/dri/i965/brw_vec4_cse.cpp         |  2 +-
 .../dri/i965/brw_vec4_dead_code_eliminate.cpp      | 25 +++++++++--------
 .../drivers/dri/i965/brw_vec4_live_variables.cpp   | 32 +++++++++++-----------
 .../drivers/dri/i965/brw_vec4_live_variables.h     | 15 ++++++----
 5 files changed, 42 insertions(+), 34 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 3191eab..34cab04 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1140,7 +1140,7 @@ vec4_visitor::opt_register_coalesce()
       /* Can't coalesce this GRF if someone else was going to
        * read it later.
        */
-      if (var_range_end(var_from_reg(alloc, dst_reg(inst->src[0])), 4) > ip)
+      if (var_range_end(var_from_reg(alloc, dst_reg(inst->src[0])), 8) > ip)
         continue;
 
       /* We need to check interference with the final destination between this
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
index 1b91db9..bef897a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
@@ -246,7 +246,7 @@ vec4_visitor::opt_cse_local(bblock_t *block)
              * more -- a sure sign they'll fail operands_match().
              */
             if (src->file == VGRF) {
-               if (var_range_end(var_from_reg(alloc, dst_reg(*src)), 4) < ip) {
+               if (var_range_end(var_from_reg(alloc, dst_reg(*src)), 8) < ip) {
                   entry->remove();
                   ralloc_free(entry);
                   break;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
index 950c6c8..6a80810 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
@@ -57,12 +57,13 @@ vec4_visitor::dead_code_eliminate()
          if ((inst->dst.file == VGRF && !inst->has_side_effects()) ||
              (inst->dst.is_null() && inst->writes_flag())){
             bool result_live[4] = { false };
-
             if (inst->dst.file == VGRF) {
-               for (unsigned i = 0; i < regs_written(inst); i++) {
-                  for (int c = 0; c < 4; c++)
-                     result_live[c] |= BITSET_TEST(
-                        live, var_from_reg(alloc, offset(inst->dst, i), c));
+               for (unsigned i = 0; i < 2 * regs_written(inst); i++) {
+                  for (int c = 0; c < 4; c++) {
+                     const unsigned v =
+                        var_from_reg(alloc, inst->dst, c, i);
+                     result_live[c] |= BITSET_TEST(live, v);
+                  }
                }
             } else {
                for (unsigned c = 0; c < 4; c++)
@@ -111,11 +112,12 @@ vec4_visitor::dead_code_eliminate()
 
          if (inst->dst.file == VGRF && !inst->predicate &&
              !inst->is_align1_partial_write()) {
-            for (unsigned i = 0; i < regs_written(inst); i++) {
+            for (unsigned i = 0; i < 2 * regs_written(inst); i++) {
                for (int c = 0; c < 4; c++) {
                   if (inst->dst.writemask & (1 << c)) {
-                     BITSET_CLEAR(live, var_from_reg(alloc,
-                                                     offset(inst->dst, i), c));
+                     const unsigned v =
+                        var_from_reg(alloc, inst->dst, c, i);
+                     BITSET_CLEAR(live, v);
                   }
                }
             }
@@ -133,10 +135,11 @@ vec4_visitor::dead_code_eliminate()
 
          for (int i = 0; i < 3; i++) {
             if (inst->src[i].file == VGRF) {
-               for (unsigned j = 0; j < regs_read(inst, i); j++) {
+               for (unsigned j = 0; j < 2 * regs_read(inst, i); j++) {
                   for (int c = 0; c < 4; c++) {
-                     BITSET_SET(live, var_from_reg(alloc,
-                                                   offset(inst->src[i], j), 
c));
+                     const unsigned v =
+                        var_from_reg(alloc, inst->src[i], c, j);
+                     BITSET_SET(live, v);
                   }
                }
             }
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
index 20344ed..4827798 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
@@ -76,10 +76,10 @@ vec4_live_variables::setup_def_use()
         /* Set use[] for this instruction */
         for (unsigned int i = 0; i < 3; i++) {
            if (inst->src[i].file == VGRF) {
-               for (unsigned j = 0; j < regs_read(inst, i); j++) {
+               for (unsigned j = 0; j < 2 * regs_read(inst, i); j++) {
                   for (int c = 0; c < 4; c++) {
                      const unsigned v =
-                        var_from_reg(alloc, offset(inst->src[i], j), c);
+                        var_from_reg(alloc, inst->src[i], c, j);
                      if (!BITSET_TEST(bd->def, v))
                         BITSET_SET(bd->use, v);
                   }
@@ -99,11 +99,11 @@ vec4_live_variables::setup_def_use()
          */
         if (inst->dst.file == VGRF &&
             (!inst->predicate || inst->opcode == BRW_OPCODE_SEL)) {
-            for (unsigned i = 0; i < regs_written(inst); i++) {
+            for (unsigned i = 0; i < 2 * regs_written(inst); i++) {
                for (int c = 0; c < 4; c++) {
                   if (inst->dst.writemask & (1 << c)) {
                      const unsigned v =
-                        var_from_reg(alloc, offset(inst->dst, i), c);
+                        var_from_reg(alloc, inst->dst, c, i);
                      if (!BITSET_TEST(bd->use, v))
                         BITSET_SET(bd->def, v);
                   }
@@ -188,7 +188,7 @@ vec4_live_variables::vec4_live_variables(const 
simple_allocator &alloc,
 {
    mem_ctx = ralloc_context(NULL);
 
-   num_vars = alloc.total_size * 4;
+   num_vars = alloc.total_size * 8;
    block_data = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);
 
    bitset_words = BITSET_WORDS(num_vars);
@@ -238,14 +238,14 @@ vec4_visitor::calculate_live_intervals()
    if (this->live_intervals)
       return;
 
-   int *start = ralloc_array(mem_ctx, int, this->alloc.total_size * 4);
-   int *end = ralloc_array(mem_ctx, int, this->alloc.total_size * 4);
+   int *start = ralloc_array(mem_ctx, int, this->alloc.total_size * 8);
+   int *end = ralloc_array(mem_ctx, int, this->alloc.total_size * 8);
    ralloc_free(this->virtual_grf_start);
    ralloc_free(this->virtual_grf_end);
    this->virtual_grf_start = start;
    this->virtual_grf_end = end;
 
-   for (unsigned i = 0; i < this->alloc.total_size * 4; i++) {
+   for (unsigned i = 0; i < this->alloc.total_size * 8; i++) {
       start[i] = MAX_INSTRUCTION;
       end[i] = -1;
    }
@@ -257,10 +257,10 @@ vec4_visitor::calculate_live_intervals()
    foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
       for (unsigned int i = 0; i < 3; i++) {
         if (inst->src[i].file == VGRF) {
-            for (unsigned j = 0; j < regs_read(inst, i); j++) {
+            for (unsigned j = 0; j < 2 * regs_read(inst, i); j++) {
                for (int c = 0; c < 4; c++) {
                   const unsigned v =
-                     var_from_reg(alloc, offset(inst->src[i], j), c);
+                     var_from_reg(alloc, inst->src[i], c, j);
                   start[v] = MIN2(start[v], ip);
                   end[v] = ip;
                }
@@ -269,11 +269,11 @@ vec4_visitor::calculate_live_intervals()
       }
 
       if (inst->dst.file == VGRF) {
-         for (unsigned i = 0; i < regs_written(inst); i++) {
+         for (unsigned i = 0; i < 2 * regs_written(inst); i++) {
             for (int c = 0; c < 4; c++) {
                if (inst->dst.writemask & (1 << c)) {
                   const unsigned v =
-                     var_from_reg(alloc, offset(inst->dst, i), c);
+                     var_from_reg(alloc, inst->dst, c, i);
                   start[v] = MIN2(start[v], ip);
                   end[v] = ip;
                }
@@ -340,8 +340,8 @@ vec4_visitor::var_range_end(unsigned v, unsigned n) const
 bool
 vec4_visitor::virtual_grf_interferes(int a, int b)
 {
-   return !((var_range_end(4 * alloc.offsets[a], 4 * alloc.sizes[a]) <=
-             var_range_start(4 * alloc.offsets[b], 4 * alloc.sizes[b])) ||
-            (var_range_end(4 * alloc.offsets[b], 4 * alloc.sizes[b]) <=
-             var_range_start(4 * alloc.offsets[a], 4 * alloc.sizes[a])));
+   return !((var_range_end(8 * alloc.offsets[a], 8 * alloc.sizes[a]) <=
+             var_range_start(8 * alloc.offsets[b], 8 * alloc.sizes[b])) ||
+            (var_range_end(8 * alloc.offsets[b], 8 * alloc.sizes[b]) <=
+             var_range_start(8 * alloc.offsets[a], 8 * alloc.sizes[a])));
 }
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h 
b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h
index 2fbcaa1..26a074f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h
@@ -78,23 +78,28 @@ protected:
    void *mem_ctx;
 };
 
+/* Returns the variable index for the k-th dword of the c-th component of
+ * register reg */
 inline unsigned
 var_from_reg(const simple_allocator &alloc, const src_reg &reg,
-             unsigned c = 0)
+              unsigned c = 0, unsigned k = 0)
 {
    assert(reg.file == VGRF && reg.nr < alloc.count &&
           reg.offset / REG_SIZE < alloc.sizes[reg.nr] && c < 4);
-   return (4 * (alloc.offsets[reg.nr] + reg.offset / REG_SIZE) +
-           BRW_GET_SWZ(reg.swizzle, c));
+   const unsigned csize = DIV_ROUND_UP(type_sz(reg.type), 4);
+   return 8 * (alloc.offsets[reg.nr] + reg.offset / REG_SIZE) +
+           (BRW_GET_SWZ(reg.swizzle, c) + k / csize * 4) * csize + k % csize;
 }
 
 inline unsigned
 var_from_reg(const simple_allocator &alloc, const dst_reg &reg,
-             unsigned c = 0)
+              unsigned c = 0, unsigned k = 0)
 {
    assert(reg.file == VGRF && reg.nr < alloc.count &&
           reg.offset / REG_SIZE < alloc.sizes[reg.nr] && c < 4);
-   return 4 * (alloc.offsets[reg.nr] + reg.offset / REG_SIZE) + c;
+   const unsigned csize = DIV_ROUND_UP(type_sz(reg.type), 4);
+   return 8 * (alloc.offsets[reg.nr] + reg.offset / REG_SIZE) +
+      (c + k / csize * 4) * csize + k % csize;
 }
 
 } /* namespace brw */
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to