The negation source modifier on src registers has changed meaning in Broadwell 
when
used with logical operations. Don't copy propagate when negate src modifier is 
set
and when the destination instruction is a logical op.

Signed-off-by: Abdiel Janulgue <abdiel.janul...@linux.intel.com>
---
 src/mesa/drivers/dri/i965/brw_vec4.h               |  4 +-
 .../drivers/dri/i965/brw_vec4_copy_propagation.cpp | 68 +++++++++++++++-------
 2 files changed, 49 insertions(+), 23 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index fd58b3c..51da46c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -228,6 +228,8 @@ writemask(dst_reg reg, unsigned mask)
    return reg;
 }
 
+struct copy_entry;
+
 class vec4_instruction : public backend_instruction {
 public:
    DECLARE_RALLOC_CXX_OPERATORS(vec4_instruction)
@@ -498,7 +500,7 @@ public:
                               vec4_instruction *last_rhs_inst);
 
    bool try_copy_propagation(vec4_instruction *inst, int arg,
-                             src_reg *values[4]);
+                             struct copy_entry *entry);
 
    /** Walks an exec_list of ir_instruction and sends it through this visitor. 
*/
    void visit_instructions(const exec_list *list);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
index 83cf191..e537895 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -36,6 +36,11 @@ extern "C" {
 
 namespace brw {
 
+struct copy_entry {
+   src_reg *value[4];
+   enum opcode opcode;
+};
+
 static bool
 is_direct_copy(vec4_instruction *inst)
 {
@@ -195,24 +200,33 @@ try_constant_propagation(vec4_instruction *inst, int arg, 
src_reg *values[4])
    return false;
 }
 
+static bool
+is_logic_op(enum opcode opcode)
+{
+   return (opcode == BRW_OPCODE_AND ||
+           opcode == BRW_OPCODE_OR  ||
+           opcode == BRW_OPCODE_XOR ||
+           opcode == BRW_OPCODE_NOT);
+}
+
 bool
 vec4_visitor::try_copy_propagation(vec4_instruction *inst, int arg,
-                                   src_reg *values[4])
+                                   struct copy_entry *entry)
 {
    /* For constant propagation, we only handle the same constant
     * across all 4 channels.  Some day, we should handle the 8-bit
     * float vector format, which would let us constant propagate
     * vectors better.
     */
-   src_reg value = *values[0];
+   src_reg value = *(entry->value[0]);
    for (int i = 1; i < 4; i++) {
       /* This is equals() except we don't care about the swizzle. */
-      if (value.file != values[i]->file ||
-         value.reg != values[i]->reg ||
-         value.reg_offset != values[i]->reg_offset ||
-         value.type != values[i]->type ||
-         value.negate != values[i]->negate ||
-         value.abs != values[i]->abs) {
+      if (value.file != entry->value[i]->file ||
+         value.reg != entry->value[i]->reg ||
+         value.reg_offset != entry->value[i]->reg_offset ||
+         value.type != entry->value[i]->type ||
+         value.negate != entry->value[i]->negate ||
+         value.abs != entry->value[i]->abs) {
         return false;
       }
    }
@@ -223,7 +237,7 @@ vec4_visitor::try_copy_propagation(vec4_instruction *inst, 
int arg,
     */
    int s[4];
    for (int i = 0; i < 4; i++) {
-      s[i] = BRW_GET_SWZ(values[i]->swizzle,
+      s[i] = BRW_GET_SWZ(entry->value[i]->swizzle,
                         BRW_GET_SWZ(inst->src[arg].swizzle, i));
    }
    value.swizzle = BRW_SWIZZLE4(s[0], s[1], s[2], s[3]);
@@ -233,6 +247,14 @@ vec4_visitor::try_copy_propagation(vec4_instruction *inst, 
int arg,
        value.file != ATTR)
       return false;
 
+   if (brw->gen >=8) {
+      if (value.negate) {
+         if (is_logic_op(inst->opcode)) {
+            return false;
+         }
+      }
+   }
+
    if (inst->src[arg].abs) {
       value.negate = false;
       value.abs = true;
@@ -284,9 +306,9 @@ bool
 vec4_visitor::opt_copy_propagation()
 {
    bool progress = false;
-   src_reg *cur_value[virtual_grf_reg_count][4];
+   struct copy_entry entries[virtual_grf_reg_count];
 
-   memset(&cur_value, 0, sizeof(cur_value));
+   memset(&entries, 0, sizeof(entries));
 
    foreach_list(node, &this->instructions) {
       vec4_instruction *inst = (vec4_instruction *)node;
@@ -299,7 +321,7 @@ vec4_visitor::opt_copy_propagation()
        * src/glsl/opt_copy_propagation.cpp to track available copies.
        */
       if (!is_dominated_by_previous_instruction(inst)) {
-        memset(cur_value, 0, sizeof(cur_value));
+        memset(&entries, 0, sizeof(entries));
         continue;
       }
 
@@ -320,31 +342,32 @@ vec4_visitor::opt_copy_propagation()
 
         /* Find the regs that each swizzle component came from.
          */
-        src_reg *values[4];
+        struct copy_entry entry;
         int c;
         for (c = 0; c < 4; c++) {
-           values[c] = cur_value[reg][BRW_GET_SWZ(inst->src[i].swizzle, c)];
+           entry.value[c] = 
entries[reg].value[BRW_GET_SWZ(inst->src[i].swizzle, c)];
 
            /* If there's no available copy for this channel, bail.
             * We could be more aggressive here -- some channels might
             * not get used based on the destination writemask.
             */
-           if (!values[c])
+           if (!entry.value[c])
               break;
 
            /* We'll only be able to copy propagate if the sources are
             * all from the same file -- there's no ability to swizzle
             * 0 or 1 constants in with source registers like in i915.
             */
-           if (c > 0 && values[c - 1]->file != values[c]->file)
+           if (c > 0 && entry.value[c - 1]->file != entry.value[c]->file)
               break;
         }
 
         if (c != 4)
            continue;
 
-        if (try_constant_propagation(inst, i, values) ||
-            try_copy_propagation(inst, i, values))
+         entry.opcode = entries[reg].opcode;
+         if (try_constant_propagation(inst, i, entry.value) ||
+            try_copy_propagation(inst, i, &entry))
            progress = true;
       }
 
@@ -360,7 +383,8 @@ vec4_visitor::opt_copy_propagation()
         bool direct_copy = is_direct_copy(inst);
         for (int i = 0; i < 4; i++) {
            if (inst->dst.writemask & (1 << i)) {
-              cur_value[reg][i] = direct_copy ? &inst->src[0] : NULL;
+              entries[reg].value[i] = direct_copy ? &inst->src[0] : NULL;
+              entries[reg].opcode = inst->opcode;
            }
         }
 
@@ -368,12 +392,12 @@ vec4_visitor::opt_copy_propagation()
          * our destination's updated channels, as the two are no longer equal.
          */
         if (inst->dst.reladdr)
-           memset(cur_value, 0, sizeof(cur_value));
+           memset(&entries, 0, sizeof(entries));
         else {
            for (int i = 0; i < virtual_grf_reg_count; i++) {
               for (int j = 0; j < 4; j++) {
-                 if (is_channel_updated(inst, cur_value[i], j)){
-                    cur_value[i][j] = NULL;
+                 if (is_channel_updated(inst, entries[i].value, j)){
+                    entries[i].value[j] = NULL;
                  }
               }
            }
-- 
1.9.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to