Need to do full check when not all bank swizzles in the group are forced
(e.g. when trying to merge interp_* group with the next instruction)

Signed-off-by: Vadim Girlin <vadimgir...@gmail.com>
---

Tested on evergreen without regressions.

 src/gallium/drivers/r600/r600_asm.c |   30 ++++++++++++++++++++----------
 1 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c 
b/src/gallium/drivers/r600/r600_asm.c
index 0311b56..604cb60 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -696,15 +696,19 @@ static int check_and_set_bank_swizzle(struct 
r600_bytecode *bc,
 {
        struct alu_bank_swizzle bs;
        int bank_swizzle[5];
-       int i, r = 0, forced = 0;
+       int i, r = 0, forced = 1;
        boolean scalar_only = bc->chip_class == CAYMAN ? false : true;
        int max_slots = bc->chip_class == CAYMAN ? 4 : 5;
 
        for (i = 0; i < max_slots; i++) {
-               if (slots[i] && slots[i]->bank_swizzle_force) {
-                       slots[i]->bank_swizzle = slots[i]->bank_swizzle_force;
-                       forced = 1;
+               if (slots[i]) {
+                       if (slots[i]->bank_swizzle_force) {
+                               slots[i]->bank_swizzle = 
slots[i]->bank_swizzle_force;
+                       } else {
+                               forced = 0;
+                       }
                }
+
                if (i < 4 && slots[i])
                        scalar_only = false;
        }
@@ -714,7 +718,11 @@ static int check_and_set_bank_swizzle(struct r600_bytecode 
*bc,
        /* Just check every possible combination of bank swizzle.
         * Not very efficent, but works on the first try in most of the cases. 
*/
        for (i = 0; i < 4; i++)
-               bank_swizzle[i] = SQ_ALU_VEC_012;
+               if (!slots[i] || !slots[i]->bank_swizzle_force)
+                       bank_swizzle[i] = SQ_ALU_VEC_012;
+               else
+                       bank_swizzle[i] = slots[i]->bank_swizzle;
+
        bank_swizzle[4] = SQ_ALU_SCL_210;
        while(bank_swizzle[4] <= SQ_ALU_SCL_221) {
 
@@ -751,11 +759,13 @@ static int check_and_set_bank_swizzle(struct 
r600_bytecode *bc,
                        bank_swizzle[4]++;
                } else {
                        for (i = 0; i < max_slots; i++) {
-                               bank_swizzle[i]++;
-                               if (bank_swizzle[i] <= SQ_ALU_VEC_210)
-                                       break;
-                               else
-                                       bank_swizzle[i] = SQ_ALU_VEC_012;
+                               if (!slots[i] || !slots[i]->bank_swizzle_force) 
{
+                                       bank_swizzle[i]++;
+                                       if (bank_swizzle[i] <= SQ_ALU_VEC_210)
+                                               break;
+                                       else
+                                               bank_swizzle[i] = 
SQ_ALU_VEC_012;
+                               }
                        }
                }
        }
-- 
1.7.6

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to