The tgsi_umad function does not work for Cayman since it does not
populate the y, z and w slots for UMUL that Cayman requires.
---
 src/gallium/drivers/r600/r600_shader.c | 47 +++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 29facf7..aa23907 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -2111,6 +2111,51 @@ static int cayman_mul_int_instr(struct r600_shader_ctx 
*ctx)
        return 0;
 }
 
+static int cayman_umad(struct r600_shader_ctx *ctx)
+{
+       struct tgsi_full_instruction *inst = 
&ctx->parse.FullToken.FullInstruction;
+       struct r600_bytecode_alu alu;
+       int i, j, k, r;
+       int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
+
+       for (k = 0; k < last_slot; k++) {
+               if (!(inst->Dst[0].Register.WriteMask & (1 << k)))
+                       continue;
+
+               for (i = 0 ; i < 4; i++) {
+                       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+                       alu.op = ALU_OP2_MULLO_UINT;
+                       for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+                               r600_bytecode_src(&alu.src[j], &ctx->src[j], k);
+                       }
+                       alu.dst.chan = i;
+                       alu.dst.sel = ctx->temp_reg;
+                       alu.dst.write = (i == k);
+                       if (i == 3)
+                               alu.last = 1;
+                       r = r600_bytecode_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+               }
+       }
+
+       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+       tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
+
+       alu.op = ALU_OP2_ADD_INT;
+
+       alu.src[0].sel = ctx->temp_reg;
+       alu.src[0].chan = 0;
+
+       r600_bytecode_src(&alu.src[1], &ctx->src[2], 0);
+       alu.last = 1;
+       r = r600_bytecode_add_alu(ctx->bc, &alu);
+       if (r)
+               return r;
+
+       return 0;
+}
+
 /*
  * r600 - trunc to -PI..PI range
  * r700 - normalize by dividing by 2PI
@@ -6356,7 +6401,7 @@ static struct r600_shader_tgsi_instruction 
cm_shader_tgsi_instruction[] = {
        {TGSI_OPCODE_U2F,       0, ALU_OP1_UINT_TO_FLT, tgsi_op2},
        {TGSI_OPCODE_UADD,      0, ALU_OP2_ADD_INT, tgsi_op2},
        {TGSI_OPCODE_UDIV,      0, ALU_OP0_NOP, tgsi_udiv},
-       {TGSI_OPCODE_UMAD,      0, ALU_OP0_NOP, tgsi_umad},
+       {TGSI_OPCODE_UMAD,      0, ALU_OP0_NOP, cayman_umad},
        {TGSI_OPCODE_UMAX,      0, ALU_OP2_MAX_UINT, tgsi_op2},
        {TGSI_OPCODE_UMIN,      0, ALU_OP2_MIN_UINT, tgsi_op2},
        {TGSI_OPCODE_UMOD,      0, ALU_OP0_NOP, tgsi_umod},
-- 
1.8.2

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to