Module: Mesa
Branch: master
Commit: 3be5ee1574e0a3494c0550f4b7ae09117855d749
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=3be5ee1574e0a3494c0550f4b7ae09117855d749

Author: Dave Airlie <airl...@redhat.com>
Date:   Fri Feb 20 10:47:15 2015 +1000

r600g: add doubles support for CAYMAN

Only a subset of AMD GPUs supported by r600g support doubles,
CAYMAN and CYPRESS are probably all we'll try and support, however
I don't have a CYPRESS so ignore that for now.

This disables SB support for doubles, as we think we need to
make the scheduler smarter to introduce delay slots.

[airlied: pushing this to avoid pain of rebasing, it mostly
works on cayman only so far, Glenn has some ideas about
delay slot issues we need to look into. turned off by
default for now]

Signed-off-by: Dave Airlie <airl...@redhat.com>

---

 src/gallium/drivers/r600/r600_asm.c    |   14 ++
 src/gallium/drivers/r600/r600_asm.h    |   15 ++
 src/gallium/drivers/r600/r600_isa.h    |    8 +-
 src/gallium/drivers/r600/r600_shader.c |  388 +++++++++++++++++++++++++++++++-
 src/gallium/drivers/r600/r600_shader.h |    2 +
 5 files changed, 421 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c 
b/src/gallium/drivers/r600/r600_asm.c
index 9b356ba..bc69806 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -252,6 +252,12 @@ static int alu_uses_rel(struct r600_bytecode *bc, struct 
r600_bytecode_alu *alu)
        return 0;
 }
 
+static int is_alu_64bit_inst(struct r600_bytecode *bc, struct 
r600_bytecode_alu *alu)
+{
+       const struct alu_op_info *op = r600_isa_alu(alu->op);
+       return (op->flags & AF_64);
+}
+
 static int is_alu_vec_unit_inst(struct r600_bytecode *bc, struct 
r600_bytecode_alu *alu)
 {
        unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op);
@@ -576,6 +582,12 @@ static int replace_gpr_with_pv_ps(struct r600_bytecode *bc,
 
        for (i = 0; i < max_slots; ++i) {
                if (prev[i] && (prev[i]->dst.write || prev[i]->is_op3) && 
!prev[i]->dst.rel) {
+
+                       if (is_alu_64bit_inst(bc, prev[i])) {
+                               gpr[i] = -1;
+                               continue;
+                       }
+
                        gpr[i] = prev[i]->dst.sel;
                        /* cube writes more than PV.X */
                        if (is_alu_reduction_inst(bc, prev[i]))
@@ -591,6 +603,8 @@ static int replace_gpr_with_pv_ps(struct r600_bytecode *bc,
                if(!alu)
                        continue;
 
+               if (is_alu_64bit_inst(bc, alu))
+                       continue;
                num_src = r600_bytecode_get_num_operands(bc, alu);
                for (src = 0; src < num_src; ++src) {
                        if (!is_gpr(alu->src[src].sel) || alu->src[src].rel)
diff --git a/src/gallium/drivers/r600/r600_asm.h 
b/src/gallium/drivers/r600/r600_asm.h
index b282907..7cf3a09 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -280,4 +280,19 @@ void eg_bytecode_export_read(struct r600_bytecode *bc,
 
 void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
                           unsigned *num_format, unsigned *format_comp, 
unsigned *endian);
+
+static inline int fp64_switch(int i)
+{
+       switch (i) {
+       case 0:
+               return 1;
+       case 1:
+               return 0;
+       case 2:
+               return 3;
+       case 3:
+               return 2;
+       }
+       return 0;
+}
 #endif
diff --git a/src/gallium/drivers/r600/r600_isa.h 
b/src/gallium/drivers/r600/r600_isa.h
index fdbe1c0..27fc1e8 100644
--- a/src/gallium/drivers/r600/r600_isa.h
+++ b/src/gallium/drivers/r600/r600_isa.h
@@ -339,11 +339,11 @@ static const struct alu_op_info alu_op_table[] = {
                {"PRED_SETGT_64",             2, { 0x7C, 0xC7 },{   AF_V,  
AF_V,  AF_V,  AF_V},  AF_PRED | AF_CC_GT | AF_64 },
                {"PRED_SETE_64",              2, { 0x7D, 0xC8 },{   AF_V,  
AF_V,  AF_V,  AF_V},  AF_PRED | AF_CC_E | AF_64 },
                {"PRED_SETGE_64",             2, { 0x7E, 0xC9 },{   AF_V,  
AF_V,  AF_V,  AF_V},  AF_PRED | AF_CC_GE | AF_64 },
-               {"MUL_64",                    2, { 0x1B, 0xCA },{   AF_V,  
AF_V,  AF_V,  AF_V},  AF_64 },
+               {"MUL_64",                    2, { 0x1B, 0xCA },{   AF_V,  
AF_V,  AF_V,  AF_4V}, AF_64 },
                {"ADD_64",                    2, { 0x17, 0xCB },{   AF_V,  
AF_V,  AF_V,  AF_V},  AF_64 },
                {"MOVA_INT",                  1, { 0x18, 0xCC },{   AF_V,  
AF_V,  AF_V,  AF_V},  AF_MOVA },
-               {"FLT64_TO_FLT32",            1, { 0x1C, 0xCD },{   AF_V,  
AF_V,  AF_V,  AF_V},  0 },
-               {"FLT32_TO_FLT64",            1, { 0x1D, 0xCE },{   AF_V,  
AF_V,  AF_V,  AF_V},  0 },
+               {"FLT64_TO_FLT32",            1, { 0x1C, 0xCD },{   AF_V,  
AF_V,  AF_V,  AF_V},  AF_64 },
+               {"FLT32_TO_FLT64",            1, { 0x1D, 0xCE },{   AF_V,  
AF_V,  AF_V,  AF_V},  AF_64 },
                {"SAD_ACCUM_PREV_UINT",       2, {   -1, 0xCF },{      0,     
0,  AF_V,  AF_V},  AF_UINT_DST | AF_PREV_NEXT },
                {"DOT",                       2, {   -1, 0xD0 },{      0,     
0,  AF_V,  AF_V},  AF_PREV_NEXT },
                {"MUL_PREV",                  1, {   -1, 0xD1 },{      0,     
0,  AF_V,  AF_V},  AF_PREV_INTERLEAVE },
@@ -369,7 +369,7 @@ static const struct alu_op_info alu_op_table[] = {
                {"FMA",                       3, {   -1, 0x07 },{      0,     
0,  AF_V,  AF_V},  0 },
                {"MULADD_INT24",              3, {   -1, 0x08 },{      0,     
0,     0,  AF_V},  AF_INT_DST | AF_24 },
                {"CNDNE_64",                  3, {   -1, 0x09 },{      0,     
0,  AF_V,  AF_V},  AF_CMOV | AF_64 },
-               {"FMA_64",                    3, {   -1, 0x0A },{      0,     
0,  AF_V,  AF_V},  AF_64 },
+               {"FMA_64",                    3, {   -1, 0x0A },{      0,     
0,  AF_V,  AF_4V}, AF_64 },
                {"LERP_UINT",                 3, {   -1, 0x0B },{      0,     
0,  AF_V,  AF_V},  AF_UINT_DST },
                {"BIT_ALIGN_INT",             3, {   -1, 0x0C },{      0,     
0,  AF_V,  AF_V},  AF_INT_DST },
                {"BYTE_ALIGN_INT",            3, {   -1, 0x0D },{      0,     
0,  AF_V,  AF_V},  AF_INT_DST },
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 25a251e..1489298 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -47,7 +47,7 @@ MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT
 These 8xx t-slot only opcodes become vector ops, with all four 
 slots expecting the arguments on sources a and b. Result is 
 broadcast to all channels.
-MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT
+MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT, MUL_64
 These 8xx t-slot only opcodes become vector ops in the z, y, and 
 x slots.
 EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64
@@ -167,6 +167,8 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
     }
        /* disable SB for shaders using CF_INDEX_0/1 (sampler/ubo array 
indexing) as it doesn't handle those currently */
        use_sb &= !shader->shader.uses_index_registers;
+       /* disable SB for shaders using doubles */
+       use_sb &= !shader->shader.uses_doubles;
 
        /* Check if the bytecode has already been built.  When using the llvm
         * backend, r600_shader_from_tgsi() will take care of building the
@@ -343,7 +345,7 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx)
        struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
        int j;
 
-       if (i->Instruction.NumDstRegs > 1) {
+       if (i->Instruction.NumDstRegs > 1 && i->Instruction.Opcode != 
TGSI_OPCODE_DFRACEXP) {
                R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
                return -EINVAL;
        }
@@ -1907,6 +1909,9 @@ static int r600_shader_from_tgsi(struct r600_context 
*rctx,
        ctx.tokens = tokens;
        tgsi_scan_shader(tokens, &ctx.info);
        shader->indirect_files = ctx.info.indirect_files;
+
+       shader->uses_doubles = ctx.info.uses_doubles;
+
        indirect_gprs = ctx.info.indirect_files & ~(1 << TGSI_FILE_CONSTANT);
        tgsi_parse_init(&ctx.parse, tokens);
        ctx.type = ctx.info.processor;
@@ -2679,6 +2684,167 @@ static int tgsi_last_instruction(unsigned writemask)
        return lasti;
 }
 
+
+
+static int tgsi_op2_64_params(struct r600_shader_ctx *ctx, bool singledest, 
bool swap)
+{
+       struct tgsi_full_instruction *inst = 
&ctx->parse.FullToken.FullInstruction;
+       unsigned write_mask = inst->Dst[0].Register.WriteMask;
+       struct r600_bytecode_alu alu;
+       int i, j, r, lasti = tgsi_last_instruction(write_mask);
+       int use_tmp = 0;
+
+       if (singledest) {
+               switch (write_mask) {
+               case 0x1:
+                       write_mask = 0x3;
+                       break;
+               case 0x2:
+                       use_tmp = 1;
+                       write_mask = 0x3;
+                       break;
+               case 0x4:
+                       write_mask = 0xc;
+                       break;
+               case 0x8:
+                       write_mask = 0xc;
+                       use_tmp = 3;
+                       break;
+               }
+       }
+
+       lasti = tgsi_last_instruction(write_mask);
+       for (i = 0; i <= lasti; i++) {
+
+               if (!(write_mask & (1 << i)))
+                       continue;
+
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+
+               if (singledest) {
+                       tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+                       if (use_tmp) {
+                               alu.dst.sel = ctx->temp_reg;
+                               alu.dst.chan = i;
+                               alu.dst.write = 1;
+                       }
+                       if (i == 1 || i == 3)
+                               alu.dst.write = 0;
+               } else
+                       tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+
+               alu.op = ctx->inst_info->op;
+               if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == 
TGSI_OPCODE_DABS) {
+                       r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+               } else if (!swap) {
+                       for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+                               r600_bytecode_src(&alu.src[j], &ctx->src[j], 
fp64_switch(i));
+                       }
+               } else {
+                       r600_bytecode_src(&alu.src[0], &ctx->src[1], 
fp64_switch(i));
+                       r600_bytecode_src(&alu.src[1], &ctx->src[0], 
fp64_switch(i));
+               }
+
+               /* handle some special cases */
+               if (i == 1 || i == 3) {
+                       switch 
(ctx->parse.FullToken.FullInstruction.Instruction.Opcode) {
+                       case TGSI_OPCODE_SUB:
+                               r600_bytecode_src_toggle_neg(&alu.src[1]);
+                               break;
+                       case TGSI_OPCODE_DABS:
+                               r600_bytecode_src_set_abs(&alu.src[0]);
+                               break;
+                       default:
+                               break;
+                       }
+               }
+               if (i == lasti) {
+                       alu.last = 1;
+               }
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+
+       if (use_tmp) {
+               write_mask = inst->Dst[0].Register.WriteMask;
+
+               /* move result from temp to dst */
+               for (i = 0; i <= lasti; i++) {
+                       if (!(write_mask & (1 << i)))
+                               continue;
+
+                       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+                       alu.op = ALU_OP1_MOV;
+                       tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+                       alu.src[0].sel = ctx->temp_reg;
+                       alu.src[0].chan = use_tmp - 1;
+                       alu.last = (i == lasti);
+
+                       r = r600_bytecode_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+               }
+       }
+       return 0;
+}
+
+static int tgsi_op2_64(struct r600_shader_ctx *ctx)
+{
+       struct tgsi_full_instruction *inst = 
&ctx->parse.FullToken.FullInstruction;
+       unsigned write_mask = inst->Dst[0].Register.WriteMask;
+       /* confirm writemaskiing */
+       if ((write_mask & 0x3) != 0x3 &&
+           (write_mask & 0xc) != 0xc) {
+               fprintf(stderr, "illegal writemask for 64-bit: 0x%x\n", 
write_mask);
+               return -1;
+       }
+       return tgsi_op2_64_params(ctx, false, false);
+}
+
+static int tgsi_op2_64_single_dest(struct r600_shader_ctx *ctx)
+{
+       return tgsi_op2_64_params(ctx, true, false);
+}
+
+static int tgsi_op2_64_single_dest_s(struct r600_shader_ctx *ctx)
+{
+       return tgsi_op2_64_params(ctx, true, true);
+}
+
+static int tgsi_op3_64(struct r600_shader_ctx *ctx)
+{
+       struct tgsi_full_instruction *inst = 
&ctx->parse.FullToken.FullInstruction;
+       struct r600_bytecode_alu alu;
+       int i, j, r;
+       int lasti = 3;
+       int tmp = r600_get_temp(ctx);
+
+       for (i = 0; i < lasti + 1; i++) {
+
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.op = ctx->inst_info->op;
+               for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+                       r600_bytecode_src(&alu.src[j], &ctx->src[j], i == 3 ? 0 
: 1);
+               }
+
+               if (inst->Dst[0].Register.WriteMask & (1 << i))
+                       tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+               else
+                       alu.dst.sel = tmp;
+
+               alu.dst.chan = i;
+               alu.is_op3 = 1;
+               if (i == lasti) {
+                       alu.last = 1;
+               }
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+       return 0;
+}
+
 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only)
 {
        struct tgsi_full_instruction *inst = 
&ctx->parse.FullToken.FullInstruction;
@@ -2795,6 +2961,152 @@ static int tgsi_ineg(struct r600_shader_ctx *ctx)
 
 }
 
+static int tgsi_dneg(struct r600_shader_ctx *ctx)
+{
+       struct tgsi_full_instruction *inst = 
&ctx->parse.FullToken.FullInstruction;
+       struct r600_bytecode_alu alu;
+       int i, r;
+       int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+
+       for (i = 0; i < lasti + 1; i++) {
+
+               if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+                       continue;
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.op = ALU_OP1_MOV;
+
+               r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+
+               if (i == 1 || i == 3)
+                       r600_bytecode_src_toggle_neg(&alu.src[0]);
+               tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+
+               if (i == lasti) {
+                       alu.last = 1;
+               }
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+       return 0;
+
+}
+
+static int tgsi_dfracexp(struct r600_shader_ctx *ctx)
+{
+       struct tgsi_full_instruction *inst = 
&ctx->parse.FullToken.FullInstruction;
+       struct r600_bytecode_alu alu;
+       unsigned write_mask = inst->Dst[0].Register.WriteMask;
+       int i, j, r;
+       int firsti = write_mask == 0xc ? 2 : 0;
+
+       for (i = 0; i <= 3; i++) {
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.op = ctx->inst_info->op;
+
+               alu.dst.sel = ctx->temp_reg;
+               alu.dst.chan = i;
+               alu.dst.write = 1;
+               for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+                       r600_bytecode_src(&alu.src[j], &ctx->src[j], 
fp64_switch(i));
+               }
+
+               if (i == 3)
+                       alu.last = 1;
+
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+
+       /* MOV first two channels to writemask dst0 */
+       for (i = 0; i <= 1; i++) {
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.op = ALU_OP1_MOV;
+               alu.src[0].chan = i + 2;
+               alu.src[0].sel = ctx->temp_reg;
+
+               tgsi_dst(ctx, &inst->Dst[0], firsti + i, &alu.dst);
+               alu.dst.write = (inst->Dst[0].Register.WriteMask >> (firsti + 
i)) & 1;
+               alu.last = 1;
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+
+       for (i = 0; i <= 3; i++) {
+               if (inst->Dst[1].Register.WriteMask & (1 << i)) {
+                       /* MOV third channels to writemask dst1 */
+                       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+                       alu.op = ALU_OP1_MOV;
+                       alu.src[0].chan = 1;
+                       alu.src[0].sel = ctx->temp_reg;
+
+                       tgsi_dst(ctx, &inst->Dst[1], i, &alu.dst);
+                       alu.last = 1;
+                       r = r600_bytecode_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+                       break;
+               }
+       }
+       return 0;
+}
+
+static int cayman_emit_double_instr(struct r600_shader_ctx *ctx)
+{
+       struct tgsi_full_instruction *inst = 
&ctx->parse.FullToken.FullInstruction;
+       int i, r;
+       struct r600_bytecode_alu alu;
+       int last_slot = 3;
+       int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+       int t1 = ctx->temp_reg;
+
+       /* these have to write the result to X/Y by the looks of it */
+       for (i = 0 ; i < last_slot; i++) {
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.op = ctx->inst_info->op;
+
+               /* should only be one src regs */
+               assert (inst->Instruction.NumSrcRegs == 1);
+
+               r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
+               r600_bytecode_src(&alu.src[1], &ctx->src[0], 0);
+
+               /* RSQ should take the absolute value of src */
+               if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == 
TGSI_OPCODE_DRSQ ||
+                   ctx->parse.FullToken.FullInstruction.Instruction.Opcode == 
TGSI_OPCODE_DSQRT) {
+                       r600_bytecode_src_set_abs(&alu.src[1]);
+               }
+               alu.dst.sel = t1;
+               alu.dst.chan = i;
+               alu.dst.write = (i == 0 || i == 1);
+
+               if (i == last_slot - 1)
+                       alu.last = 1;
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+
+       for (i = 0 ; i <= lasti; i++) {
+               if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+                       continue;
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.op = ALU_OP1_MOV;
+               alu.src[0].sel = t1;
+               alu.src[0].chan = (i == 0 || i == 2) ? 0 : 1;
+               tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+               alu.dst.write = 1;
+               if (i == lasti)
+                       alu.last = 1;
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+       return 0;
+}
+
 static int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
 {
        struct tgsi_full_instruction *inst = 
&ctx->parse.FullToken.FullInstruction;
@@ -2873,6 +3185,55 @@ static int cayman_mul_int_instr(struct r600_shader_ctx 
*ctx)
        return 0;
 }
 
+
+static int cayman_mul_double_instr(struct r600_shader_ctx *ctx)
+{
+       struct tgsi_full_instruction *inst = 
&ctx->parse.FullToken.FullInstruction;
+       int i, j, k, r;
+       struct r600_bytecode_alu alu;
+       int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+       int t1 = ctx->temp_reg;
+
+       for (k = 0; k <= 2; k++) {
+               if (!(inst->Dst[0].Register.WriteMask & (0x3 << (k * 2))))
+                       continue;
+
+               for (i = 0; i < 4; i++) {
+                       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+                       alu.op = ctx->inst_info->op;
+                       for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+                               r600_bytecode_src(&alu.src[j], &ctx->src[j], k 
* 2 + ((i == 3) ? 0 : 1));;
+                       }
+                       alu.dst.sel = t1;
+                       alu.dst.chan = i;
+                       alu.dst.write = 1;
+                       if (i == 3)
+                               alu.last = 1;
+                       r = r600_bytecode_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+               }
+       }
+
+       for (i = 0; i <= lasti; i++) {
+               if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+                       continue;
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.op = ALU_OP1_MOV;
+               alu.src[0].sel = t1;
+               alu.src[0].chan = i;
+               tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+               alu.dst.write = 1;
+               if (i == lasti)
+                       alu.last = 1;
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+       }
+
+       return 0;
+}
+
 /*
  * r600 - trunc to -PI..PI range
  * r700 - normalize by dividing by 2PI
@@ -7965,5 +8326,28 @@ static const struct r600_shader_tgsi_instruction 
cm_shader_tgsi_instruction[] =
        [TGSI_OPCODE_INTERP_CENTROID]   = { ALU_OP0_NOP, tgsi_interp_egcm},
        [TGSI_OPCODE_INTERP_SAMPLE]     = { ALU_OP0_NOP, tgsi_interp_egcm},
        [TGSI_OPCODE_INTERP_OFFSET]     = { ALU_OP0_NOP, tgsi_interp_egcm},
+       [TGSI_OPCODE_F2D]       = { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64},
+       [TGSI_OPCODE_D2F]       = { ALU_OP1_FLT64_TO_FLT32, 
tgsi_op2_64_single_dest},
+       [TGSI_OPCODE_DABS]      = { ALU_OP1_MOV, tgsi_op2_64},
+       [TGSI_OPCODE_DNEG]      = { ALU_OP2_ADD_64, tgsi_dneg},
+       [TGSI_OPCODE_DADD]      = { ALU_OP2_ADD_64, tgsi_op2_64},
+       [TGSI_OPCODE_DMUL]      = { ALU_OP2_MUL_64, cayman_mul_double_instr},
+       [TGSI_OPCODE_DMAX]      = { ALU_OP2_MAX_64, tgsi_op2_64},
+       [TGSI_OPCODE_DMIN]      = { ALU_OP2_MIN_64, tgsi_op2_64},
+       [TGSI_OPCODE_DSLT]      = { ALU_OP2_SETGT_64, 
tgsi_op2_64_single_dest_s},
+       [TGSI_OPCODE_DSGE]      = { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest},
+       [TGSI_OPCODE_DSEQ]      = { ALU_OP2_SETE_64, tgsi_op2_64_single_dest},
+       [TGSI_OPCODE_DSNE]      = { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest},
+       [TGSI_OPCODE_DRCP]      = { ALU_OP2_RECIP_64, cayman_emit_double_instr},
+       [TGSI_OPCODE_DSQRT]     = { ALU_OP2_SQRT_64, cayman_emit_double_instr},
+       [TGSI_OPCODE_DMAD]      = { ALU_OP3_FMA_64, tgsi_op3_64},
+       [TGSI_OPCODE_DFRAC]     = { ALU_OP1_FRACT_64, tgsi_op2_64},
+       [TGSI_OPCODE_DLDEXP]    = { ALU_OP2_LDEXP_64, tgsi_op2_64},
+       [TGSI_OPCODE_DFRACEXP]  = { ALU_OP1_FREXP_64, tgsi_dfracexp},
+       [TGSI_OPCODE_D2I]       = { ALU_OP0_NOP, tgsi_unsupported},
+       [TGSI_OPCODE_I2D]       = { ALU_OP0_NOP, tgsi_unsupported},
+       [TGSI_OPCODE_D2U]       = { ALU_OP0_NOP, tgsi_unsupported},
+       [TGSI_OPCODE_U2D]       = { ALU_OP0_NOP, tgsi_unsupported},
+       [TGSI_OPCODE_DRSQ]      = { ALU_OP2_RECIPSQRT_64, 
cayman_emit_double_instr},
        [TGSI_OPCODE_LAST]      = { ALU_OP0_NOP, tgsi_unsupported},
 };
diff --git a/src/gallium/drivers/r600/r600_shader.h 
b/src/gallium/drivers/r600/r600_shader.h
index f49e1c1..48de9cd 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -89,6 +89,8 @@ struct r600_shader {
        unsigned                vs_as_gs_a;
        unsigned                ps_prim_id_input;
        struct r600_shader_array * arrays;
+
+       boolean                 uses_doubles;
 };
 
 union r600_shader_key {

_______________________________________________
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to