From: Hi-Angel <hi-an...@yandex.ru> Changes turned out to be bigger than I expected, so I skipped over every place where I was in doubts. Still, it looks better.
Signed-off-by: Constantine Charlamov <hi-an...@yandex.ru> --- src/gallium/drivers/r600/r600_shader.c | 246 +++++++++------------------------ 1 file changed, 65 insertions(+), 181 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 8562678d0c..905214f69b 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -30,6 +30,7 @@ #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_info.h" +#include "tgsi/tgsi_exec.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_scan.h" #include "tgsi/tgsi_dump.h" @@ -378,14 +379,14 @@ static int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned temp_reg, static int tgsi_last_instruction(unsigned writemask) { - int i, lasti = 0; + int i, last_ch = 0; - for (i = 0; i < 4; i++) { + TGSI_FOR_EACH_CHANNEL (i) { if (writemask & (1 << i)) { - lasti = i; + last_ch = i; } } - return lasti; + return last_ch; } static int tgsi_is_supported(struct r600_shader_ctx *ctx) @@ -2693,8 +2694,7 @@ static int r600_store_tcs_output(struct r600_shader_ctx *ctx) /* LDS write */ lasti = tgsi_last_instruction(write_mask); for (chan_index = 1; chan_index <= lasti; chan_index++) { - - if (!(write_mask & (1 << chan_index))) + if(!TGSI_IS_DST0_CHANNEL_ENABLED(inst, chan_index)) continue; r = single_alu_op2(ctx, ALU_OP2_ADD_INT, temp_reg, chan_index, @@ -2704,10 +2704,7 @@ static int r600_store_tcs_output(struct r600_shader_ctx *ctx) return r; } - for (chan_index = 0; chan_index <= lasti; chan_index++) { - if (!(write_mask & (1 << chan_index))) - continue; - + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { if ((chan_index == 0 && ((write_mask & 3) == 3)) || (chan_index == 2 && ((write_mask & 0xc) == 0xc))) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); @@ -3747,7 +3744,7 @@ static int tgsi_op2_64_params(struct r600_shader_ctx *ctx, bool singledest, bool struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; unsigned write_mask = inst->Dst[0].Register.WriteMask; struct r600_bytecode_alu alu; - int chan_index, j, r, lasti = tgsi_last_instruction(write_mask); + int chan_index, j, r, lasti; int use_tmp = 0; if (singledest) { @@ -3770,11 +3767,7 @@ static int tgsi_op2_64_params(struct r600_shader_ctx *ctx, bool singledest, bool } lasti = tgsi_last_instruction(write_mask); - for (chan_index = 0; chan_index <= lasti; chan_index++) { - - if (!(write_mask & (1 << chan_index))) - continue; - + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); if (singledest) { @@ -3823,10 +3816,7 @@ static int tgsi_op2_64_params(struct r600_shader_ctx *ctx, bool singledest, bool write_mask = inst->Dst[0].Register.WriteMask; /* move result from temp to dst */ - for (chan_index = 0; chan_index <= lasti; chan_index++) { - if (!(write_mask & (1 << chan_index))) - continue; - + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP1_MOV; tgsi_dst(ctx, &inst->Dst[0], chan_index, &alu.dst); @@ -3912,10 +3902,7 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only) ctx->info.properties[TGSI_PROPERTY_MUL_ZERO_WINS]) op = ALU_OP2_MUL; - for (chan_index = 0; chan_index <= lasti; chan_index++) { - if (!(write_mask & (1 << chan_index))) - continue; - + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); if (use_tmp) { alu.dst.sel = ctx->temp_reg; @@ -3943,10 +3930,7 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only) if (use_tmp) { /* move result from temp to dst */ - for (chan_index = 0; chan_index <= lasti; chan_index++) { - if (!(write_mask & (1 << chan_index))) - continue; - + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP1_MOV; tgsi_dst(ctx, &inst->Dst[0], chan_index, &alu.dst); @@ -3984,10 +3968,7 @@ static int tgsi_ineg(struct r600_shader_ctx *ctx) int chan_index, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); - for (chan_index = 0; chan_index < lasti + 1; chan_index++) { - - if (!(inst->Dst[0].Register.WriteMask & (1 << chan_index))) - continue; + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ctx->inst_info->op; @@ -4015,10 +3996,7 @@ static int tgsi_dneg(struct r600_shader_ctx *ctx) int chan_index, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); - for (chan_index = 0; chan_index < lasti + 1; chan_index++) { - - if (!(inst->Dst[0].Register.WriteMask & (1 << chan_index))) - continue; + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP1_MOV; @@ -4248,9 +4226,7 @@ static int cayman_emit_double_instr(struct r600_shader_ctx *ctx) if (r) return r; - for (chan_index = 0 ; chan_index <= lasti; chan_index++) { - if (!(inst->Dst[0].Register.WriteMask & (1 << chan_index))) - continue; + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP1_MOV; alu.src[0].sel = t1; @@ -4304,11 +4280,8 @@ static int cayman_mul_int_instr(struct r600_shader_ctx *ctx) int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); int t1 = ctx->temp_reg; - for (k = 0; k <= lasti; k++) { - if (!(inst->Dst[0].Register.WriteMask & (1 << k))) - continue; - - for (chan_index = 0 ; chan_index < 4; chan_index++) { + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, k) { + TGSI_FOR_EACH_CHANNEL(chan_index) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ctx->inst_info->op; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { @@ -4325,9 +4298,7 @@ static int cayman_mul_int_instr(struct r600_shader_ctx *ctx) } } - for (chan_index = 0 ; chan_index <= lasti; chan_index++) { - if (!(inst->Dst[0].Register.WriteMask & (1 << chan_index))) - continue; + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP1_MOV; alu.src[0].sel = t1; @@ -4360,7 +4331,7 @@ static int cayman_mul_double_instr(struct r600_shader_ctx *ctx) k = inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ? 0 : 1; - for (chan_index = 0; chan_index < 4; chan_index++) { + TGSI_FOR_EACH_CHANNEL (chan_index) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ctx->inst_info->op; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { @@ -4376,9 +4347,7 @@ static int cayman_mul_double_instr(struct r600_shader_ctx *ctx) return r; } - for (chan_index = 0; chan_index <= lasti; chan_index++) { - if (!(inst->Dst[0].Register.WriteMask & (1 << chan_index))) - continue; + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP1_MOV; alu.src[0].sel = t1; @@ -4584,10 +4553,7 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) return r; /* replicate result */ - for (chan_index = 0; chan_index < lasti + 1; chan_index++) { - if (!(inst->Dst[0].Register.WriteMask & (1 << chan_index))) - continue; - + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP1_MOV; @@ -5940,10 +5906,7 @@ static int tgsi_f2i(struct r600_shader_ctx *ctx) unsigned write_mask = inst->Dst[0].Register.WriteMask; int last_inst = tgsi_last_instruction(write_mask); - for (chan_index = 0; chan_index < 4; chan_index++) { - if (!(write_mask & (1<<chan_index))) - continue; - + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP1_TRUNC; @@ -5959,10 +5922,7 @@ static int tgsi_f2i(struct r600_shader_ctx *ctx) return r; } - for (chan_index = 0; chan_index < 4; chan_index++) { - if (!(write_mask & (1<<chan_index))) - continue; - + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ctx->inst_info->op; @@ -5990,10 +5950,7 @@ static int tgsi_iabs(struct r600_shader_ctx *ctx) int last_inst = tgsi_last_instruction(write_mask); /* tmp = -src */ - for (chan_index = 0; chan_index < 4; chan_index++) { - if (!(write_mask & (1<<chan_index))) - continue; - + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP2_SUB_INT; @@ -6012,10 +5969,7 @@ static int tgsi_iabs(struct r600_shader_ctx *ctx) } /* dst = (src >= 0 ? src : tmp) */ - for (chan_index = 0; chan_index < 4; chan_index++) { - if (!(write_mask & (1<<chan_index))) - continue; - + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP3_CNDGE_INT; alu.is_op3 = 1; @@ -6046,10 +6000,7 @@ static int tgsi_issg(struct r600_shader_ctx *ctx) int last_inst = tgsi_last_instruction(write_mask); /* tmp = (src >= 0 ? src : -1) */ - for (chan_index = 0; chan_index < 4; chan_index++) { - if (!(write_mask & (1<<chan_index))) - continue; - + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP3_CNDGE_INT; alu.is_op3 = 1; @@ -6070,10 +6021,7 @@ static int tgsi_issg(struct r600_shader_ctx *ctx) } /* dst = (tmp > 0 ? 1 : tmp) */ - for (chan_index = 0; chan_index < 4; chan_index++) { - if (!(write_mask & (1<<chan_index))) - continue; - + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP3_CNDGT_INT; alu.is_op3 = 1; @@ -6163,10 +6111,7 @@ static int tgsi_bfi(struct r600_shader_ctx *ctx) t1 = ctx->temp_reg; - for (chan_index = 0; chan_index < 4; chan_index++) { - if (!(write_mask & (1<<chan_index))) - continue; - + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { /* create mask tmp */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP2_BFM_INT; @@ -6185,10 +6130,7 @@ static int tgsi_bfi(struct r600_shader_ctx *ctx) t2 = r600_get_temp(ctx); - for (chan_index = 0; chan_index < 4; chan_index++) { - if (!(write_mask & (1<<chan_index))) - continue; - + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { /* shift insert left */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP2_LSHL_INT; @@ -6205,10 +6147,7 @@ static int tgsi_bfi(struct r600_shader_ctx *ctx) return r; } - for (chan_index = 0; chan_index < 4; chan_index++) { - if (!(write_mask & (1<<chan_index))) - continue; - + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { /* actual bitfield insert */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP3_BFI_INT; @@ -6247,10 +6186,7 @@ static int tgsi_msb(struct r600_shader_ctx *ctx) t1 = ctx->temp_reg; /* bit position is indexed from lsb by TGSI, and from msb by the hardware */ - for (chan_index = 0; chan_index < 4; chan_index++) { - if (!(write_mask & (1<<chan_index))) - continue; - + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { /* t1 = FFBH_INT / FFBH_UINT */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ctx->inst_info->op; @@ -6268,10 +6204,7 @@ static int tgsi_msb(struct r600_shader_ctx *ctx) t2 = r600_get_temp(ctx); - for (chan_index = 0; chan_index < 4; chan_index++) { - if (!(write_mask & (1<<chan_index))) - continue; - + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { /* t2 = 31 - t1 */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP2_SUB_INT; @@ -6290,10 +6223,7 @@ static int tgsi_msb(struct r600_shader_ctx *ctx) return r; } - for (chan_index = 0; chan_index < 4; chan_index++) { - if (!(write_mask & (1<<chan_index))) - continue; - + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { /* result = t1 >= 0 ? t2 : t1 */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP3_CNDGE_INT; @@ -6463,10 +6393,7 @@ static int tgsi_interp_egcm(struct r600_shader_ctx *ctx) // INTERP can't swizzle dst lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); - for (i = 0; i <= lasti; i++) { - if (!(inst->Dst[0].Register.WriteMask & (1 << i))) - continue; - + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, i) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP1_MOV; alu.src[0].sel = tmp; @@ -6559,10 +6486,7 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) if (ctx->src[j].abs) temp_regs[j] = r600_get_temp(ctx); } - for (chan_index = 0; chan_index < lasti + 1; chan_index++) { - if (!(inst->Dst[0].Register.WriteMask & (1 << chan_index))) - continue; - + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = op; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { @@ -6667,7 +6591,7 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_l src_gpr = tgsi_tex_get_src_gpr(ctx, 0); if (src_requires_loading) { - for (chan_index = 0; chan_index < 4; chan_index++) { + TGSI_FOR_EACH_CHANNEL (chan_index) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP1_MOV; r600_bytecode_src(&alu.src[0], &ctx->src[0], chan_index); @@ -6702,11 +6626,8 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_l if (ctx->bc->chip_class >= EVERGREEN) return 0; - for (chan_index = 0; chan_index < 4; chan_index++) { + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); - if (!(inst->Dst[0].Register.WriteMask & (1 << chan_index))) - continue; - memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP2_AND_INT; @@ -7674,10 +7595,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) /* optimize if it's just an equal balance */ if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) { - for (chan_index = 0; chan_index < lasti + 1; chan_index++) { - if (!(inst->Dst[0].Register.WriteMask & (1 << chan_index))) - continue; - + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP2_ADD; r600_bytecode_src(&alu.src[0], &ctx->src[1], chan_index); @@ -7696,10 +7614,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) } /* 1 - src0 */ - for (chan_index = 0; chan_index < lasti + 1; chan_index++) { - if (!(inst->Dst[0].Register.WriteMask & (1 << chan_index))) - continue; - + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP2_ADD; alu.src[0].sel = V_SQ_ALU_SRC_1; @@ -7718,10 +7633,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) } /* (1 - src0) * src2 */ - for (chan_index = 0; chan_index < lasti + 1; chan_index++) { - if (!(inst->Dst[0].Register.WriteMask & (1 << chan_index))) - continue; - + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP2_MUL; alu.src[0].sel = ctx->temp_reg; @@ -7748,10 +7660,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) else temp_regs[1] = 0; - for (chan_index = 0; chan_index < lasti + 1; chan_index++) { - if (!(inst->Dst[0].Register.WriteMask & (1 << chan_index))) - continue; - + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP3_MULADD; alu.is_op3 = 1; @@ -7799,10 +7708,7 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) temp_regs[j] = r600_get_temp(ctx); } - for (chan_index = 0; chan_index < lasti + 1; chan_index++) { - if (!(inst->Dst[0].Register.WriteMask & (1 << chan_index))) - continue; - + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = op; r = tgsi_make_src_for_op3(ctx, temp_regs[0], chan_index, &alu.src[0], &ctx->src[0]); @@ -7834,10 +7740,7 @@ static int tgsi_ucmp(struct r600_shader_ctx *ctx) int chan_index, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); - for (chan_index = 0; chan_index < lasti + 1; chan_index++) { - if (!(inst->Dst[0].Register.WriteMask & (1 << chan_index))) - continue; - + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP3_CNDE_INT; r600_bytecode_src(&alu.src[0], &ctx->src[0], chan_index); @@ -8343,9 +8246,7 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx) return -1; } - for (chan_index = 0; chan_index <= lasti; ++chan_index) { - if (!(inst->Dst[0].Register.WriteMask & (1 << chan_index))) - continue; + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { r600_bytecode_src(&alu.src[0], &ctx->src[0], chan_index); alu.last = chan_index == lasti; alu.dst.sel = reg; @@ -8376,14 +8277,12 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) alu.op = ALU_OP1_FLOOR; alu.dst.sel = ctx->bc->ar_reg; alu.dst.write = 1; - for (chan_index = 0; chan_index <= lasti; ++chan_index) { - if (inst->Dst[0].Register.WriteMask & (1 << chan_index)) { - alu.dst.chan = chan_index; - r600_bytecode_src(&alu.src[0], &ctx->src[0], chan_index); - alu.last = chan_index == lasti; - if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) - return r; - } + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { + alu.dst.chan = chan_index; + r600_bytecode_src(&alu.src[0], &ctx->src[0], chan_index); + alu.last = chan_index == lasti; + if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) + return r; } memset(&alu, 0, sizeof(alu)); @@ -8407,13 +8306,11 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) alu.dst.write = 1; /* FLT_TO_INT is trans-only on r600/r700 */ alu.last = TRUE; - for (chan_index = 0; chan_index <= lasti; ++chan_index) { - if (inst->Dst[0].Register.WriteMask & (1 << chan_index)) { - alu.dst.chan = chan_index; - r600_bytecode_src(&alu.src[0], &ctx->src[0], chan_index); - if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) - return r; - } + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { + alu.dst.chan = chan_index; + r600_bytecode_src(&alu.src[0], &ctx->src[0], chan_index); + if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) + return r; } break; case TGSI_OPCODE_UARL: @@ -8421,14 +8318,12 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) alu.op = ALU_OP1_MOV; alu.dst.sel = ctx->bc->ar_reg; alu.dst.write = 1; - for (chan_index = 0; chan_index <= lasti; ++chan_index) { - if (inst->Dst[0].Register.WriteMask & (1 << chan_index)) { - alu.dst.chan = chan_index; - r600_bytecode_src(&alu.src[0], &ctx->src[0], chan_index); - alu.last = chan_index == lasti; - if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) - return r; - } + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { + alu.dst.chan = chan_index; + r600_bytecode_src(&alu.src[0], &ctx->src[0], chan_index); + alu.last = chan_index == lasti; + if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) + return r; } break; default: @@ -8903,10 +8798,7 @@ static int tgsi_umad(struct r600_shader_ctx *ctx) int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); /* src0 * src1 */ - for (chan_index = 0; chan_index < lasti + 1; chan_index++) { - if (!(inst->Dst[0].Register.WriteMask & (1 << chan_index))) - continue; - + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { if (ctx->bc->chip_class == CAYMAN) { for (j = 0 ; j < 4; j++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); @@ -8944,10 +8836,7 @@ static int tgsi_umad(struct r600_shader_ctx *ctx) } - for (chan_index = 0; chan_index < lasti + 1; chan_index++) { - if (!(inst->Dst[0].Register.WriteMask & (1 << chan_index))) - continue; - + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); tgsi_dst(ctx, &inst->Dst[0], chan_index, &alu.dst); @@ -8992,10 +8881,7 @@ static int tgsi_pk2h(struct r600_shader_ctx *ctx) return r; /* dst.x = temp.y * 0x10000 + temp.x */ - for (chan_index = 0; chan_index < lasti + 1; chan_index++) { - if (!(inst->Dst[0].Register.WriteMask & (1 << chan_index))) - continue; - + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP3_MULADD_UINT24; alu.is_op3 = 1; @@ -9049,9 +8935,7 @@ static int tgsi_up2h(struct r600_shader_ctx *ctx) return r; /* dst.wz = dst.xy = f16_to_f32(temp.xy) */ - for (chan_index = 0; chan_index < lasti + 1; chan_index++) { - if (!(inst->Dst[0].Register.WriteMask & (1 << chan_index))) - continue; + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); tgsi_dst(ctx, &inst->Dst[0], chan_index, &alu.dst); alu.op = ALU_OP1_FLT16_TO_FLT32; -- 2.11.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev