Re: [Mesa-dev] [PATCH] r600g: fix op3 abs issue
On Tue, 31 Mar 2015 07:27:50 +0200, Dave Airlie airl...@gmail.com wrote: From: Dave Airlie airl...@redhat.com This code to handle absolute values on op3 srcs was a bit too simple, it really needs a temp reg per src, not one per channel, make it easier and let sb clean up the mess. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89831 Signed-off-by: Dave Airlie airl...@redhat.com --- src/gallium/drivers/r600/r600_shader.c | 51 ++ 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index ff2c784..1367599 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -4864,10 +4864,9 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru } static int tgsi_make_src_for_op3(struct r600_shader_ctx *ctx, - unsigned temp, int temp_chan, - struct r600_bytecode_alu_src *bc_src, - const struct r600_shader_src *shader_src, - unsigned chan) + unsigned temp, int chan, + struct r600_bytecode_alu_src *bc_src, + const struct r600_shader_src *shader_src) { struct r600_bytecode_alu alu; int r; @@ -4880,7 +4879,7 @@ static int tgsi_make_src_for_op3(struct r600_shader_ctx *ctx, memset(alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP1_MOV; alu.dst.sel = temp; - alu.dst.chan = temp_chan; + alu.dst.chan = chan; alu.dst.write = 1; alu.src[0] = *bc_src; @@ -4891,7 +4890,7 @@ static int tgsi_make_src_for_op3(struct r600_shader_ctx *ctx, memset(bc_src, 0, sizeof(*bc_src)); bc_src-sel = temp; - bc_src-chan = temp_chan; + bc_src-chan = chan; } return 0; } @@ -4902,7 +4901,13 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) struct r600_bytecode_alu alu; int i, j, r; int lasti = tgsi_last_instruction(inst-Dst[0].Register.WriteMask); + int temp_regs[4]; + for (j = 0; j inst-Instruction.NumSrcRegs; j++) { + temp_regs[j] = 0; + if (ctx-src[j].abs) + temp_regs[j] = r600_get_temp(ctx); + } for (i = 0; i lasti + 1; i++) { if (!(inst-Dst[0].Register.WriteMask (1 i))) continue; @@ -4910,7 +4915,7 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) memset(alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ctx-inst_info-op; for (j = 0; j inst-Instruction.NumSrcRegs; j++) { - r = tgsi_make_src_for_op3(ctx, ctx-temp_reg, j, alu.src[j], ctx-src[j], i); + r = tgsi_make_src_for_op3(ctx, temp_regs[j], i, alu.src[j], ctx-src[j]); if (r) return r; } @@ -6003,7 +6008,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) struct tgsi_full_instruction *inst = ctx-parse.FullToken.FullInstruction; struct r600_bytecode_alu alu; int lasti = tgsi_last_instruction(inst-Dst[0].Register.WriteMask); - unsigned i, extra_temp; + unsigned i, temp_regs[2]; int r; /* optimize if it's just an equal balance */ @@ -6073,10 +6078,15 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) } /* src0 * src1 + (1 - src0) * src2 */ - if (ctx-src[0].abs || ctx-src[1].abs) /* XXX avoid dupliating condition */ - extra_temp = r600_get_temp(ctx); +if (ctx-src[0].abs) Rest of patch uses tabs for indendation, this line spaces. + temp_regs[0] = r600_get_temp(ctx); + else + temp_regs[0] = 0; + if (ctx-src[1].abs) + temp_regs[1] = r600_get_temp(ctx); else - extra_temp = 0; + temp_regs[1] = 0; + for (i = 0; i lasti + 1; i++) { if (!(inst-Dst[0].Register.WriteMask (1 i))) continue; @@ -6084,10 +6094,10 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) memset(alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP3_MULADD; alu.is_op3 = 1; - r = tgsi_make_src_for_op3(ctx, extra_temp, 0, alu.src[0], ctx-src[0], i); + r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, alu.src[0], ctx-src[0]); if (r) return r; - r = tgsi_make_src_for_op3(ctx, extra_temp, 1, alu.src[1], ctx-src[1], i); + r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, alu.src[1], ctx-src[1]); if (r) return r;
[Mesa-dev] [PATCH] r600g: fix op3 abs issue
From: Dave Airlie airl...@redhat.com This code to handle absolute values on op3 srcs was a bit too simple, it really needs a temp reg per src, not one per channel, make it easier and let sb clean up the mess. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89831 Signed-off-by: Dave Airlie airl...@redhat.com --- src/gallium/drivers/r600/r600_shader.c | 51 ++ 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index ff2c784..1367599 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -4864,10 +4864,9 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru } static int tgsi_make_src_for_op3(struct r600_shader_ctx *ctx, - unsigned temp, int temp_chan, - struct r600_bytecode_alu_src *bc_src, - const struct r600_shader_src *shader_src, - unsigned chan) + unsigned temp, int chan, + struct r600_bytecode_alu_src *bc_src, + const struct r600_shader_src *shader_src) { struct r600_bytecode_alu alu; int r; @@ -4880,7 +4879,7 @@ static int tgsi_make_src_for_op3(struct r600_shader_ctx *ctx, memset(alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP1_MOV; alu.dst.sel = temp; - alu.dst.chan = temp_chan; + alu.dst.chan = chan; alu.dst.write = 1; alu.src[0] = *bc_src; @@ -4891,7 +4890,7 @@ static int tgsi_make_src_for_op3(struct r600_shader_ctx *ctx, memset(bc_src, 0, sizeof(*bc_src)); bc_src-sel = temp; - bc_src-chan = temp_chan; + bc_src-chan = chan; } return 0; } @@ -4902,7 +4901,13 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) struct r600_bytecode_alu alu; int i, j, r; int lasti = tgsi_last_instruction(inst-Dst[0].Register.WriteMask); + int temp_regs[4]; + for (j = 0; j inst-Instruction.NumSrcRegs; j++) { + temp_regs[j] = 0; + if (ctx-src[j].abs) + temp_regs[j] = r600_get_temp(ctx); + } for (i = 0; i lasti + 1; i++) { if (!(inst-Dst[0].Register.WriteMask (1 i))) continue; @@ -4910,7 +4915,7 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) memset(alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ctx-inst_info-op; for (j = 0; j inst-Instruction.NumSrcRegs; j++) { - r = tgsi_make_src_for_op3(ctx, ctx-temp_reg, j, alu.src[j], ctx-src[j], i); + r = tgsi_make_src_for_op3(ctx, temp_regs[j], i, alu.src[j], ctx-src[j]); if (r) return r; } @@ -6003,7 +6008,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) struct tgsi_full_instruction *inst = ctx-parse.FullToken.FullInstruction; struct r600_bytecode_alu alu; int lasti = tgsi_last_instruction(inst-Dst[0].Register.WriteMask); - unsigned i, extra_temp; + unsigned i, temp_regs[2]; int r; /* optimize if it's just an equal balance */ @@ -6073,10 +6078,15 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) } /* src0 * src1 + (1 - src0) * src2 */ - if (ctx-src[0].abs || ctx-src[1].abs) /* XXX avoid dupliating condition */ - extra_temp = r600_get_temp(ctx); +if (ctx-src[0].abs) + temp_regs[0] = r600_get_temp(ctx); + else + temp_regs[0] = 0; + if (ctx-src[1].abs) + temp_regs[1] = r600_get_temp(ctx); else - extra_temp = 0; + temp_regs[1] = 0; + for (i = 0; i lasti + 1; i++) { if (!(inst-Dst[0].Register.WriteMask (1 i))) continue; @@ -6084,10 +6094,10 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) memset(alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP3_MULADD; alu.is_op3 = 1; - r = tgsi_make_src_for_op3(ctx, extra_temp, 0, alu.src[0], ctx-src[0], i); + r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, alu.src[0], ctx-src[0]); if (r) return r; - r = tgsi_make_src_for_op3(ctx, extra_temp, 1, alu.src[1], ctx-src[1], i); + r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, alu.src[1], ctx-src[1]); if (r) return r; alu.src[2].sel = ctx-temp_reg; @@ -6109,8 +6119,15 @@ static int