Module: Mesa Branch: main Commit: c3f51a5dcf2d7f2987ee34e5c485f9fabfdddf61 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c3f51a5dcf2d7f2987ee34e5c485f9fabfdddf61
Author: Pavel Ondračka <[email protected]> Date: Sun Aug 28 18:41:08 2022 +0200 r300: allow presubtract when both ADD sources are negative Current code doesn't handle this, however it is easy to make it work by moving the negate to the presubtract source. Minor win in shader-db, mostly with Unigine shaders. Shader-db RV530: total instructions in shared programs: 136382 -> 136236 (-0.11%) instructions in affected programs: 9911 -> 9765 (-1.47%) total temps in shared programs: 18939 -> 18942 (0.02%) temps in affected programs: 37 -> 40 (8.11%) Reviewed-by: Filip Gawin <[email protected]> Signed-off-by: Pavel Ondračka <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18289> --- .../drivers/r300/compiler/radeon_optimize.c | 26 +++++++++++----------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/r300/compiler/radeon_optimize.c b/src/gallium/drivers/r300/compiler/radeon_optimize.c index 4d8f5cbf031..bc4af2c81ee 100644 --- a/src/gallium/drivers/r300/compiler/radeon_optimize.c +++ b/src/gallium/drivers/r300/compiler/radeon_optimize.c @@ -504,25 +504,29 @@ static void presub_replace_add( { rc_presubtract_op presub_opcode; - /* This function assumes that inst_add->U.I.SrcReg[0] and - * inst_add->U.I.SrcReg[1] aren't both negative. - */ - assert(!(inst_add->U.I.SrcReg[1].Negate && inst_add->U.I.SrcReg[0].Negate)); - - if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate) + unsigned int negates = 0; + if (inst_add->U.I.SrcReg[0].Negate) + negates++; + if (inst_add->U.I.SrcReg[1].Negate) + negates++; + assert(negates != 2 || inst_add->U.I.SrcReg[1].Negate == inst_add->U.I.SrcReg[0].Negate); + + if (negates == 1) presub_opcode = RC_PRESUB_SUB; else presub_opcode = RC_PRESUB_ADD; - if (inst_add->U.I.SrcReg[1].Negate) { + if (inst_add->U.I.SrcReg[1].Negate && negates == 1) { inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0]; } else { inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0]; inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1]; } - inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; - inst_reader->U.I.PreSub.SrcReg[1].Negate = 0; + /* If both sources are negative we can move the negate to the presub. */ + unsigned negate_mask = negates == 1 ? 0 : inst_add->U.I.SrcReg[0].Negate; + inst_reader->U.I.PreSub.SrcReg[0].Negate = negate_mask; + inst_reader->U.I.PreSub.SrcReg[1].Negate = negate_mask; inst_reader->U.I.PreSub.Opcode = presub_opcode; inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index], @@ -596,10 +600,6 @@ static int peephole_add_presub_add( if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs) return 0; - /* presub_replace_add() assumes only one is negative */ - if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate) - return 0; - /* if src0 is negative, at least all bits of dstmask have to be set */ if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask) return 0;
