Module: Mesa Branch: main Commit: 23dfae4c810e5e31cea647b7803700b0fcd4eb96 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=23dfae4c810e5e31cea647b7803700b0fcd4eb96
Author: Pavel Ondračka <[email protected]> Date: Wed May 11 21:52:44 2022 +0200 r300: fix vertex shader control flow in loops This fixes 7 loop piglit tests when loop unrolling is disabled. The problem is that we were trying to be smart with breaks and tried to save one predicate instruction for endif in some cases. This worked for simple loops but brought problems for more complex shaders, instead just switch to standard VE_PRED_SNEQ_PUSH ME_PRED_SET_POP combo everywhere. Shader-db results on RV530 show three hurt glmark tests, however I believe the simplification should be worth it. total instructions in shared programs: 123715 -> 123718 (<.01%) instructions in affected programs: 54 -> 57 (5.56%) total predicate in shared programs: 118 -> 121 (2.54%) predicate in affected programs: 6 -> 9 (50.00%) total temps in shared programs: 17304 -> 17307 (0.02%) temps in affected programs: 12 -> 15 (25.00%) Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6468 Signed-off-by: Pavel Ondračka <[email protected]> Reviewed-by: Filip Gawin <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16470> --- src/gallium/drivers/r300/compiler/radeon_vert_fc.c | 34 +++++++--------------- 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/src/gallium/drivers/r300/compiler/radeon_vert_fc.c b/src/gallium/drivers/r300/compiler/radeon_vert_fc.c index 51ab6213f25..f543b41fda9 100644 --- a/src/gallium/drivers/r300/compiler/radeon_vert_fc.c +++ b/src/gallium/drivers/r300/compiler/radeon_vert_fc.c @@ -36,7 +36,6 @@ struct vert_fc_state { unsigned LoopsReserved; int PredStack[R500_PVS_MAX_LOOP_DEPTH]; int PredicateReg; - unsigned InCFBreak; }; static void build_pred_src( @@ -161,7 +160,7 @@ static void lower_brk( { if (fc_state->LoopDepth == 1) { inst->U.I.Opcode = RC_OPCODE_RCP; - inst->U.I.DstReg.Pred = RC_PRED_INV; + inst->U.I.DstReg.Pred = RC_PRED_SET; inst->U.I.SrcReg[0].Index = 0; inst->U.I.SrcReg[0].File = RC_FILE_NONE; inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; @@ -203,17 +202,8 @@ static void lower_if( } } - if (inst->Next->U.I.Opcode == RC_OPCODE_BRK) { - fc_state->InCFBreak = 1; - } - if ((fc_state->BranchDepth == 0 && fc_state->LoopDepth == 0) - || (fc_state->LoopDepth == 1 && fc_state->InCFBreak)) { - if (fc_state->InCFBreak) { - inst->U.I.Opcode = RC_ME_PRED_SEQ; - inst->U.I.DstReg.Pred = RC_PRED_SET; - } else { - inst->U.I.Opcode = RC_ME_PRED_SNEQ; - } + if (fc_state->BranchDepth == 0 && fc_state->LoopDepth == 0) { + inst->U.I.Opcode = RC_ME_PRED_SNEQ; } else { unsigned swz; inst->U.I.Opcode = RC_VE_PRED_SNEQ_PUSH; @@ -274,17 +264,13 @@ void rc_vert_fc(struct radeon_compiler *c, void *user) break; case RC_OPCODE_ENDIF: - if (fc_state.LoopDepth == 1 && fc_state.InCFBreak) { - struct rc_instruction * to_delete = inst; - inst = inst->Prev; - rc_remove_instruction(to_delete); - /* XXX: Delete the endif instruction */ - } else { - inst->U.I.Opcode = RC_ME_PRED_SET_POP; - build_pred_dst(&inst->U.I.DstReg, &fc_state); - build_pred_src(&inst->U.I.SrcReg[0], &fc_state); - } - fc_state.InCFBreak = 0; + /* TODO: If LoopDepth == 1 and there is only a single break + * we can optimize out the endif just after the break. However + * previous attempts were buggy, so keep it simple for now. + */ + inst->U.I.Opcode = RC_ME_PRED_SET_POP; + build_pred_dst(&inst->U.I.DstReg, &fc_state); + build_pred_src(&inst->U.I.SrcReg[0], &fc_state); fc_state.BranchDepth--; break;
