Module: Mesa Branch: master Commit: 94beaa1d92e9964a22e5dad9e26488f77ab45ab9 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=94beaa1d92e9964a22e5dad9e26488f77ab45ab9
Author: Connor Abbott <[email protected]> Date: Thu Mar 11 13:11:20 2021 +0100 ir3/legalize: Fix last input (ss) insertion If there was a mix of ldlv and bary.f and we inserted an (ss) *after* the last input which was a bary.f, then last_input_needs_ss would get unset, even though it shouldn't. For figuring out whether we need the (ss), we need to know whether there are any pending ldlv's when last_input gets executed, not at the end of the block, which means that the existing code's strategy of inserting it after the whole block has been processed won't work. Rework it to do the last_input processing in the main loop instead. Reviewed-by: Rob Clark <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10076> --- src/freedreno/ir3/ir3_legalize.c | 78 ++++++++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 31 deletions(-) diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index 9505ff52136..262a9c3ab3c 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -86,7 +86,6 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) if (bd->valid) return false; - struct ir3_instruction *last_input = NULL; struct ir3_instruction *last_rel = NULL; struct ir3_instruction *last_n = NULL; struct list_head instr_list; @@ -113,6 +112,20 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) &state->needs_sy, &pstate->needs_sy); } + unsigned input_count = 0; + + foreach_instr (n, &block->instr_list) { + if (is_input(n)) { + input_count++; + } + } + + unsigned inputs_remaining = input_count; + + /* Inputs can only be in the first block */ + assert(input_count == 0 || + block == list_first_entry(&block->shader->block_list, struct ir3_block, node)); + /* remove all the instructions from the list, we'll be adding * them back in as we go */ @@ -279,43 +292,46 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) } if (is_input(n)) { - last_input = n; last_input_needs_ss |= (n->opc == OPC_LDLV); - } - last_n = n; - } + assert(inputs_remaining > 0); + inputs_remaining--; + if (inputs_remaining == 0) { + /* This is the last input. We add the (ei) flag to release + * varying memory after this executes. If it's an ldlv, + * however, we need to insert a dummy bary.f on which we can + * set the (ei) flag. We may also need to insert an (ss) to + * guarantee that all ldlv's have finished fetching their + * results before releasing the varying memory. + */ + struct ir3_instruction *last_input = n; + if (n->opc == OPC_LDLV) { + struct ir3_instruction *baryf; - if (last_input) { - assert(block == list_first_entry(&block->shader->block_list, - struct ir3_block, node)); - /* special hack.. if using ldlv to bypass interpolation, - * we need to insert a dummy bary.f on which we can set - * the (ei) flag: - */ - if (is_mem(last_input) && (last_input->opc == OPC_LDLV)) { - struct ir3_instruction *baryf; + /* (ss)bary.f (ei)r63.x, 0, r0.x */ + baryf = ir3_instr_create(block, OPC_BARY_F, 3); + ir3_reg_create(baryf, regid(63, 0), 0); + ir3_reg_create(baryf, 0, IR3_REG_IMMED)->iim_val = 0; + ir3_reg_create(baryf, regid(0, 0), 0); - /* (ss)bary.f (ei)r63.x, 0, r0.x */ - baryf = ir3_instr_create(block, OPC_BARY_F, 3); - ir3_reg_create(baryf, regid(63, 0), 0); - ir3_reg_create(baryf, 0, IR3_REG_IMMED)->iim_val = 0; - ir3_reg_create(baryf, regid(0, 0), 0); + last_input = baryf; + } - /* insert the dummy bary.f after last_input: */ - ir3_instr_move_after(baryf, last_input); + last_input->regs[0]->flags |= IR3_REG_EI; + if (last_input_needs_ss) { + last_input->flags |= IR3_INSTR_SS; + regmask_init(&state->needs_ss_war, mergedregs); + regmask_init(&state->needs_ss, mergedregs); + } + } + } - last_input = baryf; + last_n = n; + } - /* by definition, we need (ss) since we are inserting - * the dummy bary.f immediately after the ldlv: - */ - last_input_needs_ss = true; - } - last_input->regs[0]->flags |= IR3_REG_EI; - if (last_input_needs_ss) - last_input->flags |= IR3_INSTR_SS; - } else if (has_tex_prefetch) { + assert(inputs_remaining == 0); + + if (has_tex_prefetch && input_count == 0) { /* texture prefetch, but *no* inputs.. we need to insert a * dummy bary.f at the top of the shader to unblock varying * storage: _______________________________________________ mesa-commit mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-commit
