Module: Mesa Branch: main Commit: 0c773325bb69405858059067a8b558fd8359b09f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0c773325bb69405858059067a8b558fd8359b09f
Author: Marek Olšák <marek.ol...@amd.com> Date: Mon Oct 16 14:19:48 2023 -0400 radeonsi: clean up si_set_streamout_targets Merge the branches and reorder code. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-pra...@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26095> --- src/gallium/drivers/radeonsi/si_state_streamout.c | 116 +++++++++------------- 1 file changed, 47 insertions(+), 69 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c b/src/gallium/drivers/radeonsi/si_state_streamout.c index 9e34b202d98..73a82134681 100644 --- a/src/gallium/drivers/radeonsi/si_state_streamout.c +++ b/src/gallium/drivers/radeonsi/si_state_streamout.c @@ -63,10 +63,12 @@ static void si_set_streamout_targets(struct pipe_context *ctx, unsigned num_targ struct si_context *sctx = (struct si_context *)ctx; unsigned old_num_targets = sctx->streamout.num_targets; unsigned i; - bool wait_now = false; /* We are going to unbind the buffers. Mark which caches need to be flushed. */ - if (sctx->streamout.num_targets && sctx->streamout.begin_emitted) { + if (old_num_targets && sctx->streamout.begin_emitted) { + /* Stop streamout. */ + si_emit_streamout_end(sctx); + /* Since streamout uses vector writes which go through TC L2 * and most other clients can use TC L2 as well, we don't need * to flush it. @@ -76,7 +78,7 @@ static void si_set_streamout_targets(struct pipe_context *ctx, unsigned num_targ * cases. Thus, flag the TC L2 dirtiness in the resource and * handle it at draw call time. */ - for (i = 0; i < sctx->streamout.num_targets; i++) + for (i = 0; i < old_num_targets; i++) if (sctx->streamout.targets[i]) si_resource(sctx->streamout.targets[i]->b.buffer)->TC_L2_dirty = true; @@ -90,53 +92,33 @@ static void si_set_streamout_targets(struct pipe_context *ctx, unsigned num_targ * VS_PARTIAL_FLUSH is required if the buffers are going to be * used as an input immediately. */ - sctx->flags |= SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE; - - if (sctx->gfx_level >= GFX11) { - sctx->flags |= SI_CONTEXT_VS_PARTIAL_FLUSH | SI_CONTEXT_PFP_SYNC_ME; - - /* Wait now. This is needed to make sure that GDS is not - * busy at the end of IBs. - * - * Also, the next streamout operation will overwrite GDS, - * so we need to make sure that it's idle. - */ - wait_now = true; - } else { - sctx->flags |= SI_CONTEXT_VS_PARTIAL_FLUSH | SI_CONTEXT_PFP_SYNC_ME; - } + sctx->flags |= SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE | + SI_CONTEXT_VS_PARTIAL_FLUSH | SI_CONTEXT_PFP_SYNC_ME; + si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); } - /* All readers of the streamout targets need to be finished before we can - * start writing to the targets. + /* TODO: This is a hack that fixes these failures. It shouldn't be necessary. + * spec@ext_transform_feedback@immediate-reuse + * spec@ext_transform_feedback@immediate-reuse-index-buffer + * spec@ext_transform_feedback@immediate-reuse-uniform-buffer + * .. and some dEQP-GLES[23].functional.fragment_ops.random.* */ - if (num_targets) { - sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH | - SI_CONTEXT_PFP_SYNC_ME; - } - - if (sctx->flags) - si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); + if (sctx->gfx_level >= GFX11) + si_flush_gfx_cs(sctx, 0, NULL); /* Streamout buffers must be bound in 2 places: * 1) in VGT by setting the VGT_STRMOUT registers * 2) as shader resources */ - - /* Stop streamout. */ - if (sctx->streamout.num_targets && sctx->streamout.begin_emitted) - si_emit_streamout_end(sctx); - - /* TODO: This is a hack that fixes streamout failures. It shouldn't be necessary. */ - if (sctx->gfx_level >= GFX11 && !wait_now) - si_flush_gfx_cs(sctx, 0, NULL); - - /* Set the new targets. */ unsigned enabled_mask = 0, append_bitmask = 0; + for (i = 0; i < num_targets; i++) { si_so_target_reference(&sctx->streamout.targets[i], targets[i]); - if (!targets[i]) + + if (!targets[i]) { + si_set_internal_shader_buffer(sctx, SI_VS_STREAMOUT_BUF0 + i, NULL); continue; + } enabled_mask |= 1 << i; @@ -151,52 +133,48 @@ static void si_set_streamout_targets(struct pipe_context *ctx, unsigned num_targ &t->buf_filled_size_offset, (struct pipe_resource **)&t->buf_filled_size); } - } - for (; i < sctx->streamout.num_targets; i++) + /* Bind it to the shader. */ + struct pipe_shader_buffer sbuf; + sbuf.buffer = targets[i]->buffer; + + if (sctx->gfx_level >= GFX11) { + sbuf.buffer_offset = targets[i]->buffer_offset; + sbuf.buffer_size = targets[i]->buffer_size; + } else { + sbuf.buffer_offset = 0; + sbuf.buffer_size = targets[i]->buffer_offset + targets[i]->buffer_size; + } + + si_set_internal_shader_buffer(sctx, SI_VS_STREAMOUT_BUF0 + i, &sbuf); + si_resource(targets[i]->buffer)->bind_history |= SI_BIND_STREAMOUT_BUFFER; + } + for (; i < old_num_targets; i++) { si_so_target_reference(&sctx->streamout.targets[i], NULL); + si_set_internal_shader_buffer(sctx, SI_VS_STREAMOUT_BUF0 + i, NULL); + } - if (!!sctx->streamout.enabled_mask != !!enabled_mask) { - sctx->streamout.enabled_mask = enabled_mask; + if (!!sctx->streamout.enabled_mask != !!enabled_mask) sctx->do_update_shaders = true; /* to keep/remove streamout shader code as an optimization */ - } sctx->streamout.num_targets = num_targets; + sctx->streamout.enabled_mask = enabled_mask; sctx->streamout.append_bitmask = append_bitmask; /* Update dirty state bits. */ if (num_targets) { si_streamout_buffers_dirty(sctx); + + /* All readers of the streamout targets need to be finished before we can + * start writing to them. + */ + sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH | + SI_CONTEXT_PFP_SYNC_ME; + si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush); } else { si_set_atom_dirty(sctx, &sctx->atoms.s.streamout_begin, false); si_set_streamout_enable(sctx, false); } - - /* Set the shader resources.*/ - for (i = 0; i < num_targets; i++) { - if (targets[i]) { - struct pipe_shader_buffer sbuf; - sbuf.buffer = targets[i]->buffer; - - if (sctx->gfx_level >= GFX11) { - sbuf.buffer_offset = targets[i]->buffer_offset; - sbuf.buffer_size = targets[i]->buffer_size; - } else { - sbuf.buffer_offset = 0; - sbuf.buffer_size = targets[i]->buffer_offset + targets[i]->buffer_size; - } - - si_set_internal_shader_buffer(sctx, SI_VS_STREAMOUT_BUF0 + i, &sbuf); - si_resource(targets[i]->buffer)->bind_history |= SI_BIND_STREAMOUT_BUFFER; - } else { - si_set_internal_shader_buffer(sctx, SI_VS_STREAMOUT_BUF0 + i, NULL); - } - } - for (; i < old_num_targets; i++) - si_set_internal_shader_buffer(sctx, SI_VS_STREAMOUT_BUF0 + i, NULL); - - if (wait_now) - si_emit_cache_flush_direct(sctx); } static void si_flush_vgt_streamout(struct si_context *sctx)