Module: Mesa
Branch: main
Commit: 0c773325bb69405858059067a8b558fd8359b09f
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=0c773325bb69405858059067a8b558fd8359b09f

Author: Marek Olšák <marek.ol...@amd.com>
Date:   Mon Oct 16 14:19:48 2023 -0400

radeonsi: clean up si_set_streamout_targets

Merge the branches and reorder code.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-pra...@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26095>

---

 src/gallium/drivers/radeonsi/si_state_streamout.c | 116 +++++++++-------------
 1 file changed, 47 insertions(+), 69 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c 
b/src/gallium/drivers/radeonsi/si_state_streamout.c
index 9e34b202d98..73a82134681 100644
--- a/src/gallium/drivers/radeonsi/si_state_streamout.c
+++ b/src/gallium/drivers/radeonsi/si_state_streamout.c
@@ -63,10 +63,12 @@ static void si_set_streamout_targets(struct pipe_context 
*ctx, unsigned num_targ
    struct si_context *sctx = (struct si_context *)ctx;
    unsigned old_num_targets = sctx->streamout.num_targets;
    unsigned i;
-   bool wait_now = false;
 
    /* We are going to unbind the buffers. Mark which caches need to be 
flushed. */
-   if (sctx->streamout.num_targets && sctx->streamout.begin_emitted) {
+   if (old_num_targets && sctx->streamout.begin_emitted) {
+      /* Stop streamout. */
+      si_emit_streamout_end(sctx);
+
       /* Since streamout uses vector writes which go through TC L2
        * and most other clients can use TC L2 as well, we don't need
        * to flush it.
@@ -76,7 +78,7 @@ static void si_set_streamout_targets(struct pipe_context 
*ctx, unsigned num_targ
        * cases. Thus, flag the TC L2 dirtiness in the resource and
        * handle it at draw call time.
        */
-      for (i = 0; i < sctx->streamout.num_targets; i++)
+      for (i = 0; i < old_num_targets; i++)
          if (sctx->streamout.targets[i])
             si_resource(sctx->streamout.targets[i]->b.buffer)->TC_L2_dirty = 
true;
 
@@ -90,53 +92,33 @@ static void si_set_streamout_targets(struct pipe_context 
*ctx, unsigned num_targ
        * VS_PARTIAL_FLUSH is required if the buffers are going to be
        * used as an input immediately.
        */
-      sctx->flags |= SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE;
-
-      if (sctx->gfx_level >= GFX11) {
-         sctx->flags |= SI_CONTEXT_VS_PARTIAL_FLUSH | SI_CONTEXT_PFP_SYNC_ME;
-
-         /* Wait now. This is needed to make sure that GDS is not
-          * busy at the end of IBs.
-          *
-          * Also, the next streamout operation will overwrite GDS,
-          * so we need to make sure that it's idle.
-          */
-         wait_now = true;
-      } else {
-         sctx->flags |= SI_CONTEXT_VS_PARTIAL_FLUSH | SI_CONTEXT_PFP_SYNC_ME;
-      }
+      sctx->flags |= SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE |
+                     SI_CONTEXT_VS_PARTIAL_FLUSH | SI_CONTEXT_PFP_SYNC_ME;
+      si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
    }
 
-   /* All readers of the streamout targets need to be finished before we can
-    * start writing to the targets.
+   /* TODO: This is a hack that fixes these failures. It shouldn't be 
necessary.
+    *    spec@ext_transform_feedback@immediate-reuse
+    *    spec@ext_transform_feedback@immediate-reuse-index-buffer
+    *    spec@ext_transform_feedback@immediate-reuse-uniform-buffer
+    *    .. and some dEQP-GLES[23].functional.fragment_ops.random.*
     */
-   if (num_targets) {
-      sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH 
|
-                     SI_CONTEXT_PFP_SYNC_ME;
-   }
-
-   if (sctx->flags)
-      si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
+   if (sctx->gfx_level >= GFX11)
+      si_flush_gfx_cs(sctx, 0, NULL);
 
    /* Streamout buffers must be bound in 2 places:
     * 1) in VGT by setting the VGT_STRMOUT registers
     * 2) as shader resources
     */
-
-   /* Stop streamout. */
-   if (sctx->streamout.num_targets && sctx->streamout.begin_emitted)
-      si_emit_streamout_end(sctx);
-
-   /* TODO: This is a hack that fixes streamout failures. It shouldn't be 
necessary. */
-   if (sctx->gfx_level >= GFX11 && !wait_now)
-      si_flush_gfx_cs(sctx, 0, NULL);
-
-   /* Set the new targets. */
    unsigned enabled_mask = 0, append_bitmask = 0;
+
    for (i = 0; i < num_targets; i++) {
       si_so_target_reference(&sctx->streamout.targets[i], targets[i]);
-      if (!targets[i])
+
+      if (!targets[i]) {
+         si_set_internal_shader_buffer(sctx, SI_VS_STREAMOUT_BUF0 + i, NULL);
          continue;
+      }
 
       enabled_mask |= 1 << i;
 
@@ -151,52 +133,48 @@ static void si_set_streamout_targets(struct pipe_context 
*ctx, unsigned num_targ
                               &t->buf_filled_size_offset,
                               (struct pipe_resource **)&t->buf_filled_size);
       }
-   }
 
-   for (; i < sctx->streamout.num_targets; i++)
+      /* Bind it to the shader. */
+      struct pipe_shader_buffer sbuf;
+      sbuf.buffer = targets[i]->buffer;
+
+      if (sctx->gfx_level >= GFX11) {
+         sbuf.buffer_offset = targets[i]->buffer_offset;
+         sbuf.buffer_size = targets[i]->buffer_size;
+      } else {
+         sbuf.buffer_offset = 0;
+         sbuf.buffer_size = targets[i]->buffer_offset + 
targets[i]->buffer_size;
+      }
+
+      si_set_internal_shader_buffer(sctx, SI_VS_STREAMOUT_BUF0 + i, &sbuf);
+      si_resource(targets[i]->buffer)->bind_history |= 
SI_BIND_STREAMOUT_BUFFER;
+   }
+   for (; i < old_num_targets; i++) {
       si_so_target_reference(&sctx->streamout.targets[i], NULL);
+      si_set_internal_shader_buffer(sctx, SI_VS_STREAMOUT_BUF0 + i, NULL);
+   }
 
-   if (!!sctx->streamout.enabled_mask != !!enabled_mask) {
-      sctx->streamout.enabled_mask = enabled_mask;
+   if (!!sctx->streamout.enabled_mask != !!enabled_mask)
       sctx->do_update_shaders = true; /* to keep/remove streamout shader code 
as an optimization */
-   }
 
    sctx->streamout.num_targets = num_targets;
+   sctx->streamout.enabled_mask = enabled_mask;
    sctx->streamout.append_bitmask = append_bitmask;
 
    /* Update dirty state bits. */
    if (num_targets) {
       si_streamout_buffers_dirty(sctx);
+
+      /* All readers of the streamout targets need to be finished before we can
+       * start writing to them.
+       */
+      sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH 
|
+                     SI_CONTEXT_PFP_SYNC_ME;
+      si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
    } else {
       si_set_atom_dirty(sctx, &sctx->atoms.s.streamout_begin, false);
       si_set_streamout_enable(sctx, false);
    }
-
-   /* Set the shader resources.*/
-   for (i = 0; i < num_targets; i++) {
-      if (targets[i]) {
-         struct pipe_shader_buffer sbuf;
-         sbuf.buffer = targets[i]->buffer;
-
-         if (sctx->gfx_level >= GFX11) {
-            sbuf.buffer_offset = targets[i]->buffer_offset;
-            sbuf.buffer_size = targets[i]->buffer_size;
-         } else {
-            sbuf.buffer_offset = 0;
-            sbuf.buffer_size = targets[i]->buffer_offset + 
targets[i]->buffer_size;
-         }
-
-         si_set_internal_shader_buffer(sctx, SI_VS_STREAMOUT_BUF0 + i, &sbuf);
-         si_resource(targets[i]->buffer)->bind_history |= 
SI_BIND_STREAMOUT_BUFFER;
-      } else {
-         si_set_internal_shader_buffer(sctx, SI_VS_STREAMOUT_BUF0 + i, NULL);
-      }
-   }
-   for (; i < old_num_targets; i++)
-      si_set_internal_shader_buffer(sctx, SI_VS_STREAMOUT_BUF0 + i, NULL);
-
-   if (wait_now)
-      si_emit_cache_flush_direct(sctx);
 }
 
 static void si_flush_vgt_streamout(struct si_context *sctx)

Reply via email to