Mesa (main): asahi: Handle synchronized transfers better

GitLab Mirror Wed, 16 Nov 2022 18:56:14 -0800

Module: Mesa
Branch: main
Commit: bf593e09b131393d838f805525a9cc90fcd7081e
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=bf593e09b131393d838f805525a9cc90fcd7081e


Author: Alyssa Rosenzweig <[email protected]>
Date:   Tue Nov  8 11:33:00 2022 -0500

asahi: Handle synchronized transfers better

We need to flush the appropriate batch(es in the future) before a synchronized
transfer for correct results. To do so without major performance regressions, we
need to do extra bookkeeping about which batches write which resources. We
already know about reads via the BO list.

Signed-off-by: Alyssa Rosenzweig <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19606>

---

 src/gallium/drivers/asahi/agx_batch.c |  57 +++++++++++++++
 src/gallium/drivers/asahi/agx_pipe.c  | 126 ++++++++++++++++++----------------
 src/gallium/drivers/asahi/agx_state.c |  13 ++--
 src/gallium/drivers/asahi/agx_state.h |  22 ++++++
 src/gallium/drivers/asahi/meson.build |   1 +
 5 files changed, 155 insertions(+), 64 deletions(-)

diff --git a/src/gallium/drivers/asahi/agx_batch.c 
b/src/gallium/drivers/asahi/agx_batch.c
new file mode 100644
index 00000000000..a7147e099cf
--- /dev/null
+++ b/src/gallium/drivers/asahi/agx_batch.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2022 Alyssa Rosenzweig
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "agx_state.h"
+
+void
+agx_flush_readers(struct agx_context *ctx, struct agx_resource *rsrc, const 
char *reason)
+{
+   /* TODO: Turn into loop when we support multiple batches */
+   if (ctx->batch) {
+      struct agx_batch *batch = ctx->batch;
+
+      if (agx_batch_uses_bo(batch, rsrc->bo))
+         agx_flush_batch(ctx, batch);
+   }
+}
+
+void
+agx_flush_writer(struct agx_context *ctx, struct agx_resource *rsrc, const 
char *reason)
+{
+   struct hash_entry *ent = _mesa_hash_table_search(ctx->writer, rsrc);
+
+   if (ent)
+      agx_flush_batch(ctx, ent->data);
+}
+
+void
+agx_batch_reads(struct agx_batch *batch, struct agx_resource *rsrc)
+{
+   agx_batch_add_bo(batch, rsrc->bo);
+
+   if (rsrc->separate_stencil)
+      agx_batch_add_bo(batch, rsrc->separate_stencil->bo);
+}
+
+void
+agx_batch_writes(struct agx_batch *batch, struct agx_resource *rsrc)
+{
+   struct agx_context *ctx = batch->ctx;
+   struct hash_entry *ent = _mesa_hash_table_search(ctx->writer, rsrc);
+
+   /* Nothing to do if we're already writing */
+   if (ent && ent->data == batch)
+      return;
+
+   /* Flush the old writer if there is one */
+   agx_flush_writer(ctx, rsrc, "Multiple writers");
+
+   /* Write is strictly stronger than a read */
+   agx_batch_reads(batch, rsrc);
+
+   /* We are now the new writer */
+   assert(!_mesa_hash_table_search(ctx->writer, rsrc));
+   _mesa_hash_table_insert(ctx->writer, rsrc, batch);
+}
diff --git a/src/gallium/drivers/asahi/agx_pipe.c 
b/src/gallium/drivers/asahi/agx_pipe.c
index 67631c4842c..f2b59fac900 100644
--- a/src/gallium/drivers/asahi/agx_pipe.c
+++ b/src/gallium/drivers/asahi/agx_pipe.c
@@ -272,10 +272,13 @@ agx_transfer_map(struct pipe_context *pctx,
    if ((usage & PIPE_MAP_DIRECTLY) && rsrc->modifier != DRM_FORMAT_MOD_LINEAR)
       return NULL;
 
-   if (ctx->batch->cbufs[0] && resource == ctx->batch->cbufs[0]->texture)
-      agx_flush_all(ctx, "Transfer to colour buffer");
-   if (ctx->batch->zsbuf && resource == ctx->batch->zsbuf->texture)
-      agx_flush_all(ctx, "Transfer to depth buffer");
+   /* Synchronize */
+   if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {
+      agx_flush_writer(ctx, rsrc, "Unsynchronized transfer");
+
+      if (usage & PIPE_MAP_WRITE)
+         agx_flush_readers(ctx, rsrc, "Unsynchronized read");
+   }
 
    struct agx_transfer *transfer = CALLOC_STRUCT(agx_transfer);
    transfer->base.level = level;
@@ -419,62 +422,68 @@ agx_flush(struct pipe_context *pctx,
    if (fence)
       *fence = NULL;
 
+   agx_flush_batch(ctx, ctx->batch);
+}
+
+void
+agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
+{
+   struct agx_device *dev = agx_device(ctx->base.screen);
+
    /* Nothing to do */
-   if (!(ctx->batch->draw | ctx->batch->clear))
+   if (!(batch->draw | batch->clear))
       return;
 
    /* Finalize the encoder */
    uint8_t stop[5 + 64] = { 0x00, 0x00, 0x00, 0xc0, 0x00 };
-   memcpy(ctx->batch->encoder_current, stop, sizeof(stop));
+   memcpy(batch->encoder_current, stop, sizeof(stop));
 
    /* Emit the commandbuffer */
    uint64_t pipeline_clear = 0, pipeline_reload = 0;
    bool clear_pipeline_textures = false;
 
-   struct agx_device *dev = agx_device(pctx->screen);
-
    uint16_t clear_colour[4] = {
-      _mesa_float_to_half(ctx->batch->clear_color[0]),
-      _mesa_float_to_half(ctx->batch->clear_color[1]),
-      _mesa_float_to_half(ctx->batch->clear_color[2]),
-      _mesa_float_to_half(ctx->batch->clear_color[3])
+      _mesa_float_to_half(batch->clear_color[0]),
+      _mesa_float_to_half(batch->clear_color[1]),
+      _mesa_float_to_half(batch->clear_color[2]),
+      _mesa_float_to_half(batch->clear_color[3])
    };
 
    pipeline_clear = agx_build_clear_pipeline(ctx,
          dev->internal.clear,
-         agx_pool_upload(&ctx->batch->pool, clear_colour, 
sizeof(clear_colour)));
+         agx_pool_upload(&batch->pool, clear_colour, sizeof(clear_colour)));
 
-   if (ctx->batch->cbufs[0]) {
-      enum pipe_format fmt = ctx->batch->cbufs[0]->format;
+   if (batch->cbufs[0]) {
+      enum pipe_format fmt = batch->cbufs[0]->format;
       enum agx_format internal = agx_pixel_format[fmt].internal;
       uint32_t shader = dev->reload.format[internal];
 
       pipeline_reload = agx_build_reload_pipeline(ctx, shader,
-                               ctx->batch->cbufs[0]);
+                               batch->cbufs[0]);
    }
 
-   if (ctx->batch->cbufs[0] && !(ctx->batch->clear & PIPE_CLEAR_COLOR0)) {
+   if (batch->cbufs[0] && !(batch->clear & PIPE_CLEAR_COLOR0)) {
       clear_pipeline_textures = true;
       pipeline_clear = pipeline_reload;
    }
 
    uint64_t pipeline_store = 0;
 
-   if (ctx->batch->cbufs[0]) {
+   if (batch->cbufs[0]) {
       pipeline_store =
          agx_build_store_pipeline(ctx,
                                   dev->internal.store,
-                                  agx_pool_upload(&ctx->batch->pool, 
ctx->render_target[0], sizeof(ctx->render_target)));
+                                  agx_pool_upload(&batch->pool, 
ctx->render_target[0], sizeof(ctx->render_target)));
    }
 
    /* Pipelines must 64 aligned */
-   for (unsigned i = 0; i < ctx->batch->nr_cbufs; ++i) {
-      struct agx_resource *rt = agx_resource(ctx->batch->cbufs[i]->texture);
+   for (unsigned i = 0; i < batch->nr_cbufs; ++i) {
+      struct agx_resource *rt = agx_resource(batch->cbufs[i]->texture);
       BITSET_SET(rt->data_valid, 0);
    }
 
-   struct agx_resource *zbuf = ctx->batch->zsbuf ?
-      agx_resource(ctx->batch->zsbuf->texture) : NULL;
+   struct agx_resource *zbuf = batch->zsbuf ?
+      agx_resource(batch->zsbuf->texture) : NULL;
 
    if (zbuf) {
       BITSET_SET(zbuf->data_valid, 0);
@@ -484,36 +493,17 @@ agx_flush(struct pipe_context *pctx,
    }
 
    /* BO list for a given batch consists of:
-    *  - BOs for the batch's framebuffer surfaces
     *  - BOs for the batch's pools
     *  - BOs for the encoder
     *  - BO for internal shaders
     *  - BOs added to the batch explicitly
     */
-   struct agx_batch *batch = ctx->batch;
-
    agx_batch_add_bo(batch, batch->encoder);
    agx_batch_add_bo(batch, batch->scissor.bo);
    agx_batch_add_bo(batch, batch->depth_bias.bo);
    agx_batch_add_bo(batch, dev->internal.bo);
    agx_batch_add_bo(batch, dev->reload.bo);
 
-   for (unsigned i = 0; i < batch->nr_cbufs; ++i) {
-      struct pipe_surface *surf = batch->cbufs[i];
-      assert(surf != NULL && surf->texture != NULL);
-      struct agx_resource *rsrc = agx_resource(surf->texture);
-      agx_batch_add_bo(batch, rsrc->bo);
-   }
-
-   if (batch->zsbuf) {
-      struct pipe_surface *surf = batch->zsbuf;
-      struct agx_resource *rsrc = agx_resource(surf->texture);
-      agx_batch_add_bo(batch, rsrc->bo);
-
-      if (rsrc->separate_stencil)
-         agx_batch_add_bo(batch, rsrc->separate_stencil->bo);
-   }
-
    unsigned handle_count =
       agx_batch_num_bo(batch) +
       agx_pool_num_bos(&batch->pool) +
@@ -540,19 +530,19 @@ agx_flush(struct pipe_context *pctx,
 
    unsigned cmdbuf_size = demo_cmdbuf(dev->cmdbuf.ptr.cpu,
                dev->cmdbuf.size,
-               &ctx->batch->pool,
+               &batch->pool,
                &ctx->framebuffer,
-               ctx->batch->encoder->ptr.gpu,
+               batch->encoder->ptr.gpu,
                encoder_id,
-               ctx->batch->scissor.bo->ptr.gpu,
-               ctx->batch->depth_bias.bo->ptr.gpu,
+               batch->scissor.bo->ptr.gpu,
+               batch->depth_bias.bo->ptr.gpu,
                pipeline_clear,
                pipeline_reload,
                pipeline_store,
                clear_pipeline_textures,
-               ctx->batch->clear,
-               ctx->batch->clear_depth,
-               ctx->batch->clear_stencil);
+               batch->clear,
+               batch->clear_depth,
+               batch->clear_stencil);
 
    /* Generate the mapping table from the BO list */
    demo_mem_map(dev->memmap.ptr.cpu, dev->memmap.size, handles, handle_count,
@@ -573,20 +563,33 @@ agx_flush(struct pipe_context *pctx,
       agx_bo_unreference(agx_lookup_bo(dev, handle));
    }
 
+   /* There is no more writer for anything we wrote recorded on this context */
+   hash_table_foreach(ctx->writer, ent) {
+      if (ent->data == batch)
+         _mesa_hash_table_remove(ctx->writer, ent);
+   }
+
    memset(batch->bo_list.set, 0, batch->bo_list.word_count * 
sizeof(BITSET_WORD));
-   agx_pool_cleanup(&ctx->batch->pool);
-   agx_pool_cleanup(&ctx->batch->pipeline_pool);
-   agx_pool_init(&ctx->batch->pool, dev, AGX_MEMORY_TYPE_FRAMEBUFFER, true);
-   agx_pool_init(&ctx->batch->pipeline_pool, dev, AGX_MEMORY_TYPE_CMDBUF_32, 
true);
-   ctx->batch->clear = 0;
-   ctx->batch->draw = 0;
-   ctx->batch->load = 0;
-   ctx->batch->encoder_current = ctx->batch->encoder->ptr.cpu;
-   ctx->batch->encoder_end = ctx->batch->encoder_current + 
ctx->batch->encoder->size;
-   ctx->batch->scissor.count = 0;
+   agx_pool_cleanup(&batch->pool);
+   agx_pool_cleanup(&batch->pipeline_pool);
+   agx_pool_init(&batch->pool, dev, AGX_MEMORY_TYPE_FRAMEBUFFER, true);
+   agx_pool_init(&batch->pipeline_pool, dev, AGX_MEMORY_TYPE_CMDBUF_32, true);
+   batch->clear = 0;
+   batch->draw = 0;
+   batch->load = 0;
+   batch->encoder_current = batch->encoder->ptr.cpu;
+   batch->encoder_end = batch->encoder_current + batch->encoder->size;
+   batch->scissor.count = 0;
 
    agx_dirty_all(ctx);
-   agx_batch_init_state(ctx->batch);
+   agx_batch_init_state(batch);
+
+   /* After resetting the batch, rebind the framebuffer so we update resource
+    * tracking logic and the BO lists.
+    *
+    * XXX: This is a hack to workaround lack of proper batch tracking.
+    */
+   ctx->base.set_framebuffer_state(&ctx->base, &ctx->framebuffer);
 }
 
 static void
@@ -625,6 +628,7 @@ agx_create_context(struct pipe_screen *screen,
    pctx->priv = priv;
 
    ctx->batch = rzalloc(ctx, struct agx_batch);
+   ctx->batch->ctx = ctx;
    ctx->batch->bo_list.set = rzalloc_array(ctx->batch, BITSET_WORD, 128);
    ctx->batch->bo_list.word_count = 128;
    agx_pool_init(&ctx->batch->pool,
@@ -637,6 +641,8 @@ agx_create_context(struct pipe_screen *screen,
    ctx->batch->scissor.bo = agx_bo_create(agx_device(screen), 0x80000, 
AGX_MEMORY_TYPE_FRAMEBUFFER);
    ctx->batch->depth_bias.bo = agx_bo_create(agx_device(screen), 0x80000, 
AGX_MEMORY_TYPE_FRAMEBUFFER);
 
+   ctx->writer = _mesa_pointer_hash_table_create(ctx);
+
    /* Upload fixed shaders (TODO: compile them?) */
 
    pctx->stream_uploader = u_upload_create_default(pctx);
diff --git a/src/gallium/drivers/asahi/agx_state.c 
b/src/gallium/drivers/asahi/agx_state.c
index 3f352173c2f..9dfffe27cba 100644
--- a/src/gallium/drivers/asahi/agx_state.c
+++ b/src/gallium/drivers/asahi/agx_state.c
@@ -762,6 +762,9 @@ agx_set_framebuffer_state(struct pipe_context *pctx,
    ctx->batch->zsbuf = state->zsbuf;
    ctx->dirty = ~0;
 
+   if (state->zsbuf)
+      agx_batch_writes(ctx->batch, agx_resource(state->zsbuf->texture));
+
    for (unsigned i = 0; i < state->nr_cbufs; ++i) {
       struct pipe_surface *surf = state->cbufs[i];
       struct agx_resource *tex = agx_resource(surf->texture);
@@ -770,6 +773,8 @@ agx_set_framebuffer_state(struct pipe_context *pctx,
       unsigned level = surf->u.tex.level;
       unsigned layer = surf->u.tex.first_layer;
 
+      agx_batch_writes(ctx->batch, tex);
+
       assert(surf->u.tex.last_layer == layer);
 
       agx_pack(ctx->render_target[i], RENDER_TARGET, cfg) {
@@ -1348,7 +1353,7 @@ agx_build_pipeline(struct agx_context *ctx, struct 
agx_compiled_shader *cs, enum
    /* TODO: Dirty track me to save some CPU cycles and maybe improve caching */
    for (unsigned i = 0; i < nr_textures; ++i) {
       struct agx_sampler_view *tex = ctx->stage[stage].textures[i];
-      agx_batch_add_bo(ctx->batch, agx_resource(tex->base.texture)->bo);
+      agx_batch_reads(ctx->batch, agx_resource(tex->base.texture));
 
       textures[i] = tex->desc;
    }
@@ -1863,10 +1868,10 @@ agx_index_buffer_ptr(struct agx_batch *batch,
    off_t offset = draw->start * info->index_size;
 
    if (!info->has_user_indices) {
-      struct agx_bo *bo = agx_resource(info->index.resource)->bo;
-      agx_batch_add_bo(batch, bo);
+      struct agx_resource *rsrc = agx_resource(info->index.resource);
+      agx_batch_reads(batch, rsrc);
 
-      return bo->ptr.gpu + offset;
+      return rsrc->bo->ptr.gpu + offset;
    } else {
       return agx_pool_upload_aligned(&batch->pool,
                                      ((uint8_t *) info->index.user) + offset,
diff --git a/src/gallium/drivers/asahi/agx_state.h 
b/src/gallium/drivers/asahi/agx_state.h
index 28b514b66cd..d6479852281 100644
--- a/src/gallium/drivers/asahi/agx_state.h
+++ b/src/gallium/drivers/asahi/agx_state.h
@@ -93,6 +93,8 @@ struct agx_array {
 };
 
 struct agx_batch {
+   struct agx_context *ctx;
+
    unsigned width, height, nr_cbufs;
    struct pipe_surface *cbufs[8];
    struct pipe_surface *zsbuf;
@@ -208,6 +210,9 @@ struct agx_context {
    uint8_t render_target[8][AGX_RENDER_TARGET_LENGTH];
 
    struct blitter_context *blitter;
+
+   /* Map of agx_resource to agx_batch that writes that resource */
+   struct hash_table *writer;
 };
 
 static inline struct agx_context *
@@ -359,6 +364,15 @@ agx_batch_bo_list_bits(struct agx_batch *batch)
    return batch->bo_list.word_count * sizeof(BITSET_WORD) * 8;
 }
 
+static bool
+agx_batch_uses_bo(struct agx_batch *batch, struct agx_bo *bo)
+{
+   if (bo->handle < agx_batch_bo_list_bits(batch))
+      return BITSET_TEST(batch->bo_list.set, bo->handle);
+   else
+      return false;
+}
+
 static inline void
 agx_batch_add_bo(struct agx_batch *batch, struct agx_bo *bo)
 {
@@ -388,6 +402,14 @@ agx_batch_num_bo(struct agx_batch *batch)
 #define AGX_BATCH_FOREACH_BO_HANDLE(batch, handle) \
    BITSET_FOREACH_SET(handle, (batch)->bo_list.set, 
agx_batch_bo_list_bits(batch))
 
+void agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch);
+void agx_flush_readers(struct agx_context *ctx, struct agx_resource *rsrc, 
const char *reason);
+void agx_flush_writer(struct agx_context *ctx, struct agx_resource *rsrc, 
const char *reason);
+
+/* Use these instead of batch_add_bo for proper resource tracking */
+void agx_batch_reads(struct agx_batch *batch, struct agx_resource *rsrc);
+void agx_batch_writes(struct agx_batch *batch, struct agx_resource *rsrc);
+
 /* Blit shaders */
 void
 agx_blitter_save(struct agx_context *ctx, struct blitter_context *blitter,
diff --git a/src/gallium/drivers/asahi/meson.build 
b/src/gallium/drivers/asahi/meson.build
index 0e7bad9bede..cd67db011ab 100644
--- a/src/gallium/drivers/asahi/meson.build
+++ b/src/gallium/drivers/asahi/meson.build
@@ -19,6 +19,7 @@
 # SOFTWARE.
 
 files_asahi = files(
+  'agx_batch.c',
   'agx_blit.c',
   'agx_pipe.c',
   'agx_state.c',

Mesa (main): asahi: Handle synchronized transfers better

Reply via email to