Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>

On 31.05.2016 23:42, Marek Olšák wrote:
From: Marek Olšák <marek.ol...@amd.com>

This improves throughput by keeping TTM overhead down.

Some piglit tests such as texelFetch and streaming-texture-leak will
use less memory now.

v2: use gart_size / 4 as the threshold
---
  src/gallium/drivers/radeon/r600_pipe_common.h |  1 +
  src/gallium/drivers/radeon/r600_texture.c     | 32 +++++++++++++++++++++++----
  2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index cf3756b..d693004 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -501,6 +501,7 @@ struct r600_common_context {
        /* Misc stats. */
        unsigned                        num_draw_calls;
        unsigned                        num_dma_calls;
+       uint64_t                        num_alloc_tex_transfer_bytes;

        /* Render condition. */
        struct r600_atom                render_cond_atom;
diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index dbabbd0..bf6cfe8 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1302,9 +1302,11 @@ static bool r600_can_invalidate_texture(struct 
r600_common_screen *rscreen,
                                                 box->depth);
  }

-static void r600_texture_invalidate_storage(struct r600_common_screen *rscreen,
+static void r600_texture_invalidate_storage(struct r600_common_context *rctx,
                                            struct r600_texture *rtex)
  {
+       struct r600_common_screen *rscreen = rctx->screen;
+
        /* There is no point in discarding depth and tiled buffers. */
        assert(!rtex->is_depth);
        assert(rtex->surface.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED);
@@ -1319,6 +1321,8 @@ static void r600_texture_invalidate_storage(struct 
r600_common_screen *rscreen,

        r600_dirty_all_framebuffer_states(rscreen);
        p_atomic_inc(&rscreen->dirty_tex_descriptor_counter);
+
+       rctx->num_alloc_tex_transfer_bytes += rtex->size;
  }

  static void *r600_texture_transfer_map(struct pipe_context *ctx,
@@ -1378,8 +1382,7 @@ static void *r600_texture_transfer_map(struct 
pipe_context *ctx,
                        /* It's busy. */
                        if (r600_can_invalidate_texture(rctx->screen, rtex,
                                                        usage, box))
-                               r600_texture_invalidate_storage(rctx->screen,
-                                                               rtex);
+                               r600_texture_invalidate_storage(rctx, rtex);
                        else
                                use_staging_texture = true;
                }
@@ -1499,6 +1502,7 @@ static void *r600_texture_transfer_map(struct 
pipe_context *ctx,
  static void r600_texture_transfer_unmap(struct pipe_context *ctx,
                                        struct pipe_transfer* transfer)
  {
+       struct r600_common_context *rctx = (struct r600_common_context*)ctx;
        struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
        struct pipe_resource *texture = transfer->resource;
        struct r600_texture *rtex = (struct r600_texture*)texture;
@@ -1514,8 +1518,28 @@ static void r600_texture_transfer_unmap(struct 
pipe_context *ctx,
                }
        }

-       if (rtransfer->staging)
+       if (rtransfer->staging) {
+               rctx->num_alloc_tex_transfer_bytes += 
rtransfer->staging->buf->size;
                pipe_resource_reference((struct 
pipe_resource**)&rtransfer->staging, NULL);
+       }
+
+       /* Heuristic for {upload, draw, upload, draw, ..}:
+        *
+        * Flush the gfx IB if we've allocated too much texture storage.
+        *
+        * The idea is that we don't want to build IBs that use too much
+        * memory and put pressure on the kernel memory manager and we also
+        * want to make temporary and invalidated buffers go idle ASAP to
+        * decrease the total memory usage or make them reusable. The memory
+        * usage will be slightly higher than given here because of the buffer
+        * cache in the winsys.
+        *
+        * The result is that the kernel memory manager is never a bottleneck.
+        */
+       if (rctx->num_alloc_tex_transfer_bytes > rctx->screen->info.gart_size / 
4) {
+               rctx->gfx.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
+               rctx->num_alloc_tex_transfer_bytes = 0;
+       }

        FREE(transfer);
  }

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to