On 10.01.2018 20:49, Marek Olšák wrote:
From: Marek Olšák <marek.ol...@amd.com>

Cc: 17.2 17.3 <mesa-sta...@lists.freedesktop.org>
---
  src/gallium/auxiliary/util/u_threaded_context.c | 13 +++++++++++++
  src/gallium/auxiliary/util/u_threaded_context.h |  8 ++++++++
  2 files changed, 21 insertions(+)

diff --git a/src/gallium/auxiliary/util/u_threaded_context.c 
b/src/gallium/auxiliary/util/u_threaded_context.c
index ffa8247..7bd13cf 100644
--- a/src/gallium/auxiliary/util/u_threaded_context.c
+++ b/src/gallium/auxiliary/util/u_threaded_context.c
@@ -1508,35 +1508,47 @@ struct tc_resource_copy_region {
  };
static void
  tc_resource_copy_region(struct pipe_context *_pipe,
                          struct pipe_resource *dst, unsigned dst_level,
                          unsigned dstx, unsigned dsty, unsigned dstz,
                          struct pipe_resource *src, unsigned src_level,
                          const struct pipe_box *src_box);
static void
+tc_notify_staging_upload_done(struct threaded_context *tc, unsigned size)
+{
+   tc->unflushed_transfer_size += size;
+
+   if (tc->unflushed_transfer_size > TC_MAX_UNFLUSHED_STAGING_UPLOAD_SIZE) {
+      tc->base.flush(&tc->base, NULL, PIPE_FLUSH_ASYNC);
+      tc->unflushed_transfer_size = 0;
+   }
+}
+
+static void
  tc_buffer_do_flush_region(struct threaded_context *tc,
                            struct threaded_transfer *ttrans,
                            const struct pipe_box *box)
  {
     struct threaded_resource *tres = threaded_resource(ttrans->b.resource);
if (ttrans->staging) {
        struct pipe_box src_box;
u_box_1d(ttrans->offset + box->x % tc->map_buffer_alignment,
                 box->width, &src_box);
/* Copy the staging buffer into the original one. */
        tc_resource_copy_region(&tc->base, ttrans->b.resource, 0, box->x, 0, 0,
                                ttrans->staging, 0, &src_box);
+      tc_notify_staging_upload_done(tc, box->width);
     }
util_range_add(tres->base_valid_buffer_range, box->x, box->x + box->width);
  }
static void
  tc_transfer_flush_region(struct pipe_context *_pipe,
                           struct pipe_transfer *transfer,
                           const struct pipe_box *rel_box)
  {
@@ -1653,20 +1665,21 @@ tc_buffer_subdata(struct pipe_context *_pipe,
/* The upload is small. Enqueue it. */
     struct tc_buffer_subdata *p =
        tc_add_slot_based_call(tc, TC_CALL_buffer_subdata, tc_buffer_subdata, 
size);
tc_set_resource_reference(&p->resource, resource);
     p->usage = usage;
     p->offset = offset;
     p->size = size;
     memcpy(p->slot, data, size);
+   tc_notify_staging_upload_done(tc, size);
  }
struct tc_texture_subdata {
     struct pipe_resource *resource;
     unsigned level, usage, stride, layer_stride;
     struct pipe_box box;
     char slot[0]; /* more will be allocated if needed */
  };
static void
diff --git a/src/gallium/auxiliary/util/u_threaded_context.h 
b/src/gallium/auxiliary/util/u_threaded_context.h
index 53c5a7e..295464a 100644
--- a/src/gallium/auxiliary/util/u_threaded_context.h
+++ b/src/gallium/auxiliary/util/u_threaded_context.h
@@ -225,20 +225,27 @@ struct tc_unflushed_batch_token;
  /* Threshold for when to use the queue or sync. */
  #define TC_MAX_STRING_MARKER_BYTES  512
/* Threshold for when to enqueue buffer/texture_subdata as-is.
   * If the upload size is greater than this, it will do instead:
   * - for buffers: DISCARD_RANGE is done by the threaded context
   * - for textures: sync and call the driver directly
   */
  #define TC_MAX_SUBDATA_BYTES        320
+/* Every staging upload allocates memory. If we have too many uploads
+ * in a row without flushes, we might run out of memory. This limit controls
+ * how many bytes of queued uploads we can have at a time. If we go over,
+ * the threaded context triggers a context flush.
+ */
+#define TC_MAX_UNFLUSHED_STAGING_UPLOAD_SIZE (512 * 1024 * 1024)

This seems very aggressive. In reality, this should probably scale with free space in GART, but we don't know that here. Can you reduce it to something like 64MB? Unless there's concrete evidence that having it higher is beneficial, of course.

With that, patches 9 & 10:

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>


+
  typedef void (*tc_replace_buffer_storage_func)(struct pipe_context *ctx,
                                                 struct pipe_resource *dst,
                                                 struct pipe_resource *src);
  typedef struct pipe_fence_handle *(*tc_create_fence_func)(struct pipe_context 
*ctx,
                                                            struct 
tc_unflushed_batch_token *token);
struct threaded_resource {
     struct pipe_resource b;
     const struct u_resource_vtbl *vtbl;
@@ -346,20 +353,21 @@ struct tc_batch {
     struct tc_call call[TC_CALLS_PER_BATCH];
  };
struct threaded_context {
     struct pipe_context base;
     struct pipe_context *pipe;
     struct slab_child_pool pool_transfers;
     tc_replace_buffer_storage_func replace_buffer_storage;
     tc_create_fence_func create_fence;
     unsigned map_buffer_alignment;
+   unsigned unflushed_transfer_size;
struct list_head unflushed_queries; /* Counters for the HUD. */
     unsigned num_offloaded_slots;
     unsigned num_direct_slots;
     unsigned num_syncs;
struct util_queue queue;
     struct util_queue_fence *fence;



--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to