On Mon, Jan 22, 2018 at 2:07 PM, Nicolai Hähnle <nhaeh...@gmail.com> wrote: > On 10.01.2018 20:49, Marek Olšák wrote: >> >> From: Marek Olšák <marek.ol...@amd.com> >> >> Cc: 17.2 17.3 <mesa-sta...@lists.freedesktop.org> >> --- >> src/gallium/auxiliary/util/u_threaded_context.c | 13 +++++++++++++ >> src/gallium/auxiliary/util/u_threaded_context.h | 8 ++++++++ >> 2 files changed, 21 insertions(+) >> >> diff --git a/src/gallium/auxiliary/util/u_threaded_context.c >> b/src/gallium/auxiliary/util/u_threaded_context.c >> index ffa8247..7bd13cf 100644 >> --- a/src/gallium/auxiliary/util/u_threaded_context.c >> +++ b/src/gallium/auxiliary/util/u_threaded_context.c >> @@ -1508,35 +1508,47 @@ struct tc_resource_copy_region { >> }; >> static void >> tc_resource_copy_region(struct pipe_context *_pipe, >> struct pipe_resource *dst, unsigned dst_level, >> unsigned dstx, unsigned dsty, unsigned dstz, >> struct pipe_resource *src, unsigned src_level, >> const struct pipe_box *src_box); >> static void >> +tc_notify_staging_upload_done(struct threaded_context *tc, unsigned size) >> +{ >> + tc->unflushed_transfer_size += size; >> + >> + if (tc->unflushed_transfer_size > >> TC_MAX_UNFLUSHED_STAGING_UPLOAD_SIZE) { >> + tc->base.flush(&tc->base, NULL, PIPE_FLUSH_ASYNC); >> + tc->unflushed_transfer_size = 0; >> + } >> +} >> + >> +static void >> tc_buffer_do_flush_region(struct threaded_context *tc, >> struct threaded_transfer *ttrans, >> const struct pipe_box *box) >> { >> struct threaded_resource *tres = >> threaded_resource(ttrans->b.resource); >> if (ttrans->staging) { >> struct pipe_box src_box; >> u_box_1d(ttrans->offset + box->x % tc->map_buffer_alignment, >> box->width, &src_box); >> /* Copy the staging buffer into the original one. */ >> tc_resource_copy_region(&tc->base, ttrans->b.resource, 0, box->x, >> 0, 0, >> ttrans->staging, 0, &src_box); >> + tc_notify_staging_upload_done(tc, box->width); >> } >> util_range_add(tres->base_valid_buffer_range, box->x, box->x + >> box->width); >> } >> static void >> tc_transfer_flush_region(struct pipe_context *_pipe, >> struct pipe_transfer *transfer, >> const struct pipe_box *rel_box) >> { >> @@ -1653,20 +1665,21 @@ tc_buffer_subdata(struct pipe_context *_pipe, >> /* The upload is small. Enqueue it. */ >> struct tc_buffer_subdata *p = >> tc_add_slot_based_call(tc, TC_CALL_buffer_subdata, >> tc_buffer_subdata, size); >> tc_set_resource_reference(&p->resource, resource); >> p->usage = usage; >> p->offset = offset; >> p->size = size; >> memcpy(p->slot, data, size); >> + tc_notify_staging_upload_done(tc, size); >> } >> struct tc_texture_subdata { >> struct pipe_resource *resource; >> unsigned level, usage, stride, layer_stride; >> struct pipe_box box; >> char slot[0]; /* more will be allocated if needed */ >> }; >> static void >> diff --git a/src/gallium/auxiliary/util/u_threaded_context.h >> b/src/gallium/auxiliary/util/u_threaded_context.h >> index 53c5a7e..295464a 100644 >> --- a/src/gallium/auxiliary/util/u_threaded_context.h >> +++ b/src/gallium/auxiliary/util/u_threaded_context.h >> @@ -225,20 +225,27 @@ struct tc_unflushed_batch_token; >> /* Threshold for when to use the queue or sync. */ >> #define TC_MAX_STRING_MARKER_BYTES 512 >> /* Threshold for when to enqueue buffer/texture_subdata as-is. >> * If the upload size is greater than this, it will do instead: >> * - for buffers: DISCARD_RANGE is done by the threaded context >> * - for textures: sync and call the driver directly >> */ >> #define TC_MAX_SUBDATA_BYTES 320 >> +/* Every staging upload allocates memory. If we have too many uploads >> + * in a row without flushes, we might run out of memory. This limit >> controls >> + * how many bytes of queued uploads we can have at a time. If we go over, >> + * the threaded context triggers a context flush. >> + */ >> +#define TC_MAX_UNFLUSHED_STAGING_UPLOAD_SIZE (512 * 1024 * 1024) > > > This seems very aggressive. In reality, this should probably scale with free > space in GART, but we don't know that here. Can you reduce it to something > like 64MB? Unless there's concrete evidence that having it higher is > beneficial, of course.
Well the current limit is infinite. I think 64MB is too low. I would expect games to use 2-4GB of VRAM per IB. 512MB seems reasonable for a GTT upload limit. 512MB might not be great for small APUs, but it's definitely not very high for mid-sized dGPUs, and context flushes can be bad for perf. I know that Metro 2033 is slower if I decrease the limit a lot, but I don't remember what size I was testing. Marek _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev