From: Nicolai Hähnle <nicolai.haeh...@amd.com> This is a move towards using composition instead of inheritance for different query types.
This change weakens out-of-memory error reporting somewhat, though this should be acceptable since we didn't consistently report such errors in the first place. --- src/gallium/drivers/radeonsi/si_perfcounter.c | 8 +- src/gallium/drivers/radeonsi/si_query.c | 177 +++++++++--------- src/gallium/drivers/radeonsi/si_query.h | 17 +- src/gallium/drivers/radeonsi/si_texture.c | 7 +- 4 files changed, 99 insertions(+), 110 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index 0b3d8f89273..f0d10c054c4 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -761,23 +761,22 @@ static void si_pc_query_destroy(struct si_screen *sscreen, struct si_query_group *group = query->groups; query->groups = group->next; FREE(group); } FREE(query->counters); si_query_hw_destroy(sscreen, rquery); } -static bool si_pc_query_prepare_buffer(struct si_screen *screen, - struct si_query_hw *hwquery, - struct r600_resource *buffer) +static bool si_pc_query_prepare_buffer(struct si_context *ctx, + struct si_query_buffer *qbuf) { /* no-op */ return true; } static void si_pc_query_emit_start(struct si_context *sctx, struct si_query_hw *hwquery, struct r600_resource *buffer, uint64_t va) { struct si_query_pc *query = (struct si_query_pc *)hwquery; @@ -1055,23 +1054,20 @@ struct pipe_query *si_create_batch_query(struct pipe_context *ctx, counter->base = group->result_base + j; counter->stride = group->num_counters; counter->qwords = 1; if ((block->b->b->flags & SI_PC_BLOCK_SE) && group->se < 0) counter->qwords = screen->info.max_se; if (group->instance < 0) counter->qwords *= block->num_instances; } - if (!si_query_hw_init(screen, &query->b)) - goto error; - return (struct pipe_query *)query; error: si_pc_query_destroy(screen, &query->b.b); return NULL; } static bool si_init_block_names(struct si_screen *screen, struct si_pc_block *block) { diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c index 479a1bbf2c4..5b0fba0ed92 100644 --- a/src/gallium/drivers/radeonsi/si_query.c +++ b/src/gallium/drivers/radeonsi/si_query.c @@ -514,86 +514,129 @@ static struct pipe_query *si_query_sw_create(unsigned query_type) query = CALLOC_STRUCT(si_query_sw); if (!query) return NULL; query->b.type = query_type; query->b.ops = &sw_query_ops; return (struct pipe_query *)query; } -void si_query_hw_destroy(struct si_screen *sscreen, - struct si_query *rquery) +void si_query_buffer_destroy(struct si_screen *sscreen, struct si_query_buffer *buffer) { - struct si_query_hw *query = (struct si_query_hw *)rquery; - struct si_query_buffer *prev = query->buffer.previous; + struct si_query_buffer *prev = buffer->previous; /* Release all query buffers. */ while (prev) { struct si_query_buffer *qbuf = prev; prev = prev->previous; r600_resource_reference(&qbuf->buf, NULL); FREE(qbuf); } - r600_resource_reference(&query->buffer.buf, NULL); - r600_resource_reference(&query->workaround_buf, NULL); - FREE(rquery); + r600_resource_reference(&buffer->buf, NULL); +} + +void si_query_buffer_reset(struct si_context *sctx, struct si_query_buffer *buffer) +{ + /* Discard all query buffers except for the oldest. */ + while (buffer->previous) { + struct si_query_buffer *qbuf = buffer->previous; + buffer->previous = qbuf->previous; + + r600_resource_reference(&buffer->buf, NULL); + buffer->buf = qbuf->buf; /* move ownership */ + FREE(qbuf); + } + buffer->results_end = 0; + + /* Discard even the oldest buffer if it can't be mapped without a stall. */ + if (buffer->buf && + (si_rings_is_buffer_referenced(sctx, buffer->buf->buf, RADEON_USAGE_READWRITE) || + !sctx->ws->buffer_wait(buffer->buf->buf, 0, RADEON_USAGE_READWRITE))) { + r600_resource_reference(&buffer->buf, NULL); + } } -static struct r600_resource *si_new_query_buffer(struct si_screen *sscreen, - struct si_query_hw *query) +bool si_query_buffer_alloc(struct si_context *sctx, struct si_query_buffer *buffer, + bool (*prepare_buffer)(struct si_context *, struct si_query_buffer*), + unsigned size) { - unsigned buf_size = MAX2(query->result_size, - sscreen->info.min_alloc_size); + if (buffer->buf && buffer->results_end + size >= buffer->buf->b.b.width0) + return true; + + if (buffer->buf) { + struct si_query_buffer *qbuf = MALLOC_STRUCT(si_query_buffer); + memcpy(qbuf, buffer, sizeof(*qbuf)); + buffer->previous = qbuf; + } + + buffer->results_end = 0; /* Queries are normally read by the CPU after * being written by the gpu, hence staging is probably a good * usage pattern. */ - struct r600_resource *buf = r600_resource( - pipe_buffer_create(&sscreen->b, 0, - PIPE_USAGE_STAGING, buf_size)); - if (!buf) - return NULL; + struct si_screen *screen = sctx->screen; + unsigned buf_size = MAX2(size, screen->info.min_alloc_size); + buffer->buf = r600_resource( + pipe_buffer_create(&screen->b, 0, PIPE_USAGE_STAGING, buf_size)); + if (unlikely(!buffer->buf)) + return false; - if (!query->ops->prepare_buffer(sscreen, query, buf)) { - r600_resource_reference(&buf, NULL); - return NULL; + if (prepare_buffer) { + if (unlikely(!prepare_buffer(sctx, buffer))) { + r600_resource_reference(&buffer->buf, NULL); + return false; + } } - return buf; + return true; } -static bool si_query_hw_prepare_buffer(struct si_screen *sscreen, - struct si_query_hw *query, - struct r600_resource *buffer) + +void si_query_hw_destroy(struct si_screen *sscreen, + struct si_query *rquery) +{ + struct si_query_hw *query = (struct si_query_hw *)rquery; + + si_query_buffer_destroy(sscreen, &query->buffer); + r600_resource_reference(&query->workaround_buf, NULL); + FREE(rquery); +} + +static bool si_query_hw_prepare_buffer(struct si_context *sctx, + struct si_query_buffer *qbuf) { - /* Callers ensure that the buffer is currently unused by the GPU. */ - uint32_t *results = sscreen->ws->buffer_map(buffer->buf, NULL, + static const struct si_query_hw si_query_hw_s; + struct si_query_hw *query = container_of(qbuf, &si_query_hw_s, buffer); + struct si_screen *screen = sctx->screen; + + /* The caller ensures that the buffer is currently unused by the GPU. */ + uint32_t *results = screen->ws->buffer_map(qbuf->buf->buf, NULL, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED); if (!results) return false; - memset(results, 0, buffer->b.b.width0); + memset(results, 0, qbuf->buf->b.b.width0); if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER || query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE || query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) { - unsigned max_rbs = sscreen->info.num_render_backends; - unsigned enabled_rb_mask = sscreen->info.enabled_rb_mask; + unsigned max_rbs = screen->info.num_render_backends; + unsigned enabled_rb_mask = screen->info.enabled_rb_mask; unsigned num_results; unsigned i, j; /* Set top bits for unused backends. */ - num_results = buffer->b.b.width0 / query->result_size; + num_results = qbuf->buf->b.b.width0 / query->result_size; for (j = 0; j < num_results; j++) { for (i = 0; i < max_rbs; i++) { if (!(enabled_rb_mask & (1<<i))) { results[(i * 4)+1] = 0x80000000; results[(i * 4)+3] = 0x80000000; } } results += 4 * max_rbs; } } @@ -624,30 +667,20 @@ static void si_query_hw_clear_result(struct si_query_hw *, union pipe_query_result *); static struct si_query_hw_ops query_hw_default_hw_ops = { .prepare_buffer = si_query_hw_prepare_buffer, .emit_start = si_query_hw_do_emit_start, .emit_stop = si_query_hw_do_emit_stop, .clear_result = si_query_hw_clear_result, .add_result = si_query_hw_add_result, }; -bool si_query_hw_init(struct si_screen *sscreen, - struct si_query_hw *query) -{ - query->buffer.buf = si_new_query_buffer(sscreen, query); - if (!query->buffer.buf) - return false; - - return true; -} - static struct pipe_query *si_query_hw_create(struct si_screen *sscreen, unsigned query_type, unsigned index) { struct si_query_hw *query = CALLOC_STRUCT(si_query_hw); if (!query) return NULL; query->b.type = query_type; query->b.ops = &query_hw_ops; @@ -693,25 +726,20 @@ static struct pipe_query *si_query_hw_create(struct si_screen *sscreen, query->result_size = 11 * 16; query->result_size += 8; /* for the fence + alignment */ query->b.num_cs_dw_suspend = 6 + si_cp_write_fence_dwords(sscreen); break; default: assert(0); FREE(query); return NULL; } - if (!si_query_hw_init(sscreen, query)) { - FREE(query); - return NULL; - } - return (struct pipe_query *)query; } static void si_update_occlusion_query_state(struct si_context *sctx, unsigned type, int diff) { if (type == PIPE_QUERY_OCCLUSION_COUNTER || type == PIPE_QUERY_OCCLUSION_PREDICATE || type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) { bool old_enable = sctx->num_occlusion_queries != 0; @@ -802,43 +830,31 @@ static void si_query_hw_do_emit_start(struct si_context *sctx, } radeon_add_to_buffer_list(sctx, sctx->gfx_cs, query->buffer.buf, RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); } static void si_query_hw_emit_start(struct si_context *sctx, struct si_query_hw *query) { uint64_t va; - if (!query->buffer.buf) - return; // previous buffer allocation failure + if (!si_query_buffer_alloc(sctx, &query->buffer, query->ops->prepare_buffer, + query->result_size)) + return; si_update_occlusion_query_state(sctx, query->b.type, 1); si_update_prims_generated_query_state(sctx, query->b.type, 1); if (query->b.type != SI_QUERY_TIME_ELAPSED_SDMA) si_need_gfx_cs_space(sctx); - /* Get a new query buffer if needed. */ - if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) { - struct si_query_buffer *qbuf = MALLOC_STRUCT(si_query_buffer); - *qbuf = query->buffer; - query->buffer.results_end = 0; - query->buffer.previous = qbuf; - query->buffer.buf = si_new_query_buffer(sctx->screen, query); - if (!query->buffer.buf) - return; - } - - /* emit begin query */ va = query->buffer.buf->gpu_address + query->buffer.results_end; - query->ops->emit_start(sctx, query, query->buffer.buf, va); } static void si_query_hw_do_emit_stop(struct si_context *sctx, struct si_query_hw *query, struct r600_resource *buffer, uint64_t va) { struct radeon_cmdbuf *cs = sctx->gfx_cs; uint64_t fence_va = 0; @@ -905,26 +921,30 @@ static void si_query_hw_do_emit_stop(struct si_context *sctx, query->buffer.buf, fence_va, 0x80000000, query->b.type); } } static void si_query_hw_emit_stop(struct si_context *sctx, struct si_query_hw *query) { uint64_t va; - if (!query->buffer.buf) - return; // previous buffer allocation failure - /* The queries which need begin already called this in begin_query. */ - if (query->flags & SI_QUERY_HW_FLAG_NO_START) + if (query->flags & SI_QUERY_HW_FLAG_NO_START) { si_need_gfx_cs_space(sctx); + if (!si_query_buffer_alloc(sctx, &query->buffer, query->ops->prepare_buffer, + query->result_size)) + return; + } + + if (!query->buffer.buf) + return; // previous buffer allocation failure /* emit end query */ va = query->buffer.buf->gpu_address + query->buffer.results_end; query->ops->emit_stop(sctx, query, query->buffer.buf, va); query->buffer.results_end += query->result_size; si_update_occlusion_query_state(sctx, query->b.type, -1); si_update_prims_generated_query_state(sctx, query->b.type, -1); @@ -1054,59 +1074,32 @@ static void si_destroy_query(struct pipe_context *ctx, struct pipe_query *query) static boolean si_begin_query(struct pipe_context *ctx, struct pipe_query *query) { struct si_context *sctx = (struct si_context *)ctx; struct si_query *rquery = (struct si_query *)query; return rquery->ops->begin(sctx, rquery); } -void si_query_hw_reset_buffers(struct si_context *sctx, - struct si_query_hw *query) -{ - struct si_query_buffer *prev = query->buffer.previous; - - /* Discard the old query buffers. */ - while (prev) { - struct si_query_buffer *qbuf = prev; - prev = prev->previous; - r600_resource_reference(&qbuf->buf, NULL); - FREE(qbuf); - } - - query->buffer.results_end = 0; - query->buffer.previous = NULL; - - /* Obtain a new buffer if the current one can't be mapped without a stall. */ - if (si_rings_is_buffer_referenced(sctx, query->buffer.buf->buf, RADEON_USAGE_READWRITE) || - !sctx->ws->buffer_wait(query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) { - r600_resource_reference(&query->buffer.buf, NULL); - query->buffer.buf = si_new_query_buffer(sctx->screen, query); - } else { - if (!query->ops->prepare_buffer(sctx->screen, query, query->buffer.buf)) - r600_resource_reference(&query->buffer.buf, NULL); - } -} - bool si_query_hw_begin(struct si_context *sctx, struct si_query *rquery) { struct si_query_hw *query = (struct si_query_hw *)rquery; if (query->flags & SI_QUERY_HW_FLAG_NO_START) { assert(0); return false; } if (!(query->flags & SI_QUERY_HW_FLAG_BEGIN_RESUMES)) - si_query_hw_reset_buffers(sctx, query); + si_query_buffer_reset(sctx, &query->buffer); r600_resource_reference(&query->workaround_buf, NULL); si_query_hw_emit_start(sctx, query); if (!query->buffer.buf) return false; LIST_ADDTAIL(&query->b.active_list, &sctx->active_queries); sctx->num_cs_dw_queries_suspend += query->b.num_cs_dw_suspend; return true; @@ -1119,21 +1112,21 @@ static bool si_end_query(struct pipe_context *ctx, struct pipe_query *query) return rquery->ops->end(sctx, rquery); } bool si_query_hw_end(struct si_context *sctx, struct si_query *rquery) { struct si_query_hw *query = (struct si_query_hw *)rquery; if (query->flags & SI_QUERY_HW_FLAG_NO_START) - si_query_hw_reset_buffers(sctx, query); + si_query_buffer_reset(sctx, &query->buffer); si_query_hw_emit_stop(sctx, query); if (!(query->flags & SI_QUERY_HW_FLAG_NO_START)) { LIST_DELINIT(&query->b.active_list); sctx->num_cs_dw_queries_suspend -= query->b.num_cs_dw_suspend; } if (!query->buffer.buf) return false; diff --git a/src/gallium/drivers/radeonsi/si_query.h b/src/gallium/drivers/radeonsi/si_query.h index ebd965a004f..63af760a271 100644 --- a/src/gallium/drivers/radeonsi/si_query.h +++ b/src/gallium/drivers/radeonsi/si_query.h @@ -27,20 +27,21 @@ #include "util/u_threaded_context.h" struct pipe_context; struct pipe_query; struct pipe_resource; struct si_screen; struct si_context; struct si_query; +struct si_query_buffer; struct si_query_hw; struct r600_resource; enum { SI_QUERY_DRAW_CALLS = PIPE_QUERY_DRIVER_SPECIFIC, SI_QUERY_DECOMPRESS_CALLS, SI_QUERY_MRT_DRAW_CALLS, SI_QUERY_PRIM_RESTART_CALLS, SI_QUERY_SPILL_DRAW_CALLS, SI_QUERY_COMPUTE_CALLS, @@ -153,23 +154,21 @@ struct si_query { }; enum { SI_QUERY_HW_FLAG_NO_START = (1 << 0), /* gap */ /* whether begin_query doesn't clear the result */ SI_QUERY_HW_FLAG_BEGIN_RESUMES = (1 << 2), }; struct si_query_hw_ops { - bool (*prepare_buffer)(struct si_screen *, - struct si_query_hw *, - struct r600_resource *); + bool (*prepare_buffer)(struct si_context *, struct si_query_buffer *); void (*emit_start)(struct si_context *, struct si_query_hw *, struct r600_resource *buffer, uint64_t va); void (*emit_stop)(struct si_context *, struct si_query_hw *, struct r600_resource *buffer, uint64_t va); void (*clear_result)(struct si_query_hw *, union pipe_query_result *); void (*add_result)(struct si_screen *screen, struct si_query_hw *, void *buffer, union pipe_query_result *result); @@ -179,40 +178,45 @@ struct si_query_buffer { /* The buffer where query results are stored. */ struct r600_resource *buf; /* Offset of the next free result after current query data */ unsigned results_end; /* If a query buffer is full, a new buffer is created and the old one * is put in here. When we calculate the result, we sum up the samples * from all buffers. */ struct si_query_buffer *previous; }; +void si_query_buffer_destroy(struct si_screen *sctx, struct si_query_buffer *buffer); +void si_query_buffer_reset(struct si_context *sctx, struct si_query_buffer *buffer); +bool si_query_buffer_alloc(struct si_context *sctx, struct si_query_buffer *buffer, + bool (*prepare_buffer)(struct si_context *, struct si_query_buffer*), + unsigned size); + + struct si_query_hw { struct si_query b; struct si_query_hw_ops *ops; unsigned flags; /* The query buffer and how many results are in it. */ struct si_query_buffer buffer; /* Size of the result in memory for both begin_query and end_query, * this can be one or two numbers, or it could even be a size of a structure. */ unsigned result_size; /* For transform feedback: which stream the query is for */ unsigned stream; /* Workaround via compute shader */ struct r600_resource *workaround_buf; unsigned workaround_offset; }; -bool si_query_hw_init(struct si_screen *sscreen, - struct si_query_hw *query); void si_query_hw_destroy(struct si_screen *sscreen, struct si_query *rquery); bool si_query_hw_begin(struct si_context *sctx, struct si_query *rquery); bool si_query_hw_end(struct si_context *sctx, struct si_query *rquery); bool si_query_hw_get_result(struct si_context *sctx, struct si_query *rquery, bool wait, union pipe_query_result *result); @@ -237,20 +241,17 @@ struct pipe_query *si_create_batch_query(struct pipe_context *ctx, unsigned num_queries, unsigned *query_types); int si_get_perfcounter_info(struct si_screen *, unsigned index, struct pipe_driver_query_info *info); int si_get_perfcounter_group_info(struct si_screen *, unsigned index, struct pipe_driver_query_group_info *info); -void si_query_hw_reset_buffers(struct si_context *sctx, - struct si_query_hw *query); - struct si_qbo_state { void *saved_compute; struct pipe_constant_buffer saved_const0; struct pipe_shader_buffer saved_ssbo[3]; }; #endif /* SI_QUERY_H */ diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c index ac1a0aa6097..9df12e0f5bd 100644 --- a/src/gallium/drivers/radeonsi/si_texture.c +++ b/src/gallium/drivers/radeonsi/si_texture.c @@ -2276,25 +2276,24 @@ void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx, struct si_context *sctx = (struct si_context*)ctx; struct pipe_query *tmp; unsigned i = vi_get_context_dcc_stats_index(sctx, tex); bool query_active = sctx->dcc_stats[i].query_active; bool disable = false; if (sctx->dcc_stats[i].ps_stats[2]) { union pipe_query_result result; /* Read the results. */ - ctx->get_query_result(ctx, sctx->dcc_stats[i].ps_stats[2], + struct pipe_query *query = sctx->dcc_stats[i].ps_stats[2]; + ctx->get_query_result(ctx, query, true, &result); - si_query_hw_reset_buffers(sctx, - (struct si_query_hw*) - sctx->dcc_stats[i].ps_stats[2]); + si_query_buffer_reset(sctx, &((struct si_query_hw*)query)->buffer); /* Compute the approximate number of fullscreen draws. */ tex->ps_draw_ratio = result.pipeline_statistics.ps_invocations / (tex->buffer.b.b.width0 * tex->buffer.b.b.height0); sctx->last_tex_ps_draw_ratio = tex->ps_draw_ratio; disable = tex->dcc_separate_buffer && !vi_should_enable_separate_dcc(tex); } -- 2.19.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev