From: Nicolai Hähnle <nicolai.haeh...@amd.com> Remove a level of indirection to make the code more explicit -- should make it easier to follow what's going on. --- src/gallium/drivers/radeonsi/si_perfcounter.c | 143 ++++++++++++------ 1 file changed, 93 insertions(+), 50 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index f0d10c054c4..65197c0daa4 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -139,21 +139,25 @@ struct si_query_group { unsigned selectors[SI_QUERY_MAX_COUNTERS]; }; struct si_query_counter { unsigned base; unsigned qwords; unsigned stride; /* in uint64s */ }; struct si_query_pc { - struct si_query_hw b; + struct si_query b; + struct si_query_buffer buffer; + + /* Size of the results in memory, in bytes. */ + unsigned result_size; unsigned shaders; unsigned num_counters; struct si_query_counter *counters; struct si_query_group *groups; }; static struct si_pc_block_base cik_CB = { .name = "CB", @@ -758,70 +762,72 @@ static void si_pc_query_destroy(struct si_screen *sscreen, struct si_query_pc *query = (struct si_query_pc *)rquery; while (query->groups) { struct si_query_group *group = query->groups; query->groups = group->next; FREE(group); } FREE(query->counters); - si_query_hw_destroy(sscreen, rquery); -} - -static bool si_pc_query_prepare_buffer(struct si_context *ctx, - struct si_query_buffer *qbuf) -{ - /* no-op */ - return true; + si_query_buffer_destroy(sscreen, &query->buffer); + FREE(query); } -static void si_pc_query_emit_start(struct si_context *sctx, +static void si_pc_query_resume(struct si_context *sctx, struct si_query *rquery) +/* struct si_query_hw *hwquery, - struct r600_resource *buffer, uint64_t va) + struct r600_resource *buffer, uint64_t va)*/ { - struct si_query_pc *query = (struct si_query_pc *)hwquery; - struct si_query_group *group; + struct si_query_pc *query = (struct si_query_pc *)rquery; int current_se = -1; int current_instance = -1; + if (!si_query_buffer_alloc(sctx, &query->buffer, NULL, query->result_size)) + return; + si_need_gfx_cs_space(sctx); + if (query->shaders) si_pc_emit_shaders(sctx, query->shaders); - for (group = query->groups; group; group = group->next) { + for (struct si_query_group *group = query->groups; group; group = group->next) { struct si_pc_block *block = group->block; if (group->se != current_se || group->instance != current_instance) { current_se = group->se; current_instance = group->instance; si_pc_emit_instance(sctx, group->se, group->instance); } si_pc_emit_select(sctx, block, group->num_counters, group->selectors); } if (current_se != -1 || current_instance != -1) si_pc_emit_instance(sctx, -1, -1); - si_pc_emit_start(sctx, buffer, va); + uint64_t va = query->buffer.buf->gpu_address + query->buffer.results_end; + si_pc_emit_start(sctx, query->buffer.buf, va); } -static void si_pc_query_emit_stop(struct si_context *sctx, - struct si_query_hw *hwquery, - struct r600_resource *buffer, uint64_t va) +static void si_pc_query_suspend(struct si_context *sctx, struct si_query *rquery) { - struct si_query_pc *query = (struct si_query_pc *)hwquery; - struct si_query_group *group; + struct si_query_pc *query = (struct si_query_pc *)rquery; - si_pc_emit_stop(sctx, buffer, va); + if (!query->buffer.buf) + return; - for (group = query->groups; group; group = group->next) { + uint64_t va = query->buffer.buf->gpu_address + query->buffer.results_end; + query->buffer.results_end += query->result_size; + + si_pc_emit_stop(sctx, query->buffer.buf, va); + + for (struct si_query_group *group = query->groups; group; group = group->next) { struct si_pc_block *block = group->block; unsigned se = group->se >= 0 ? group->se : 0; unsigned se_end = se + 1; if ((block->b->b->flags & SI_PC_BLOCK_SE) && (group->se < 0)) se_end = sctx->screen->info.max_se; do { unsigned instance = group->instance >= 0 ? group->instance : 0; @@ -829,63 +835,101 @@ static void si_pc_query_emit_stop(struct si_context *sctx, si_pc_emit_instance(sctx, se, instance); si_pc_emit_read(sctx, block, group->num_counters, va); va += sizeof(uint64_t) * group->num_counters; } while (group->instance < 0 && ++instance < block->num_instances); } while (++se < se_end); } si_pc_emit_instance(sctx, -1, -1); } -static void si_pc_query_clear_result(struct si_query_hw *hwquery, - union pipe_query_result *result) +static bool si_pc_query_begin(struct si_context *ctx, struct si_query *rquery) { - struct si_query_pc *query = (struct si_query_pc *)hwquery; + struct si_query_pc *query = (struct si_query_pc *)rquery; - memset(result, 0, sizeof(result->batch[0]) * query->num_counters); + si_query_buffer_reset(ctx, &query->buffer); + + LIST_ADDTAIL(&query->b.active_list, &ctx->active_queries); + ctx->num_cs_dw_queries_suspend += query->b.num_cs_dw_suspend; + + si_pc_query_resume(ctx, rquery); + + return true; } -static void si_pc_query_add_result(struct si_screen *screen, - struct si_query_hw *hwquery, +static bool si_pc_query_end(struct si_context *ctx, struct si_query *rquery) +{ + struct si_query_pc *query = (struct si_query_pc *)rquery; + + si_pc_query_suspend(ctx, rquery); + + LIST_DEL(&rquery->active_list); + ctx->num_cs_dw_queries_suspend -= rquery->num_cs_dw_suspend; + + return query->buffer.buf != NULL; +} + +static void si_pc_query_add_result(struct si_query_pc *query, void *buffer, union pipe_query_result *result) { - struct si_query_pc *query = (struct si_query_pc *)hwquery; uint64_t *results = buffer; unsigned i, j; for (i = 0; i < query->num_counters; ++i) { struct si_query_counter *counter = &query->counters[i]; for (j = 0; j < counter->qwords; ++j) { uint32_t value = results[counter->base + j * counter->stride]; result->batch[i].u64 += value; } } } +static bool si_pc_query_get_result(struct si_context *sctx, struct si_query *rquery, + bool wait, union pipe_query_result *result) +{ + struct si_query_pc *query = (struct si_query_pc *)rquery; + + memset(result, 0, sizeof(result->batch[0]) * query->num_counters); + + for (struct si_query_buffer *qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) { + unsigned usage = PIPE_TRANSFER_READ | + (wait ? 0 : PIPE_TRANSFER_DONTBLOCK); + unsigned results_base = 0; + void *map; + + if (rquery->b.flushed) + map = sctx->ws->buffer_map(qbuf->buf->buf, NULL, usage); + else + map = si_buffer_map_sync_with_rings(sctx, qbuf->buf, usage); + + if (!map) + return false; + + while (results_base != qbuf->results_end) { + si_pc_query_add_result(query, map + results_base, result); + results_base += query->result_size; + } + } + + return true; +} + static struct si_query_ops batch_query_ops = { .destroy = si_pc_query_destroy, - .begin = si_query_hw_begin, - .end = si_query_hw_end, - .get_result = si_query_hw_get_result, - - .suspend = si_query_hw_suspend, - .resume = si_query_hw_resume, -}; + .begin = si_pc_query_begin, + .end = si_pc_query_end, + .get_result = si_pc_query_get_result, -static struct si_query_hw_ops batch_query_hw_ops = { - .prepare_buffer = si_pc_query_prepare_buffer, - .emit_start = si_pc_query_emit_start, - .emit_stop = si_pc_query_emit_stop, - .clear_result = si_pc_query_clear_result, - .add_result = si_pc_query_add_result, + .suspend = si_pc_query_suspend, + .resume = si_pc_query_resume, }; static struct si_query_group *get_group_state(struct si_screen *screen, struct si_query_pc *query, struct si_pc_block *block, unsigned sub_gid) { struct si_query_group *group = query->groups; while (group) { @@ -961,22 +1005,21 @@ struct pipe_query *si_create_batch_query(struct pipe_context *ctx, unsigned base_gid, sub_gid, sub_index; unsigned i, j; if (!pc) return NULL; query = CALLOC_STRUCT(si_query_pc); if (!query) return NULL; - query->b.b.ops = &batch_query_ops; - query->b.ops = &batch_query_hw_ops; + query->b.ops = &batch_query_ops; query->num_counters = num_queries; /* Collect selectors per group */ for (i = 0; i < num_queries; ++i) { unsigned sub_gid; if (query_types[i] < SI_QUERY_FIRST_PERFCOUNTER) goto error; @@ -996,41 +1039,41 @@ struct pipe_query *si_create_batch_query(struct pipe_context *ctx, fprintf(stderr, "perfcounter group %s: too many selected\n", block->b->b->name); goto error; } group->selectors[group->num_counters] = sub_index; ++group->num_counters; } /* Compute result bases and CS size per group */ - query->b.b.num_cs_dw_suspend = pc->num_stop_cs_dwords; - query->b.b.num_cs_dw_suspend += pc->num_instance_cs_dwords; + query->b.num_cs_dw_suspend = pc->num_stop_cs_dwords; + query->b.num_cs_dw_suspend += pc->num_instance_cs_dwords; i = 0; for (group = query->groups; group; group = group->next) { struct si_pc_block *block = group->block; unsigned read_dw; unsigned instances = 1; if ((block->b->b->flags & SI_PC_BLOCK_SE) && group->se < 0) instances = screen->info.max_se; if (group->instance < 0) instances *= block->num_instances; group->result_base = i; - query->b.result_size += sizeof(uint64_t) * instances * group->num_counters; + query->result_size += sizeof(uint64_t) * instances * group->num_counters; i += instances * group->num_counters; read_dw = 6 * group->num_counters; - query->b.b.num_cs_dw_suspend += instances * read_dw; - query->b.b.num_cs_dw_suspend += instances * pc->num_instance_cs_dwords; + query->b.num_cs_dw_suspend += instances * read_dw; + query->b.num_cs_dw_suspend += instances * pc->num_instance_cs_dwords; } if (query->shaders) { if (query->shaders == SI_PC_SHADERS_WINDOWING) query->shaders = 0xffffffff; } /* Map user-supplied query array to result indices */ query->counters = CALLOC(num_queries, sizeof(*query->counters)); for (i = 0; i < num_queries; ++i) { @@ -1057,21 +1100,21 @@ struct pipe_query *si_create_batch_query(struct pipe_context *ctx, counter->qwords = 1; if ((block->b->b->flags & SI_PC_BLOCK_SE) && group->se < 0) counter->qwords = screen->info.max_se; if (group->instance < 0) counter->qwords *= block->num_instances; } return (struct pipe_query *)query; error: - si_pc_query_destroy(screen, &query->b.b); + si_pc_query_destroy(screen, &query->b); return NULL; } static bool si_init_block_names(struct si_screen *screen, struct si_pc_block *block) { bool per_instance_groups = si_pc_block_has_per_instance_groups(screen->perfcounters, block); bool per_se_groups = si_pc_block_has_per_se_groups(screen->perfcounters, block); unsigned i, j, k; unsigned groups_shader = 1, groups_se = 1, groups_instance = 1; -- 2.19.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev