From: Nicolai Hähnle <nicolai.haeh...@amd.com>

---
 src/gallium/drivers/radeonsi/si_perfcounter.c | 13 ++--
 src/gallium/drivers/radeonsi/si_query.c       | 75 ++++++++++---------
 src/gallium/drivers/radeonsi/si_query.h       | 18 +++--
 3 files changed, 62 insertions(+), 44 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c 
b/src/gallium/drivers/radeonsi/si_perfcounter.c
index 69e149c76b6..0b3d8f89273 100644
--- a/src/gallium/drivers/radeonsi/si_perfcounter.c
+++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
@@ -861,21 +861,24 @@ static void si_pc_query_add_result(struct si_screen 
*screen,
                        uint32_t value = results[counter->base + j * 
counter->stride];
                        result->batch[i].u64 += value;
                }
        }
 }
 
 static struct si_query_ops batch_query_ops = {
        .destroy = si_pc_query_destroy,
        .begin = si_query_hw_begin,
        .end = si_query_hw_end,
-       .get_result = si_query_hw_get_result
+       .get_result = si_query_hw_get_result,
+
+       .suspend = si_query_hw_suspend,
+       .resume = si_query_hw_resume,
 };
 
 static struct si_query_hw_ops batch_query_hw_ops = {
        .prepare_buffer = si_pc_query_prepare_buffer,
        .emit_start = si_pc_query_emit_start,
        .emit_stop = si_pc_query_emit_stop,
        .clear_result = si_pc_query_clear_result,
        .add_result = si_pc_query_add_result,
 };
 
@@ -994,41 +997,41 @@ struct pipe_query *si_create_batch_query(struct 
pipe_context *ctx,
                        fprintf(stderr,
                                "perfcounter group %s: too many selected\n",
                                block->b->b->name);
                        goto error;
                }
                group->selectors[group->num_counters] = sub_index;
                ++group->num_counters;
        }
 
        /* Compute result bases and CS size per group */
-       query->b.num_cs_dw_end = pc->num_stop_cs_dwords;
-       query->b.num_cs_dw_end += pc->num_instance_cs_dwords;
+       query->b.b.num_cs_dw_suspend = pc->num_stop_cs_dwords;
+       query->b.b.num_cs_dw_suspend += pc->num_instance_cs_dwords;
 
        i = 0;
        for (group = query->groups; group; group = group->next) {
                struct si_pc_block *block = group->block;
                unsigned read_dw;
                unsigned instances = 1;
 
                if ((block->b->b->flags & SI_PC_BLOCK_SE) && group->se < 0)
                        instances = screen->info.max_se;
                if (group->instance < 0)
                        instances *= block->num_instances;
 
                group->result_base = i;
                query->b.result_size += sizeof(uint64_t) * instances * 
group->num_counters;
                i += instances * group->num_counters;
 
                read_dw = 6 * group->num_counters;
-               query->b.num_cs_dw_end += instances * read_dw;
-               query->b.num_cs_dw_end += instances * 
pc->num_instance_cs_dwords;
+               query->b.b.num_cs_dw_suspend += instances * read_dw;
+               query->b.b.num_cs_dw_suspend += instances * 
pc->num_instance_cs_dwords;
        }
 
        if (query->shaders) {
                if (query->shaders == SI_PC_SHADERS_WINDOWING)
                        query->shaders = 0xffffffff;
        }
 
        /* Map user-supplied query array to result indices */
        query->counters = CALLOC(num_queries, sizeof(*query->counters));
        for (i = 0; i < num_queries; ++i) {
diff --git a/src/gallium/drivers/radeonsi/si_query.c 
b/src/gallium/drivers/radeonsi/si_query.c
index aed3e1e80c1..479a1bbf2c4 100644
--- a/src/gallium/drivers/radeonsi/si_query.c
+++ b/src/gallium/drivers/radeonsi/si_query.c
@@ -27,20 +27,22 @@
 #include "si_pipe.h"
 #include "si_query.h"
 #include "util/u_memory.h"
 #include "util/u_upload_mgr.h"
 #include "util/os_time.h"
 #include "util/u_suballoc.h"
 #include "amd/common/sid.h"
 
 #define SI_MAX_STREAMS 4
 
+static struct si_query_ops query_hw_ops;
+
 struct si_hw_query_params {
        unsigned start_offset;
        unsigned end_offset;
        unsigned fence_offset;
        unsigned pair_stride;
        unsigned pair_count;
 };
 
 /* Queries without buffer handling or suspend/resume. */
 struct si_query_sw {
@@ -600,28 +602,20 @@ static bool si_query_hw_prepare_buffer(struct si_screen 
*sscreen,
 }
 
 static void si_query_hw_get_result_resource(struct si_context *sctx,
                                            struct si_query *rquery,
                                            bool wait,
                                            enum pipe_query_value_type 
result_type,
                                            int index,
                                            struct pipe_resource *resource,
                                            unsigned offset);
 
-static struct si_query_ops query_hw_ops = {
-       .destroy = si_query_hw_destroy,
-       .begin = si_query_hw_begin,
-       .end = si_query_hw_end,
-       .get_result = si_query_hw_get_result,
-       .get_result_resource = si_query_hw_get_result_resource,
-};
-
 static void si_query_hw_do_emit_start(struct si_context *sctx,
                                      struct si_query_hw *query,
                                      struct r600_resource *buffer,
                                      uint64_t va);
 static void si_query_hw_do_emit_stop(struct si_context *sctx,
                                     struct si_query_hw *query,
                                     struct r600_resource *buffer,
                                     uint64_t va);
 static void si_query_hw_add_result(struct si_screen *sscreen,
                                   struct si_query_hw *, void *buffer,
@@ -658,55 +652,54 @@ static struct pipe_query *si_query_hw_create(struct 
si_screen *sscreen,
        query->b.type = query_type;
        query->b.ops = &query_hw_ops;
        query->ops = &query_hw_default_hw_ops;
 
        switch (query_type) {
        case PIPE_QUERY_OCCLUSION_COUNTER:
        case PIPE_QUERY_OCCLUSION_PREDICATE:
        case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
                query->result_size = 16 * sscreen->info.num_render_backends;
                query->result_size += 16; /* for the fence + alignment */
-               query->num_cs_dw_end = 6 + si_cp_write_fence_dwords(sscreen);
+               query->b.num_cs_dw_suspend = 6 + 
si_cp_write_fence_dwords(sscreen);
                break;
        case SI_QUERY_TIME_ELAPSED_SDMA:
                /* GET_GLOBAL_TIMESTAMP only works if the offset is a multiple 
of 32. */
                query->result_size = 64;
-               query->num_cs_dw_end = 0;
                break;
        case PIPE_QUERY_TIME_ELAPSED:
                query->result_size = 24;
-               query->num_cs_dw_end = 8 + si_cp_write_fence_dwords(sscreen);
+               query->b.num_cs_dw_suspend = 8 + 
si_cp_write_fence_dwords(sscreen);
                break;
        case PIPE_QUERY_TIMESTAMP:
                query->result_size = 16;
-               query->num_cs_dw_end = 8 + si_cp_write_fence_dwords(sscreen);
+               query->b.num_cs_dw_suspend = 8 + 
si_cp_write_fence_dwords(sscreen);
                query->flags = SI_QUERY_HW_FLAG_NO_START;
                break;
        case PIPE_QUERY_PRIMITIVES_EMITTED:
        case PIPE_QUERY_PRIMITIVES_GENERATED:
        case PIPE_QUERY_SO_STATISTICS:
        case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
                /* NumPrimitivesWritten, PrimitiveStorageNeeded. */
                query->result_size = 32;
-               query->num_cs_dw_end = 6;
+               query->b.num_cs_dw_suspend = 6;
                query->stream = index;
                break;
        case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
                /* NumPrimitivesWritten, PrimitiveStorageNeeded. */
                query->result_size = 32 * SI_MAX_STREAMS;
-               query->num_cs_dw_end = 6 * SI_MAX_STREAMS;
+               query->b.num_cs_dw_suspend = 6 * SI_MAX_STREAMS;
                break;
        case PIPE_QUERY_PIPELINE_STATISTICS:
                /* 11 values on GCN. */
                query->result_size = 11 * 16;
                query->result_size += 8; /* for the fence + alignment */
-               query->num_cs_dw_end = 6 + si_cp_write_fence_dwords(sscreen);
+               query->b.num_cs_dw_suspend = 6 + 
si_cp_write_fence_dwords(sscreen);
                break;
        default:
                assert(0);
                FREE(query);
                return NULL;
        }
 
        if (!si_query_hw_init(sscreen, query)) {
                FREE(query);
                return NULL;
@@ -833,22 +826,20 @@ static void si_query_hw_emit_start(struct si_context 
*sctx,
                query->buffer.previous = qbuf;
                query->buffer.buf = si_new_query_buffer(sctx->screen, query);
                if (!query->buffer.buf)
                        return;
        }
 
        /* emit begin query */
        va = query->buffer.buf->gpu_address + query->buffer.results_end;
 
        query->ops->emit_start(sctx, query, query->buffer.buf, va);
-
-       sctx->num_cs_dw_queries_suspend += query->num_cs_dw_end;
 }
 
 static void si_query_hw_do_emit_stop(struct si_context *sctx,
                                       struct si_query_hw *query,
                                       struct r600_resource *buffer,
                                       uint64_t va)
 {
        struct radeon_cmdbuf *cs = sctx->gfx_cs;
        uint64_t fence_va = 0;
 
@@ -928,23 +919,20 @@ static void si_query_hw_emit_stop(struct si_context *sctx,
        if (query->flags & SI_QUERY_HW_FLAG_NO_START)
                si_need_gfx_cs_space(sctx);
 
        /* emit end query */
        va = query->buffer.buf->gpu_address + query->buffer.results_end;
 
        query->ops->emit_stop(sctx, query, query->buffer.buf, va);
 
        query->buffer.results_end += query->result_size;
 
-       if (!(query->flags & SI_QUERY_HW_FLAG_NO_START))
-               sctx->num_cs_dw_queries_suspend -= query->num_cs_dw_end;
-
        si_update_occlusion_query_state(sctx, query->b.type, -1);
        si_update_prims_generated_query_state(sctx, query->b.type, -1);
 }
 
 static void emit_set_predicate(struct si_context *ctx,
                               struct r600_resource *buf, uint64_t va,
                               uint32_t op)
 {
        struct radeon_cmdbuf *cs = ctx->gfx_cs;
 
@@ -1112,21 +1100,22 @@ bool si_query_hw_begin(struct si_context *sctx,
 
        if (!(query->flags & SI_QUERY_HW_FLAG_BEGIN_RESUMES))
                si_query_hw_reset_buffers(sctx, query);
 
        r600_resource_reference(&query->workaround_buf, NULL);
 
        si_query_hw_emit_start(sctx, query);
        if (!query->buffer.buf)
                return false;
 
-       LIST_ADDTAIL(&query->list, &sctx->active_queries);
+       LIST_ADDTAIL(&query->b.active_list, &sctx->active_queries);
+       sctx->num_cs_dw_queries_suspend += query->b.num_cs_dw_suspend;
        return true;
 }
 
 static bool si_end_query(struct pipe_context *ctx, struct pipe_query *query)
 {
        struct si_context *sctx = (struct si_context *)ctx;
        struct si_query *rquery = (struct si_query *)query;
 
        return rquery->ops->end(sctx, rquery);
 }
@@ -1134,22 +1123,24 @@ static bool si_end_query(struct pipe_context *ctx, 
struct pipe_query *query)
 bool si_query_hw_end(struct si_context *sctx,
                     struct si_query *rquery)
 {
        struct si_query_hw *query = (struct si_query_hw *)rquery;
 
        if (query->flags & SI_QUERY_HW_FLAG_NO_START)
                si_query_hw_reset_buffers(sctx, query);
 
        si_query_hw_emit_stop(sctx, query);
 
-       if (!(query->flags & SI_QUERY_HW_FLAG_NO_START))
-               LIST_DELINIT(&query->list);
+       if (!(query->flags & SI_QUERY_HW_FLAG_NO_START)) {
+               LIST_DELINIT(&query->b.active_list);
+               sctx->num_cs_dw_queries_suspend -= query->b.num_cs_dw_suspend;
+       }
 
        if (!query->buffer.buf)
                return false;
 
        return true;
 }
 
 static void si_get_hw_query_params(struct si_context *sctx,
                                   struct si_query_hw *rquery, int index,
                                   struct si_hw_query_params *params)
@@ -1342,20 +1333,41 @@ static void si_query_hw_add_result(struct si_screen 
*sscreen,
                       result->pipeline_statistics.c_primitives,
                       result->pipeline_statistics.ps_invocations,
                       result->pipeline_statistics.cs_invocations);
 #endif
                break;
        default:
                assert(0);
        }
 }
 
+void si_query_hw_suspend(struct si_context *sctx, struct si_query *query)
+{
+       si_query_hw_emit_stop(sctx, (struct si_query_hw *)query);
+}
+
+void si_query_hw_resume(struct si_context *sctx, struct si_query *query)
+{
+       si_query_hw_emit_start(sctx, (struct si_query_hw *)query);
+}
+
+static struct si_query_ops query_hw_ops = {
+       .destroy = si_query_hw_destroy,
+       .begin = si_query_hw_begin,
+       .end = si_query_hw_end,
+       .get_result = si_query_hw_get_result,
+       .get_result_resource = si_query_hw_get_result_resource,
+
+       .suspend = si_query_hw_suspend,
+       .resume = si_query_hw_resume,
+};
+
 static boolean si_get_query_result(struct pipe_context *ctx,
                                   struct pipe_query *query, boolean wait,
                                   union pipe_query_result *result)
 {
        struct si_context *sctx = (struct si_context *)ctx;
        struct si_query *rquery = (struct si_query *)query;
 
        return rquery->ops->get_result(sctx, rquery, wait, result);
 }
 
@@ -1637,40 +1649,35 @@ static void si_render_condition(struct pipe_context 
*ctx,
 
        sctx->render_cond = query;
        sctx->render_cond_invert = condition;
        sctx->render_cond_mode = mode;
 
        si_set_atom_dirty(sctx, atom, query != NULL);
 }
 
 void si_suspend_queries(struct si_context *sctx)
 {
-       struct si_query_hw *query;
+       struct si_query *query;
 
-       LIST_FOR_EACH_ENTRY(query, &sctx->active_queries, list) {
-               si_query_hw_emit_stop(sctx, query);
-       }
-       assert(sctx->num_cs_dw_queries_suspend == 0);
+       LIST_FOR_EACH_ENTRY(query, &sctx->active_queries, active_list)
+               query->ops->suspend(sctx, query);
 }
 
 void si_resume_queries(struct si_context *sctx)
 {
-       struct si_query_hw *query;
-
-       assert(sctx->num_cs_dw_queries_suspend == 0);
+       struct si_query *query;
 
        /* Check CS space here. Resuming must not be interrupted by flushes. */
        si_need_gfx_cs_space(sctx);
 
-       LIST_FOR_EACH_ENTRY(query, &sctx->active_queries, list) {
-               si_query_hw_emit_start(sctx, query);
-       }
+       LIST_FOR_EACH_ENTRY(query, &sctx->active_queries, active_list)
+               query->ops->resume(sctx, query);
 }
 
 #define XFULL(name_, query_type_, type_, result_type_, group_id_) \
        { \
                .name = name_, \
                .query_type = SI_QUERY_##query_type_, \
                .type = PIPE_DRIVER_QUERY_TYPE_##type_, \
                .result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_##result_type_, \
                .group_id = group_id_ \
        }
diff --git a/src/gallium/drivers/radeonsi/si_query.h 
b/src/gallium/drivers/radeonsi/si_query.h
index 032946edf4d..ebd965a004f 100644
--- a/src/gallium/drivers/radeonsi/si_query.h
+++ b/src/gallium/drivers/radeonsi/si_query.h
@@ -126,28 +126,37 @@ struct si_query_ops {
        bool (*end)(struct si_context *, struct si_query *);
        bool (*get_result)(struct si_context *,
                           struct si_query *, bool wait,
                           union pipe_query_result *result);
        void (*get_result_resource)(struct si_context *,
                                    struct si_query *, bool wait,
                                    enum pipe_query_value_type result_type,
                                    int index,
                                    struct pipe_resource *resource,
                                    unsigned offset);
+
+       void (*suspend)(struct si_context *, struct si_query *);
+       void (*resume)(struct si_context *, struct si_query *);
 };
 
 struct si_query {
        struct threaded_query b;
        struct si_query_ops *ops;
 
-       /* The type of query */
+       /* The PIPE_QUERY_xxx type of query */
        unsigned type;
+
+       /* The number of dwords for suspend. */
+       unsigned num_cs_dw_suspend;
+
+       /* Linked list of queries that must be suspended at end of CS. */
+       struct list_head active_list;
 };
 
 enum {
        SI_QUERY_HW_FLAG_NO_START = (1 << 0),
        /* gap */
        /* whether begin_query doesn't clear the result */
        SI_QUERY_HW_FLAG_BEGIN_RESUMES = (1 << 2),
 };
 
 struct si_query_hw_ops {
@@ -180,44 +189,43 @@ struct si_query_buffer {
 struct si_query_hw {
        struct si_query b;
        struct si_query_hw_ops *ops;
        unsigned flags;
 
        /* The query buffer and how many results are in it. */
        struct si_query_buffer buffer;
        /* Size of the result in memory for both begin_query and end_query,
         * this can be one or two numbers, or it could even be a size of a 
structure. */
        unsigned result_size;
-       /* The number of dwords for end_query. */
-       unsigned num_cs_dw_end;
-       /* Linked list of queries */
-       struct list_head list;
        /* For transform feedback: which stream the query is for */
        unsigned stream;
 
        /* Workaround via compute shader */
        struct r600_resource *workaround_buf;
        unsigned workaround_offset;
 };
 
 bool si_query_hw_init(struct si_screen *sscreen,
                      struct si_query_hw *query);
 void si_query_hw_destroy(struct si_screen *sscreen,
                         struct si_query *rquery);
 bool si_query_hw_begin(struct si_context *sctx,
                       struct si_query *rquery);
 bool si_query_hw_end(struct si_context *sctx,
                     struct si_query *rquery);
 bool si_query_hw_get_result(struct si_context *sctx,
                            struct si_query *rquery,
                            bool wait,
                            union pipe_query_result *result);
+void si_query_hw_suspend(struct si_context *sctx, struct si_query *query);
+void si_query_hw_resume(struct si_context *sctx, struct si_query *query);
+
 
 /* Performance counters */
 struct si_perfcounters {
        unsigned num_groups;
        unsigned num_blocks;
        struct si_pc_block *blocks;
 
        unsigned num_stop_cs_dwords;
        unsigned num_instance_cs_dwords;
 
-- 
2.19.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to