The V3D engine provides several perf counters. Implement ->get_driver_query_[group_]info() so that these counters are exposed through the GL_AMD_performance_monitor extension.
Signed-off-by: Boris Brezillon <boris.brezil...@free-electrons.com> --- src/gallium/drivers/vc4/vc4_context.h | 13 +++ src/gallium/drivers/vc4/vc4_job.c | 9 +- src/gallium/drivers/vc4/vc4_query.c | 197 ++++++++++++++++++++++++++++++++-- src/gallium/drivers/vc4/vc4_screen.c | 7 ++ src/gallium/drivers/vc4/vc4_screen.h | 1 + 5 files changed, 215 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index 4a1e4093f1a0..b6d9f041efc7 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -309,6 +309,11 @@ struct vc4_job { struct vc4_job_key key; }; +struct vc4_hwperfmon { + uint32_t id; + uint64_t counters[DRM_VC4_MAX_PERF_COUNTERS]; +}; + struct vc4_context { struct pipe_context base; @@ -387,6 +392,8 @@ struct vc4_context { struct pipe_viewport_state viewport; struct vc4_constbuf_stateobj constbuf[PIPE_SHADER_TYPES]; struct vc4_vertexbuf_stateobj vertexbuf; + + struct vc4_hwperfmon *perfmon; /** @} */ }; @@ -444,6 +451,12 @@ vc4_sampler_state(struct pipe_sampler_state *psampler) return (struct vc4_sampler_state *)psampler; } +int vc4_get_driver_query_group_info(struct pipe_screen *pscreen, + unsigned index, + struct pipe_driver_query_group_info *info); +int vc4_get_driver_query_info(struct pipe_screen *pscreen, unsigned index, + struct pipe_driver_query_info *info); + struct pipe_context *vc4_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags); void vc4_draw_init(struct pipe_context *pctx); diff --git a/src/gallium/drivers/vc4/vc4_job.c b/src/gallium/drivers/vc4/vc4_job.c index fb0c5bbc78cf..f75a32565603 100644 --- a/src/gallium/drivers/vc4/vc4_job.c +++ b/src/gallium/drivers/vc4/vc4_job.c @@ -362,7 +362,7 @@ vc4_submit_setup_rcl_msaa_surface(struct vc4_job *job, rsc->writes++; } -#define MAX_CHUNKS 1 +#define MAX_CHUNKS 2 /** * Submits the job to the kernel and then reinitializes it. @@ -467,6 +467,13 @@ vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job) submit.uniforms = (uintptr_t)job->uniforms.base; submit.uniforms_size = cl_offset(&job->uniforms); + if (vc4->perfmon && screen->has_extended_cl) { + chunks[nchunks].perfmon.type = VC4_PERFMON_CHUNK; + chunks[nchunks].perfmon.id = vc4->perfmon->id; + chunks[nchunks].perfmon.pad = 0; + nchunks++; + } + if (nchunks) { submit.flags |= VC4_SUBMIT_CL_EXTENDED; submit.cl_chunks = (uintptr_t)chunks; diff --git a/src/gallium/drivers/vc4/vc4_query.c b/src/gallium/drivers/vc4/vc4_query.c index ddf8f8fb0c2c..d6b081bb15d7 100644 --- a/src/gallium/drivers/vc4/vc4_query.c +++ b/src/gallium/drivers/vc4/vc4_query.c @@ -32,49 +32,224 @@ struct vc4_query { - uint8_t pad; + unsigned num_queries; + struct vc4_hwperfmon *hwperfmon; }; +static const char *v3d_counter_names[] = { + "FEP-valid-primitives-no-rendered-pixels", + "FEP-valid-primitives-rendered-pixels", + "FEP-clipped-quads", + "FEP-valid-quads", + "TLB-quads-not-passing-stencil-test", + "TLB-quads-not-passing-z-and-stencil-test", + "TLB-quads-with-zero-coverage", + "TLB-quads-with-non-zero-coverage", + "TLB-quads-written-to-color-buffer", + "PTB-primitives-discarded-outside-viewport", + "PTB-primitives-need-clipping", + "PTB-primitives-discared-reversed", + "QPU-total-idle-clk-cycles", + "QPU-total-clk-cycles-vertex-coord-shading", + "QPU-total-clk-cycles-fragment-shading", + "QPU-total-clk-cycles-executing-valid-instr", + "QPU-total-clk-cycles-waiting-TMU", + "QPU-total-clk-cycles-waiting-scoreboard", + "QPU-total-clk-cycles-waiting-varyings", + "QPU-total-instr-cache-hit", + "QPU-total-instr-cache-miss", + "QPU-total-uniform-cache-hit", + "QPU-total-uniform-cache-miss", + "TMU-total-text-quads-processed", + "TMU-total-text-cache-miss", + "VPM-total-clk-cycles-VDW-stalled", + "VPM-total-clk-cycles-VCD-stalled", + "L2C-total-cache-hit", + "L2C-total-cache-miss", +}; + +int vc4_get_driver_query_group_info(struct pipe_screen *pscreen, + unsigned index, + struct pipe_driver_query_group_info *info) +{ + struct vc4_screen *screen = vc4_screen(pscreen); + + if (!screen->has_perfmon_ioctl) + return 0; + + if (!info) + return 1; + + if (index > 0) + return 0; + + info->name = "V3D counters"; + info->max_active_queries = DRM_VC4_MAX_PERF_COUNTERS; + info->num_queries = ARRAY_SIZE(v3d_counter_names); + return 1; +} + +int vc4_get_driver_query_info(struct pipe_screen *pscreen, unsigned index, + struct pipe_driver_query_info *info) +{ + struct vc4_screen *screen = vc4_screen(pscreen); + + if (!screen->has_perfmon_ioctl) + return 0; + + if (!info) + return ARRAY_SIZE(v3d_counter_names); + + if (index >= ARRAY_SIZE(v3d_counter_names)) + return 0; + + info->name = v3d_counter_names[index]; + info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index; + info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE; + info->type = PIPE_DRIVER_QUERY_TYPE_UINT64; + info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH; + return 1; +} + static struct pipe_query * -vc4_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index) +vc4_create_batch_query(struct pipe_context *pctx, unsigned num_queries, + unsigned *query_types) { + struct vc4_context *ctx = vc4_context(pctx); struct vc4_query *query = calloc(1, sizeof(*query)); + struct drm_vc4_perfmon_create req; + struct vc4_hwperfmon *hwperfmon; + unsigned i, nhwqueries = 0; + int ret; + + if (!query) + return NULL; + + for (i = 0; i < num_queries; i++) { + if (query_types[i] >= PIPE_QUERY_DRIVER_SPECIFIC) + nhwqueries++; + } + + /* We can't mix HW and non-HW queries. */ + if (nhwqueries && nhwqueries != num_queries) + return NULL; + + if (!nhwqueries) + return (struct pipe_query *)query; + + hwperfmon = calloc(1, sizeof(*hwperfmon)); + if (!hwperfmon) + goto err_free_query; + + for (i = 0; i < num_queries; i++) + req.events[i] = query_types[i] - PIPE_QUERY_DRIVER_SPECIFIC; + + req.ncounters = num_queries; + ret = vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_CREATE, &req); + if (ret) + goto err_free_hwperfmon; + + hwperfmon->id = req.id; + query->hwperfmon = hwperfmon; + query->num_queries = num_queries; /* Note that struct pipe_query isn't actually defined anywhere. */ return (struct pipe_query *)query; + +err_free_hwperfmon: + free(hwperfmon); + +err_free_query: + free(query); + + return NULL; +} + +static struct pipe_query * +vc4_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index) +{ + return vc4_create_batch_query(ctx, 1, &query_type); } static void -vc4_destroy_query(struct pipe_context *ctx, struct pipe_query *query) +vc4_destroy_query(struct pipe_context *pctx, struct pipe_query *pquery) { + struct vc4_context *ctx = vc4_context(pctx); + struct vc4_query *query = (struct vc4_query *)pquery; + + if (query->hwperfmon) { + struct drm_vc4_perfmon_destroy req; + + req.id = query->hwperfmon->id; + vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_DESTROY, &req); + free(query->hwperfmon); + } + free(query); } static boolean -vc4_begin_query(struct pipe_context *ctx, struct pipe_query *query) +vc4_begin_query(struct pipe_context *pctx, struct pipe_query *pquery) { + struct vc4_query *query = (struct vc4_query *)pquery; + struct vc4_context *ctx = vc4_context(pctx); + + if (!query->hwperfmon) + return true; + + /* Only one perfmon can be activated per context. */ + if (ctx->perfmon) + return false; + + ctx->perfmon = query->hwperfmon; return true; } static bool -vc4_end_query(struct pipe_context *ctx, struct pipe_query *query) +vc4_end_query(struct pipe_context *pctx, struct pipe_query *pquery) { + struct vc4_query *query = (struct vc4_query *)pquery; + struct vc4_context *ctx = vc4_context(pctx); + + if (!query->hwperfmon) + return true; + + if (ctx->perfmon != query->hwperfmon) + return false; + + ctx->perfmon = NULL; return true; } static boolean -vc4_get_query_result(struct pipe_context *ctx, struct pipe_query *query, +vc4_get_query_result(struct pipe_context *pctx, struct pipe_query *pquery, boolean wait, union pipe_query_result *vresult) { - uint64_t *result = &vresult->u64; + struct vc4_context *ctx = vc4_context(pctx); + struct vc4_query *query = (struct vc4_query *)pquery; + struct drm_vc4_perfmon_get_values req; + unsigned i; + int ret; + + if (!query->hwperfmon) { + vresult->u64 = 0; + return true; + } - *result = 0; + req.id = query->hwperfmon->id; + req.values_ptr = (uintptr_t)query->hwperfmon->counters; + ret = vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_GET_VALUES, &req); + if (ret) + return false; + + for (i = 0; i < query->num_queries; i++) + vresult[i].u64 = query->hwperfmon->counters[i]; return true; } static void -vc4_set_active_query_state(struct pipe_context *pipe, boolean enable) +vc4_set_active_query_state(struct pipe_context *pctx, boolean enable) { } @@ -82,10 +257,10 @@ void vc4_query_init(struct pipe_context *pctx) { pctx->create_query = vc4_create_query; + pctx->create_batch_query = vc4_create_batch_query; pctx->destroy_query = vc4_destroy_query; pctx->begin_query = vc4_begin_query; pctx->end_query = vc4_end_query; pctx->get_query_result = vc4_get_query_result; - pctx->set_active_query_state = vc4_set_active_query_state; + pctx->set_active_query_state = vc4_set_active_query_state; } - diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c index 4b63e940822d..2f784dda51b7 100644 --- a/src/gallium/drivers/vc4/vc4_screen.c +++ b/src/gallium/drivers/vc4/vc4_screen.c @@ -698,6 +698,8 @@ vc4_screen_create(int fd, struct renderonly *ro) vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_MADVISE); screen->has_extended_cl = vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_EXTENDED_CL); + screen->has_perfmon_ioctl = + vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_PERFMON); if (!vc4_get_chip_info(screen)) goto fail; @@ -724,6 +726,11 @@ vc4_screen_create(int fd, struct renderonly *ro) pscreen->get_compiler_options = vc4_screen_get_compiler_options; pscreen->query_dmabuf_modifiers = vc4_screen_query_dmabuf_modifiers; + if (screen->has_perfmon_ioctl) { + pscreen->get_driver_query_group_info = vc4_get_driver_query_group_info; + pscreen->get_driver_query_info = vc4_get_driver_query_info; + } + return pscreen; fail: diff --git a/src/gallium/drivers/vc4/vc4_screen.h b/src/gallium/drivers/vc4/vc4_screen.h index 83719d88baf0..fe8f286d0da1 100644 --- a/src/gallium/drivers/vc4/vc4_screen.h +++ b/src/gallium/drivers/vc4/vc4_screen.h @@ -98,6 +98,7 @@ struct vc4_screen { bool has_madvise; bool has_tiling_ioctl; bool has_extended_cl; + bool has_perfmon_ioctl; struct vc4_simulator_file *sim_file; }; -- 2.11.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev