Module: Mesa Branch: main Commit: aebf04ab3ff2d3d96a546771e1a8a1d3811b6dd0 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=aebf04ab3ff2d3d96a546771e1a8a1d3811b6dd0
Author: Samuel Pitoiset <[email protected]> Date: Fri Nov 5 15:42:43 2021 +0100 ac/rgp: add support for queue event timings Signed-off-by: Samuel Pitoiset <[email protected]> Reviewed-by: Pierre-Eric Pelloux-Prayer <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13709> --- src/amd/common/ac_rgp.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++ src/amd/common/ac_rgp.h | 67 +++++++++++++++++++++++++++++++++++ src/amd/common/ac_sqtt.h | 3 ++ 3 files changed, 160 insertions(+) diff --git a/src/amd/common/ac_rgp.c b/src/amd/common/ac_rgp.c index 354fe56560e..4e06b2d44bf 100644 --- a/src/amd/common/ac_rgp.c +++ b/src/amd/common/ac_rgp.c @@ -780,6 +780,67 @@ static void ac_sqtt_fill_sqtt_data(struct sqtt_file_chunk_sqtt_data *chunk, int3 chunk->size = size; } +/** + * SQTT queue event timings info. + */ +struct sqtt_file_chunk_queue_event_timings { + struct sqtt_file_chunk_header header; + uint32_t queue_info_table_record_count; + uint32_t queue_info_table_size; + uint32_t queue_event_table_record_count; + uint32_t queue_event_table_size; +}; + +static_assert(sizeof(struct sqtt_file_chunk_queue_event_timings) == 32, + "sqtt_file_chunk_queue_event_timings doesn't match RGP spec"); + +struct sqtt_queue_info_record { + uint64_t queue_id; + uint64_t queue_context; + struct sqtt_queue_hardware_info hardware_info; + uint32_t reserved; +}; + +static_assert(sizeof(struct sqtt_queue_info_record) == 24, + "sqtt_queue_info_record doesn't match RGP spec"); + +struct sqtt_queue_event_record { + enum sqtt_queue_event_type event_type; + uint32_t sqtt_cb_id; + uint64_t frame_index; + uint32_t queue_info_index; + uint32_t submit_sub_index; + uint64_t api_id; + uint64_t cpu_timestamp; + uint64_t gpu_timestamps[2]; +}; + +static_assert(sizeof(struct sqtt_queue_event_record) == 56, + "sqtt_queue_event_record doesn't match RGP spec"); + +static void +ac_sqtt_fill_queue_event_timings(struct rgp_queue_info *rgp_queue_info, + struct rgp_queue_event *rgp_queue_event, + struct sqtt_file_chunk_queue_event_timings *chunk) +{ + unsigned queue_info_size = + rgp_queue_info->record_count * sizeof(struct sqtt_queue_info_record); + unsigned queue_event_size = + rgp_queue_event->record_count * sizeof(struct sqtt_queue_event_record); + + chunk->header.chunk_id.type = SQTT_FILE_CHUNK_TYPE_QUEUE_EVENT_TIMINGS; + chunk->header.chunk_id.index = 0; + chunk->header.major_version = 1; + chunk->header.minor_version = 1; + chunk->header.size_in_bytes = queue_info_size + queue_event_size + + sizeof(*chunk); + + chunk->queue_info_table_record_count = rgp_queue_info->record_count; + chunk->queue_info_table_size = queue_info_size; + chunk->queue_event_table_record_count = rgp_queue_event->record_count; + chunk->queue_event_table_size = queue_event_size; +} + /* Below values are from from llvm project * llvm/include/llvm/BinaryFormat/ELF.h */ @@ -823,6 +884,8 @@ static void ac_sqtt_dump_data(struct radeon_info *rad_info, &thread_trace_data->rgp_loader_events; struct rgp_pso_correlation *rgp_pso_correlation = &thread_trace_data->rgp_pso_correlation; + struct rgp_queue_info *rgp_queue_info = &thread_trace_data->rgp_queue_info; + struct rgp_queue_event *rgp_queue_event = &thread_trace_data->rgp_queue_event; /* SQTT header file. */ ac_sqtt_fill_header(&header); @@ -912,6 +975,33 @@ static void ac_sqtt_dump_data(struct radeon_info *rad_info, sizeof(struct sqtt_pso_correlation_record)); } + /* SQTT queue event timings. */ + if (rgp_queue_info->record_count || rgp_queue_event->record_count) { + struct sqtt_file_chunk_queue_event_timings queue_event_timings; + + ac_sqtt_fill_queue_event_timings(rgp_queue_info, rgp_queue_event, + &queue_event_timings); + fwrite(&queue_event_timings, sizeof(struct sqtt_file_chunk_queue_event_timings), 1, + output); + file_offset += sizeof(struct sqtt_file_chunk_queue_event_timings); + + /* Queue info. */ + list_for_each_entry_safe(struct rgp_queue_info_record, record, + &rgp_queue_info->record, list) { + fwrite(record, sizeof(struct sqtt_queue_info_record), 1, output); + } + file_offset += (rgp_queue_info->record_count * + sizeof(struct sqtt_queue_info_record)); + + /* Queue event. */ + list_for_each_entry_safe(struct rgp_queue_event_record, record, + &rgp_queue_event->record, list) { + fwrite(record, sizeof(struct sqtt_queue_event_record), 1, output); + } + file_offset += (rgp_queue_event->record_count * + sizeof(struct sqtt_queue_event_record)); + } + if (thread_trace) { for (unsigned i = 0; i < thread_trace->num_traces; i++) { const struct ac_thread_trace_se *se = &thread_trace->traces[i]; diff --git a/src/amd/common/ac_rgp.h b/src/amd/common/ac_rgp.h index 51799ce061c..ce06f644c7c 100644 --- a/src/amd/common/ac_rgp.h +++ b/src/amd/common/ac_rgp.h @@ -108,6 +108,73 @@ struct rgp_pso_correlation { simple_mtx_t lock; }; +enum sqtt_queue_type { + SQTT_QUEUE_TYPE_UNKNOWN = 0x0, + SQTT_QUEUE_TYPE_UNIVERSAL = 0x1, + SQTT_QUEUE_TYPE_COMPUTE = 0x2, + SQTT_QUEUE_TYPE_DMA = 0x3, +}; + +enum sqtt_engine_type { + SQTT_ENGINE_TYPE_UNKNOWN = 0x0, + SQTT_ENGINE_TYPE_UNIVERSAL = 0x1, + SQTT_ENGINE_TYPE_COMPUTE = 0x2, + SQTT_ENGINE_TYPE_EXCLUSIVE_COMPUTE = 0x3, + SQTT_ENGINE_TYPE_DMA = 0x4, + SQTT_ENGINE_TYPE_HIGH_PRIORITY_UNIVERSAL = 0x7, + SQTT_ENGINE_TYPE_HIGH_PRIORITY_GRAPHICS = 0x8, +}; + +struct sqtt_queue_hardware_info { + union { + struct { + enum sqtt_queue_type queue_type : 8; + enum sqtt_engine_type engine_type : 8; + uint32_t reserved : 16; + }; + uint32_t value; + }; +}; + +struct rgp_queue_info_record { + uint64_t queue_id; + uint64_t queue_context; + struct sqtt_queue_hardware_info hardware_info; + uint32_t reserved; + struct list_head list; +}; + +struct rgp_queue_info { + uint32_t record_count; + struct list_head record; + simple_mtx_t lock; +}; + +enum sqtt_queue_event_type { + SQTT_QUEUE_TIMING_EVENT_CMDBUF_SUBMIT, + SQTT_QUEUE_TIMING_EVENT_SIGNAL_SEMAPHORE, + SQTT_QUEUE_TIMING_EVENT_WAIT_SEMAPHORE, + SQTT_QUEUE_TIMING_EVENT_PRESENT +}; + +struct rgp_queue_event_record { + enum sqtt_queue_event_type event_type; + uint32_t sqtt_cb_id; + uint64_t frame_index; + uint32_t queue_info_index; + uint32_t submit_sub_index; + uint64_t api_id; + uint64_t cpu_timestamp; + uint64_t gpu_timestamps[2]; + struct list_head list; +}; + +struct rgp_queue_event { + uint32_t record_count; + struct list_head record; + simple_mtx_t lock; +}; + int ac_dump_rgp_capture(struct radeon_info *info, struct ac_thread_trace *thread_trace); diff --git a/src/amd/common/ac_sqtt.h b/src/amd/common/ac_sqtt.h index b0f3de8a74c..8700928f94a 100644 --- a/src/amd/common/ac_sqtt.h +++ b/src/amd/common/ac_sqtt.h @@ -49,6 +49,9 @@ struct ac_thread_trace_data { struct rgp_code_object rgp_code_object; struct rgp_loader_events rgp_loader_events; struct rgp_pso_correlation rgp_pso_correlation; + + struct rgp_queue_info rgp_queue_info; + struct rgp_queue_event rgp_queue_event; }; #define SQTT_BUFFER_ALIGN_SHIFT 12
