From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeon/r600_query.c | 25 ++++++++++++++++++++++- src/gallium/drivers/radeon/r600_query.h | 1 + src/gallium/drivers/radeon/radeon_winsys.h | 1 + src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 2 ++ src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 2 ++ 5 files changed, 30 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c index 05741d3..8009416 100644 --- a/src/gallium/drivers/radeon/r600_query.c +++ b/src/gallium/drivers/radeon/r600_query.c @@ -19,37 +19,41 @@ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r600_query.h" #include "r600_cs.h" #include "util/u_memory.h" #include "util/u_upload_mgr.h" - +#include "os/os_time.h" #include "tgsi/tgsi_text.h" struct r600_hw_query_params { unsigned start_offset; unsigned end_offset; unsigned fence_offset; unsigned pair_stride; unsigned pair_count; }; /* Queries without buffer handling or suspend/resume. */ struct r600_query_sw { struct r600_query b; uint64_t begin_result; uint64_t end_result; + + uint64_t begin_time; + uint64_t end_time; + /* Fence for GPU_FINISHED. */ struct pipe_fence_handle *fence; }; static void r600_query_sw_destroy(struct r600_common_context *rctx, struct r600_query *rquery) { struct pipe_screen *screen = rctx->b.screen; struct r600_query_sw *query = (struct r600_query_sw *)rquery; @@ -69,28 +73,30 @@ static enum radeon_value_id winsys_id_from_type(unsigned type) case R600_QUERY_NUM_GFX_IBS: return RADEON_NUM_GFX_IBS; case R600_QUERY_NUM_SDMA_IBS: return RADEON_NUM_SDMA_IBS; case R600_QUERY_NUM_BYTES_MOVED: return RADEON_NUM_BYTES_MOVED; case R600_QUERY_NUM_EVICTIONS: return RADEON_NUM_EVICTIONS; case R600_QUERY_VRAM_USAGE: return RADEON_VRAM_USAGE; case R600_QUERY_VRAM_VIS_USAGE: return RADEON_VRAM_VIS_USAGE; case R600_QUERY_GTT_USAGE: return RADEON_GTT_USAGE; case R600_QUERY_GPU_TEMPERATURE: return RADEON_GPU_TEMPERATURE; case R600_QUERY_CURRENT_GPU_SCLK: return RADEON_CURRENT_SCLK; case R600_QUERY_CURRENT_GPU_MCLK: return RADEON_CURRENT_MCLK; + case R600_QUERY_CS_THREAD_BUSY: return RADEON_CS_THREAD_TIME; default: unreachable("query type does not correspond to winsys id"); } } static bool r600_query_sw_begin(struct r600_common_context *rctx, struct r600_query *rquery) { struct r600_query_sw *query = (struct r600_query_sw *)rquery; + enum radeon_value_id ws_id; switch(query->b.type) { case PIPE_QUERY_TIMESTAMP_DISJOINT: case PIPE_QUERY_GPU_FINISHED: break; case R600_QUERY_DRAW_CALLS: query->begin_result = rctx->num_draw_calls; break; case R600_QUERY_SPILL_DRAW_CALLS: query->begin_result = rctx->num_spill_draw_calls; @@ -139,22 +145,28 @@ static bool r600_query_sw_begin(struct r600_common_context *rctx, case R600_QUERY_NUM_MAPPED_BUFFERS: query->begin_result = 0; break; case R600_QUERY_BUFFER_WAIT_TIME: case R600_QUERY_NUM_GFX_IBS: case R600_QUERY_NUM_SDMA_IBS: case R600_QUERY_NUM_BYTES_MOVED: case R600_QUERY_NUM_EVICTIONS: { enum radeon_value_id ws_id = winsys_id_from_type(query->b.type); query->begin_result = rctx->ws->query_value(rctx->ws, ws_id); + query->begin_time = os_time_get_nano(); break; } + case R600_QUERY_CS_THREAD_BUSY: + ws_id = winsys_id_from_type(query->b.type); + query->begin_result = rctx->ws->query_value(rctx->ws, ws_id); + query->begin_time = os_time_get_nano(); + break; case R600_QUERY_GPU_LOAD: case R600_QUERY_GPU_SHADERS_BUSY: case R600_QUERY_GPU_TA_BUSY: case R600_QUERY_GPU_GDS_BUSY: case R600_QUERY_GPU_VGT_BUSY: case R600_QUERY_GPU_IA_BUSY: case R600_QUERY_GPU_SX_BUSY: case R600_QUERY_GPU_WD_BUSY: case R600_QUERY_GPU_BCI_BUSY: case R600_QUERY_GPU_SC_BUSY: @@ -193,20 +205,21 @@ static bool r600_query_sw_begin(struct r600_common_context *rctx, unreachable("r600_query_sw_begin: bad query type"); } return true; } static bool r600_query_sw_end(struct r600_common_context *rctx, struct r600_query *rquery) { struct r600_query_sw *query = (struct r600_query_sw *)rquery; + enum radeon_value_id ws_id; switch(query->b.type) { case PIPE_QUERY_TIMESTAMP_DISJOINT: break; case PIPE_QUERY_GPU_FINISHED: rctx->b.flush(&rctx->b, &query->fence, PIPE_FLUSH_DEFERRED); break; case R600_QUERY_DRAW_CALLS: query->end_result = rctx->num_draw_calls; break; @@ -256,20 +269,25 @@ static bool r600_query_sw_end(struct r600_common_context *rctx, case R600_QUERY_BUFFER_WAIT_TIME: case R600_QUERY_NUM_MAPPED_BUFFERS: case R600_QUERY_NUM_GFX_IBS: case R600_QUERY_NUM_SDMA_IBS: case R600_QUERY_NUM_BYTES_MOVED: case R600_QUERY_NUM_EVICTIONS: { enum radeon_value_id ws_id = winsys_id_from_type(query->b.type); query->end_result = rctx->ws->query_value(rctx->ws, ws_id); break; } + case R600_QUERY_CS_THREAD_BUSY: + ws_id = winsys_id_from_type(query->b.type); + query->end_result = rctx->ws->query_value(rctx->ws, ws_id); + query->end_time = os_time_get_nano(); + break; case R600_QUERY_GPU_LOAD: case R600_QUERY_GPU_SHADERS_BUSY: case R600_QUERY_GPU_TA_BUSY: case R600_QUERY_GPU_GDS_BUSY: case R600_QUERY_GPU_VGT_BUSY: case R600_QUERY_GPU_IA_BUSY: case R600_QUERY_GPU_SX_BUSY: case R600_QUERY_GPU_WD_BUSY: case R600_QUERY_GPU_BCI_BUSY: case R600_QUERY_GPU_SC_BUSY: @@ -330,20 +348,24 @@ static bool r600_query_sw_get_result(struct r600_common_context *rctx, (uint64_t)rctx->screen->info.clock_crystal_freq * 1000; result->timestamp_disjoint.disjoint = false; return true; case PIPE_QUERY_GPU_FINISHED: { struct pipe_screen *screen = rctx->b.screen; result->b = screen->fence_finish(screen, &rctx->b, query->fence, wait ? PIPE_TIMEOUT_INFINITE : 0); return result->b; } + case R600_QUERY_CS_THREAD_BUSY: + result->u64 = (query->end_result - query->begin_result) * 100 / + (query->end_time - query->begin_time); + return true; case R600_QUERY_GPIN_ASIC_ID: result->u32 = 0; return true; case R600_QUERY_GPIN_NUM_SIMD: result->u32 = rctx->screen->info.num_good_compute_units; return true; case R600_QUERY_GPIN_NUM_RB: result->u32 = rctx->screen->info.num_render_backends; return true; case R600_QUERY_GPIN_NUM_SPI: @@ -1735,20 +1757,21 @@ static struct pipe_driver_query_info r600_driver_query_list[] = { X("compute-calls", COMPUTE_CALLS, UINT64, AVERAGE), X("spill-compute-calls", SPILL_COMPUTE_CALLS, UINT64, AVERAGE), X("dma-calls", DMA_CALLS, UINT64, AVERAGE), X("cp-dma-calls", CP_DMA_CALLS, UINT64, AVERAGE), X("num-vs-flushes", NUM_VS_FLUSHES, UINT64, AVERAGE), X("num-ps-flushes", NUM_PS_FLUSHES, UINT64, AVERAGE), X("num-cs-flushes", NUM_CS_FLUSHES, UINT64, AVERAGE), X("num-fb-cache-flushes", NUM_FB_CACHE_FLUSHES, UINT64, AVERAGE), X("num-L2-invalidates", NUM_L2_INVALIDATES, UINT64, AVERAGE), X("num-L2-writebacks", NUM_L2_WRITEBACKS, UINT64, AVERAGE), + X("CS-thread-busy", CS_THREAD_BUSY, UINT64, AVERAGE), X("requested-VRAM", REQUESTED_VRAM, BYTES, AVERAGE), X("requested-GTT", REQUESTED_GTT, BYTES, AVERAGE), X("mapped-VRAM", MAPPED_VRAM, BYTES, AVERAGE), X("mapped-GTT", MAPPED_GTT, BYTES, AVERAGE), X("buffer-wait-time", BUFFER_WAIT_TIME, MICROSECONDS, CUMULATIVE), X("num-mapped-buffers", NUM_MAPPED_BUFFERS, UINT64, AVERAGE), X("num-GFX-IBs", NUM_GFX_IBS, UINT64, AVERAGE), X("num-SDMA-IBs", NUM_SDMA_IBS, UINT64, AVERAGE), X("num-bytes-moved", NUM_BYTES_MOVED, BYTES, CUMULATIVE), X("num-evictions", NUM_EVICTIONS, UINT64, CUMULATIVE), diff --git a/src/gallium/drivers/radeon/r600_query.h b/src/gallium/drivers/radeon/r600_query.h index 5de80d9..84b834c 100644 --- a/src/gallium/drivers/radeon/r600_query.h +++ b/src/gallium/drivers/radeon/r600_query.h @@ -48,20 +48,21 @@ enum { R600_QUERY_COMPUTE_CALLS, R600_QUERY_SPILL_COMPUTE_CALLS, R600_QUERY_DMA_CALLS, R600_QUERY_CP_DMA_CALLS, R600_QUERY_NUM_VS_FLUSHES, R600_QUERY_NUM_PS_FLUSHES, R600_QUERY_NUM_CS_FLUSHES, R600_QUERY_NUM_FB_CACHE_FLUSHES, R600_QUERY_NUM_L2_INVALIDATES, R600_QUERY_NUM_L2_WRITEBACKS, + R600_QUERY_CS_THREAD_BUSY, R600_QUERY_REQUESTED_VRAM, R600_QUERY_REQUESTED_GTT, R600_QUERY_MAPPED_VRAM, R600_QUERY_MAPPED_GTT, R600_QUERY_BUFFER_WAIT_TIME, R600_QUERY_NUM_MAPPED_BUFFERS, R600_QUERY_NUM_GFX_IBS, R600_QUERY_NUM_SDMA_IBS, R600_QUERY_NUM_BYTES_MOVED, R600_QUERY_NUM_EVICTIONS, diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index 881bd5f..432550d 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -87,20 +87,21 @@ enum radeon_value_id { RADEON_NUM_SDMA_IBS, RADEON_NUM_BYTES_MOVED, RADEON_NUM_EVICTIONS, RADEON_VRAM_USAGE, RADEON_VRAM_VIS_USAGE, RADEON_GTT_USAGE, RADEON_GPU_TEMPERATURE, /* DRM 2.42.0 */ RADEON_CURRENT_SCLK, RADEON_CURRENT_MCLK, RADEON_GPU_RESET_COUNTER, /* DRM 2.43.0 */ + RADEON_CS_THREAD_TIME, }; /* Each group of four has the same priority. */ enum radeon_bo_priority { RADEON_PRIO_FENCE = 0, RADEON_PRIO_TRACE, RADEON_PRIO_SO_FILLED_SIZE, RADEON_PRIO_QUERY, RADEON_PRIO_IB1 = 4, /* main IB submitted to the kernel */ diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c index c3dfda5..db0087c 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c @@ -458,20 +458,22 @@ static uint64_t amdgpu_query_value(struct radeon_winsys *rws, case RADEON_GTT_USAGE: amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_GTT, 0, &heap); return heap.heap_usage; case RADEON_GPU_TEMPERATURE: case RADEON_CURRENT_SCLK: case RADEON_CURRENT_MCLK: return 0; case RADEON_GPU_RESET_COUNTER: assert(0); return 0; + case RADEON_CS_THREAD_TIME: + return util_queue_get_thread_time_nano(&ws->cs_queue, 0); } return 0; } static bool amdgpu_read_registers(struct radeon_winsys *rws, unsigned reg_offset, unsigned num_registers, uint32_t *out) { struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index cacd683..bdcf194 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -662,20 +662,22 @@ static uint64_t radeon_query_value(struct radeon_winsys *rws, "current-gpu-sclk", (uint32_t*)&retval); return retval; case RADEON_CURRENT_MCLK: radeon_get_drm_value(ws->fd, RADEON_INFO_CURRENT_GPU_MCLK, "current-gpu-mclk", (uint32_t*)&retval); return retval; case RADEON_GPU_RESET_COUNTER: radeon_get_drm_value(ws->fd, RADEON_INFO_GPU_RESET_COUNTER, "gpu-reset-counter", (uint32_t*)&retval); return retval; + case RADEON_CS_THREAD_TIME: + return util_queue_get_thread_time_nano(&ws->cs_queue, 0); } return 0; } static bool radeon_read_registers(struct radeon_winsys *rws, unsigned reg_offset, unsigned num_registers, uint32_t *out) { struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws; unsigned i; -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev