From: Marek Olšák <marek.ol...@amd.com>

---
 src/gallium/drivers/radeon/r600_query.c           | 25 ++++++++++++++++++++++-
 src/gallium/drivers/radeon/r600_query.h           |  1 +
 src/gallium/drivers/radeon/radeon_winsys.h        |  1 +
 src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c     |  2 ++
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c |  2 ++
 5 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index 05741d3..8009416 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -19,37 +19,41 @@
  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
 #include "r600_query.h"
 #include "r600_cs.h"
 #include "util/u_memory.h"
 #include "util/u_upload_mgr.h"
-
+#include "os/os_time.h"
 #include "tgsi/tgsi_text.h"
 
 struct r600_hw_query_params {
        unsigned start_offset;
        unsigned end_offset;
        unsigned fence_offset;
        unsigned pair_stride;
        unsigned pair_count;
 };
 
 /* Queries without buffer handling or suspend/resume. */
 struct r600_query_sw {
        struct r600_query b;
 
        uint64_t begin_result;
        uint64_t end_result;
+
+       uint64_t begin_time;
+       uint64_t end_time;
+
        /* Fence for GPU_FINISHED. */
        struct pipe_fence_handle *fence;
 };
 
 static void r600_query_sw_destroy(struct r600_common_context *rctx,
                                  struct r600_query *rquery)
 {
        struct pipe_screen *screen = rctx->b.screen;
        struct r600_query_sw *query = (struct r600_query_sw *)rquery;
 
@@ -69,28 +73,30 @@ static enum radeon_value_id winsys_id_from_type(unsigned 
type)
        case R600_QUERY_NUM_GFX_IBS: return RADEON_NUM_GFX_IBS;
        case R600_QUERY_NUM_SDMA_IBS: return RADEON_NUM_SDMA_IBS;
        case R600_QUERY_NUM_BYTES_MOVED: return RADEON_NUM_BYTES_MOVED;
        case R600_QUERY_NUM_EVICTIONS: return RADEON_NUM_EVICTIONS;
        case R600_QUERY_VRAM_USAGE: return RADEON_VRAM_USAGE;
        case R600_QUERY_VRAM_VIS_USAGE: return RADEON_VRAM_VIS_USAGE;
        case R600_QUERY_GTT_USAGE: return RADEON_GTT_USAGE;
        case R600_QUERY_GPU_TEMPERATURE: return RADEON_GPU_TEMPERATURE;
        case R600_QUERY_CURRENT_GPU_SCLK: return RADEON_CURRENT_SCLK;
        case R600_QUERY_CURRENT_GPU_MCLK: return RADEON_CURRENT_MCLK;
+       case R600_QUERY_CS_THREAD_BUSY: return RADEON_CS_THREAD_TIME;
        default: unreachable("query type does not correspond to winsys id");
        }
 }
 
 static bool r600_query_sw_begin(struct r600_common_context *rctx,
                                struct r600_query *rquery)
 {
        struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+       enum radeon_value_id ws_id;
 
        switch(query->b.type) {
        case PIPE_QUERY_TIMESTAMP_DISJOINT:
        case PIPE_QUERY_GPU_FINISHED:
                break;
        case R600_QUERY_DRAW_CALLS:
                query->begin_result = rctx->num_draw_calls;
                break;
        case R600_QUERY_SPILL_DRAW_CALLS:
                query->begin_result = rctx->num_spill_draw_calls;
@@ -139,22 +145,28 @@ static bool r600_query_sw_begin(struct 
r600_common_context *rctx,
        case R600_QUERY_NUM_MAPPED_BUFFERS:
                query->begin_result = 0;
                break;
        case R600_QUERY_BUFFER_WAIT_TIME:
        case R600_QUERY_NUM_GFX_IBS:
        case R600_QUERY_NUM_SDMA_IBS:
        case R600_QUERY_NUM_BYTES_MOVED:
        case R600_QUERY_NUM_EVICTIONS: {
                enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
                query->begin_result = rctx->ws->query_value(rctx->ws, ws_id);
+               query->begin_time = os_time_get_nano();
                break;
        }
+       case R600_QUERY_CS_THREAD_BUSY:
+               ws_id = winsys_id_from_type(query->b.type);
+               query->begin_result = rctx->ws->query_value(rctx->ws, ws_id);
+               query->begin_time = os_time_get_nano();
+               break;
        case R600_QUERY_GPU_LOAD:
        case R600_QUERY_GPU_SHADERS_BUSY:
        case R600_QUERY_GPU_TA_BUSY:
        case R600_QUERY_GPU_GDS_BUSY:
        case R600_QUERY_GPU_VGT_BUSY:
        case R600_QUERY_GPU_IA_BUSY:
        case R600_QUERY_GPU_SX_BUSY:
        case R600_QUERY_GPU_WD_BUSY:
        case R600_QUERY_GPU_BCI_BUSY:
        case R600_QUERY_GPU_SC_BUSY:
@@ -193,20 +205,21 @@ static bool r600_query_sw_begin(struct 
r600_common_context *rctx,
                unreachable("r600_query_sw_begin: bad query type");
        }
 
        return true;
 }
 
 static bool r600_query_sw_end(struct r600_common_context *rctx,
                              struct r600_query *rquery)
 {
        struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+       enum radeon_value_id ws_id;
 
        switch(query->b.type) {
        case PIPE_QUERY_TIMESTAMP_DISJOINT:
                break;
        case PIPE_QUERY_GPU_FINISHED:
                rctx->b.flush(&rctx->b, &query->fence, PIPE_FLUSH_DEFERRED);
                break;
        case R600_QUERY_DRAW_CALLS:
                query->end_result = rctx->num_draw_calls;
                break;
@@ -256,20 +269,25 @@ static bool r600_query_sw_end(struct r600_common_context 
*rctx,
        case R600_QUERY_BUFFER_WAIT_TIME:
        case R600_QUERY_NUM_MAPPED_BUFFERS:
        case R600_QUERY_NUM_GFX_IBS:
        case R600_QUERY_NUM_SDMA_IBS:
        case R600_QUERY_NUM_BYTES_MOVED:
        case R600_QUERY_NUM_EVICTIONS: {
                enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
                query->end_result = rctx->ws->query_value(rctx->ws, ws_id);
                break;
        }
+       case R600_QUERY_CS_THREAD_BUSY:
+               ws_id = winsys_id_from_type(query->b.type);
+               query->end_result = rctx->ws->query_value(rctx->ws, ws_id);
+               query->end_time = os_time_get_nano();
+               break;
        case R600_QUERY_GPU_LOAD:
        case R600_QUERY_GPU_SHADERS_BUSY:
        case R600_QUERY_GPU_TA_BUSY:
        case R600_QUERY_GPU_GDS_BUSY:
        case R600_QUERY_GPU_VGT_BUSY:
        case R600_QUERY_GPU_IA_BUSY:
        case R600_QUERY_GPU_SX_BUSY:
        case R600_QUERY_GPU_WD_BUSY:
        case R600_QUERY_GPU_BCI_BUSY:
        case R600_QUERY_GPU_SC_BUSY:
@@ -330,20 +348,24 @@ static bool r600_query_sw_get_result(struct 
r600_common_context *rctx,
                        (uint64_t)rctx->screen->info.clock_crystal_freq * 1000;
                result->timestamp_disjoint.disjoint = false;
                return true;
        case PIPE_QUERY_GPU_FINISHED: {
                struct pipe_screen *screen = rctx->b.screen;
                result->b = screen->fence_finish(screen, &rctx->b, query->fence,
                                                 wait ? PIPE_TIMEOUT_INFINITE : 
0);
                return result->b;
        }
 
+       case R600_QUERY_CS_THREAD_BUSY:
+               result->u64 = (query->end_result - query->begin_result) * 100 /
+                             (query->end_time - query->begin_time);
+               return true;
        case R600_QUERY_GPIN_ASIC_ID:
                result->u32 = 0;
                return true;
        case R600_QUERY_GPIN_NUM_SIMD:
                result->u32 = rctx->screen->info.num_good_compute_units;
                return true;
        case R600_QUERY_GPIN_NUM_RB:
                result->u32 = rctx->screen->info.num_render_backends;
                return true;
        case R600_QUERY_GPIN_NUM_SPI:
@@ -1735,20 +1757,21 @@ static struct pipe_driver_query_info 
r600_driver_query_list[] = {
        X("compute-calls",              COMPUTE_CALLS,          UINT64, 
AVERAGE),
        X("spill-compute-calls",        SPILL_COMPUTE_CALLS,    UINT64, 
AVERAGE),
        X("dma-calls",                  DMA_CALLS,              UINT64, 
AVERAGE),
        X("cp-dma-calls",               CP_DMA_CALLS,           UINT64, 
AVERAGE),
        X("num-vs-flushes",             NUM_VS_FLUSHES,         UINT64, 
AVERAGE),
        X("num-ps-flushes",             NUM_PS_FLUSHES,         UINT64, 
AVERAGE),
        X("num-cs-flushes",             NUM_CS_FLUSHES,         UINT64, 
AVERAGE),
        X("num-fb-cache-flushes",       NUM_FB_CACHE_FLUSHES,   UINT64, 
AVERAGE),
        X("num-L2-invalidates",         NUM_L2_INVALIDATES,     UINT64, 
AVERAGE),
        X("num-L2-writebacks",          NUM_L2_WRITEBACKS,      UINT64, 
AVERAGE),
+       X("CS-thread-busy",             CS_THREAD_BUSY,         UINT64, 
AVERAGE),
        X("requested-VRAM",             REQUESTED_VRAM,         BYTES, AVERAGE),
        X("requested-GTT",              REQUESTED_GTT,          BYTES, AVERAGE),
        X("mapped-VRAM",                MAPPED_VRAM,            BYTES, AVERAGE),
        X("mapped-GTT",                 MAPPED_GTT,             BYTES, AVERAGE),
        X("buffer-wait-time",           BUFFER_WAIT_TIME,       MICROSECONDS, 
CUMULATIVE),
        X("num-mapped-buffers",         NUM_MAPPED_BUFFERS,     UINT64, 
AVERAGE),
        X("num-GFX-IBs",                NUM_GFX_IBS,            UINT64, 
AVERAGE),
        X("num-SDMA-IBs",               NUM_SDMA_IBS,           UINT64, 
AVERAGE),
        X("num-bytes-moved",            NUM_BYTES_MOVED,        BYTES, 
CUMULATIVE),
        X("num-evictions",              NUM_EVICTIONS,          UINT64, 
CUMULATIVE),
diff --git a/src/gallium/drivers/radeon/r600_query.h 
b/src/gallium/drivers/radeon/r600_query.h
index 5de80d9..84b834c 100644
--- a/src/gallium/drivers/radeon/r600_query.h
+++ b/src/gallium/drivers/radeon/r600_query.h
@@ -48,20 +48,21 @@ enum {
        R600_QUERY_COMPUTE_CALLS,
        R600_QUERY_SPILL_COMPUTE_CALLS,
        R600_QUERY_DMA_CALLS,
        R600_QUERY_CP_DMA_CALLS,
        R600_QUERY_NUM_VS_FLUSHES,
        R600_QUERY_NUM_PS_FLUSHES,
        R600_QUERY_NUM_CS_FLUSHES,
        R600_QUERY_NUM_FB_CACHE_FLUSHES,
        R600_QUERY_NUM_L2_INVALIDATES,
        R600_QUERY_NUM_L2_WRITEBACKS,
+       R600_QUERY_CS_THREAD_BUSY,
        R600_QUERY_REQUESTED_VRAM,
        R600_QUERY_REQUESTED_GTT,
        R600_QUERY_MAPPED_VRAM,
        R600_QUERY_MAPPED_GTT,
        R600_QUERY_BUFFER_WAIT_TIME,
        R600_QUERY_NUM_MAPPED_BUFFERS,
        R600_QUERY_NUM_GFX_IBS,
        R600_QUERY_NUM_SDMA_IBS,
        R600_QUERY_NUM_BYTES_MOVED,
        R600_QUERY_NUM_EVICTIONS,
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h 
b/src/gallium/drivers/radeon/radeon_winsys.h
index 881bd5f..432550d 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -87,20 +87,21 @@ enum radeon_value_id {
     RADEON_NUM_SDMA_IBS,
     RADEON_NUM_BYTES_MOVED,
     RADEON_NUM_EVICTIONS,
     RADEON_VRAM_USAGE,
     RADEON_VRAM_VIS_USAGE,
     RADEON_GTT_USAGE,
     RADEON_GPU_TEMPERATURE, /* DRM 2.42.0 */
     RADEON_CURRENT_SCLK,
     RADEON_CURRENT_MCLK,
     RADEON_GPU_RESET_COUNTER, /* DRM 2.43.0 */
+    RADEON_CS_THREAD_TIME,
 };
 
 /* Each group of four has the same priority. */
 enum radeon_bo_priority {
     RADEON_PRIO_FENCE = 0,
     RADEON_PRIO_TRACE,
     RADEON_PRIO_SO_FILLED_SIZE,
     RADEON_PRIO_QUERY,
 
     RADEON_PRIO_IB1 = 4, /* main IB submitted to the kernel */
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
index c3dfda5..db0087c 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -458,20 +458,22 @@ static uint64_t amdgpu_query_value(struct radeon_winsys 
*rws,
    case RADEON_GTT_USAGE:
       amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_GTT, 0, &heap);
       return heap.heap_usage;
    case RADEON_GPU_TEMPERATURE:
    case RADEON_CURRENT_SCLK:
    case RADEON_CURRENT_MCLK:
       return 0;
    case RADEON_GPU_RESET_COUNTER:
       assert(0);
       return 0;
+   case RADEON_CS_THREAD_TIME:
+      return util_queue_get_thread_time_nano(&ws->cs_queue, 0);
    }
    return 0;
 }
 
 static bool amdgpu_read_registers(struct radeon_winsys *rws,
                                   unsigned reg_offset,
                                   unsigned num_registers, uint32_t *out)
 {
    struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws;
 
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index cacd683..bdcf194 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -662,20 +662,22 @@ static uint64_t radeon_query_value(struct radeon_winsys 
*rws,
                              "current-gpu-sclk", (uint32_t*)&retval);
         return retval;
     case RADEON_CURRENT_MCLK:
         radeon_get_drm_value(ws->fd, RADEON_INFO_CURRENT_GPU_MCLK,
                              "current-gpu-mclk", (uint32_t*)&retval);
         return retval;
     case RADEON_GPU_RESET_COUNTER:
         radeon_get_drm_value(ws->fd, RADEON_INFO_GPU_RESET_COUNTER,
                              "gpu-reset-counter", (uint32_t*)&retval);
         return retval;
+    case RADEON_CS_THREAD_TIME:
+        return util_queue_get_thread_time_nano(&ws->cs_queue, 0);
     }
     return 0;
 }
 
 static bool radeon_read_registers(struct radeon_winsys *rws,
                                   unsigned reg_offset,
                                   unsigned num_registers, uint32_t *out)
 {
     struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws;
     unsigned i;
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to