From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/r600/r600_hw_context.c | 2 +- src/gallium/drivers/radeon/r600_pipe_common.c | 7 +++++-- src/gallium/drivers/radeon/r600_pipe_common.h | 2 +- src/gallium/drivers/radeonsi/si_debug.c | 14 ++++++++++++-- src/gallium/drivers/radeonsi/si_hw_context.c | 10 ++++++---- src/gallium/drivers/radeonsi/si_pipe.h | 1 + src/gallium/drivers/radeonsi/si_state_draw.c | 15 +++++++++++++++ 7 files changed, 41 insertions(+), 10 deletions(-)
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index ca7f41d..a821c35 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -277,21 +277,21 @@ void r600_context_gfx_flush(void *context, unsigned flags, if (ctx->trace_buf) eg_trace_emit(ctx); /* old kernels and userspace don't set SX_MISC, so we must reset it to 0 here */ if (ctx->b.chip_class == R600) { radeon_set_context_reg(cs, R_028350_SX_MISC, 0); } if (ctx->is_debug) { /* Save the IB for debug contexts. */ radeon_clear_saved_cs(&ctx->last_gfx); - radeon_save_cs(ws, cs, &ctx->last_gfx); + radeon_save_cs(ws, cs, &ctx->last_gfx, true); r600_resource_reference(&ctx->last_trace_buf, ctx->trace_buf); r600_resource_reference(&ctx->trace_buf, NULL); } /* Flush the CS. */ ws->cs_flush(cs, flags, &ctx->b.last_gfx_fence); if (fence) ws->fence_reference(fence, ctx->b.last_gfx_fence); ctx->b.num_gfx_cs_flushes++; if (ctx->is_debug) { diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 8c66cc3..c58048f 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -435,21 +435,21 @@ static void r600_flush_dma_ring(void *ctx, unsigned flags, (rctx->screen->debug_flags & DBG_CHECK_VM) && rctx->check_vm_faults; if (!radeon_emitted(cs, 0)) { if (fence) rctx->ws->fence_reference(fence, rctx->last_sdma_fence); return; } if (check_vm) - radeon_save_cs(rctx->ws, cs, &saved); + radeon_save_cs(rctx->ws, cs, &saved, true); rctx->ws->cs_flush(cs, flags, &rctx->last_sdma_fence); if (fence) rctx->ws->fence_reference(fence, rctx->last_sdma_fence); if (check_vm) { /* Use conservative timeout 800ms, after which we won't wait any * longer and assume the GPU is hung. */ rctx->ws->fence_wait(rctx->ws, rctx->last_sdma_fence, 800*1000*1000); @@ -457,38 +457,41 @@ static void r600_flush_dma_ring(void *ctx, unsigned flags, rctx->check_vm_faults(rctx, &saved, RING_DMA); radeon_clear_saved_cs(&saved); } } /** * Store a linearized copy of all chunks of \p cs together with the buffer * list in \p saved. */ void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs, - struct radeon_saved_cs *saved) + struct radeon_saved_cs *saved, bool get_buffer_list) { void *buf; unsigned i; /* Save the IB chunks. */ saved->num_dw = cs->prev_dw + cs->current.cdw; saved->ib = MALLOC(4 * saved->num_dw); if (!saved->ib) goto oom; buf = saved->ib; for (i = 0; i < cs->num_prev; ++i) { memcpy(buf, cs->prev[i].buf, cs->prev[i].cdw * 4); buf += cs->prev[i].cdw; } memcpy(buf, cs->current.buf, cs->current.cdw * 4); + if (!get_buffer_list) + return; + /* Save the buffer list. */ saved->bo_count = ws->cs_get_buffer_list(cs, NULL); saved->bo_list = CALLOC(saved->bo_count, sizeof(saved->bo_list[0])); if (!saved->bo_list) { FREE(saved->ib); goto oom; } ws->cs_get_buffer_list(cs, saved->bo_list); diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 4839c76..b391cbb 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -760,21 +760,21 @@ bool r600_can_dump_shader(struct r600_common_screen *rscreen, bool r600_extra_shader_checks(struct r600_common_screen *rscreen, unsigned processor); void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst, uint64_t offset, uint64_t size, unsigned value); struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen, const struct pipe_resource *templ); const char *r600_get_llvm_processor_name(enum radeon_family family); void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw, struct r600_resource *dst, struct r600_resource *src); void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs, - struct radeon_saved_cs *saved); + struct radeon_saved_cs *saved, bool get_buffer_list); void radeon_clear_saved_cs(struct radeon_saved_cs *saved); bool r600_check_device_reset(struct r600_common_context *rctx); /* r600_gpu_load.c */ void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen); uint64_t r600_begin_counter(struct r600_common_screen *rscreen, unsigned type); unsigned r600_end_counter(struct r600_common_screen *rscreen, unsigned type, uint64_t begin); /* r600_perfcounters.c */ diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index 7c8a0fe..5a6d391 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -213,51 +213,60 @@ static void si_dump_debug_registers(struct si_context *sctx, FILE *f) si_dump_mmapped_reg(sctx, f, R_008218_CP_CPC_STALLED_STAT1); si_dump_mmapped_reg(sctx, f, R_00821C_CP_CPF_STATUS); si_dump_mmapped_reg(sctx, f, R_008220_CP_CPF_BUSY_STAT); si_dump_mmapped_reg(sctx, f, R_008224_CP_CPF_STALLED_STAT1); fprintf(f, "\n"); } static void si_dump_last_ib(struct si_context *sctx, FILE *f) { int last_trace_id = -1; + int last_ce_trace_id = -1; if (!sctx->last_gfx.ib) return; if (sctx->last_trace_buf) { /* We are expecting that the ddebug pipe has already * waited for the context, so this buffer should be idle. * If the GPU is hung, there is no point in waiting for it. */ uint32_t *map = sctx->b.ws->buffer_map(sctx->last_trace_buf->buf, NULL, PIPE_TRANSFER_UNSYNCHRONIZED | PIPE_TRANSFER_READ); - if (map) - last_trace_id = *map; + if (map) { + last_trace_id = map[0]; + last_ce_trace_id = map[1]; + } } if (sctx->init_config) ac_parse_ib(f, sctx->init_config->pm4, sctx->init_config->ndw, -1, "IB2: Init config", sctx->b.chip_class, NULL, NULL); if (sctx->init_config_gs_rings) ac_parse_ib(f, sctx->init_config_gs_rings->pm4, sctx->init_config_gs_rings->ndw, -1, "IB2: Init GS rings", sctx->b.chip_class, NULL, NULL); ac_parse_ib(f, sctx->last_gfx.ib, sctx->last_gfx.num_dw, last_trace_id, "IB", sctx->b.chip_class, NULL, NULL); + + if (sctx->last_ce.ib) { + ac_parse_ib(f, sctx->last_ce.ib, sctx->last_ce.num_dw, + last_ce_trace_id, "CE IB", sctx->b.chip_class, + NULL, NULL); + } } static const char *priority_to_string(enum radeon_bo_priority priority) { #define ITEM(x) [RADEON_PRIO_##x] = #x static const char *table[64] = { ITEM(FENCE), ITEM(TRACE), ITEM(SO_FILLED_SIZE), ITEM(QUERY), @@ -838,20 +847,21 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f, } if (flags & PIPE_DUMP_LAST_COMMAND_BUFFER) { si_dump_bo_list(sctx, &sctx->last_gfx, f); si_dump_last_ib(sctx, f); fprintf(f, "Done.\n"); /* dump only once */ radeon_clear_saved_cs(&sctx->last_gfx); + radeon_clear_saved_cs(&sctx->last_ce); r600_resource_reference(&sctx->last_trace_buf, NULL); } } static void si_dump_dma(struct si_context *sctx, struct radeon_saved_cs *saved, FILE *f) { static const char ib_name[] = "sDMA IB"; unsigned i; diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index 92d0cc5..f2dfcc7 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -138,21 +138,23 @@ void si_context_gfx_flush(void *context, unsigned flags, SI_CONTEXT_INV_VMEM_L1; si_emit_cache_flush(ctx); if (ctx->trace_buf) si_trace_emit(ctx); if (ctx->is_debug) { /* Save the IB for debug contexts. */ radeon_clear_saved_cs(&ctx->last_gfx); - radeon_save_cs(ws, cs, &ctx->last_gfx); + radeon_save_cs(ws, cs, &ctx->last_gfx, true); + radeon_clear_saved_cs(&ctx->last_ce); + radeon_save_cs(ws, ctx->ce_ib, &ctx->last_ce, false); r600_resource_reference(&ctx->last_trace_buf, ctx->trace_buf); r600_resource_reference(&ctx->trace_buf, NULL); } /* Flush the CS. */ ws->cs_flush(cs, flags, &ctx->b.last_gfx_fence); if (fence) ws->fence_reference(fence, ctx->b.last_gfx_fence); ctx->b.num_gfx_cs_flushes++; @@ -166,30 +168,30 @@ void si_context_gfx_flush(void *context, unsigned flags, si_check_vm_faults(&ctx->b, &ctx->last_gfx, RING_GFX); } si_begin_new_cs(ctx); ctx->gfx_flush_in_progress = false; } void si_begin_new_cs(struct si_context *ctx) { if (ctx->is_debug) { - uint32_t zero = 0; + static const uint32_t zeros[2]; /* Create a buffer used for writing trace IDs and initialize it to 0. */ assert(!ctx->trace_buf); ctx->trace_buf = (struct r600_resource*) pipe_buffer_create(ctx->b.b.screen, 0, - PIPE_USAGE_STAGING, 4); + PIPE_USAGE_STAGING, 8); if (ctx->trace_buf) pipe_buffer_write_nooverlap(&ctx->b.b, &ctx->trace_buf->b.b, - 0, sizeof(zero), &zero); + 0, sizeof(zeros), zeros); ctx->trace_id = 0; } if (ctx->trace_buf) si_trace_emit(ctx); /* Flush read caches at the beginning of CS not flushed by the kernel. */ if (ctx->b.chip_class >= CIK) ctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 | SI_CONTEXT_INV_ICACHE; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index f7e0486..1984299 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -404,20 +404,21 @@ struct si_context { struct si_shader *last_ls; struct si_shader_selector *last_tcs; int last_num_tcs_input_cp; int last_tes_sh_base; bool last_tess_uses_primid; unsigned last_num_patches; /* Debug state. */ bool is_debug; struct radeon_saved_cs last_gfx; + struct radeon_saved_cs last_ce; struct r600_resource *last_trace_buf; struct r600_resource *trace_buf; unsigned trace_id; uint64_t dmesg_timestamp; unsigned apitrace_call_number; /* Other state */ bool need_check_render_feedback; bool decompression_enabled; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index dfe4236..5254645 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -1402,20 +1402,35 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) pipe_resource_reference(&indexbuf, NULL); } void si_trace_emit(struct si_context *sctx) { struct radeon_winsys_cs *cs = sctx->b.gfx.cs; sctx->trace_id++; radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, sctx->trace_buf, RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE); + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME)); radeon_emit(cs, sctx->trace_buf->gpu_address); radeon_emit(cs, sctx->trace_buf->gpu_address >> 32); radeon_emit(cs, sctx->trace_id); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, AC_ENCODE_TRACE_POINT(sctx->trace_id)); + + if (sctx->ce_ib) { + struct radeon_winsys_cs *ce = sctx->ce_ib; + + radeon_emit(ce, PKT3(PKT3_WRITE_DATA, 3, 0)); + radeon_emit(ce, S_370_DST_SEL(V_370_MEM_ASYNC) | + S_370_WR_CONFIRM(1) | + S_370_ENGINE_SEL(V_370_CE)); + radeon_emit(ce, sctx->trace_buf->gpu_address + 4); + radeon_emit(ce, (sctx->trace_buf->gpu_address + 4) >> 32); + radeon_emit(ce, sctx->trace_id); + radeon_emit(ce, PKT3(PKT3_NOP, 0, 0)); + radeon_emit(ce, AC_ENCODE_TRACE_POINT(sctx->trace_id)); + } } -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev