From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_cp_dma.c | 25 +++++++++++++++++++ src/gallium/drivers/radeonsi/si_descriptors.c | 10 ++------ src/gallium/drivers/radeonsi/si_fence.c | 21 ++++++---------- src/gallium/drivers/radeonsi/si_pipe.c | 13 ++-------- src/gallium/drivers/radeonsi/si_pipe.h | 3 +++ src/gallium/drivers/radeonsi/si_state_draw.c | 12 +++------ 6 files changed, 43 insertions(+), 41 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index 80673f3f5f2..59360c0d4aa 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -574,10 +574,35 @@ void si_test_gds(struct si_context *sctx) pipe_buffer_read(ctx, dst, 0, sizeof(r), r); printf("GDS clear = %08x %08x %08x %08x -> %s\n", r[0], r[1], r[2], r[3], r[0] == 0xc1ea4146 && r[1] == 0xc1ea4146 && r[2] == 0xc1ea4146 && r[3] == 0xc1ea4146 ? "pass" : "fail"); pipe_resource_reference(&src, NULL); pipe_resource_reference(&dst, NULL); exit(0); } + +void si_cp_write_data(struct si_context *sctx, struct r600_resource *buf, + unsigned offset, unsigned size, unsigned dst_sel, + unsigned engine, const void *data) +{ + struct radeon_cmdbuf *cs = sctx->gfx_cs; + + assert(offset % 4 == 0); + assert(size % 4 == 0); + + if (sctx->chip_class == SI && dst_sel == V_370_MEM) + dst_sel = V_370_MEM_GRBM; + + radeon_add_to_buffer_list(sctx, cs, buf, + RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA); + uint64_t va = buf->gpu_address + offset; + + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + size/4, 0)); + radeon_emit(cs, S_370_DST_SEL(dst_sel) | + S_370_WR_CONFIRM(1) | + S_370_ENGINE_SEL(engine)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit_array(cs, (const uint32_t*)data, size/4); +} diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 71ae00c53cb..ca62848296b 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -1814,35 +1814,29 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf, } } } } static void si_upload_bindless_descriptor(struct si_context *sctx, unsigned desc_slot, unsigned num_dwords) { struct si_descriptors *desc = &sctx->bindless_descriptors; - struct radeon_cmdbuf *cs = sctx->gfx_cs; unsigned desc_slot_offset = desc_slot * 16; uint32_t *data; uint64_t va; data = desc->list + desc_slot_offset; va = desc->gpu_address + desc_slot_offset * 4; - radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + num_dwords, 0)); - radeon_emit(cs, S_370_DST_SEL(V_370_TC_L2) | - S_370_WR_CONFIRM(1) | - S_370_ENGINE_SEL(V_370_ME)); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - radeon_emit_array(cs, data, num_dwords); + si_cp_write_data(sctx, desc->buffer, va - desc->buffer->gpu_address, + num_dwords * 4, V_370_TC_L2, V_370_ME, data); } static void si_upload_bindless_descriptors(struct si_context *sctx) { if (!sctx->bindless_descriptors_dirty) return; /* Wait for graphics/compute to be idle before updating the resident * descriptors directly in memory, in case the GPU is using them. */ diff --git a/src/gallium/drivers/radeonsi/si_fence.c b/src/gallium/drivers/radeonsi/si_fence.c index 46d0289c90b..84bf4d10c20 100644 --- a/src/gallium/drivers/radeonsi/si_fence.c +++ b/src/gallium/drivers/radeonsi/si_fence.c @@ -252,35 +252,30 @@ static void si_fine_fence_set(struct si_context *ctx, assert(util_bitcount(flags & (PIPE_FLUSH_TOP_OF_PIPE | PIPE_FLUSH_BOTTOM_OF_PIPE)) == 1); /* Use uncached system memory for the fence. */ u_upload_alloc(ctx->cached_gtt_allocator, 0, 4, 4, &fine->offset, (struct pipe_resource **)&fine->buf, (void **)&fence_ptr); if (!fine->buf) return; *fence_ptr = 0; - uint64_t fence_va = fine->buf->gpu_address + fine->offset; - - radeon_add_to_buffer_list(ctx, ctx->gfx_cs, fine->buf, - RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); if (flags & PIPE_FLUSH_TOP_OF_PIPE) { - struct radeon_cmdbuf *cs = ctx->gfx_cs; - radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); - radeon_emit(cs, S_370_DST_SEL(ctx->chip_class >= CIK ? V_370_MEM - : V_370_MEM_GRBM) | - S_370_WR_CONFIRM(1) | - S_370_ENGINE_SEL(V_370_PFP)); - radeon_emit(cs, fence_va); - radeon_emit(cs, fence_va >> 32); - radeon_emit(cs, 0x80000000); + uint32_t value = 0x80000000; + + si_cp_write_data(ctx, fine->buf, fine->offset, 4, + V_370_MEM, V_370_PFP, &value); } else if (flags & PIPE_FLUSH_BOTTOM_OF_PIPE) { + uint64_t fence_va = fine->buf->gpu_address + fine->offset; + + radeon_add_to_buffer_list(ctx, ctx->gfx_cs, fine->buf, + RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); si_cp_release_mem(ctx, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_INT_SEL_NONE, EOP_DATA_SEL_VALUE_32BIT, NULL, fence_va, 0x80000000, PIPE_QUERY_GPU_FINISHED); } else { assert(false); } } diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index f68ef3f67ce..3bb8e04e4ad 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -523,31 +523,22 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, sctx->sample_mask = 0xffff; if (sctx->chip_class >= GFX9) { sctx->wait_mem_scratch = r600_resource( pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, 4)); if (!sctx->wait_mem_scratch) goto fail; /* Initialize the memory. */ - struct radeon_cmdbuf *cs = sctx->gfx_cs; - radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); - radeon_emit(cs, S_370_DST_SEL(sctx->chip_class >= CIK ? V_370_MEM - : V_370_MEM_GRBM) | - S_370_WR_CONFIRM(1) | - S_370_ENGINE_SEL(V_370_ME)); - radeon_emit(cs, sctx->wait_mem_scratch->gpu_address); - radeon_emit(cs, sctx->wait_mem_scratch->gpu_address >> 32); - radeon_emit(cs, sctx->wait_mem_number); - radeon_add_to_buffer_list(sctx, cs, sctx->wait_mem_scratch, - RADEON_USAGE_WRITE, RADEON_PRIO_FENCE); + si_cp_write_data(sctx, sctx->wait_mem_scratch, 0, 4, + V_370_MEM, V_370_ME, &sctx->wait_mem_number); } /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD doesn't skip loads * if NUM_RECORDS == 0). We need to use a dummy buffer instead. */ if (sctx->chip_class == CIK) { sctx->null_const_buf.buffer = pipe_aligned_buffer_create(screen, SI_RESOURCE_FLAG_32BIT, PIPE_USAGE_DEFAULT, 16, sctx->screen->info.tcc_cache_line_size); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 9943998a707..d874f215a21 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1178,20 +1178,23 @@ void si_cp_dma_clear_buffer(struct si_context *sctx, struct radeon_cmdbuf *cs, enum si_coherency coher, enum si_cache_policy cache_policy); void si_cp_dma_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, struct pipe_resource *src, uint64_t dst_offset, uint64_t src_offset, unsigned size, unsigned user_flags, enum si_coherency coher, enum si_cache_policy cache_policy); void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource *buf, uint64_t offset, unsigned size); void cik_emit_prefetch_L2(struct si_context *sctx, bool vertex_stage_only); void si_test_gds(struct si_context *sctx); +void si_cp_write_data(struct si_context *sctx, struct r600_resource *buf, + unsigned offset, unsigned size, unsigned dst_sel, + unsigned engine, const void *data); /* si_debug.c */ void si_save_cs(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, struct radeon_saved_cs *saved, bool get_buffer_list); void si_clear_saved_cs(struct radeon_saved_cs *saved); void si_destroy_saved_cs(struct si_saved_cs *scs); void si_auto_log_cs(void *data, struct u_log_context *log); void si_log_hw_flush(struct si_context *sctx); void si_log_draw_state(struct si_context *sctx, struct u_log_context *log); void si_log_compute_state(struct si_context *sctx, struct u_log_context *log); diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 9a80bd81327..1ff74e77433 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -1585,31 +1585,25 @@ si_draw_rectangle(struct blitter_context *blitter, /* Don't set per-stage shader pointers for VS. */ sctx->shader_pointers_dirty &= ~SI_DESCS_SHADER_MASK(VERTEX); sctx->vertex_buffer_pointer_dirty = false; si_draw_vbo(pipe, &info); } void si_trace_emit(struct si_context *sctx) { struct radeon_cmdbuf *cs = sctx->gfx_cs; - uint64_t va = sctx->current_saved_cs->trace_buf->gpu_address; uint32_t trace_id = ++sctx->current_saved_cs->trace_id; - radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); - radeon_emit(cs, S_370_DST_SEL(sctx->chip_class >= CIK ? V_370_MEM - : V_370_MEM_GRBM) | - S_370_WR_CONFIRM(1) | - S_370_ENGINE_SEL(V_370_ME)); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - radeon_emit(cs, trace_id); + si_cp_write_data(sctx, sctx->current_saved_cs->trace_buf, + 0, 4, V_370_MEM, V_370_ME, &trace_id); + radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, AC_ENCODE_TRACE_POINT(trace_id)); if (sctx->log) u_log_flush(sctx->log); } void si_init_draw_functions(struct si_context *sctx) { sctx->b.draw_vbo = si_draw_vbo; -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev