From: Marek Olšák <marek.ol...@amd.com>

---
 src/gallium/drivers/radeonsi/si_cp_dma.c      | 25 +++++++++++++++++++
 src/gallium/drivers/radeonsi/si_descriptors.c | 10 ++------
 src/gallium/drivers/radeonsi/si_fence.c       | 21 ++++++----------
 src/gallium/drivers/radeonsi/si_pipe.c        | 13 ++--------
 src/gallium/drivers/radeonsi/si_pipe.h        |  3 +++
 src/gallium/drivers/radeonsi/si_state_draw.c  | 12 +++------
 6 files changed, 43 insertions(+), 41 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c 
b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 80673f3f5f2..59360c0d4aa 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -574,10 +574,35 @@ void si_test_gds(struct si_context *sctx)
 
        pipe_buffer_read(ctx, dst, 0, sizeof(r), r);
        printf("GDS clear = %08x %08x %08x %08x -> %s\n", r[0], r[1], r[2], 
r[3],
                        r[0] == 0xc1ea4146 && r[1] == 0xc1ea4146 &&
                        r[2] == 0xc1ea4146 && r[3] == 0xc1ea4146 ? "pass" : 
"fail");
 
        pipe_resource_reference(&src, NULL);
        pipe_resource_reference(&dst, NULL);
        exit(0);
 }
+
+void si_cp_write_data(struct si_context *sctx, struct r600_resource *buf,
+                     unsigned offset, unsigned size, unsigned dst_sel,
+                     unsigned engine, const void *data)
+{
+       struct radeon_cmdbuf *cs = sctx->gfx_cs;
+
+       assert(offset % 4 == 0);
+       assert(size % 4 == 0);
+
+       if (sctx->chip_class == SI && dst_sel == V_370_MEM)
+               dst_sel = V_370_MEM_GRBM;
+
+       radeon_add_to_buffer_list(sctx, cs, buf,
+                                 RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA);
+       uint64_t va = buf->gpu_address + offset;
+
+       radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + size/4, 0));
+       radeon_emit(cs, S_370_DST_SEL(dst_sel) |
+                   S_370_WR_CONFIRM(1) |
+                   S_370_ENGINE_SEL(engine));
+       radeon_emit(cs, va);
+       radeon_emit(cs, va >> 32);
+       radeon_emit_array(cs, (const uint32_t*)data, size/4);
+}
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 71ae00c53cb..ca62848296b 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -1814,35 +1814,29 @@ void si_rebind_buffer(struct si_context *sctx, struct 
pipe_resource *buf,
                        }
                }
        }
 }
 
 static void si_upload_bindless_descriptor(struct si_context *sctx,
                                          unsigned desc_slot,
                                          unsigned num_dwords)
 {
        struct si_descriptors *desc = &sctx->bindless_descriptors;
-       struct radeon_cmdbuf *cs = sctx->gfx_cs;
        unsigned desc_slot_offset = desc_slot * 16;
        uint32_t *data;
        uint64_t va;
 
        data = desc->list + desc_slot_offset;
        va = desc->gpu_address + desc_slot_offset * 4;
 
-       radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + num_dwords, 0));
-       radeon_emit(cs, S_370_DST_SEL(V_370_TC_L2) |
-                   S_370_WR_CONFIRM(1) |
-                   S_370_ENGINE_SEL(V_370_ME));
-       radeon_emit(cs, va);
-       radeon_emit(cs, va >> 32);
-       radeon_emit_array(cs, data, num_dwords);
+       si_cp_write_data(sctx, desc->buffer, va - desc->buffer->gpu_address,
+                        num_dwords * 4, V_370_TC_L2, V_370_ME, data);
 }
 
 static void si_upload_bindless_descriptors(struct si_context *sctx)
 {
        if (!sctx->bindless_descriptors_dirty)
                return;
 
        /* Wait for graphics/compute to be idle before updating the resident
         * descriptors directly in memory, in case the GPU is using them.
         */
diff --git a/src/gallium/drivers/radeonsi/si_fence.c 
b/src/gallium/drivers/radeonsi/si_fence.c
index 46d0289c90b..84bf4d10c20 100644
--- a/src/gallium/drivers/radeonsi/si_fence.c
+++ b/src/gallium/drivers/radeonsi/si_fence.c
@@ -252,35 +252,30 @@ static void si_fine_fence_set(struct si_context *ctx,
        assert(util_bitcount(flags & (PIPE_FLUSH_TOP_OF_PIPE | 
PIPE_FLUSH_BOTTOM_OF_PIPE)) == 1);
 
        /* Use uncached system memory for the fence. */
        u_upload_alloc(ctx->cached_gtt_allocator, 0, 4, 4,
                       &fine->offset, (struct pipe_resource **)&fine->buf, 
(void **)&fence_ptr);
        if (!fine->buf)
                return;
 
        *fence_ptr = 0;
 
-       uint64_t fence_va = fine->buf->gpu_address + fine->offset;
-
-       radeon_add_to_buffer_list(ctx, ctx->gfx_cs, fine->buf,
-                                 RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
        if (flags & PIPE_FLUSH_TOP_OF_PIPE) {
-               struct radeon_cmdbuf *cs = ctx->gfx_cs;
-               radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
-               radeon_emit(cs, S_370_DST_SEL(ctx->chip_class >= CIK ? V_370_MEM
-                                                                    : 
V_370_MEM_GRBM) |
-                       S_370_WR_CONFIRM(1) |
-                       S_370_ENGINE_SEL(V_370_PFP));
-               radeon_emit(cs, fence_va);
-               radeon_emit(cs, fence_va >> 32);
-               radeon_emit(cs, 0x80000000);
+               uint32_t value = 0x80000000;
+
+               si_cp_write_data(ctx, fine->buf, fine->offset, 4,
+                                V_370_MEM, V_370_PFP, &value);
        } else if (flags & PIPE_FLUSH_BOTTOM_OF_PIPE) {
+               uint64_t fence_va = fine->buf->gpu_address + fine->offset;
+
+               radeon_add_to_buffer_list(ctx, ctx->gfx_cs, fine->buf,
+                                         RADEON_USAGE_WRITE, 
RADEON_PRIO_QUERY);
                si_cp_release_mem(ctx,
                                  V_028A90_BOTTOM_OF_PIPE_TS, 0,
                                  EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
                                  EOP_DATA_SEL_VALUE_32BIT,
                                  NULL, fence_va, 0x80000000,
                                  PIPE_QUERY_GPU_FINISHED);
        } else {
                assert(false);
        }
 }
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index f68ef3f67ce..3bb8e04e4ad 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -523,31 +523,22 @@ static struct pipe_context *si_create_context(struct 
pipe_screen *screen,
 
        sctx->sample_mask = 0xffff;
 
        if (sctx->chip_class >= GFX9) {
                sctx->wait_mem_scratch = r600_resource(
                        pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, 4));
                if (!sctx->wait_mem_scratch)
                        goto fail;
 
                /* Initialize the memory. */
-               struct radeon_cmdbuf *cs = sctx->gfx_cs;
-               radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
-               radeon_emit(cs, S_370_DST_SEL(sctx->chip_class >= CIK ? 
V_370_MEM
-                                                                     : 
V_370_MEM_GRBM) |
-                           S_370_WR_CONFIRM(1) |
-                           S_370_ENGINE_SEL(V_370_ME));
-               radeon_emit(cs, sctx->wait_mem_scratch->gpu_address);
-               radeon_emit(cs, sctx->wait_mem_scratch->gpu_address >> 32);
-               radeon_emit(cs, sctx->wait_mem_number);
-               radeon_add_to_buffer_list(sctx, cs, sctx->wait_mem_scratch,
-                                         RADEON_USAGE_WRITE, 
RADEON_PRIO_FENCE);
+               si_cp_write_data(sctx, sctx->wait_mem_scratch, 0, 4,
+                                V_370_MEM, V_370_ME, &sctx->wait_mem_number);
        }
 
        /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD doesn't skip loads
         * if NUM_RECORDS == 0). We need to use a dummy buffer instead. */
        if (sctx->chip_class == CIK) {
                sctx->null_const_buf.buffer =
                        pipe_aligned_buffer_create(screen,
                                                   SI_RESOURCE_FLAG_32BIT,
                                                   PIPE_USAGE_DEFAULT, 16,
                                                   
sctx->screen->info.tcc_cache_line_size);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 9943998a707..d874f215a21 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1178,20 +1178,23 @@ void si_cp_dma_clear_buffer(struct si_context *sctx, 
struct radeon_cmdbuf *cs,
                            enum si_coherency coher, enum si_cache_policy 
cache_policy);
 void si_cp_dma_copy_buffer(struct si_context *sctx,
                           struct pipe_resource *dst, struct pipe_resource *src,
                           uint64_t dst_offset, uint64_t src_offset, unsigned 
size,
                           unsigned user_flags, enum si_coherency coher,
                           enum si_cache_policy cache_policy);
 void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource 
*buf,
                              uint64_t offset, unsigned size);
 void cik_emit_prefetch_L2(struct si_context *sctx, bool vertex_stage_only);
 void si_test_gds(struct si_context *sctx);
+void si_cp_write_data(struct si_context *sctx, struct r600_resource *buf,
+                     unsigned offset, unsigned size, unsigned dst_sel,
+                     unsigned engine, const void *data);
 
 /* si_debug.c */
 void si_save_cs(struct radeon_winsys *ws, struct radeon_cmdbuf *cs,
                struct radeon_saved_cs *saved, bool get_buffer_list);
 void si_clear_saved_cs(struct radeon_saved_cs *saved);
 void si_destroy_saved_cs(struct si_saved_cs *scs);
 void si_auto_log_cs(void *data, struct u_log_context *log);
 void si_log_hw_flush(struct si_context *sctx);
 void si_log_draw_state(struct si_context *sctx, struct u_log_context *log);
 void si_log_compute_state(struct si_context *sctx, struct u_log_context *log);
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index 9a80bd81327..1ff74e77433 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1585,31 +1585,25 @@ si_draw_rectangle(struct blitter_context *blitter,
        /* Don't set per-stage shader pointers for VS. */
        sctx->shader_pointers_dirty &= ~SI_DESCS_SHADER_MASK(VERTEX);
        sctx->vertex_buffer_pointer_dirty = false;
 
        si_draw_vbo(pipe, &info);
 }
 
 void si_trace_emit(struct si_context *sctx)
 {
        struct radeon_cmdbuf *cs = sctx->gfx_cs;
-       uint64_t va = sctx->current_saved_cs->trace_buf->gpu_address;
        uint32_t trace_id = ++sctx->current_saved_cs->trace_id;
 
-       radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
-       radeon_emit(cs, S_370_DST_SEL(sctx->chip_class >= CIK ? V_370_MEM
-                                                             : V_370_MEM_GRBM) 
|
-                   S_370_WR_CONFIRM(1) |
-                   S_370_ENGINE_SEL(V_370_ME));
-       radeon_emit(cs, va);
-       radeon_emit(cs, va >> 32);
-       radeon_emit(cs, trace_id);
+       si_cp_write_data(sctx, sctx->current_saved_cs->trace_buf,
+                        0, 4, V_370_MEM, V_370_ME, &trace_id);
+
        radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
        radeon_emit(cs, AC_ENCODE_TRACE_POINT(trace_id));
 
        if (sctx->log)
                u_log_flush(sctx->log);
 }
 
 void si_init_draw_functions(struct si_context *sctx)
 {
        sctx->b.draw_vbo = si_draw_vbo;
-- 
2.17.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to