From: Marek Olšák <marek.ol...@amd.com>

compute and SDMA will be added into it.
---
 src/gallium/drivers/radeonsi/si_blit.c        |  2 +-
 src/gallium/drivers/radeonsi/si_cp_dma.c      | 33 ++++++++++++-------
 src/gallium/drivers/radeonsi/si_pipe.c        |  3 +-
 src/gallium/drivers/radeonsi/si_pipe.h        |  8 +++--
 .../drivers/radeonsi/si_test_dma_perf.c       |  4 +--
 5 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index fcaff80125c..8f7aa0815b9 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -903,21 +903,21 @@ void si_resource_copy_region(struct pipe_context *ctx,
        struct si_context *sctx = (struct si_context *)ctx;
        struct si_texture *ssrc = (struct si_texture*)src;
        struct pipe_surface *dst_view, dst_templ;
        struct pipe_sampler_view src_templ, *src_view;
        unsigned dst_width, dst_height, src_width0, src_height0;
        unsigned dst_width0, dst_height0, src_force_level = 0;
        struct pipe_box sbox, dstbox;
 
        /* Handle buffers first. */
        if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
-               si_copy_buffer(sctx, dst, src, dstx, src_box->x, 
src_box->width, 0, -1);
+               si_copy_buffer(sctx, dst, src, dstx, src_box->x, 
src_box->width);
                return;
        }
 
        assert(u_max_sample(dst) == u_max_sample(src));
 
        /* The driver doesn't decompress resources automatically while
         * u_blitter is rendering. */
        si_decompress_subresource(ctx, src, PIPE_MASK_RGBAZS, src_level,
                                  src_box->z, src_box->z + src_box->depth - 1);
 
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c 
b/src/gallium/drivers/radeonsi/si_cp_dma.c
index ad53682b1b2..e85bb9b1acf 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -426,36 +426,32 @@ static void si_cp_dma_realign_engine(struct si_context 
*sctx, unsigned size,
        va = sctx->scratch_buffer->gpu_address;
        si_emit_cp_dma(sctx, va, va + SI_CPDMA_ALIGNMENT, size, dma_flags,
                       cache_policy);
 }
 
 /**
  * Do memcpy between buffers using CP DMA.
  *
  * \param user_flags   bitmask of SI_CPDMA_*
  */
-void si_copy_buffer(struct si_context *sctx,
-                   struct pipe_resource *dst, struct pipe_resource *src,
-                   uint64_t dst_offset, uint64_t src_offset, unsigned size,
-                   unsigned user_flags, enum si_cache_policy cache_policy)
+void si_cp_dma_copy_buffer(struct si_context *sctx,
+                          struct pipe_resource *dst, struct pipe_resource *src,
+                          uint64_t dst_offset, uint64_t src_offset, unsigned 
size,
+                          unsigned user_flags, enum si_coherency coher,
+                          enum si_cache_policy cache_policy)
 {
        uint64_t main_dst_offset, main_src_offset;
        unsigned skipped_size = 0;
        unsigned realign_size = 0;
-       enum si_coherency coher = SI_COHERENCY_SHADER;
        bool is_first = true;
 
-       if (!size)
-               return;
-
-       if (cache_policy == -1)
-               cache_policy = get_cache_policy(sctx, coher);
+       assert(size);
 
        if (dst != src || dst_offset != src_offset) {
                /* Mark the buffer range of destination as valid (initialized),
                 * so that transfer_map knows it should wait for the GPU when 
mapping
                 * that range. */
                util_range_add(&r600_resource(dst)->valid_buffer_range, 
dst_offset,
                               dst_offset + size);
        }
 
        dst_offset += r600_resource(dst)->gpu_address;
@@ -520,35 +516,50 @@ void si_copy_buffer(struct si_context *sctx,
 
                si_emit_cp_dma(sctx, dst_offset, src_offset, skipped_size,
                               dma_flags, cache_policy);
        }
 
        /* Finally, realign the engine if the size wasn't aligned. */
        if (realign_size) {
                si_cp_dma_realign_engine(sctx, realign_size, user_flags, coher,
                                         cache_policy, &is_first);
        }
+}
+
+void si_copy_buffer(struct si_context *sctx,
+                   struct pipe_resource *dst, struct pipe_resource *src,
+                   uint64_t dst_offset, uint64_t src_offset, unsigned size)
+{
+       enum si_coherency coher = SI_COHERENCY_SHADER;
+       enum si_cache_policy cache_policy = get_cache_policy(sctx, coher);
+
+       if (!size)
+               return;
+
+       si_cp_dma_copy_buffer(sctx, dst, src, dst_offset, src_offset, size,
+                             0, coher, cache_policy);
 
        if (cache_policy != L2_BYPASS)
                r600_resource(dst)->TC_L2_dirty = true;
 
        /* If it's not a prefetch... */
        if (dst_offset != src_offset)
                sctx->num_cp_dma_calls++;
 }
 
 void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource 
*buf,
                              uint64_t offset, unsigned size)
 {
        assert(sctx->chip_class >= CIK);
 
-       si_copy_buffer(sctx, buf, buf, offset, offset, size, SI_CPDMA_SKIP_ALL, 
L2_LRU);
+       si_cp_dma_copy_buffer(sctx, buf, buf, offset, offset, size,
+                             SI_CPDMA_SKIP_ALL, SI_COHERENCY_SHADER, L2_LRU);
 }
 
 static void cik_prefetch_shader_async(struct si_context *sctx,
                                      struct si_pm4_state *state)
 {
        struct pipe_resource *bo = &state->bo[0]->b.b;
        assert(state->nbo == 1);
 
        cik_prefetch_TC_L2_async(sctx, bo, 0, bo->width0);
 }
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index bce7b3f550e..8ef29c25df2 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -723,21 +723,22 @@ static void si_test_vmfault(struct si_screen *sscreen)
                pipe_buffer_create_const0(&sscreen->b, 0, PIPE_USAGE_DEFAULT, 
64);
 
        if (!buf) {
                puts("Buffer allocation failed.");
                exit(1);
        }
 
        r600_resource(buf)->gpu_address = 0; /* cause a VM fault */
 
        if (sscreen->debug_flags & DBG(TEST_VMFAULT_CP)) {
-               si_copy_buffer(sctx, buf, buf, 0, 4, 4, 0, -1);
+               si_cp_dma_copy_buffer(sctx, buf, buf, 0, 4, 4, 0,
+                                     SI_COHERENCY_NONE, L2_BYPASS);
                ctx->flush(ctx, NULL, 0);
                puts("VM fault test: CP - done.");
        }
        if (sscreen->debug_flags & DBG(TEST_VMFAULT_SDMA)) {
                si_sdma_clear_buffer(sctx, buf, 0, 4, 0);
                ctx->flush(ctx, NULL, 0);
                puts("VM fault test: SDMA - done.");
        }
        if (sscreen->debug_flags & DBG(TEST_VMFAULT_SHADER)) {
                util_test_constant_buffer(ctx, buf);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index a6f09b65f74..29d7e555a0c 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1120,24 +1120,28 @@ enum si_coherency {
 };
 
 void si_cp_dma_wait_for_idle(struct si_context *sctx);
 void si_cp_dma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
                            uint64_t offset, uint64_t size, unsigned value,
                            enum si_coherency coher,
                            enum si_cache_policy cache_policy);
 void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
                     uint64_t offset, uint64_t size, unsigned value,
                     enum si_coherency coher);
+void si_cp_dma_copy_buffer(struct si_context *sctx,
+                          struct pipe_resource *dst, struct pipe_resource *src,
+                          uint64_t dst_offset, uint64_t src_offset, unsigned 
size,
+                          unsigned user_flags, enum si_coherency coher,
+                          enum si_cache_policy cache_policy);
 void si_copy_buffer(struct si_context *sctx,
                    struct pipe_resource *dst, struct pipe_resource *src,
-                   uint64_t dst_offset, uint64_t src_offset, unsigned size,
-                   unsigned user_flags, enum si_cache_policy cache_policy);
+                   uint64_t dst_offset, uint64_t src_offset, unsigned size);
 void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource 
*buf,
                              uint64_t offset, unsigned size);
 void cik_emit_prefetch_L2(struct si_context *sctx, bool vertex_stage_only);
 void si_init_cp_dma_functions(struct si_context *sctx);
 
 /* si_debug.c */
 void si_save_cs(struct radeon_winsys *ws, struct radeon_cmdbuf *cs,
                struct radeon_saved_cs *saved, bool get_buffer_list);
 void si_clear_saved_cs(struct radeon_saved_cs *saved);
 void si_destroy_saved_cs(struct si_saved_cs *scs);
diff --git a/src/gallium/drivers/radeonsi/si_test_dma_perf.c 
b/src/gallium/drivers/radeonsi/si_test_dma_perf.c
index f097a642999..6c04720e963 100644
--- a/src/gallium/drivers/radeonsi/si_test_dma_perf.c
+++ b/src/gallium/drivers/radeonsi/si_test_dma_perf.c
@@ -171,22 +171,22 @@ void si_test_dma_perf(struct si_screen *sscreen)
                                src = is_copy ? pipe_buffer_create(screen, 0, 
src_usage, size) : NULL;
 
                                /* Run tests. */
                                for (unsigned iter = 0; iter < NUM_RUNS; 
iter++) {
                                        q[iter] = ctx->create_query(ctx, 
query_type, 0);
                                        ctx->begin_query(ctx, q[iter]);
 
                                        if (test_cp) {
                                                /* CP DMA */
                                                if (is_copy) {
-                                                       si_copy_buffer(sctx, 
dst, src, 0, 0, size, 0,
-                                                                      
cache_policy);
+                                                       
si_cp_dma_copy_buffer(sctx, dst, src, 0, 0, size, 0,
+                                                                             
SI_COHERENCY_NONE, cache_policy);
                                                } else {
                                                        
si_cp_dma_clear_buffer(sctx, dst, 0, size, clear_value,
                                                                               
SI_COHERENCY_NONE, cache_policy);
                                                }
                                        } else if (test_sdma) {
                                                /* SDMA */
                                                if (is_copy) {
                                                        struct pipe_box box;
                                                        u_box_1d(0, size, &box);
                                                        sctx->dma_copy(ctx, 
dst, 0, 0, 0, 0, src, 0, &box);
-- 
2.17.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to