From: Tvrtko Ursulin <[email protected]>

Converting the GFX v10.0 ring helpers to use the variadic
amdgpu_ring_write().

Also small cleanups in gfx_v10_0_cp_gfx_start(),
gfx_v10_0_ring_emit_ce_meta() and
gfx_v10_0_ring_emit_de_meta.

Signed-off-by: Tvrtko Ursulin <[email protected]>
Cc: Christian König <[email protected]>
Cc: Sunil Khatri <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 399 ++++++++++++-----------
 2 files changed, 204 insertions(+), 197 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 4f467864ed09..1b428dda706a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -611,7 +611,7 @@ amdgpu_ring_write11(struct amdgpu_ring *ring,
                          NULL)(__VA_ARGS__)
 
 static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
-                                             void *src, int count_dw)
+                                             const void *src, int count_dw)
 {
        unsigned occupied, chunk1, chunk2;
        u32 buf_mask = ring->buf_mask;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 003522c2d902..63fc94c5d989 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -3683,15 +3683,16 @@ static void gfx10_kiq_set_resources(struct amdgpu_ring 
*kiq_ring, uint64_t queue
        /* Cleaner shader MC address */
        shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
 
-       amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
-       amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
-                         PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 
queue_type:0 (KIQ) */
-       amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask 
lo */
-       amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask 
hi */
-       amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner 
shader addr lo */
-       amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner 
shader addr hi */
-       amdgpu_ring_write(kiq_ring, 0); /* oac mask */
-       amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
+       amdgpu_ring_write(kiq_ring,
+                         PACKET3(PACKET3_SET_RESOURCES, 6),
+                         PACKET3_SET_RESOURCES_VMID_MASK(0) |
+                         PACKET3_SET_RESOURCES_QUEUE_TYPE(0),  /* vmid_mask:0 
queue_type:0 (KIQ) */
+                         lower_32_bits(queue_mask),    /* queue mask lo */
+                         upper_32_bits(queue_mask),    /* queue mask hi */
+                         lower_32_bits(shader_mc_addr), /* cleaner shader addr 
lo */
+                         upper_32_bits(shader_mc_addr), /* cleaner shader addr 
hi */
+                         0,    /* oac mask */
+                         0);   /* gds heap base:0, gds heap size:0 */
 }
 
 static void gfx10_kiq_map_queues(struct amdgpu_ring *kiq_ring,
@@ -3715,10 +3716,9 @@ static void gfx10_kiq_map_queues(struct amdgpu_ring 
*kiq_ring,
                WARN_ON(1);
        }
 
-       amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
-       /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
-       amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
-                         PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+       amdgpu_ring_write(kiq_ring,
+                         PACKET3(PACKET3_MAP_QUEUES, 5),
+                         PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Q_sel: 0, vmid: 
0, engine: 0, num_Q: 1 */
                          PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
                          PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
                          PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
@@ -3726,12 +3726,12 @@ static void gfx10_kiq_map_queues(struct amdgpu_ring 
*kiq_ring,
                          PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: 
normal compute queue */
                          PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: 
all_on_one_pipe */
                          PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
-                         PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: 
must be 1 */
-       amdgpu_ring_write(kiq_ring, 
PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
-       amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
-       amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
-       amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
-       amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
+                         PACKET3_MAP_QUEUES_NUM_QUEUES(1), /* num_queues: must 
be 1 */
+                         
PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index),
+                         lower_32_bits(mqd_addr),
+                         upper_32_bits(mqd_addr),
+                         lower_32_bits(wptr_addr),
+                         upper_32_bits(wptr_addr));
 }
 
 static void gfx10_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
@@ -3741,23 +3741,21 @@ static void gfx10_kiq_unmap_queues(struct amdgpu_ring 
*kiq_ring,
 {
        uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
 
-       amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
-       amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+       amdgpu_ring_write(kiq_ring,
+                         PACKET3(PACKET3_UNMAP_QUEUES, 4),
                          PACKET3_UNMAP_QUEUES_ACTION(action) |
                          PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
                          PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
-                         PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
-       amdgpu_ring_write(kiq_ring,
-                 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
+                         PACKET3_UNMAP_QUEUES_NUM_QUEUES(1) /* Q_sel: 0, vmid: 
0, engine: 0, num_Q: 1 */,
+                         
PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
 
        if (action == PREEMPT_QUEUES_NO_UNMAP) {
-               amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
-               amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
-               amdgpu_ring_write(kiq_ring, seq);
+               amdgpu_ring_write(kiq_ring,
+                                 lower_32_bits(gpu_addr),
+                                 upper_32_bits(gpu_addr),
+                                 seq);
        } else {
-               amdgpu_ring_write(kiq_ring, 0);
-               amdgpu_ring_write(kiq_ring, 0);
-               amdgpu_ring_write(kiq_ring, 0);
+               amdgpu_ring_fill(kiq_ring, 0, 3);
        }
 }
 
@@ -3768,18 +3766,17 @@ static void gfx10_kiq_query_status(struct amdgpu_ring 
*kiq_ring,
 {
        uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
 
-       amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
        amdgpu_ring_write(kiq_ring,
+                         PACKET3(PACKET3_QUERY_STATUS, 5),
                          PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
                          PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
-                         PACKET3_QUERY_STATUS_COMMAND(2));
-       amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+                         PACKET3_QUERY_STATUS_COMMAND(2), /* Q_sel: 0, vmid: 
0, engine: 0, num_Q: 1 */
                          
PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
-                         PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
-       amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
-       amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
-       amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
-       amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
+                         PACKET3_QUERY_STATUS_ENG_SEL(eng_sel),
+                         lower_32_bits(addr),
+                         upper_32_bits(addr),
+                         lower_32_bits(seq),
+                         upper_32_bits(seq));
 }
 
 static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
@@ -3918,12 +3915,13 @@ static void gfx_v10_0_init_golden_registers(struct 
amdgpu_device *adev)
 static void gfx_v10_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
                                       bool wc, uint32_t reg, uint32_t val)
 {
-       amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-       amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
-                         WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0));
-       amdgpu_ring_write(ring, reg);
-       amdgpu_ring_write(ring, 0);
-       amdgpu_ring_write(ring, val);
+       amdgpu_ring_write(ring,
+                         PACKET3(PACKET3_WRITE_DATA, 3),
+                         WRITE_DATA_ENGINE_SEL(eng_sel) |
+                         WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0),
+                         reg,
+                         0,
+                         val);
 }
 
 static void gfx_v10_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
@@ -3931,21 +3929,21 @@ static void gfx_v10_0_wait_reg_mem(struct amdgpu_ring 
*ring, int eng_sel,
                                  uint32_t addr1, uint32_t ref, uint32_t mask,
                                  uint32_t inv)
 {
-       amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+       if (mem_space)
+               BUG_ON(addr0 & 0x3); /* Dword align */
+
        amdgpu_ring_write(ring,
+                         PACKET3(PACKET3_WAIT_REG_MEM, 5),
                          /* memory (1) or register (0) */
                          (WAIT_REG_MEM_MEM_SPACE(mem_space) |
                           WAIT_REG_MEM_OPERATION(opt) | /* wait */
                           WAIT_REG_MEM_FUNCTION(3) |  /* equal */
-                          WAIT_REG_MEM_ENGINE(eng_sel)));
-
-       if (mem_space)
-               BUG_ON(addr0 & 0x3); /* Dword align */
-       amdgpu_ring_write(ring, addr0);
-       amdgpu_ring_write(ring, addr1);
-       amdgpu_ring_write(ring, ref);
-       amdgpu_ring_write(ring, mask);
-       amdgpu_ring_write(ring, inv); /* poll interval */
+                          WAIT_REG_MEM_ENGINE(eng_sel)),
+                         addr0,
+                         addr1,
+                         ref,
+                         mask,
+                         inv); /* poll interval */
 }
 
 static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring)
@@ -3964,10 +3962,11 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring 
*ring)
                return r;
        }
 
-       amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
-       amdgpu_ring_write(ring, scratch -
-                         PACKET3_SET_UCONFIG_REG_START);
-       amdgpu_ring_write(ring, 0xDEADBEEF);
+       amdgpu_ring_write(ring,
+                         PACKET3(PACKET3_SET_UCONFIG_REG, 1),
+                         scratch - PACKET3_SET_UCONFIG_REG_START,
+                         0xDEADBEEF);
+
        amdgpu_ring_commit(ring);
 
        for (i = 0; i < adev->usec_timeout; i++) {
@@ -6239,8 +6238,8 @@ static int gfx_v10_0_cp_gfx_start(struct amdgpu_device 
*adev)
        struct amdgpu_ring *ring;
        const struct cs_section_def *sect = NULL;
        const struct cs_extent_def *ext = NULL;
-       int r, i;
        int ctx_reg_offset;
+       int r;
 
        /* init the CP */
        WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT,
@@ -6256,43 +6255,46 @@ static int gfx_v10_0_cp_gfx_start(struct amdgpu_device 
*adev)
                return r;
        }
 
-       amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
-       amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+       amdgpu_ring_write(ring,
+                         PACKET3(PACKET3_PREAMBLE_CNTL, 0),
+                         PACKET3_PREAMBLE_BEGIN_CLEAR_STATE,
 
-       amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
-       amdgpu_ring_write(ring, 0x80000000);
-       amdgpu_ring_write(ring, 0x80000000);
+                         PACKET3(PACKET3_CONTEXT_CONTROL, 1),
+                         0x80000000,
+                         0x80000000);
 
        for (sect = gfx10_cs_data; sect->section != NULL; ++sect) {
                for (ext = sect->section; ext->extent != NULL; ++ext) {
                        if (sect->id == SECT_CONTEXT) {
                                amdgpu_ring_write(ring,
                                                  
PACKET3(PACKET3_SET_CONTEXT_REG,
-                                                         ext->reg_count));
-                               amdgpu_ring_write(ring, ext->reg_index -
+                                                         ext->reg_count),
+                                                 ext->reg_index -
                                                  
PACKET3_SET_CONTEXT_REG_START);
-                               for (i = 0; i < ext->reg_count; i++)
-                                       amdgpu_ring_write(ring, ext->extent[i]);
+                               amdgpu_ring_write_multiple(ring, ext->extent,
+                                                          ext->reg_count);
                        }
                }
        }
 
        ctx_reg_offset =
                SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - 
PACKET3_SET_CONTEXT_REG_START;
-       amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
-       amdgpu_ring_write(ring, ctx_reg_offset);
-       amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override);
 
-       amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
-       amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
+       amdgpu_ring_write(ring,
+                         PACKET3(PACKET3_SET_CONTEXT_REG, 1),
+                          ctx_reg_offset,
+                          adev->gfx.config.pa_sc_tile_steering_override,
 
-       amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
-       amdgpu_ring_write(ring, 0);
+                          PACKET3(PACKET3_PREAMBLE_CNTL, 0),
+                          PACKET3_PREAMBLE_END_CLEAR_STATE,
 
-       amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
-       amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
-       amdgpu_ring_write(ring, 0x8000);
-       amdgpu_ring_write(ring, 0x8000);
+                          PACKET3(PACKET3_CLEAR_STATE, 0),
+                          0,
+
+                          PACKET3(PACKET3_SET_BASE, 2),
+                          PACKET3_BASE_INDEX(CE_PARTITION_BASE),
+                          0x8000,
+                          0x8000);
 
        amdgpu_ring_commit(ring);
 
@@ -6306,9 +6308,7 @@ static int gfx_v10_0_cp_gfx_start(struct amdgpu_device 
*adev)
                        return r;
                }
 
-               amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
-               amdgpu_ring_write(ring, 0);
-
+               amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0), 0);
                amdgpu_ring_commit(ring);
        }
        return 0;
@@ -8564,6 +8564,8 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring 
*ring,
        unsigned int vmid = AMDGPU_JOB_GET_VMID(job);
        u32 header, control = 0;
 
+       BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+
        if (ib->flags & AMDGPU_IB_FLAG_CE)
                header = PACKET3(PACKET3_INDIRECT_BUFFER_CNST, 2);
        else
@@ -8582,15 +8584,14 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct 
amdgpu_ring *ring,
                                    (!amdgpu_sriov_vf(ring->adev) && flags & 
AMDGPU_IB_PREEMPTED) ? true : false);
        }
 
-       amdgpu_ring_write(ring, header);
-       BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
        amdgpu_ring_write(ring,
+                         header,
 #ifdef __BIG_ENDIAN
-               (2 << 0) |
+                         (2 << 0) |
 #endif
-               lower_32_bits(ib->gpu_addr));
-       amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
-       amdgpu_ring_write(ring, control);
+                         lower_32_bits(ib->gpu_addr),
+                         upper_32_bits(ib->gpu_addr),
+                         control);
 }
 
 static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
@@ -8601,6 +8602,8 @@ static void gfx_v10_0_ring_emit_ib_compute(struct 
amdgpu_ring *ring,
        unsigned int vmid = AMDGPU_JOB_GET_VMID(job);
        u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
 
+       BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+
        /* Currently, there is a high possibility to get wave ID mismatch
         * between ME and GDS, leading to a hw deadlock, because ME generates
         * different wave IDs than the GDS expects. This situation happens
@@ -8612,20 +8615,20 @@ static void gfx_v10_0_ring_emit_ib_compute(struct 
amdgpu_ring *ring,
         * GDS to 0 for this ring (me/pipe).
         */
        if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
-               amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
-               amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
-               amdgpu_ring_write(ring, 
ring->adev->gds.gds_compute_max_wave_id);
+               amdgpu_ring_write(ring,
+                                 PACKET3(PACKET3_SET_CONFIG_REG, 1),
+                                 mmGDS_COMPUTE_MAX_WAVE_ID,
+                                 ring->adev->gds.gds_compute_max_wave_id);
        }
 
-       amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
-       BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
        amdgpu_ring_write(ring,
+                         PACKET3(PACKET3_INDIRECT_BUFFER, 2),
 #ifdef __BIG_ENDIAN
-                               (2 << 0) |
+                         (2 << 0) |
 #endif
-                               lower_32_bits(ib->gpu_addr));
-       amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
-       amdgpu_ring_write(ring, control);
+                         lower_32_bits(ib->gpu_addr),
+                         upper_32_bits(ib->gpu_addr),
+                         control);
 }
 
 static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
@@ -8634,18 +8637,6 @@ static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring 
*ring, u64 addr,
        bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
        bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
 
-       /* RELEASE_MEM - flush caches, send int */
-       amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
-       amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
-                                PACKET3_RELEASE_MEM_GCR_GL2_WB |
-                                PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be 
set with GLM_WB */
-                                PACKET3_RELEASE_MEM_GCR_GLM_WB |
-                                PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
-                                
PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
-                                PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
-       amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 
1) |
-                                PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
-
        /*
         * the address should be Qword aligned if 64bit write, Dword
         * aligned if only send 32bit data low (discard data high)
@@ -8654,11 +8645,24 @@ static void gfx_v10_0_ring_emit_fence(struct 
amdgpu_ring *ring, u64 addr,
                BUG_ON(addr & 0x7);
        else
                BUG_ON(addr & 0x3);
-       amdgpu_ring_write(ring, lower_32_bits(addr));
-       amdgpu_ring_write(ring, upper_32_bits(addr));
-       amdgpu_ring_write(ring, lower_32_bits(seq));
-       amdgpu_ring_write(ring, upper_32_bits(seq));
-       amdgpu_ring_write(ring, 0);
+
+       amdgpu_ring_write(ring,
+                         /* RELEASE_MEM - flush caches, send int */
+                         PACKET3(PACKET3_RELEASE_MEM, 6),
+                         (PACKET3_RELEASE_MEM_GCR_SEQ |
+                          PACKET3_RELEASE_MEM_GCR_GL2_WB |
+                          PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set 
with GLM_WB */
+                          PACKET3_RELEASE_MEM_GCR_GLM_WB |
+                          PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
+                          
PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+                          PACKET3_RELEASE_MEM_EVENT_INDEX(5)),
+                         (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
+                          PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)),
+                         lower_32_bits(addr),
+                         upper_32_bits(addr),
+                         lower_32_bits(seq),
+                         upper_32_bits(seq),
+                         0);
 }
 
 static void gfx_v10_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
@@ -8675,8 +8679,8 @@ static void gfx_v10_0_ring_invalidate_tlbs(struct 
amdgpu_ring *ring,
                                   uint16_t pasid, uint32_t flush_type,
                                   bool all_hub, uint8_t dst_sel)
 {
-       amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
        amdgpu_ring_write(ring,
+                         PACKET3(PACKET3_INVALIDATE_TLBS, 0),
                          PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
                          PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
                          PACKET3_INVALIDATE_TLBS_PASID(pasid) |
@@ -8691,8 +8695,7 @@ static void gfx_v10_0_ring_emit_vm_flush(struct 
amdgpu_ring *ring,
        /* compute doesn't have PFP */
        if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
                /* sync PFP to ME, otherwise we might get invalid PFP reads */
-               amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
-               amdgpu_ring_write(ring, 0x0);
+               amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0), 0);
        }
 }
 
@@ -8705,40 +8708,42 @@ static void gfx_v10_0_ring_emit_fence_kiq(struct 
amdgpu_ring *ring, u64 addr,
        BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
 
        /* write fence seq to the "addr" */
-       amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-       amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
-                                WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
-       amdgpu_ring_write(ring, lower_32_bits(addr));
-       amdgpu_ring_write(ring, upper_32_bits(addr));
-       amdgpu_ring_write(ring, lower_32_bits(seq));
+       amdgpu_ring_write(ring,
+                         PACKET3(PACKET3_WRITE_DATA, 3),
+                         (WRITE_DATA_ENGINE_SEL(0) | WRITE_DATA_DST_SEL(5) |
+                          WR_CONFIRM),
+                         lower_32_bits(addr),
+                         upper_32_bits(addr),
+                         lower_32_bits(seq));
 
        if (flags & AMDGPU_FENCE_FLAG_INT) {
                /* set register to trigger INT */
-               amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-               amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
-                                        WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
-               amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, 
mmCPC_INT_STATUS));
-               amdgpu_ring_write(ring, 0);
-               amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
+               amdgpu_ring_write(ring,
+                                 PACKET3(PACKET3_WRITE_DATA, 3),
+                                 (WRITE_DATA_ENGINE_SEL(0) |
+                                  WRITE_DATA_DST_SEL(0) |
+                                  WR_CONFIRM),
+                                 SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS),
+                                 0,
+                                 0x20000000); /* src_id is 178 */
        }
 }
 
 static void gfx_v10_0_ring_emit_sb(struct amdgpu_ring *ring)
 {
-       amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
-       amdgpu_ring_write(ring, 0);
+       amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0), 0);
 }
 
 static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
                                         uint32_t flags)
 {
-       uint32_t dw2 = 0;
+       uint32_t dw2;
 
        if (ring->adev->gfx.mcbp)
                gfx_v10_0_ring_emit_ce_meta(ring,
                                    (!amdgpu_sriov_vf(ring->adev) && flags & 
AMDGPU_IB_PREEMPTED) ? true : false);
 
-       dw2 |= 0x80000000; /* set load_enable otherwise this package is just 
NOPs */
+       dw2 = 0x80000000; /* set load_enable otherwise this package is just 
NOPs */
        if (flags & AMDGPU_HAVE_CTX_SWITCH) {
                /* set load_global_config & load_global_uconfig */
                dw2 |= 0x8001;
@@ -8758,9 +8763,7 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct 
amdgpu_ring *ring,
                        dw2 |= 0x10000000;
        }
 
-       amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
-       amdgpu_ring_write(ring, dw2);
-       amdgpu_ring_write(ring, 0);
+       amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1), dw2, 0);
 }
 
 static unsigned int gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring 
*ring,
@@ -8768,11 +8771,12 @@ static unsigned int 
gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
 {
        unsigned int ret;
 
-       amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
-       amdgpu_ring_write(ring, lower_32_bits(addr));
-       amdgpu_ring_write(ring, upper_32_bits(addr));
+       amdgpu_ring_write(ring,
+                         PACKET3(PACKET3_COND_EXEC, 3),
+                         lower_32_bits(addr),
+                         upper_32_bits(addr),
        /* discard following DWs if *cond_exec_gpu_addr==0 */
-       amdgpu_ring_write(ring, 0);
+                         0);
        ret = ring->wptr & ring->buf_mask;
        /* patch dummy value later */
        amdgpu_ring_write(ring, 0);
@@ -8839,22 +8843,21 @@ static void gfx_v10_0_ring_emit_ce_meta(struct 
amdgpu_ring *ring, bool resume)
 
        offset = offsetof(struct v10_gfx_meta_data, ce_payload);
        ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
-       ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
-
-       amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
-       amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
-                                WRITE_DATA_DST_SEL(8) |
-                                WR_CONFIRM) |
-                                WRITE_DATA_CACHE_POLICY(0));
-       amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
-       amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
 
        if (resume)
-               amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
-                                          sizeof(ce_payload) >> 2);
+               ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
        else
-               amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
-                                          sizeof(ce_payload) >> 2);
+               ce_payload_cpu_addr = (void *)&ce_payload;
+
+       amdgpu_ring_write(ring,
+                         PACKET3(PACKET3_WRITE_DATA, cnt),
+                         (WRITE_DATA_ENGINE_SEL(2) | WRITE_DATA_DST_SEL(8) |
+                          WR_CONFIRM) | WRITE_DATA_CACHE_POLICY(0),
+                         lower_32_bits(ce_payload_gpu_addr),
+                         upper_32_bits(ce_payload_gpu_addr));
+
+       amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
+                                  sizeof(ce_payload) >> 2);
 }
 
 static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
@@ -8867,7 +8870,10 @@ static void gfx_v10_0_ring_emit_de_meta(struct 
amdgpu_ring *ring, bool resume)
 
        offset = offsetof(struct v10_gfx_meta_data, de_payload);
        de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
-       de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+       if (resume)
+               de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+       else
+               de_payload_cpu_addr = (void *)&de_payload;
 
        gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
                         AMDGPU_CSA_SIZE - adev->gds.gds_size,
@@ -8877,20 +8883,15 @@ static void gfx_v10_0_ring_emit_de_meta(struct 
amdgpu_ring *ring, bool resume)
        de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
 
        cnt = (sizeof(de_payload) >> 2) + 4 - 2;
-       amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
-       amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
-                                WRITE_DATA_DST_SEL(8) |
-                                WR_CONFIRM) |
-                                WRITE_DATA_CACHE_POLICY(0));
-       amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
-       amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
+       amdgpu_ring_write(ring,
+                         PACKET3(PACKET3_WRITE_DATA, cnt),
+                         (WRITE_DATA_ENGINE_SEL(1) | WRITE_DATA_DST_SEL(8) |
+                          WR_CONFIRM) | WRITE_DATA_CACHE_POLICY(0),
+                         lower_32_bits(de_payload_gpu_addr),
+                         upper_32_bits(de_payload_gpu_addr));
 
-       if (resume)
-               amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
-                                          sizeof(de_payload) >> 2);
-       else
-               amdgpu_ring_write_multiple(ring, (void *)&de_payload,
-                                          sizeof(de_payload) >> 2);
+       amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
+                                  sizeof(de_payload) >> 2);
 }
 
 static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool 
start,
@@ -8898,31 +8899,32 @@ static void gfx_v10_0_ring_emit_frame_cntl(struct 
amdgpu_ring *ring, bool start,
 {
        uint32_t v = secure ? FRAME_TMZ : 0;
 
-       amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
-       amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
+       amdgpu_ring_write(ring,
+                         PACKET3(PACKET3_FRAME_CONTROL, 0),
+                         v | FRAME_CMD(start ? 0 : 1));
 }
 
 static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
                                     uint32_t reg_val_offs)
 {
        struct amdgpu_device *adev = ring->adev;
+       u64 gpu_addr = adev->wb.gpu_addr + reg_val_offs * 4;
 
-       amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
-       amdgpu_ring_write(ring, 0 |     /* src: register*/
-                               (5 << 8) |      /* dst: memory */
-                               (1 << 20));     /* write confirm */
-       amdgpu_ring_write(ring, reg);
-       amdgpu_ring_write(ring, 0);
-       amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
-                               reg_val_offs * 4));
-       amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
-                               reg_val_offs * 4));
+       amdgpu_ring_write(ring,
+                         PACKET3(PACKET3_COPY_DATA, 4),
+                         0 |           /* src: register*/
+                         (5 << 8) |    /* dst: memory */
+                         (1 << 20),    /* write confirm */
+                         reg,
+                         0,
+                         lower_32_bits(gpu_addr),
+                         upper_32_bits(gpu_addr));
 }
 
 static void gfx_v10_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
                                   uint32_t val)
 {
-       uint32_t cmd = 0;
+       uint32_t cmd;
 
        switch (ring->funcs->type) {
        case AMDGPU_RING_TYPE_GFX:
@@ -8935,11 +8937,13 @@ static void gfx_v10_0_ring_emit_wreg(struct amdgpu_ring 
*ring, uint32_t reg,
                cmd = WR_CONFIRM;
                break;
        }
-       amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-       amdgpu_ring_write(ring, cmd);
-       amdgpu_ring_write(ring, reg);
-       amdgpu_ring_write(ring, 0);
-       amdgpu_ring_write(ring, val);
+
+       amdgpu_ring_write(ring,
+                         PACKET3(PACKET3_WRITE_DATA, 3),
+                         cmd,
+                         reg,
+                         0,
+                         val);
 }
 
 static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t 
reg,
@@ -9416,15 +9420,17 @@ static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring 
*ring)
                        PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
                        PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
 
-       /* ACQUIRE_MEM - make one or more surfaces valid for use by the 
subsequent operations */
-       amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
-       amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
-       amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
-       amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
-       amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
-       amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
-       amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
-       amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
+
+       amdgpu_ring_write(ring,
+                         /* ACQUIRE_MEM - make one or more surfaces valid for 
use by the subsequent operations */
+                         PACKET3(PACKET3_ACQUIRE_MEM, 6),
+                         0, /* CP_COHER_CNTL */
+                         0xffffffff,  /* CP_COHER_SIZE */
+                         0xffffff,  /* CP_COHER_SIZE_HI */
+                         0, /* CP_COHER_BASE */
+                         0,  /* CP_COHER_BASE_HI */
+                         0x0000000A, /* POLL_INTERVAL */
+                         gcr_cntl); /* GCR_CNTL */
 }
 
 static void gfx_v10_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
@@ -9726,8 +9732,9 @@ static void gfx_v10_ip_dump(struct amdgpu_ip_block 
*ip_block)
 static void gfx_v10_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
 {
        /* Emit the cleaner shader */
-       amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
-       amdgpu_ring_write(ring, 0);  /* RESERVED field, programmed to zero */
+       amdgpu_ring_write(ring,
+                         PACKET3(PACKET3_RUN_CLEANER_SHADER, 0),
+                         0);  /* RESERVED field, programmed to zero */
 }
 
 static const struct amd_ip_funcs gfx_v10_0_ip_funcs = {
-- 
2.47.1

Reply via email to