amdgpu: implement ring set_priority for gfx_v8 compute v9

Andres Rodriguez Thu, 08 Jun 2017 15:06:48 -0700

Programming CP_HQD_QUEUE_PRIORITY enables a queue to take priority over
other queues on the same pipe. Multiple queues on a pipe are timesliced
so this gives us full precedence over other queues.


Programming CP_HQD_PIPE_PRIORITY changes the SPI_ARB_PRIORITY of the
wave as follows:
        0x2: CS_H
        0x1: CS_M
        0x0: CS_L

The SPI block will then dispatch work according to the policy set by
SPI_ARB_PRIORITY. In the current policy CS_H is higher priority than
gfx.

In order to prevent getting stuck in loops of resources bouncing between
GFX and high priority compute and introducing further latency, we
statically reserve a portion of the pipe.

v2: fix srbm_select to ring->queue and use ring->funcs->type
v3: use AMD_SCHED_PRIORITY_* instead of AMDGPU_CTX_PRIORITY_*
v4: switch int to enum amd_sched_priority
v5: corresponding changes for srbm_lock
v6: change CU reservation to PIPE_PERCENT allocation
v7: use kiq instead of MMIO
v8: back to MMIO, and make the implementation sleep safe.
v9: corresponding changes for splitting HIGH into _HW/_SW

Acked-by: Christian König <[email protected]>
Signed-off-by: Andres Rodriguez <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |   4 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   2 +
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c      | 105 +++++++++++++++++++++++++++++
 3 files changed, 111 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 3a0561c..04ea1b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1097,30 +1097,34 @@ struct amdgpu_gfx {
        /* gfx status */
        uint32_t                        gfx_current_status;
        /* ce ram size*/
        unsigned                        ce_ram_size;
        struct amdgpu_cu_info           cu_info;
        const struct amdgpu_gfx_funcs   *funcs;
 
        /* reset mask */
        uint32_t                        grbm_soft_reset;
        uint32_t                        srbm_soft_reset;
        bool                            in_reset;
        /* s3/s4 mask */
        bool                            in_suspend;
        /* NGG */
        struct amdgpu_ngg               ngg;
+
+       /* pipe reservation */
+       struct mutex                    pipe_reserve_mutex;
+       DECLARE_BITMAP                  (pipe_reserve_bitmap, 
AMDGPU_MAX_COMPUTE_QUEUES);
 };
 
 int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
                  unsigned size, struct amdgpu_ib *ib);
 void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
                    struct dma_fence *f);
 int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
                       struct amdgpu_ib *ibs, struct amdgpu_job *job,
                       struct dma_fence **f);
 int amdgpu_ib_pool_init(struct amdgpu_device *adev);
 void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
 int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
 
 /*
  * CS.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 0296c9e..424ac3d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2012,58 +2012,60 @@ int amdgpu_device_init(struct amdgpu_device *adev,
        adev->dev = &pdev->dev;
        adev->ddev = ddev;
        adev->pdev = pdev;
        adev->flags = flags;
        adev->asic_type = flags & AMD_ASIC_MASK;
        adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
        adev->mc.gtt_size = 512 * 1024 * 1024;
        adev->accel_working = false;
        adev->num_rings = 0;
        adev->mman.buffer_funcs = NULL;
        adev->mman.buffer_funcs_ring = NULL;
        adev->vm_manager.vm_pte_funcs = NULL;
        adev->vm_manager.vm_pte_num_rings = 0;
        adev->gart.gart_funcs = NULL;
        adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
+       bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
 
        adev->smc_rreg = &amdgpu_invalid_rreg;
        adev->smc_wreg = &amdgpu_invalid_wreg;
        adev->pcie_rreg = &amdgpu_invalid_rreg;
        adev->pcie_wreg = &amdgpu_invalid_wreg;
        adev->pciep_rreg = &amdgpu_invalid_rreg;
        adev->pciep_wreg = &amdgpu_invalid_wreg;
        adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
        adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
        adev->didt_rreg = &amdgpu_invalid_rreg;
        adev->didt_wreg = &amdgpu_invalid_wreg;
        adev->gc_cac_rreg = &amdgpu_invalid_rreg;
        adev->gc_cac_wreg = &amdgpu_invalid_wreg;
        adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
        adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
 
 
        DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 
0x%04X:0x%04X 0x%02X).\n",
                 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
                 pdev->subsystem_vendor, pdev->subsystem_device, 
pdev->revision);
 
        /* mutex initialization are all done here so we
         * can recall function without having locking issues */
        atomic_set(&adev->irq.ih.lock, 0);
        mutex_init(&adev->firmware.mutex);
        mutex_init(&adev->pm.mutex);
        mutex_init(&adev->gfx.gpu_clock_mutex);
        mutex_init(&adev->srbm_mutex);
+       mutex_init(&adev->gfx.pipe_reserve_mutex);
        mutex_init(&adev->grbm_idx_mutex);
        mutex_init(&adev->mn_lock);
        hash_init(adev->mn_hash);
 
        amdgpu_check_arguments(adev);
 
        /* Registers mapping */
        /* TODO: block userspace mapping of io register */
        spin_lock_init(&adev->mmio_idx_lock);
        spin_lock_init(&adev->smc_idx_lock);
        spin_lock_init(&adev->pcie_idx_lock);
        spin_lock_init(&adev->uvd_ctx_idx_lock);
        spin_lock_init(&adev->didt_idx_lock);
        spin_lock_init(&adev->gc_cac_idx_lock);
        spin_lock_init(&adev->audio_endpt_idx_lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 6e541af..db00816 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6449,30 +6449,134 @@ static void gfx_v8_0_ring_emit_vm_flush(struct 
amdgpu_ring *ring,
 
 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
 {
        return ring->adev->wb.wb[ring->wptr_offs];
 }
 
 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
 {
        struct amdgpu_device *adev = ring->adev;
 
        /* XXX check if swapping is necessary on BE */
        adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
        WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
 }
 
+static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
+                                          bool acquire)
+{
+       struct amdgpu_device *adev = ring->adev;
+       int pipe_num, tmp, reg;
+       int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
+
+       pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
+
+       /* first me only has 2 entries, GFX and HP3D */
+       if (ring->me > 0)
+               pipe_num -= 2;
+
+       /* There is a bug in the GFX pipes that results in a HS
+        * deadlock if the pipe is restricted to a percentage
+        * lower than 17 */
+       if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE && pipe_percent < 17)
+               pipe_percent = 17;
+
+       reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
+       tmp = RREG32(reg);
+       tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
+       WREG32(reg, tmp);
+}
+
+static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
+                                           struct amdgpu_ring *ring,
+                                           bool acquire)
+{
+       int i, pipe;
+       bool reserve;
+       struct amdgpu_ring *iring;
+
+       mutex_lock(&adev->gfx.pipe_reserve_mutex);
+       pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
+       if (acquire)
+               set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
+       else
+               clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
+
+       if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, 
AMDGPU_MAX_COMPUTE_QUEUES)) {
+               /* Clear all reservations - everyone reacquires all resources */
+               for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
+                       gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
+                                                      true);
+
+               for (i = 0; i < adev->gfx.num_compute_rings; ++i)
+                       
gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
+                                                      true);
+       } else {
+               /* Lower all pipes without a current reservation */
+               for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
+                       iring = &adev->gfx.gfx_ring[i];
+                       pipe = amdgpu_gfx_queue_to_bit(adev,
+                                                      iring->me,
+                                                      iring->pipe,
+                                                      0);
+                       reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
+                       gfx_v8_0_ring_set_pipe_percent(iring, reserve);
+               }
+
+               for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
+                       iring = &adev->gfx.compute_ring[i];
+                       pipe = amdgpu_gfx_queue_to_bit(adev,
+                                                      iring->me,
+                                                      iring->pipe,
+                                                      0);
+                       reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
+                       gfx_v8_0_ring_set_pipe_percent(iring, reserve);
+               }
+       }
+
+       mutex_unlock(&adev->gfx.pipe_reserve_mutex);
+}
+
+static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
+                                     struct amdgpu_ring *ring,
+                                     bool acquire)
+{
+       uint32_t pipe_priority = acquire ? 0x2 : 0x0;
+       uint32_t queue_priority = acquire ? 0xf : 0x0;
+
+       mutex_lock(&adev->srbm_mutex);
+       vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+       WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
+       WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
+
+       vi_srbm_select(adev, 0, 0, 0, 0);
+       mutex_unlock(&adev->srbm_mutex);
+}
+static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
+                                              enum amd_sched_priority priority)
+{
+       struct amdgpu_device *adev = ring->adev;
+       bool acquire = priority == AMD_SCHED_PRIORITY_HIGH_HW;
+
+       if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
+               return;
+
+       gfx_v8_0_hqd_set_priority(adev, ring, acquire);
+       gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
+}
+
 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
                                             u64 addr, u64 seq,
                                             unsigned flags)
 {
        bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
        bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
 
        /* RELEASE_MEM - flush caches, send int */
        amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
        amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
                                 EOP_TC_ACTION_EN |
                                 EOP_TC_WB_ACTION_EN |
                                 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
                                 EVENT_INDEX(5)));
        amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel 
? 2 : 0));
@@ -6869,30 +6973,31 @@ static const struct amdgpu_ring_funcs 
gfx_v8_0_ring_funcs_compute = {
                7 + /* gfx_v8_0_ring_emit_pipeline_sync */
                17 + /* gfx_v8_0_ring_emit_vm_flush */
                7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user 
fence, vm fence */
        .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
        .emit_ib = gfx_v8_0_ring_emit_ib_compute,
        .emit_fence = gfx_v8_0_ring_emit_fence_compute,
        .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
        .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
        .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
        .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
        .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
        .test_ring = gfx_v8_0_ring_test_ring,
        .test_ib = gfx_v8_0_ring_test_ib,
        .insert_nop = amdgpu_ring_insert_nop,
        .pad_ib = amdgpu_ring_generic_pad_ib,
+       .set_priority = gfx_v8_0_ring_set_priority_compute,
 };
 
 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
        .type = AMDGPU_RING_TYPE_KIQ,
        .align_mask = 0xff,
        .nop = PACKET3(PACKET3_NOP, 0x3FFF),
        .support_64bit_ptrs = false,
        .get_rptr = gfx_v8_0_ring_get_rptr,
        .get_wptr = gfx_v8_0_ring_get_wptr_compute,
        .set_wptr = gfx_v8_0_ring_set_wptr_compute,
        .emit_frame_size =
                20 + /* gfx_v8_0_ring_emit_gds_switch */
                7 + /* gfx_v8_0_ring_emit_hdp_flush */
                5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
                7 + /* gfx_v8_0_ring_emit_pipeline_sync */
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 3/8] drm/amdgpu: implement ring set_priority for gfx_v8 compute v9

Reply via email to