when suspend_all_gangs is issued to pipe0 MES during
system suspend or runtime PM, pipe0 can only suspend and resume queues
it has tracked. KCQs registered with a non-zero pipe slot may not be
correctly handled, leaving them in an inconsistent state after resume.

Suggested-by: Shaoyun Liu <[email protected]>
Signed-off-by: Jesse Zhang <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index a418ae609c36..d99757e06738 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -1007,8 +1007,19 @@ static int gfx_v12_0_compute_ring_init(struct 
amdgpu_device *adev, int ring_id,
 
        /* mec0 is me1 */
        ring->me = mec + 1;
-       ring->pipe = pipe;
-       ring->queue = queue;
+
+       /*
+        * Assign all KCQs to pipe0 slots sequentially by ring_id so there
+        * are no slot conflicts and pipe0 MES can correctly suspend and
+        * resume all KCQs via suspend_all_gangs.
+        */
+       if (ring_id < adev->gfx.mec.num_queue_per_pipe) {
+               ring->pipe  = AMDGPU_MES_SCHED_PIPE;
+               ring->queue = ring_id;
+       } else {
+               ring->pipe = pipe;
+               ring->queue = queue;
+       }
 
        ring->ring_obj = NULL;
        ring->use_doorbell = true;
@@ -1018,9 +1029,14 @@ static int gfx_v12_0_compute_ring_init(struct 
amdgpu_device *adev, int ring_id,
        ring->vm_hub = AMDGPU_GFXHUB(0);
        sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
 
+       /*
+        * The EOP interrupt is wired to the physical hardware pipe, not the
+        * MES pipe slot. Use the original 'pipe' argument for irq_type, as
+        * ring->pipe may have been remapped to AMDGPU_MES_SCHED_PIPE above.
+        */
        irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
                + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
-               + ring->pipe;
+               + pipe;
        hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
                        AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
        /* type-2 packets are deprecated on MEC, use type-3 instead */
-- 
2.49.0

Reply via email to