instead of reset/recovery all rings, we can only work
on the perticular ring if detects it hang.

Change-Id: Ie9de78819e1567e9f001d3593c9c52f749137c32
Signed-off-by: Monk Liu <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 35 ++++++++++++++++++++++++------
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  |  6 +++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   |  1 +
 3 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 157d023..4dbd121 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2551,19 +2551,26 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, 
struct amdgpu_job *job, b
        /* block TTM */
        resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
 
-       /* block scheduler */
-       for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-               ring = adev->rings[i];
+       /* we start from the ring trigger GPU hang */
+       j = job ? job->ring->idx : 0;
 
+       /* block scheduler */
+       for (i = j; i < j + AMDGPU_MAX_RINGS; ++i) {
+               ring = adev->rings[i % AMDGPU_MAX_RINGS];
                if (!ring || !ring->sched.thread)
                        continue;
 
                kthread_park(ring->sched.thread);
+
+               if (job && job->ring->idx != i)
+                       continue;
+
+               /* only do job_reset on the hang ring if @job not NULL */
                amd_sched_hw_job_reset(&ring->sched);
-       }
 
-       /* after all hw jobs are reset, hw fence is meaningless, so 
force_completion */
-       amdgpu_fence_driver_force_completion(adev);
+               /* after all hw jobs are reset, hw fence is meaningless, so 
force_completion */
+               amdgpu_fence_driver_force_completion_ring(ring);
+       }
 
        /* request to take full control of GPU before re-initialization  */
        if (voluntary)
@@ -2615,12 +2622,26 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, 
struct amdgpu_job *job, b
        }
        fence_put(fence);
 
+       /* before recovery and unpark, kickout guilty for every rings */
        for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-               struct amdgpu_ring *ring = adev->rings[i];
+               ring = adev->rings[i];
+
                if (!ring || !ring->sched.thread)
                        continue;
 
                amd_sched_job_kickout_guilty(&ring->sched);
+       }
+
+       for (i = j; i < j + AMDGPU_MAX_RINGS; ++i) {
+               ring = adev->rings[i % AMDGPU_MAX_RINGS];
+               if (!ring || !ring->sched.thread)
+                       continue;
+
+               if (job && job->ring->idx != i) {
+                       kthread_unpark(ring->sched.thread);
+                       continue;
+               }
+
                amd_sched_job_recovery(&ring->sched);
                kthread_unpark(ring->sched.thread);
        }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 5772ef2..de4c851 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -541,6 +541,12 @@ void amdgpu_fence_driver_force_completion(struct 
amdgpu_device *adev)
        }
 }
 
+void amdgpu_fence_driver_force_completion_ring(struct amdgpu_ring *ring)
+{
+       if (ring)
+               amdgpu_fence_write(ring, ring->fence_drv.sync_seq);
+}
+
 /*
  * Common fence implementation
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 5786cc3..2acaac6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -76,6 +76,7 @@ struct amdgpu_fence_driver {
 int amdgpu_fence_driver_init(struct amdgpu_device *adev);
 void amdgpu_fence_driver_fini(struct amdgpu_device *adev);
 void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev);
+void amdgpu_fence_driver_force_completion_ring(struct amdgpu_ring *ring);
 
 int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
                                  unsigned num_hw_submission);
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to