sriov:implement strict gpu reset

Liu, Monk Mon, 09 Oct 2017 01:30:40 -0700

why we shouldn't return error code ? any particular reason ?

if we wake up a thread waiting on some fence, and return 0 the UMD won't aware 
that a gpu hang occurred just now ...


for this strict mode reset the policy is aligned with vulkan spec:
it defines that the vk runtime should return VK_DEVICELOST error to app on 
those waiting functions, and kernel should let VK UMD aware of it so kernel 
must return error to UMD in the cs_wait IOCTL

UMD cannot invoke amdgpu_query_ctx from no reason, it should be notified by 
some error fence like cs_wait

BR Monk

-----Original Message-----
From: Christian König [mailto:[email protected]] 
Sent: 2017年10月9日 16:20
To: Liu, Monk <[email protected]>; [email protected]
Subject: Re: [PATCH 07/12] drm/amdgpu/sriov:implement strict gpu reset

Am 30.09.2017 um 08:03 schrieb Monk Liu:
> changes:
> 1)implement strict mode sriov gpu reset 2)always call 
> sriov_gpu_reset_strict if hypervisor notify FLR 3)in strict reset 
> mode, set error to all fences.
> 4)change fence_wait/cs_wait functions to return -ENODEV if fence 
> signaled with error == -ETIME,
>
> Since after strict gpu reset we consider the VRAM were lost, and since 
> assuming VRAM lost there is little help to recover shadow BO because 
> all textures/resources/shaders cannot recovered (if they resident in 
> VRAM)

NAK, we shouldn't return an error code on the wait function and instead handle 
the fence error code in amdgpu_ctx_query().

Regards,
Christian.

>
> Change-Id: I50d9b8b5185ba92f137f07c9deeac19d740d753b
> Signed-off-by: Monk Liu <[email protected]>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h           |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        | 25 ++++++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    | 90 
> +++++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c       |  4 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c       |  6 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h      |  1 +
>   drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c         |  4 +-
>   drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c         |  4 +-
>   drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 60 ++++++++++++++++++
>   drivers/gpu/drm/amd/scheduler/gpu_scheduler.h |  2 +
>   10 files changed, 188 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index de11527..de9c164 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -123,6 +123,7 @@ extern int amdgpu_cntl_sb_buf_per_se;
>   extern int amdgpu_param_buf_per_se;
>   extern int amdgpu_job_hang_limit;
>   extern int amdgpu_lbpw;
> +extern int amdgpu_sriov_reset_level;
>   
>   #ifdef CONFIG_DRM_AMDGPU_SI
>   extern int amdgpu_si_support;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index c6a214f..9467cf6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -1262,6 +1262,7 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void 
> *data,
>       struct amdgpu_ctx *ctx;
>       struct dma_fence *fence;
>       long r;
> +     int fence_err = 0;
>   
>       if (amdgpu_kms_vram_lost(adev, fpriv))
>               return -ENODEV;
> @@ -1283,6 +1284,8 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void 
> *data,
>               r = PTR_ERR(fence);
>       else if (fence) {
>               r = dma_fence_wait_timeout(fence, true, timeout);
> +             /* gpu hang and this fence is signaled by gpu reset if 
> fence_err < 0 */
> +             fence_err = dma_fence_get_status(fence);
>               dma_fence_put(fence);
>       } else
>               r = 1;
> @@ -1292,7 +1295,10 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void 
> *data,
>               return r;
>   
>       memset(wait, 0, sizeof(*wait));
> -     wait->out.status = (r == 0);
> +     wait->out.status = (fence_err < 0);
> +
> +     if (fence_err < 0)
> +             return -ENODEV;
>   
>       return 0;
>   }
> @@ -1346,6 +1352,7 @@ static int amdgpu_cs_wait_all_fences(struct 
> amdgpu_device *adev,
>       uint32_t fence_count = wait->in.fence_count;
>       unsigned int i;
>       long r = 1;
> +     int fence_err = 0;
>   
>       for (i = 0; i < fence_count; i++) {
>               struct dma_fence *fence;
> @@ -1358,16 +1365,20 @@ static int amdgpu_cs_wait_all_fences(struct 
> amdgpu_device *adev,
>                       continue;
>   
>               r = dma_fence_wait_timeout(fence, true, timeout);
> +             fence_err = dma_fence_get_status(fence);
>               dma_fence_put(fence);
>               if (r < 0)
>                       return r;
>   
> -             if (r == 0)
> +             if (r == 0 || fence_err < 0)
>                       break;
>       }
>   
>       memset(wait, 0, sizeof(*wait));
> -     wait->out.status = (r > 0);
> +     wait->out.status = (r > 0 && fence_err == 0);
> +
> +     if (fence_err < 0)
> +             return -ENODEV;
>   
>       return 0;
>   }
> @@ -1391,6 +1402,7 @@ static int amdgpu_cs_wait_any_fence(struct 
> amdgpu_device *adev,
>       struct dma_fence **array;
>       unsigned int i;
>       long r;
> +     int fence_err = 0;
>   
>       /* Prepare the fence array */
>       array = kcalloc(fence_count, sizeof(struct dma_fence *), 
> GFP_KERNEL); @@ -1418,10 +1430,12 @@ static int 
> amdgpu_cs_wait_any_fence(struct amdgpu_device *adev,
>                                      &first);
>       if (r < 0)
>               goto err_free_fence_array;
> +     else
> +             fence_err = dma_fence_get_status(array[first]);
>   
>   out:
>       memset(wait, 0, sizeof(*wait));
> -     wait->out.status = (r > 0);
> +     wait->out.status = (r > 0 && fence_err == 0);
>       wait->out.first_signaled = first;
>       /* set return value 0 to indicate success */
>       r = 0;
> @@ -1431,6 +1445,9 @@ static int amdgpu_cs_wait_any_fence(struct 
> amdgpu_device *adev,
>               dma_fence_put(array[i]);
>       kfree(array);
>   
> +     if (fence_err < 0)
> +             return -ENODEV;
> +
>       return r;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 9efbb33..122e2e1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2734,6 +2734,96 @@ static int amdgpu_recover_vram_from_shadow(struct 
> amdgpu_device *adev,
>   }
>   
>   /**
> + * amdgpu_sriov_gpu_reset_strict - reset the asic under strict mode
> + *
> + * @adev: amdgpu device pointer
> + * @job: which job trigger hang
> + *
> + * Attempt the reset the GPU if it has hung (all asics).
> + * for SRIOV case.
> + * Returns 0 for success or an error on failure.
> + *
> + * this function will deny all process/fence created before this 
> +reset,
> + * and drop all jobs unfinished during this reset.
> + *
> + * Application should take the responsibility to re-open the FD to 
> +re-create
> + * the VM page table and recover all resources as well
> + *
> + **/
> +int amdgpu_sriov_gpu_reset_strict(struct amdgpu_device *adev, struct 
> +amdgpu_job *job) {
> +     int i, r = 0;
> +     int resched;
> +     struct amdgpu_ring *ring;
> +
> +     /* other thread is already into the gpu reset so just quit and come 
> later */
> +     if (!atomic_add_unless(&adev->in_sriov_reset, 1, 1))
> +             return -EAGAIN;
> +
> +     atomic_inc(&adev->gpu_reset_counter);
> +
> +     /* block TTM */
> +     resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
> +
> +     /* fake signal jobs already scheduled  */
> +     for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
> +             ring = adev->rings[i];
> +
> +             if (!ring || !ring->sched.thread)
> +                     continue;
> +
> +             kthread_park(ring->sched.thread);
> +             amd_sched_set_sched_hang(&ring->sched);
> +             amdgpu_fence_driver_force_completion_ring(ring);
> +             amd_sched_set_queue_hang(&ring->sched);
> +     }
> +
> +     /* request to take full control of GPU before re-initialization  */
> +     if (job)
> +             amdgpu_virt_reset_gpu(adev);
> +     else
> +             amdgpu_virt_request_full_gpu(adev, true);
> +
> +     /* Resume IP prior to SMC */
> +     amdgpu_sriov_reinit_early(adev);
> +
> +     /* we need recover gart prior to run SMC/CP/SDMA resume */
> +     amdgpu_ttm_recover_gart(adev);
> +
> +     /* now we are okay to resume SMC/CP/SDMA */
> +     amdgpu_sriov_reinit_late(adev);
> +
> +     /* resume IRQ status */
> +     amdgpu_irq_gpu_reset_resume_helper(adev);
> +
> +     if (amdgpu_ib_ring_tests(adev))
> +             dev_err(adev->dev, "[GPU_RESET] ib ring test failed (%d).\n", 
> r);
> +
> +     /* release full control of GPU after ib test */
> +     amdgpu_virt_release_full_gpu(adev, true);
> +
> +     for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
> +             ring = adev->rings[i];
> +
> +             if (!ring || !ring->sched.thread)
> +                     continue;
> +
> +             kthread_unpark(ring->sched.thread);
> +     }
> +
> +     drm_helper_resume_force_mode(adev->ddev);
> +
> +     ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
> +     if (r)
> +             dev_info(adev->dev, "Strict mode GPU reset failed\n");
> +     else
> +             dev_info(adev->dev, "Strict mode GPU reset successed!\n");
> +
> +     atomic_set(&adev->in_sriov_reset, 0);
> +     return 0;
> +}
> +
> +/**
>    * amdgpu_sriov_gpu_reset - reset the asic
>    *
>    * @adev: amdgpu device pointer
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 8f5211c..eee67dc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -123,6 +123,7 @@ int amdgpu_cntl_sb_buf_per_se = 0;
>   int amdgpu_param_buf_per_se = 0;
>   int amdgpu_job_hang_limit = 0;
>   int amdgpu_lbpw = -1;
> +int amdgpu_sriov_reset_level = 0;
>   
>   MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
>   module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); @@ 
> -269,6 +270,9 @@ module_param_named(job_hang_limit, amdgpu_job_hang_limit, 
> int ,0444);
>   MODULE_PARM_DESC(lbpw, "Load Balancing Per Watt (LBPW) support (1 = enable, 
> 0 = disable, -1 = auto)");
>   module_param_named(lbpw, amdgpu_lbpw, int, 0444);
>   
> +MODULE_PARM_DESC(sriov_reset_level, "what level will gpu reset on, 0: 
> +loose, 1:strict, other:disable (default 0))"); 
> +module_param_named(sriov_reset_level, amdgpu_sriov_reset_level, int 
> +,0444);
> +
>   #ifdef CONFIG_DRM_AMDGPU_SI
>   
>   #if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE) 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index 0db81a4..933823a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -41,7 +41,11 @@ static void amdgpu_job_timedout(struct amd_sched_job 
> *s_job)
>               int r;
>   
>   try_again:
> -             r = amdgpu_sriov_gpu_reset(job->adev, job);
> +             if (amdgpu_sriov_reset_level == 1)
> +                     r = amdgpu_sriov_gpu_reset_strict(job->adev, job);
> +             else
> +                     r = amdgpu_sriov_gpu_reset(job->adev, job);
> +
>               if (r == -EAGAIN) {
>                       /* maye two different schedulers all have hang job, try 
> later */
>                       schedule();
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> index a3cbd5a..5664a10 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> @@ -100,5 +100,6 @@ int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
>   int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job 
> *job);
>   int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
>   void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
> +int amdgpu_sriov_gpu_reset_strict(struct amdgpu_device *adev, struct 
> +amdgpu_job *job);
>   
>   #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c 
> b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> index 2812d88..00a9629 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> @@ -247,8 +247,8 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct 
> *work)
>               return;
>       }
>   
> -     /* Trigger recovery due to world switch failure */
> -     amdgpu_sriov_gpu_reset(adev, NULL);
> +     /* use strict mode if FLR triggered from hypervisor */
> +     amdgpu_sriov_gpu_reset_strict(adev, NULL);
>   }
>   
>   static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev, 
> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c 
> b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
> index c25a831..c94b6e9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
> @@ -513,8 +513,8 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct 
> *work)
>               return;
>       }
>   
> -     /* Trigger recovery due to world switch failure */
> -     amdgpu_sriov_gpu_reset(adev, NULL);
> +     /* use strict mode if FLR triggered from hypervisor */
> +     amdgpu_sriov_gpu_reset_strict(adev, NULL);
>   }
>   
>   static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev, 
> diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c 
> b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> index 97c94f9..12c3092 100644
> --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> @@ -430,6 +430,66 @@ void amd_sched_hw_job_reset(struct amd_gpu_scheduler 
> *sched)
>       spin_unlock(&sched->job_list_lock);
>   }
>   
> +/**
> + * amd_sched_set_sched_hang
> + * @sched: the scheduler need to set all pending jobs hang
> + *
> + * this routine set all unfinished jobs pending in the sched to
> + * an error -ETIME statues
> + *
> + **/
> +void amd_sched_set_sched_hang(struct amd_gpu_scheduler *sched) {
> +     struct amd_sched_job *s_job;
> +
> +     spin_lock(&sched->job_list_lock);
> +     list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node)
> +             dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
> +
> +     spin_unlock(&sched->job_list_lock);
> +}
> +
> +/**
> + * amd_sched_set_queue_hang
> + * @sched: the scheduler need to set all job in kfifo hang
> + *
> + * this routine set all jobs in the KFIFO of @sched to an error
> + * -ETIME status and signal those jobs.
> + *
> + **/
> +
> +void amd_sched_set_queue_hang(struct amd_gpu_scheduler *sched) {
> +     struct amd_sched_entity *entity, *tmp;
> +     struct amd_sched_job *s_job;
> +     struct amd_sched_rq *rq;
> +     int i;
> +
> +     /* set HANG status on all jobs queued and fake signal them */
> +     for (i = AMD_SCHED_PRIORITY_MIN; i < AMD_SCHED_PRIORITY_MAX; i++) {
> +             rq = &sched->sched_rq[i];
> +
> +             spin_lock(&rq->lock);
> +             list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
> +                     if (entity->dependency) {
> +                             dma_fence_remove_callback(entity->dependency, 
> &entity->cb);
> +                             dma_fence_put(entity->dependency);
> +                             entity->dependency = NULL;
> +                     }
> +
> +                     spin_lock(&entity->queue_lock);
> +                     while(kfifo_out(&entity->job_queue, &s_job, 
> sizeof(s_job)) == sizeof(s_job)) {
> +                             dma_fence_set_error(&s_job->s_fence->finished, 
> -ETIME);
> +                             amd_sched_fence_scheduled(s_job->s_fence);
> +                             amd_sched_fence_finished(s_job->s_fence);
> +                     }
> +                     spin_unlock(&entity->queue_lock);
> +             }
> +             spin_unlock(&rq->lock);
> +     }
> +     wake_up(&sched->job_scheduled);
> +}
> +
>   void amd_sched_job_kickout(struct amd_sched_job *s_job)
>   {
>       struct amd_gpu_scheduler *sched = s_job->sched; diff --git 
> a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h 
> b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
> index f9d8f28..f0242aa 100644
> --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
> +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
> @@ -167,4 +167,6 @@ void amd_sched_job_recovery(struct amd_gpu_scheduler 
> *sched);
>   bool amd_sched_dependency_optimized(struct dma_fence* fence,
>                                   struct amd_sched_entity *entity);
>   void amd_sched_job_kickout(struct amd_sched_job *s_job);
> +void amd_sched_set_queue_hang(struct amd_gpu_scheduler *sched); void 
> +amd_sched_set_sched_hang(struct amd_gpu_scheduler *sched);
>   #endif


_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

RE: [PATCH 07/12] drm/amdgpu/sriov:implement strict gpu reset

Reply via email to