>
> - /* block scheduler */
> - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
> - ring = adev->rings[i];
> + /* we start from the ring trigger GPU hang */
> + j = job ? job->ring->idx : 0;
> +
> + if (job)
> + if (amd_sched_invalidate_job(&job->base, amdgpu_job_hang_limit))
> + amd_sched_job_kickout(&job->base);
Well that looks like the wrong order to me. We should probably stop the
scheduler before trying to mess anything with the job.
[ML] Really not necessary, we have spin_lock to protect the mirror-list,
nothing will be messed up ...
>
> +void amdgpu_fence_driver_force_completion_ring(struct amdgpu_ring
> +*ring) {
> + if (ring)
> + amdgpu_fence_write(ring, ring->fence_drv.sync_seq); }
> +
The coding style is completely off.
[ML] I don't know why at email side it looks wrong coding style, but I'm sure
it is correct at my side, check here:
void amdgpu_fence_driver_force_completion_ring(struct amdgpu_ring *ring)
{
if (ring)
amdgpu_fence_write(ring, ring->fence_drv.sync_seq);
}
Additional to that I don't think that this is a good idea. We should probably
rather just signal all scheduler fences instead and don't touch the hardware
fence at all.
[ML] why don't touch hardware fence at all ? the original/bare-metal gpu reset
also signal all ring's hardware fence first, I just follow the original logic
...
Scheduler fence will be auto signaled after hw fence signaled, any problem with
that ? what's the concern ?
> - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
> - struct amdgpu_ring *ring = adev->rings[i];
> + for (i = j; i < j + AMDGPU_MAX_RINGS; ++i) {
> + ring = adev->rings[i % AMDGPU_MAX_RINGS];
> if (!ring || !ring->sched.thread)
> continue;
>
> + if (job && j != i) {
> + kthread_unpark(ring->sched.thread);
> + continue;
> + }
> +
Please split up that patch a bit further. E.g. first the handling to only
hw_job_reset the ring in question, then the kickout handling.
[ML] No I don't think so, the kickout must be prior to the hw_job_reset,
otherwise the scheduler fence callback will be removed and you need manually
install it later , which is not correct:
For the guity job, we just kick it out before job reset, in job_reset we only
reset other innocent jobs( and unbind the scheduler fence callback for them),
after hw fence
Forcely set to drv_seq, all hw fence are signaled (this is the way of original
logic, I didn't change that). When go to sched_recovery(), it will recover all
innocent job and hook
The scheduler fence with new hw fence. That way only the guilty job is dropped
forever.
-----Original Message-----
From: Christian König [mailto:[email protected]]
Sent: Monday, May 08, 2017 9:12 PM
To: Liu, Monk <[email protected]>; [email protected]
Cc: Koenig, Christian <[email protected]>
Subject: Re: [PATCH 4/4] drm/amdgpu/SRIOV:implement guilty job TDR (V2)
Am 08.05.2017 um 09:01 schrieb Liu, Monk:
> @Christian
>
> This one is changed to guilty job scheme accordingly with your
> response
>
> BR Monk
>
> -----Original Message-----
> From: Monk Liu [mailto:[email protected]]
> Sent: Monday, May 08, 2017 3:00 PM
> To: [email protected]
> Cc: Liu, Monk <[email protected]>
> Subject: [PATCH] drm/amdgpu/SRIOV:implement guilty job TDR (V2)
>
> 1,TDR will kickout guilty job if it hang exceed the threshold of the given
> one from kernel paramter "job_hang_limit", that way a bad command stream will
> not infinitly cause GPU hang.
>
> by default this threshold is 1 so a job will be kicked out after it hang.
>
> 2,if a job timeout TDR routine will not reset all sched/ring, instead if will
> only reset on the givn one which is indicated by @job of
> amdgpu_sriov_gpu_reset, that way we don't need to reset and recover each
> sched/ring if we already know which job cause GPU hang.
>
> 3,unblock sriov_gpu_reset for AI family.
> 4,don't init entity for KIQ
>
> TODO:
> when a job is considered as guilty, we should mark some flag in its fence
> status flag, and let UMD side aware that this fence signaling is not due to
> job complete but job hang.
>
> Change-Id: I7b89c19a3de93249db570d0a80522176b1525a09
> Signed-off-by: Monk Liu <[email protected]>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 4 +++
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 36
> ++++++++++++++++++++-------
> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 +++
> drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 6 +++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 +
> drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 11 +++++++-
> drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 7 ++++++
> 8 files changed, 60 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 90a69bf..93bcea2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -111,6 +111,7 @@ extern int amdgpu_prim_buf_per_se; extern int
> amdgpu_pos_buf_per_se; extern int amdgpu_cntl_sb_buf_per_se; extern
> int amdgpu_param_buf_per_se;
> +extern int amdgpu_job_hang_limit;
>
> #define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */
> #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index b4bbbb3..23afc58 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -52,6 +52,10 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
> struct amdgpu_ctx *ctx)
> struct amd_sched_rq *rq;
>
> rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
> +
> + if (ring == &adev->gfx.kiq.ring)
> + continue;
> +
That looks like a bug fix and should probably go into a separate patch.
> r = amd_sched_entity_init(&ring->sched, &ctx->rings[i].entity,
> rq, amdgpu_sched_jobs);
> if (r)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 0e5f314..f3990fe 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2537,7 +2537,7 @@ static int amdgpu_recover_vram_from_shadow(struct
> amdgpu_device *adev,
> */
> int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job
> *job) {
> - int i, r = 0;
> + int i, j, r = 0;
> int resched;
> struct amdgpu_bo *bo, *tmp;
> struct amdgpu_ring *ring;
> @@ -2550,19 +2550,30 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device
> *adev, struct amdgpu_job *job)
> /* block TTM */
> resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
>
> - /* block scheduler */
> - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
> - ring = adev->rings[i];
> + /* we start from the ring trigger GPU hang */
> + j = job ? job->ring->idx : 0;
> +
> + if (job)
> + if (amd_sched_invalidate_job(&job->base, amdgpu_job_hang_limit))
> + amd_sched_job_kickout(&job->base);
Well that looks like the wrong order to me. We should probably stop the
scheduler before trying to mess anything with the job.
>
> + /* block scheduler */
> + for (i = j; i < j + AMDGPU_MAX_RINGS; ++i) {
> + ring = adev->rings[i % AMDGPU_MAX_RINGS];
> if (!ring || !ring->sched.thread)
> continue;
>
> kthread_park(ring->sched.thread);
> +
> + if (job && j != i)
> + continue;
> +
> + /* only do job_reset on the hang ring if @job not NULL */
> amd_sched_hw_job_reset(&ring->sched);
> - }
>
> - /* after all hw jobs are reset, hw fence is meaningless, so
> force_completion */
> - amdgpu_fence_driver_force_completion(adev);
> + /* after all hw jobs are reset, hw fence is meaningless, so
> force_completion */
> + amdgpu_fence_driver_force_completion_ring(ring);
> + }
>
> /* request to take full control of GPU before re-initialization */
> if (job)
> @@ -2615,11 +2626,16 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device
> *adev, struct amdgpu_job *job)
> }
> fence_put(fence);
>
> - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
> - struct amdgpu_ring *ring = adev->rings[i];
> + for (i = j; i < j + AMDGPU_MAX_RINGS; ++i) {
> + ring = adev->rings[i % AMDGPU_MAX_RINGS];
> if (!ring || !ring->sched.thread)
> continue;
>
> + if (job && j != i) {
> + kthread_unpark(ring->sched.thread);
> + continue;
> + }
> +
Please split up that patch a bit further. E.g. first the handling to only
hw_job_reset the ring in question, then the kickout handling.
> amd_sched_job_recovery(&ring->sched);
> kthread_unpark(ring->sched.thread);
> }
> @@ -2629,6 +2645,8 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev,
> struct amdgpu_job *job)
> if (r) {
> /* bad news, how to tell it to userspace ? */
> dev_info(adev->dev, "GPU reset failed\n");
> + } else {
> + dev_info(adev->dev, "GPU reset successed!\n");
> }
>
> adev->gfx.in_reset = false;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 416908a..fd3691a8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -112,6 +112,7 @@ int amdgpu_prim_buf_per_se = 0; int
> amdgpu_pos_buf_per_se = 0; int amdgpu_cntl_sb_buf_per_se = 0; int
> amdgpu_param_buf_per_se = 0;
> +int amdgpu_job_hang_limit = 0;
>
> MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in
> megabytes"); module_param_named(vramlimit, amdgpu_vram_limit, int,
> 0600); @@ -237,6 +238,9 @@ module_param_named(cntl_sb_buf_per_se,
> amdgpu_cntl_sb_buf_per_se, int, 0444);
> MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Pramater
> Cache per Shader Engine (default depending on gfx)");
> module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int,
> 0444);
>
> +MODULE_PARM_DESC(job_hang_limit, "how much time allow a job hang and
> +not drop it (default 0)"); module_param_named(job_hang_limit,
> +amdgpu_job_hang_limit, int ,0444);
> +
>
> static const struct pci_device_id pciidlist[] = { #ifdef
> CONFIG_DRM_AMDGPU_SI diff --git
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> index d7523d1..8de3bd3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> @@ -541,6 +541,12 @@ void amdgpu_fence_driver_force_completion(struct
> amdgpu_device *adev)
> }
> }
>
> +void amdgpu_fence_driver_force_completion_ring(struct amdgpu_ring
> +*ring) {
> + if (ring)
> + amdgpu_fence_write(ring, ring->fence_drv.sync_seq); }
> +
The coding style is completely off.
Additional to that I don't think that this is a good idea. We should probably
rather just signal all scheduler fences instead and don't touch the hardware
fence at all.
> /*
> * Common fence implementation
> */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 981ef08..03e88c6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -76,6 +76,7 @@ struct amdgpu_fence_driver { int
> amdgpu_fence_driver_init(struct amdgpu_device *adev); void
> amdgpu_fence_driver_fini(struct amdgpu_device *adev); void
> amdgpu_fence_driver_force_completion(struct amdgpu_device *adev);
> +void amdgpu_fence_driver_force_completion_ring(struct amdgpu_ring
> +*ring);
>
> int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
> unsigned num_hw_submission);
> diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> index 6f4e31f..4e97e6d 100644
> --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> @@ -390,9 +390,18 @@ void amd_sched_hw_job_reset(struct amd_gpu_scheduler
> *sched)
> &s_job->s_fence->cb)) {
> fence_put(s_job->s_fence->parent);
> s_job->s_fence->parent = NULL;
> + atomic_dec(&sched->hw_rq_count);
> }
> }
> - atomic_set(&sched->hw_rq_count, 0);
> + spin_unlock(&sched->job_list_lock);
> +}
> +
> +void amd_sched_job_kickout(struct amd_sched_job *s_job) {
> + struct amd_gpu_scheduler *sched = s_job->sched;
> +
> + spin_lock(&sched->job_list_lock);
> + list_del_init(&s_job->node);
> spin_unlock(&sched->job_list_lock);
> }
>
> diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
> b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
> index 8cb41d3..59694f3 100644
> --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
> +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
> @@ -81,6 +81,7 @@ struct amd_sched_job {
> struct list_head node;
> struct delayed_work work_tdr;
> uint64_t id;
> + atomic_t karma;
> };
>
> extern const struct fence_ops amd_sched_fence_ops_scheduled; @@ -96,6
> +97,11 @@ static inline struct amd_sched_fence *to_amd_sched_fence(struct
> fence *f)
> return NULL;
> }
>
> +static inline bool amd_sched_invalidate_job(struct amd_sched_job
> +*s_job, int threshold) {
> + return (s_job && atomic_inc_return(&s_job->karma) > threshold); }
> +
Again coding style is completely off.
Christian.
> /**
> * Define the backend operations called by the scheduler,
> * these functions should be implemented in driver side @@ -158,4 +164,5 @@
> int amd_sched_job_init(struct amd_sched_job *job,
> void *owner);
> void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched); void
> amd_sched_job_recovery(struct amd_gpu_scheduler *sched);
> +void amd_sched_job_kickout(struct amd_sched_job *s_job);
> #endif
> --
> 2.7.4
>
> _______________________________________________
> amd-gfx mailing list
> [email protected]
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx