The guilty handling tried to establish a second way of signaling problems with the GPU back to userspace. This caused quite a bunch of issue we had to work around, especially lifetime issues with the drm_sched_entity.
Just drop the handling altogether and use the dma_fence based approach instead. Signed-off-by: Christian König <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 5 ----- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 25 ++++++++++++++++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 9 +------- 4 files changed, 24 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 10d8dcc3a972..d67c87fbf371 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -60,11 +60,6 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, if (!p->ctx) return -EINVAL; - if (atomic_read(&p->ctx->guilty)) { - amdgpu_ctx_put(p->ctx); - return -ECANCELED; - } - amdgpu_sync_create(&p->sync); drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index c273557fb1ae..cfb24e5d065e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -256,7 +256,7 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip, } r = drm_sched_entity_init(&entity->entity, drm_prio, scheds, num_scheds, - &ctx->guilty); + NULL); if (r) goto error_free_entity; @@ -580,6 +580,27 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev, #define AMDGPU_RAS_COUNTE_DELAY_MS 3000 +static bool amdgpu_ctx_guilty(struct amdgpu_ctx *ctx) +{ + int i, j, r; + + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { + struct amdgpu_ctx_entity *ctx_entity; + + ctx_entity = ctx->entities[i][j]; + if (ctx_entity) + continue; + + r == drm_sched_entity_error(&ctx_entity->entity); + if (r == -ETIME) + return true; + } + } + + return false; +} + static int amdgpu_ctx_query2(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, uint32_t id, union drm_amdgpu_ctx_out *out) @@ -608,7 +629,7 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev, if (ctx->generation != amdgpu_vm_generation(adev, &fpriv->vm)) out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST; - if (atomic_read(&ctx->guilty)) + if (amdgpu_ctx_guilty(ctx)) out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY; if (amdgpu_in_reset(adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index cf8d700a22fe..e444b2088d40 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -50,7 +50,6 @@ struct amdgpu_ctx { int32_t init_priority; int32_t override_priority; uint32_t stable_pstate; - atomic_t guilty; bool preamble_presented; uint64_t generation; unsigned long ras_counter_ce; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 073f632f295a..1536d40bb362 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5092,14 +5092,10 @@ int amdgpu_device_link_reset(struct amdgpu_device *adev) int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, struct amdgpu_reset_context *reset_context) { - int i, r = 0; - struct amdgpu_job *job = NULL; struct amdgpu_device *tmp_adev = reset_context->reset_req_dev; bool need_full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); - - if (reset_context->reset_req_dev == adev) - job = reset_context->job; + int i, r; if (amdgpu_sriov_vf(adev)) amdgpu_virt_pre_reset(adev); @@ -5119,9 +5115,6 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, amdgpu_fence_driver_isr_toggle(adev, false); - if (job && job->vm) - drm_sched_increase_karma(&job->base); - r = amdgpu_reset_prepare_hwcontext(adev, reset_context); /* If reset handler not implemented, continue; otherwise return */ if (r == -EOPNOTSUPP) -- 2.43.0
