From: Andrzej Kacprowski <[email protected]> The driver was returning ABORTED for all errors that trigger engine reset. It is better to distinguish between different error types by returning the actual error code reported by firmware. This allows userspace to take different actions based on the error type and improves debuggability.
Refactor ivpu_job_signal_and_destroy() by extracting engine error handling logic into a new function ivpu_job_handle_engine_error(). This simplifies engine error handling logic by removing necessity of calling ivpu_job_singal_and_destroy() multiple times by a single job changing it's behavior based on job status. Signed-off-by: Andrzej Kacprowski <[email protected]> Signed-off-by: Karol Wachowski <[email protected]> --- Changes between v1 and v2: Improved commit message describing that returning a single status code for all different errors was not desirable. --- drivers/accel/ivpu/ivpu_job.c | 49 ++++++++++++++++++++++++----------- drivers/accel/ivpu/ivpu_job.h | 3 +++ 2 files changed, 37 insertions(+), 15 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index fd548028f1d6..ba4535a75aa7 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -564,25 +564,26 @@ static struct ivpu_job *ivpu_job_remove_from_submitted_jobs(struct ivpu_device * return job; } -static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 job_status) +bool ivpu_job_handle_engine_error(struct ivpu_device *vdev, u32 job_id, u32 job_status) { - struct ivpu_job *job; - lockdep_assert_held(&vdev->submitted_jobs_lock); - job = xa_load(&vdev->submitted_jobs_xa, job_id); - if (!job) - return -ENOENT; - switch (job_status) { case VPU_JSM_STATUS_PROCESSING_ERR: case VPU_JSM_STATUS_ENGINE_RESET_REQUIRED_MIN ... VPU_JSM_STATUS_ENGINE_RESET_REQUIRED_MAX: { + struct ivpu_job *job = xa_load(&vdev->submitted_jobs_xa, job_id); + + if (!job) + return false; + /* Trigger an engine reset */ guard(mutex)(&job->file_priv->lock); + job->job_status = job_status; + if (job->file_priv->has_mmu_faults) - return 0; + return false; /* * Mark context as faulty and defer destruction of the job to jobs abort thread @@ -591,26 +592,42 @@ static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 */ job->file_priv->has_mmu_faults = true; queue_work(system_wq, &vdev->context_abort_work); - return 0; + return true; } default: /* Complete job with error status, engine reset not required */ break; } - job = ivpu_job_remove_from_submitted_jobs(vdev, job_id); + return false; +} + +static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 job_status) +{ + struct ivpu_job *job; + + lockdep_assert_held(&vdev->submitted_jobs_lock); + + job = xa_load(&vdev->submitted_jobs_xa, job_id); if (!job) return -ENOENT; - if (job->file_priv->has_mmu_faults) - job_status = DRM_IVPU_JOB_STATUS_ABORTED; + ivpu_job_remove_from_submitted_jobs(vdev, job_id); + + if (job->job_status == VPU_JSM_STATUS_SUCCESS) { + if (job->file_priv->has_mmu_faults) + job->job_status = DRM_IVPU_JOB_STATUS_ABORTED; + else + job->job_status = job_status; + } - job->bos[CMD_BUF_IDX]->job_status = job_status; + job->bos[CMD_BUF_IDX]->job_status = job->job_status; dma_fence_signal(job->done_fence); trace_job("done", job); ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d cmdq_id %u engine %d status 0x%x\n", - job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx, job_status); + job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx, + job->job_status); ivpu_job_destroy(job); ivpu_stop_job_timeout_detection(vdev); @@ -1030,7 +1047,9 @@ ivpu_job_done_callback(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr, payload = (struct vpu_ipc_msg_payload_job_done *)&jsm_msg->payload; mutex_lock(&vdev->submitted_jobs_lock); - ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status); + if (!ivpu_job_handle_engine_error(vdev, payload->job_id, payload->job_status)) + /* No engine error, complete the job normally */ + ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status); mutex_unlock(&vdev->submitted_jobs_lock); } diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h index d2fc4c151614..3ab61e6a5616 100644 --- a/drivers/accel/ivpu/ivpu_job.h +++ b/drivers/accel/ivpu/ivpu_job.h @@ -51,6 +51,7 @@ struct ivpu_cmdq { * @cmdq_id: Command queue ID used for submission * @job_id: Unique job ID for tracking and status reporting * @engine_idx: Engine index for job execution + * @job_status: Status reported by firmware for this job * @primary_preempt_buf: Primary preemption buffer for job * @secondary_preempt_buf: Secondary preemption buffer for job (optional) * @bo_count: Number of buffer objects associated with this job @@ -64,6 +65,7 @@ struct ivpu_job { u32 cmdq_id; u32 job_id; u32 engine_idx; + u32 job_status; struct ivpu_bo *primary_preempt_buf; struct ivpu_bo *secondary_preempt_buf; size_t bo_count; @@ -83,6 +85,7 @@ void ivpu_cmdq_abort_all_jobs(struct ivpu_device *vdev, u32 ctx_id, u32 cmdq_id) void ivpu_job_done_consumer_init(struct ivpu_device *vdev); void ivpu_job_done_consumer_fini(struct ivpu_device *vdev); +bool ivpu_job_handle_engine_error(struct ivpu_device *vdev, u32 job_id, u32 job_status); void ivpu_context_abort_work_fn(struct work_struct *work); void ivpu_jobs_abort_all(struct ivpu_device *vdev); -- 2.43.0
