Release profiler_lock before calling kfd_ptl_disable_request and kfd_ptl_disable_release to resolve potential deadlock Introduce a dedicated adev->psp.ptl_mutex to protect PTL state instead of relying on the KFD process mutex. This optimization ensures thread safety independent of the process lock hierarchy.
Signed-off-by: Perry Yuan <[email protected]> Reviewed-by: Yifan Zhang <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 16 +++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 1 + drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 12 ++++++++---- 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 6e422da88b7e..aae2f850b044 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4442,6 +4442,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->virt.vf_errors.lock); hash_init(adev->mn_hash); mutex_init(&adev->psp.mutex); + mutex_init(&adev->psp.ptl_mutex); mutex_init(&adev->notifier_lock); mutex_init(&adev->pm.stable_pstate_ctx_lock); mutex_init(&adev->benchmark_mutex); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 2b197cdefe31..682a0e4adafd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1260,6 +1260,15 @@ int psp_performance_monitor_hw(struct psp_context *psp, u32 req_code, psp_ptl_fmt_verify(psp, *fmt2, &ptl_fmt2)) return -EINVAL; + /* + * Add check to skip if state and formats are identical to current ones + */ + if (req_code == PSP_PTL_PERF_MON_SET && + psp->ptl_enabled == *ptl_state && + psp->ptl_fmt1 == ptl_fmt1 && + psp->ptl_fmt2 == ptl_fmt2) + return 0; + cmd = acquire_psp_cmd_buf(psp); cmd->cmd_id = GFX_CMD_ID_PERF_HW; @@ -1334,19 +1343,24 @@ static ssize_t ptl_enable_store(struct device *dev, else return -EINVAL; + mutex_lock(&psp->ptl_mutex); fmt1 = psp->ptl_fmt1; fmt2 = psp->ptl_fmt2; ptl_state = enable ? 1 : 0; cur_enabled = READ_ONCE(psp->ptl_enabled); - if (cur_enabled == enable) + if (cur_enabled == enable) { + mutex_unlock(&psp->ptl_mutex); return count; + } ret = psp_performance_monitor_hw(psp, PSP_PTL_PERF_MON_SET, &ptl_state, &fmt1, &fmt2); if (ret) { dev_err(adev->dev, "Failed to set PTL err = %d\n", ret); + mutex_unlock(&psp->ptl_mutex); return ret; } + mutex_unlock(&psp->ptl_mutex); return count; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 1af641ae9a02..1ab7255718df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -478,6 +478,7 @@ struct psp_context { bool ptl_hw_supported; /* PTL disable reference counting */ atomic_t ptl_disable_ref; + struct mutex ptl_mutex; }; struct amdgpu_psp_funcs { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 5fda0efe5469..bccb857c81c4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1802,7 +1802,7 @@ int kfd_ptl_disable_request(struct kfd_process_device *pdd, return -ENODEV; adev = pdd->dev->adev; - mutex_lock(&p->mutex); + mutex_lock(&adev->psp.ptl_mutex); if (pdd->ptl_disable_req) goto out; @@ -1819,7 +1819,7 @@ int kfd_ptl_disable_request(struct kfd_process_device *pdd, pdd->ptl_disable_req = true; out: - mutex_unlock(&p->mutex); + mutex_unlock(&adev->psp.ptl_mutex); return ret; } @@ -1833,7 +1833,7 @@ int kfd_ptl_disable_release(struct kfd_process_device *pdd, return -ENODEV; adev = pdd->dev->adev; - mutex_lock(&p->mutex); + mutex_lock(&adev->psp.ptl_mutex); if (!pdd->ptl_disable_req) goto out; @@ -1850,7 +1850,7 @@ int kfd_ptl_disable_release(struct kfd_process_device *pdd, pdd->ptl_disable_req = false; out: - mutex_unlock(&p->mutex); + mutex_unlock(&adev->psp.ptl_mutex); return ret; } @@ -3340,6 +3340,7 @@ static inline uint32_t profile_lock_device(struct kfd_process *p, if (!kfd->profiler_process) { kfd->profiler_process = p; status = 0; + mutex_unlock(&kfd->profiler_lock); if (pdd->dev->adev->psp.ptl_hw_supported) { status = kfd_ptl_disable_request(pdd, p); if (status != 0) @@ -3347,6 +3348,7 @@ static inline uint32_t profile_lock_device(struct kfd_process *p, "Failed to lock device %d for profiling, error %d\n", gpu_id, status); } + return status; } else if (kfd->profiler_process == p) { status = -EALREADY; } else { @@ -3355,6 +3357,7 @@ static inline uint32_t profile_lock_device(struct kfd_process *p, } else if (op == 0 && kfd->profiler_process == p) { kfd->profiler_process = NULL; status = 0; + mutex_unlock(&kfd->profiler_lock); if (pdd->dev->adev->psp.ptl_hw_supported) { status = kfd_ptl_disable_release(pdd, p); @@ -3363,6 +3366,7 @@ static inline uint32_t profile_lock_device(struct kfd_process *p, "Failed to unlock device %d for profiling, error %d\n", gpu_id, status); } + return status; } mutex_unlock(&kfd->profiler_lock); -- 2.34.1
