This is a design change to move the scheduler suspension and resumption
logic directly into kfd_ptl_control. This ensures the KFD scheduler is
always stopped during PTL switching command execution, which is required for
the updated PTL control flow.

Additionally, update profile_lock_device() to check for ptl_hw_supported
before attempting to enable or disable PTL restrictions.

Signed-off-by: Perry Yuan <[email protected]>
Reviewed-by: Alex Deucher <[email protected]>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 27 ++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 6fa8cff55a32..5fda0efe5469 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1778,10 +1778,17 @@ int kfd_ptl_control(struct kfd_process_device *pdd, 
bool enable)
        if (!pdd->dev->kfd2kgd || !pdd->dev->kfd2kgd->ptl_ctrl)
                return -EOPNOTSUPP;
 
+       if (adev->kfd.init_complete)
+               amdgpu_amdkfd_stop_sched(adev, pdd->dev->node_id);
+
        ret = pdd->dev->kfd2kgd->ptl_ctrl(adev, PSP_PTL_PERF_MON_SET,
                                          &ptl_state,
                                          &pref_format1,
                                          &pref_format2);
+
+       if (adev->kfd.init_complete)
+               amdgpu_amdkfd_start_sched(adev, pdd->dev->node_id);
+
        return ret;
 }
 
@@ -1831,11 +1838,7 @@ int kfd_ptl_disable_release(struct kfd_process_device 
*pdd,
                goto out;
 
        if (atomic_dec_return(&adev->psp.ptl_disable_ref) == 0) {
-               if (adev->kfd.init_complete)
-                       amdgpu_amdkfd_stop_sched(adev, pdd->dev->node_id);
                ret = kfd_ptl_control(pdd, true);
-               if (adev->kfd.init_complete)
-                       amdgpu_amdkfd_start_sched(adev, pdd->dev->node_id);
                if (ret) {
                        atomic_inc(&adev->psp.ptl_disable_ref);
                        dev_warn(pdd->dev->adev->dev,
@@ -3337,7 +3340,13 @@ static inline uint32_t profile_lock_device(struct 
kfd_process *p,
                if (!kfd->profiler_process) {
                        kfd->profiler_process = p;
                        status = 0;
-                       kfd_ptl_disable_request(pdd, p);
+                       if (pdd->dev->adev->psp.ptl_hw_supported) {
+                               status = kfd_ptl_disable_request(pdd, p);
+                               if (status != 0)
+                                       dev_err(kfd_device,
+                                               "Failed to lock device %d for 
profiling, error %d\n",
+                                               gpu_id, status);
+                       }
                } else if (kfd->profiler_process == p) {
                        status = -EALREADY;
                } else {
@@ -3346,8 +3355,14 @@ static inline uint32_t profile_lock_device(struct 
kfd_process *p,
        } else if (op == 0 && kfd->profiler_process == p) {
                kfd->profiler_process = NULL;
                status = 0;
-               kfd_ptl_disable_release(pdd, p);
 
+               if (pdd->dev->adev->psp.ptl_hw_supported) {
+                       status = kfd_ptl_disable_release(pdd, p);
+                       if (status)
+                               dev_err(kfd_device,
+                                               "Failed to unlock device %d for 
profiling, error %d\n",
+                                               gpu_id, status);
+               }
        }
        mutex_unlock(&kfd->profiler_lock);
 
-- 
2.34.1

Reply via email to