Introduce a new IOCTL option to allow userspace explicit control over
the Peak Tops Limiter (PTL) state for profiling

Link: https://github.com/ROCm/rocm-systems/tree/develop/projects/rocprofiler-sdk
Signed-off-by: Perry Yuan <[email protected]>
Reviewed-by: Yifan Zhang <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c  |   2 +
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 102 +++++++++++++++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |   9 ++
 drivers/gpu/drm/amd/amdkfd/kfd_process.c |   4 +
 drivers/gpu/drm/amd/include/amdgpu_ptl.h |   2 +
 include/uapi/linux/kfd_ioctl.h           |   7 ++
 6 files changed, 126 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index aa9307d88fde..4e04eba9879a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -2396,6 +2396,8 @@ static int gfx_v9_4_3_perf_monitor_ptl_init(struct 
amdgpu_device *adev, bool ena
 
        ptl->hw_supported = true;
 
+       atomic_set(&ptl->disable_ref, 0);
+
        return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 9c37e8248540..9a23621542fa 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1765,6 +1765,104 @@ static int kfd_ioctl_svm(struct file *filep, struct 
kfd_process *p, void *data)
 }
 #endif
 
+static int kfd_ptl_control(struct kfd_process_device *pdd, bool enable)
+{
+       struct amdgpu_device *adev = pdd->dev->adev;
+       struct amdgpu_ptl *ptl = &adev->psp.ptl;
+       enum amdgpu_ptl_fmt pref_format1 = ptl->fmt1;
+       enum amdgpu_ptl_fmt pref_format2 = ptl->fmt2;
+       uint32_t ptl_state = enable ? 1 : 0;
+       int ret;
+
+       if (!ptl->hw_supported)
+               return -EOPNOTSUPP;
+
+       if (!pdd->dev->kfd2kgd || !pdd->dev->kfd2kgd->ptl_ctrl)
+               return -EOPNOTSUPP;
+
+       ret = pdd->dev->kfd2kgd->ptl_ctrl(adev, PSP_PTL_PERF_MON_SET,
+                                         &ptl_state,
+                                         &pref_format1,
+                                         &pref_format2);
+       return ret;
+}
+
+int kfd_ptl_disable_request(struct kfd_process_device *pdd,
+               struct kfd_process *p)
+{
+       struct amdgpu_device *adev = pdd->dev->adev;
+       struct amdgpu_ptl *ptl = &adev->psp.ptl;
+       int ret = 0;
+
+       mutex_lock(&ptl->mutex);
+
+       if (pdd->ptl_disable_req)
+               goto out;
+
+       if (atomic_inc_return(&ptl->disable_ref) == 1) {
+               ret = kfd_ptl_control(pdd, false);
+               if (ret) {
+                       atomic_dec(&ptl->disable_ref);
+                       dev_warn(pdd->dev->adev->dev,
+                                       "failed to disable PTL\n");
+                       goto out;
+               }
+       }
+       pdd->ptl_disable_req = true;
+
+out:
+       mutex_unlock(&ptl->mutex);
+       return ret;
+}
+
+int kfd_ptl_disable_release(struct kfd_process_device *pdd,
+               struct kfd_process *p)
+{
+       struct amdgpu_device *adev = pdd->dev->adev;
+       struct amdgpu_ptl *ptl = &adev->psp.ptl;
+       int ret = 0;
+
+       mutex_lock(&ptl->mutex);
+
+       if (!pdd->ptl_disable_req)
+               goto out;
+
+       if (atomic_dec_return(&ptl->disable_ref) == 0) {
+               ret = kfd_ptl_control(pdd, true);
+               if (ret) {
+                       atomic_inc(&ptl->disable_ref);
+                       dev_warn(adev->dev, "Failed to enable PTL on release: 
%d\n", ret);
+                       goto out;
+               }
+       }
+       pdd->ptl_disable_req = false;
+
+out:
+       mutex_unlock(&ptl->mutex);
+       return ret;
+}
+
+static int kfd_profiler_ptl_control(struct kfd_process *p,
+               struct kfd_ioctl_ptl_control *args)
+{
+       struct kfd_process_device *pdd;
+       int ret;
+
+       mutex_lock(&p->mutex);
+       pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+       mutex_unlock(&p->mutex);
+
+       if (!pdd || !pdd->dev || !pdd->dev->kfd)
+               return -EINVAL;
+
+       if (args->enable == 0)
+               ret = kfd_ptl_disable_request(pdd, p);
+       else
+               ret = kfd_ptl_disable_release(pdd, p);
+
+       return ret;
+}
+
 static int criu_checkpoint_process(struct kfd_process *p,
                             uint8_t __user *user_priv_data,
                             uint64_t *priv_offset)
@@ -3230,6 +3328,7 @@ static inline uint32_t profile_lock_device(struct 
kfd_process *p,
                if (!kfd->profiler_process) {
                        kfd->profiler_process = p;
                        status = 0;
+                       kfd_ptl_disable_request(pdd, p);
                } else if (kfd->profiler_process == p) {
                        status = -EALREADY;
                } else {
@@ -3238,6 +3337,7 @@ static inline uint32_t profile_lock_device(struct 
kfd_process *p,
        } else if (op == 0 && kfd->profiler_process == p) {
                kfd->profiler_process = NULL;
                status = 0;
+               kfd_ptl_disable_release(pdd, p);
        }
        mutex_unlock(&kfd->profiler_lock);
 
@@ -3280,6 +3380,8 @@ static int kfd_ioctl_profiler(struct file *filep, struct 
kfd_process *p, void *d
                return 0;
        case KFD_IOC_PROFILER_PMC:
                return kfd_profiler_pmc(p, &args->pmc);
+       case KFD_IOC_PROFILER_PTL_CONTROL:
+               return kfd_profiler_ptl_control(p, &args->ptl);
        }
        return -EINVAL;
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 8983065645fa..48347065b9cd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -872,6 +872,8 @@ struct kfd_process_device {
        bool has_reset_queue;
 
        u32 pasid;
+       /* Indicates this process has requested PTL stay disabled */
+       bool ptl_disable_req;
 };
 
 #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
@@ -1607,6 +1609,13 @@ static inline bool kfd_is_first_node(struct kfd_node 
*node)
        return (node == node->kfd->nodes[0]);
 }
 
+/* PTL support */
+int kfd_ptl_control(struct kfd_process_device *pdd, bool enable);
+int kfd_ptl_disable_request(struct kfd_process_device *pdd,
+               struct kfd_process *p);
+int kfd_ptl_disable_release(struct kfd_process_device *pdd,
+               struct kfd_process *p);
+
 /* Debugfs */
 #if defined(CONFIG_DEBUG_FS)
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index deca19b478d0..882080dc4925 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1132,6 +1132,10 @@ static void kfd_process_destroy_pdds(struct kfd_process 
*p)
                pr_debug("Releasing pdd (topology id %d, for pid %d)\n",
                        pdd->dev->id, p->lead_thread->pid);
                kfd_process_profiler_release(p, pdd);
+
+               if (pdd->ptl_disable_req)
+                       kfd_ptl_disable_release(pdd, p);
+
                kfd_process_device_destroy_cwsr_dgpu(pdd);
                kfd_process_device_destroy_ib_mem(pdd);
 
diff --git a/drivers/gpu/drm/amd/include/amdgpu_ptl.h 
b/drivers/gpu/drm/amd/include/amdgpu_ptl.h
index e5ea1084bb09..f944ab45d1ea 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_ptl.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_ptl.h
@@ -39,6 +39,8 @@ struct amdgpu_ptl {
        enum amdgpu_ptl_fmt             fmt2;
        bool                            enabled;
        bool                            hw_supported;
+       /* PTL disable reference counting */
+       atomic_t                        disable_ref;
        struct mutex                    mutex;
 };
 
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index a8b2a18d07cf..da93daa3283c 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -1562,6 +1562,7 @@ struct kfd_ioctl_dbg_trap_args {
 enum kfd_profiler_ops {
        KFD_IOC_PROFILER_PMC = 0,
        KFD_IOC_PROFILER_VERSION = 2,
+       KFD_IOC_PROFILER_PTL_CONTROL = 3,
 };
 
 /**
@@ -1573,10 +1574,16 @@ struct kfd_ioctl_pmc_settings {
        __u32 perfcount_enable;   /* Force Perfcount Enable for queues on GPU */
 };
 
+struct kfd_ioctl_ptl_control {
+       __u32 gpu_id; /* user_gpu_id */
+       __u32 enable; /* set 1 to enable PTL, set 0 to disable PTL */
+};
+
 struct kfd_ioctl_profiler_args {
        __u32 op;                                               /* 
kfd_profiler_op */
        union {
                struct kfd_ioctl_pmc_settings  pmc;
+               struct kfd_ioctl_ptl_control   ptl;
                __u32 version;                          /* 
KFD_IOC_PROFILER_VERSION_NUM */
        };
 };
-- 
2.34.1

Reply via email to