Introduce a new IOCTL option to allow userspace explicit control over
the Peak Tops Limiter (PTL) state for profiling

Signed-off-by: Perry Yuan <[email protected]>
Reviewed-by: Yifan Zhang <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h  |  2 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c  |  2 +
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 89 +++++++++++++++++++++++-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  6 ++
 drivers/gpu/drm/amd/amdkfd/kfd_process.c |  5 +-
 include/uapi/linux/kfd_ioctl.h           |  7 ++
 6 files changed, 108 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 0a2f8d33a0ad..1af641ae9a02 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -476,6 +476,8 @@ struct psp_context {
        enum amdgpu_ptl_fmt             ptl_fmt2;
        bool                            ptl_enabled;
        bool                            ptl_hw_supported;
+       /* PTL disable reference counting */
+       atomic_t                        ptl_disable_ref;
 };
 
 struct amdgpu_psp_funcs {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index d94bf21db281..472e2f41fed2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -2395,6 +2395,8 @@ static int gfx_v9_4_3_perf_monitor_ptl_init(struct 
amdgpu_device *adev, bool sta
 
        adev->psp.ptl_hw_supported = true;
 
+       atomic_set(&adev->psp.ptl_disable_ref, 0);
+
        return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 285219781939..6457c5703f99 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1785,6 +1785,88 @@ int kfd_ptl_control(struct kfd_process_device *pdd, bool 
enable)
        return ret;
 }
 
+int kfd_ptl_disable_request(struct kfd_process_device *pdd,
+               struct kfd_process *p)
+{
+       struct amdgpu_device *adev;
+       int ret = 0;
+
+       if (!pdd)
+               return -ENODEV;
+
+       adev = pdd->dev->adev;
+       mutex_lock(&p->mutex);
+
+       if (pdd->ptl_disable_req)
+               goto out;
+
+       if (atomic_inc_return(&adev->psp.ptl_disable_ref) == 1) {
+               ret = kfd_ptl_control(pdd, false);
+               if (ret) {
+                       atomic_dec(&adev->psp.ptl_disable_ref);
+                       dev_warn(pdd->dev->adev->dev,
+                                       "failed to disable PTL\n");
+                       goto out;
+               }
+       }
+       pdd->ptl_disable_req = true;
+
+out:
+       mutex_unlock(&p->mutex);
+       return ret;
+}
+
+int kfd_ptl_disable_release(struct kfd_process_device *pdd,
+               struct kfd_process *p)
+{
+       struct amdgpu_device *adev;
+       int ret = 0;
+
+       if (!pdd)
+               return -ENODEV;
+
+       adev = pdd->dev->adev;
+       mutex_lock(&p->mutex);
+       if (!pdd->ptl_disable_req)
+               goto out;
+
+       if (atomic_dec_return(&adev->psp.ptl_disable_ref) == 0) {
+               ret = kfd_ptl_control(pdd, true);
+               if (ret) {
+                       atomic_inc(&adev->psp.ptl_disable_ref);
+                       dev_warn(pdd->dev->adev->dev,
+                                       "failed to enable PTL\n");
+                       goto out;
+               }
+       }
+       pdd->ptl_disable_req = false;
+
+out:
+       mutex_unlock(&p->mutex);
+       return ret;
+}
+
+static int kfd_profiler_ptl_control(struct kfd_process *p,
+               struct kfd_ioctl_ptl_control *args)
+{
+       struct kfd_process_device *pdd;
+       int ret;
+
+       mutex_lock(&p->mutex);
+       pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+       mutex_unlock(&p->mutex);
+
+       if (!pdd)
+               return -ENODEV;
+
+       if (args->enable == 0)
+               ret = kfd_ptl_disable_request(pdd, p);
+       else
+               ret = kfd_ptl_disable_release(pdd, p);
+
+       return ret;
+}
+
 static int criu_checkpoint_process(struct kfd_process *p,
                             uint8_t __user *user_priv_data,
                             uint64_t *priv_offset)
@@ -3250,7 +3332,7 @@ static inline uint32_t profile_lock_device(struct 
kfd_process *p,
                if (!kfd->profiler_process) {
                        kfd->profiler_process = p;
                        status = 0;
-                       kfd_ptl_control(pdd, false);
+                       kfd_ptl_disable_request(pdd, p);
                } else if (kfd->profiler_process == p) {
                        status = -EALREADY;
                } else {
@@ -3259,7 +3341,8 @@ static inline uint32_t profile_lock_device(struct 
kfd_process *p,
        } else if (op == 0 && kfd->profiler_process == p) {
                kfd->profiler_process = NULL;
                status = 0;
-               kfd_ptl_control(pdd, true);
+               kfd_ptl_disable_release(pdd, p);
+
        }
        mutex_unlock(&kfd->profiler_lock);
 
@@ -3302,6 +3385,8 @@ static int kfd_ioctl_profiler(struct file *filep, struct 
kfd_process *p, void *d
                return 0;
        case KFD_IOC_PROFILER_PMC:
                return kfd_profiler_pmc(p, &args->pmc);
+       case KFD_IOC_PROFILER_PTL_CONTROL:
+               return kfd_profiler_ptl_control(p, &args->ptl);
        }
        return -EINVAL;
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 164f69924a3d..48347065b9cd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -872,6 +872,8 @@ struct kfd_process_device {
        bool has_reset_queue;
 
        u32 pasid;
+       /* Indicates this process has requested PTL stay disabled */
+       bool ptl_disable_req;
 };
 
 #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
@@ -1609,6 +1611,10 @@ static inline bool kfd_is_first_node(struct kfd_node 
*node)
 
 /* PTL support */
 int kfd_ptl_control(struct kfd_process_device *pdd, bool enable);
+int kfd_ptl_disable_request(struct kfd_process_device *pdd,
+               struct kfd_process *p);
+int kfd_ptl_disable_release(struct kfd_process_device *pdd,
+               struct kfd_process *p);
 
 /* Debugfs */
 #if defined(CONFIG_DEBUG_FS)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 5114ac4da5b9..882080dc4925 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1115,7 +1115,6 @@ static void kfd_process_profiler_release(struct 
kfd_process *p, struct kfd_proce
        mutex_lock(&pdd->dev->kfd->profiler_lock);
        if (pdd->dev->kfd->profiler_process == p) {
                pdd->qpd.dqm->ops.set_perfcount(pdd->qpd.dqm, 0);
-               kfd_ptl_control(pdd, true);
                pdd->dev->kfd->profiler_process = NULL;
        }
        mutex_unlock(&pdd->dev->kfd->profiler_lock);
@@ -1133,6 +1132,10 @@ static void kfd_process_destroy_pdds(struct kfd_process 
*p)
                pr_debug("Releasing pdd (topology id %d, for pid %d)\n",
                        pdd->dev->id, p->lead_thread->pid);
                kfd_process_profiler_release(p, pdd);
+
+               if (pdd->ptl_disable_req)
+                       kfd_ptl_disable_release(pdd, p);
+
                kfd_process_device_destroy_cwsr_dgpu(pdd);
                kfd_process_device_destroy_ib_mem(pdd);
 
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 8db0c474a769..d48c407e9ee5 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -1572,6 +1572,7 @@ enum kfd_profiler_ops {
        KFD_IOC_PROFILER_PMC = 0,
        KFD_IOC_PROFILER_PC_SAMPLE = 1,
        KFD_IOC_PROFILER_VERSION = 2,
+       KFD_IOC_PROFILER_PTL_CONTROL = 3,
 };
 
 /**
@@ -1583,10 +1584,16 @@ struct kfd_ioctl_pmc_settings {
        __u32 perfcount_enable;   /* Force Perfcount Enable for queues on GPU */
 };
 
+struct kfd_ioctl_ptl_control {
+       __u32 gpu_id; /* user_gpu_id */
+       __u32 enable; /* set 1 to enable PTL, set 0 to disable PTL */
+};
+
 struct kfd_ioctl_profiler_args {
        __u32 op;                                               /* 
kfd_profiler_op */
        union {
                struct kfd_ioctl_pmc_settings  pmc;
+               struct kfd_ioctl_ptl_control   ptl;
                __u32 version;                          /* 
KFD_IOC_PROFILER_VERSION_NUM */
        };
 };
-- 
2.34.1

Reply via email to