Introduce a new IOCTL option to allow userspace explicit control over the Peak Tops Limiter (PTL) state for profiling
Signed-off-by: Perry Yuan <[email protected]> Reviewed-by: Yifan Zhang <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 2 + drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 2 + drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 89 +++++++++++++++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 6 ++ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 5 +- include/uapi/linux/kfd_ioctl.h | 7 ++ 6 files changed, 108 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 0a2f8d33a0ad..1af641ae9a02 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -476,6 +476,8 @@ struct psp_context { enum amdgpu_ptl_fmt ptl_fmt2; bool ptl_enabled; bool ptl_hw_supported; + /* PTL disable reference counting */ + atomic_t ptl_disable_ref; }; struct amdgpu_psp_funcs { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index d94bf21db281..472e2f41fed2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -2395,6 +2395,8 @@ static int gfx_v9_4_3_perf_monitor_ptl_init(struct amdgpu_device *adev, bool sta adev->psp.ptl_hw_supported = true; + atomic_set(&adev->psp.ptl_disable_ref, 0); + return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 285219781939..6457c5703f99 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1785,6 +1785,88 @@ int kfd_ptl_control(struct kfd_process_device *pdd, bool enable) return ret; } +int kfd_ptl_disable_request(struct kfd_process_device *pdd, + struct kfd_process *p) +{ + struct amdgpu_device *adev; + int ret = 0; + + if (!pdd) + return -ENODEV; + + adev = pdd->dev->adev; + mutex_lock(&p->mutex); + + if (pdd->ptl_disable_req) + goto out; + + if (atomic_inc_return(&adev->psp.ptl_disable_ref) == 1) { + ret = kfd_ptl_control(pdd, false); + if (ret) { + atomic_dec(&adev->psp.ptl_disable_ref); + dev_warn(pdd->dev->adev->dev, + "failed to disable PTL\n"); + goto out; + } + } + pdd->ptl_disable_req = true; + +out: + mutex_unlock(&p->mutex); + return ret; +} + +int kfd_ptl_disable_release(struct kfd_process_device *pdd, + struct kfd_process *p) +{ + struct amdgpu_device *adev; + int ret = 0; + + if (!pdd) + return -ENODEV; + + adev = pdd->dev->adev; + mutex_lock(&p->mutex); + if (!pdd->ptl_disable_req) + goto out; + + if (atomic_dec_return(&adev->psp.ptl_disable_ref) == 0) { + ret = kfd_ptl_control(pdd, true); + if (ret) { + atomic_inc(&adev->psp.ptl_disable_ref); + dev_warn(pdd->dev->adev->dev, + "failed to enable PTL\n"); + goto out; + } + } + pdd->ptl_disable_req = false; + +out: + mutex_unlock(&p->mutex); + return ret; +} + +static int kfd_profiler_ptl_control(struct kfd_process *p, + struct kfd_ioctl_ptl_control *args) +{ + struct kfd_process_device *pdd; + int ret; + + mutex_lock(&p->mutex); + pdd = kfd_process_device_data_by_id(p, args->gpu_id); + mutex_unlock(&p->mutex); + + if (!pdd) + return -ENODEV; + + if (args->enable == 0) + ret = kfd_ptl_disable_request(pdd, p); + else + ret = kfd_ptl_disable_release(pdd, p); + + return ret; +} + static int criu_checkpoint_process(struct kfd_process *p, uint8_t __user *user_priv_data, uint64_t *priv_offset) @@ -3250,7 +3332,7 @@ static inline uint32_t profile_lock_device(struct kfd_process *p, if (!kfd->profiler_process) { kfd->profiler_process = p; status = 0; - kfd_ptl_control(pdd, false); + kfd_ptl_disable_request(pdd, p); } else if (kfd->profiler_process == p) { status = -EALREADY; } else { @@ -3259,7 +3341,8 @@ static inline uint32_t profile_lock_device(struct kfd_process *p, } else if (op == 0 && kfd->profiler_process == p) { kfd->profiler_process = NULL; status = 0; - kfd_ptl_control(pdd, true); + kfd_ptl_disable_release(pdd, p); + } mutex_unlock(&kfd->profiler_lock); @@ -3302,6 +3385,8 @@ static int kfd_ioctl_profiler(struct file *filep, struct kfd_process *p, void *d return 0; case KFD_IOC_PROFILER_PMC: return kfd_profiler_pmc(p, &args->pmc); + case KFD_IOC_PROFILER_PTL_CONTROL: + return kfd_profiler_ptl_control(p, &args->ptl); } return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 164f69924a3d..48347065b9cd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -872,6 +872,8 @@ struct kfd_process_device { bool has_reset_queue; u32 pasid; + /* Indicates this process has requested PTL stay disabled */ + bool ptl_disable_req; }; #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) @@ -1609,6 +1611,10 @@ static inline bool kfd_is_first_node(struct kfd_node *node) /* PTL support */ int kfd_ptl_control(struct kfd_process_device *pdd, bool enable); +int kfd_ptl_disable_request(struct kfd_process_device *pdd, + struct kfd_process *p); +int kfd_ptl_disable_release(struct kfd_process_device *pdd, + struct kfd_process *p); /* Debugfs */ #if defined(CONFIG_DEBUG_FS) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 5114ac4da5b9..882080dc4925 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1115,7 +1115,6 @@ static void kfd_process_profiler_release(struct kfd_process *p, struct kfd_proce mutex_lock(&pdd->dev->kfd->profiler_lock); if (pdd->dev->kfd->profiler_process == p) { pdd->qpd.dqm->ops.set_perfcount(pdd->qpd.dqm, 0); - kfd_ptl_control(pdd, true); pdd->dev->kfd->profiler_process = NULL; } mutex_unlock(&pdd->dev->kfd->profiler_lock); @@ -1133,6 +1132,10 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) pr_debug("Releasing pdd (topology id %d, for pid %d)\n", pdd->dev->id, p->lead_thread->pid); kfd_process_profiler_release(p, pdd); + + if (pdd->ptl_disable_req) + kfd_ptl_disable_release(pdd, p); + kfd_process_device_destroy_cwsr_dgpu(pdd); kfd_process_device_destroy_ib_mem(pdd); diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 8db0c474a769..d48c407e9ee5 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -1572,6 +1572,7 @@ enum kfd_profiler_ops { KFD_IOC_PROFILER_PMC = 0, KFD_IOC_PROFILER_PC_SAMPLE = 1, KFD_IOC_PROFILER_VERSION = 2, + KFD_IOC_PROFILER_PTL_CONTROL = 3, }; /** @@ -1583,10 +1584,16 @@ struct kfd_ioctl_pmc_settings { __u32 perfcount_enable; /* Force Perfcount Enable for queues on GPU */ }; +struct kfd_ioctl_ptl_control { + __u32 gpu_id; /* user_gpu_id */ + __u32 enable; /* set 1 to enable PTL, set 0 to disable PTL */ +}; + struct kfd_ioctl_profiler_args { __u32 op; /* kfd_profiler_op */ union { struct kfd_ioctl_pmc_settings pmc; + struct kfd_ioctl_ptl_control ptl; __u32 version; /* KFD_IOC_PROFILER_VERSION_NUM */ }; }; -- 2.34.1
