Introduce a new IOCTL option to allow userspace explicit control over the Peak Tops Limiter (PTL) state for profiling
Link: https://github.com/ROCm/rocm-systems/tree/develop/projects/rocprofiler-sdk Signed-off-by: Perry Yuan <[email protected]> Reviewed-by: Yifan Zhang <[email protected]> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 2 + drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 102 +++++++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 9 ++ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 4 + drivers/gpu/drm/amd/include/amdgpu_ptl.h | 2 + include/uapi/linux/kfd_ioctl.h | 7 ++ 6 files changed, 126 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index aa9307d88fde..4e04eba9879a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -2396,6 +2396,8 @@ static int gfx_v9_4_3_perf_monitor_ptl_init(struct amdgpu_device *adev, bool ena ptl->hw_supported = true; + atomic_set(&ptl->disable_ref, 0); + return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 9c37e8248540..9a23621542fa 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1765,6 +1765,104 @@ static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data) } #endif +static int kfd_ptl_control(struct kfd_process_device *pdd, bool enable) +{ + struct amdgpu_device *adev = pdd->dev->adev; + struct amdgpu_ptl *ptl = &adev->psp.ptl; + enum amdgpu_ptl_fmt pref_format1 = ptl->fmt1; + enum amdgpu_ptl_fmt pref_format2 = ptl->fmt2; + uint32_t ptl_state = enable ? 1 : 0; + int ret; + + if (!ptl->hw_supported) + return -EOPNOTSUPP; + + if (!pdd->dev->kfd2kgd || !pdd->dev->kfd2kgd->ptl_ctrl) + return -EOPNOTSUPP; + + ret = pdd->dev->kfd2kgd->ptl_ctrl(adev, PSP_PTL_PERF_MON_SET, + &ptl_state, + &pref_format1, + &pref_format2); + return ret; +} + +int kfd_ptl_disable_request(struct kfd_process_device *pdd, + struct kfd_process *p) +{ + struct amdgpu_device *adev = pdd->dev->adev; + struct amdgpu_ptl *ptl = &adev->psp.ptl; + int ret = 0; + + mutex_lock(&ptl->mutex); + + if (pdd->ptl_disable_req) + goto out; + + if (atomic_inc_return(&ptl->disable_ref) == 1) { + ret = kfd_ptl_control(pdd, false); + if (ret) { + atomic_dec(&ptl->disable_ref); + dev_warn(pdd->dev->adev->dev, + "failed to disable PTL\n"); + goto out; + } + } + pdd->ptl_disable_req = true; + +out: + mutex_unlock(&ptl->mutex); + return ret; +} + +int kfd_ptl_disable_release(struct kfd_process_device *pdd, + struct kfd_process *p) +{ + struct amdgpu_device *adev = pdd->dev->adev; + struct amdgpu_ptl *ptl = &adev->psp.ptl; + int ret = 0; + + mutex_lock(&ptl->mutex); + + if (!pdd->ptl_disable_req) + goto out; + + if (atomic_dec_return(&ptl->disable_ref) == 0) { + ret = kfd_ptl_control(pdd, true); + if (ret) { + atomic_inc(&ptl->disable_ref); + dev_warn(adev->dev, "Failed to enable PTL on release: %d\n", ret); + goto out; + } + } + pdd->ptl_disable_req = false; + +out: + mutex_unlock(&ptl->mutex); + return ret; +} + +static int kfd_profiler_ptl_control(struct kfd_process *p, + struct kfd_ioctl_ptl_control *args) +{ + struct kfd_process_device *pdd; + int ret; + + mutex_lock(&p->mutex); + pdd = kfd_process_device_data_by_id(p, args->gpu_id); + mutex_unlock(&p->mutex); + + if (!pdd || !pdd->dev || !pdd->dev->kfd) + return -EINVAL; + + if (args->enable == 0) + ret = kfd_ptl_disable_request(pdd, p); + else + ret = kfd_ptl_disable_release(pdd, p); + + return ret; +} + static int criu_checkpoint_process(struct kfd_process *p, uint8_t __user *user_priv_data, uint64_t *priv_offset) @@ -3230,6 +3328,7 @@ static inline uint32_t profile_lock_device(struct kfd_process *p, if (!kfd->profiler_process) { kfd->profiler_process = p; status = 0; + kfd_ptl_disable_request(pdd, p); } else if (kfd->profiler_process == p) { status = -EALREADY; } else { @@ -3238,6 +3337,7 @@ static inline uint32_t profile_lock_device(struct kfd_process *p, } else if (op == 0 && kfd->profiler_process == p) { kfd->profiler_process = NULL; status = 0; + kfd_ptl_disable_release(pdd, p); } mutex_unlock(&kfd->profiler_lock); @@ -3280,6 +3380,8 @@ static int kfd_ioctl_profiler(struct file *filep, struct kfd_process *p, void *d return 0; case KFD_IOC_PROFILER_PMC: return kfd_profiler_pmc(p, &args->pmc); + case KFD_IOC_PROFILER_PTL_CONTROL: + return kfd_profiler_ptl_control(p, &args->ptl); } return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 8983065645fa..48347065b9cd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -872,6 +872,8 @@ struct kfd_process_device { bool has_reset_queue; u32 pasid; + /* Indicates this process has requested PTL stay disabled */ + bool ptl_disable_req; }; #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) @@ -1607,6 +1609,13 @@ static inline bool kfd_is_first_node(struct kfd_node *node) return (node == node->kfd->nodes[0]); } +/* PTL support */ +int kfd_ptl_control(struct kfd_process_device *pdd, bool enable); +int kfd_ptl_disable_request(struct kfd_process_device *pdd, + struct kfd_process *p); +int kfd_ptl_disable_release(struct kfd_process_device *pdd, + struct kfd_process *p); + /* Debugfs */ #if defined(CONFIG_DEBUG_FS) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index deca19b478d0..882080dc4925 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1132,6 +1132,10 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) pr_debug("Releasing pdd (topology id %d, for pid %d)\n", pdd->dev->id, p->lead_thread->pid); kfd_process_profiler_release(p, pdd); + + if (pdd->ptl_disable_req) + kfd_ptl_disable_release(pdd, p); + kfd_process_device_destroy_cwsr_dgpu(pdd); kfd_process_device_destroy_ib_mem(pdd); diff --git a/drivers/gpu/drm/amd/include/amdgpu_ptl.h b/drivers/gpu/drm/amd/include/amdgpu_ptl.h index e5ea1084bb09..f944ab45d1ea 100644 --- a/drivers/gpu/drm/amd/include/amdgpu_ptl.h +++ b/drivers/gpu/drm/amd/include/amdgpu_ptl.h @@ -39,6 +39,8 @@ struct amdgpu_ptl { enum amdgpu_ptl_fmt fmt2; bool enabled; bool hw_supported; + /* PTL disable reference counting */ + atomic_t disable_ref; struct mutex mutex; }; diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index a8b2a18d07cf..da93daa3283c 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -1562,6 +1562,7 @@ struct kfd_ioctl_dbg_trap_args { enum kfd_profiler_ops { KFD_IOC_PROFILER_PMC = 0, KFD_IOC_PROFILER_VERSION = 2, + KFD_IOC_PROFILER_PTL_CONTROL = 3, }; /** @@ -1573,10 +1574,16 @@ struct kfd_ioctl_pmc_settings { __u32 perfcount_enable; /* Force Perfcount Enable for queues on GPU */ }; +struct kfd_ioctl_ptl_control { + __u32 gpu_id; /* user_gpu_id */ + __u32 enable; /* set 1 to enable PTL, set 0 to disable PTL */ +}; + struct kfd_ioctl_profiler_args { __u32 op; /* kfd_profiler_op */ union { struct kfd_ioctl_pmc_settings pmc; + struct kfd_ioctl_ptl_control ptl; __u32 version; /* KFD_IOC_PROFILER_VERSION_NUM */ }; }; -- 2.34.1
