Introduce a Peak Tops Limiter (PTL) driver that dynamically caps engine frequency to ensure delivered TOPS never exceeds a defined TOPS_limit. This initial implementation provides core data structures and kernel-space interfaces (set/get, enable/disable) to manage PTL state.
PTL performs a firmware handshake to initialize its state and update predefined format types. It supports updating these format types at runtime while user-space tools automatically switch PTL state, and also allows explicitly switching PTL state via newly added commands. Signed-off-by: Perry Yuan <[email protected]> Reviewed-by: Lijo Lazar <[email protected]> Acked-by: Alex Deucher <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 112 ++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 3 +- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 1 + drivers/gpu/drm/amd/include/amdgpu_ptl.h | 48 ++++++++ .../gpu/drm/amd/include/kgd_kfd_interface.h | 1 + 7 files changed, 167 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/amd/include/amdgpu_ptl.h diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index cdbab7f8cee8..4fee011c2e26 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -38,6 +38,8 @@ #include "amdgpu_vm.h" #include "amdgpu_xcp.h" #include "kfd_topology.h" +#include "amdgpu_ptl.h" + extern uint64_t amdgpu_amdkfd_total_mem_size; enum TLB_FLUSH_TYPE { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5e73b9d67325..a07fe386d275 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4440,6 +4440,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->virt.vf_errors.lock); hash_init(adev->mn_hash); mutex_init(&adev->psp.mutex); + mutex_init(&adev->psp.ptl.mutex); mutex_init(&adev->notifier_lock); mutex_init(&adev->pm.stable_pstate_ctx_lock); mutex_init(&adev->benchmark_mutex); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index b0540b009e84..19b81f515374 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -679,6 +679,8 @@ static const char *psp_gfx_cmd_name(enum psp_gfx_cmd_id cmd_id) return "SPATIAL_PARTITION"; case GFX_CMD_ID_FB_NPS_MODE: return "NPS_MODE_CHANGE"; + case GFX_CMD_ID_PERF_HW: + return "PERF MONITORING HW"; default: return "UNKNOWN CMD"; } @@ -1197,6 +1199,116 @@ int psp_memory_partition(struct psp_context *psp, int mode) return ret; } +static int psp_ptl_fmt_verify(struct psp_context *psp, enum amdgpu_ptl_fmt fmt, + uint32_t *ptl_fmt) +{ + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 4)) + return -EINVAL; + + switch (fmt) { + case AMDGPU_PTL_FMT_I8: + *ptl_fmt = GFX_FTYPE_I8; + break; + case AMDGPU_PTL_FMT_F16: + *ptl_fmt = GFX_FTYPE_F16; + break; + case AMDGPU_PTL_FMT_BF16: + *ptl_fmt = GFX_FTYPE_BF16; + break; + case AMDGPU_PTL_FMT_F32: + *ptl_fmt = GFX_FTYPE_F32; + break; + case AMDGPU_PTL_FMT_F64: + *ptl_fmt = GFX_FTYPE_F64; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int psp_ptl_invoke(struct psp_context *psp, u32 req_code, + uint32_t *ptl_state, uint32_t *fmt1, uint32_t *fmt2) +{ + struct psp_gfx_cmd_resp *cmd; + struct amdgpu_ptl *ptl = &psp->ptl; + int ret; + + cmd = acquire_psp_cmd_buf(psp); + + cmd->cmd_id = GFX_CMD_ID_PERF_HW; + cmd->cmd.cmd_req_perf_hw.req = req_code; + cmd->cmd.cmd_req_perf_hw.ptl_state = *ptl_state; + cmd->cmd.cmd_req_perf_hw.pref_format1 = *fmt1; + cmd->cmd.cmd_req_perf_hw.pref_format2 = *fmt2; + + ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + if (ret) + goto out; + + /* Parse response */ + switch (req_code) { + case PSP_PTL_PERF_MON_QUERY: + *ptl_state = cmd->resp.uresp.perf_hw_info.ptl_state; + *fmt1 = cmd->resp.uresp.perf_hw_info.pref_format1; + *fmt2 = cmd->resp.uresp.perf_hw_info.pref_format2; + break; + case PSP_PTL_PERF_MON_SET: + /* Update cached state only on success */ + ptl->enabled = *ptl_state; + ptl->fmt1 = *fmt1; + ptl->fmt2 = *fmt2; + break; + } + +out: + release_psp_cmd_buf(psp); + return ret; +} + +int amdgpu_ptl_perf_monitor_ctrl(struct amdgpu_device *adev, u32 req_code, + uint32_t *ptl_state, + enum amdgpu_ptl_fmt *fmt1, + enum amdgpu_ptl_fmt *fmt2) +{ + uint32_t ptl_fmt1, ptl_fmt2; + struct psp_context *psp; + struct amdgpu_ptl *ptl; + + if (!adev || !ptl_state || !fmt1 || !fmt2) + return -EINVAL; + + if (amdgpu_sriov_vf(adev)) + return 0; + + psp = &adev->psp; + ptl = &psp->ptl; + + if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 4) || + psp->sos.fw_version < 0x0036081a) + return -EOPNOTSUPP; + + /* Verify formats */ + if (psp_ptl_fmt_verify(psp, *fmt1, &ptl_fmt1) || + psp_ptl_fmt_verify(psp, *fmt2, &ptl_fmt2)) + return -EINVAL; + + /* + * Add check to skip if state and formats are identical to current ones + */ + if (req_code == PSP_PTL_PERF_MON_SET && + ptl->enabled == *ptl_state && + ptl->fmt1 == ptl_fmt1 && + ptl->fmt2 == ptl_fmt2) + return 0; + + return psp_ptl_invoke(psp, req_code, ptl_state, &ptl_fmt1, &ptl_fmt2); +} +} + int psp_spatial_partition(struct psp_context *psp, int mode) { struct psp_gfx_cmd_resp *cmd; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 58f691db2e97..332633f6f4be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -31,6 +31,7 @@ #include "ta_ras_if.h" #include "ta_rap_if.h" #include "ta_secureDisplay_if.h" +#include "amdgpu_ptl.h" #define PSP_FENCE_BUFFER_SIZE 0x1000 #define PSP_CMD_BUFFER_SIZE 0x1000 @@ -470,6 +471,7 @@ struct psp_context { #if defined(CONFIG_DEBUG_FS) struct spirom_bo *spirom_dump_trip; #endif + struct amdgpu_ptl ptl; }; struct amdgpu_psp_funcs { @@ -653,5 +655,4 @@ void amdgpu_psp_debugfs_init(struct amdgpu_device *adev); int amdgpu_psp_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *type); - #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index dbb111a33678..9c37e8248540 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -44,6 +44,7 @@ #include "kfd_smi_events.h" #include "amdgpu_dma_buf.h" #include "kfd_debug.h" +#include "amdgpu_ptl.h" static long kfd_ioctl(struct file *, unsigned int, unsigned long); static int kfd_open(struct inode *, struct file *); diff --git a/drivers/gpu/drm/amd/include/amdgpu_ptl.h b/drivers/gpu/drm/amd/include/amdgpu_ptl.h new file mode 100644 index 000000000000..12c9e0b4645a --- /dev/null +++ b/drivers/gpu/drm/amd/include/amdgpu_ptl.h @@ -0,0 +1,48 @@ +/* + * Copyright 2026 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __AMDGPU_PTL_H__ +#define __AMDGPU_PTL_H__ + +enum amdgpu_ptl_fmt { + AMDGPU_PTL_FMT_I8 = 0, + AMDGPU_PTL_FMT_F16 = 1, + AMDGPU_PTL_FMT_BF16 = 2, + AMDGPU_PTL_FMT_F32 = 3, + AMDGPU_PTL_FMT_F64 = 4, + AMDGPU_PTL_FMT_F8 = 5, + AMDGPU_PTL_FMT_VECTOR = 6, + AMDGPU_PTL_FMT_INVALID = 7, +}; + +struct amdgpu_ptl { + enum amdgpu_ptl_fmt fmt1; + enum amdgpu_ptl_fmt fmt2; + bool enabled; + bool hw_supported; + struct mutex mutex; +}; + +int amdgpu_ptl_perf_monitor_ctrl(struct amdgpu_device *adev, u32 req_code, + u32 *ptl_state, u32 *fmt1, u32 *fmt2); + +#endif /* __AMDGPU_PTL_H__ */ diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 9aba8596faa7..6df5afb242ae 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -33,6 +33,7 @@ #include <linux/dma-fence.h> #include "amdgpu_irq.h" #include "amdgpu_gfx.h" +#include "amdgpu_ptl.h" struct pci_dev; struct amdgpu_device; -- 2.34.1
