Introduce a Peak Tops Limiter (PTL) driver that dynamically caps engine frequency to ensure delivered TOPS never exceeds a defined TOPS_limit. This initial implementation provides core data structures and kernel-space interfaces (set/get, enable/disable) to manage PTL state.
PTL performs a firmware handshake to initialize its state and update predefined format types. It supports updating these format types at runtime while user-space tools automatically switch PTL state, and also allows explicitly switching PTL state via newly added commands. Signed-off-by: Perry Yuan <[email protected]> Reviewed-by: Lijo Lazar <[email protected]> Acked-by: Alex Deucher <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 80 +++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 6 ++ include/uapi/linux/kfd_ioctl.h | 9 +++ 3 files changed, 95 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index b0540b009e84..2fbc3f95fedd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -679,6 +679,8 @@ static const char *psp_gfx_cmd_name(enum psp_gfx_cmd_id cmd_id) return "SPATIAL_PARTITION"; case GFX_CMD_ID_FB_NPS_MODE: return "NPS_MODE_CHANGE"; + case GFX_CMD_ID_PERF_HW: + return "PERF MONITORING HW"; default: return "UNKNOWN CMD"; } @@ -1197,6 +1199,84 @@ int psp_memory_partition(struct psp_context *psp, int mode) return ret; } +static int psp_ptl_fmt_verify(struct psp_context *psp, enum amdgpu_ptl_fmt fmt, + uint32_t *ptl_fmt) +{ + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 4)) + return -EINVAL; + + switch (fmt) { + case AMDGPU_PTL_FMT_I8: + *ptl_fmt = GFX_FTYPE_I8; + break; + case AMDGPU_PTL_FMT_F16: + *ptl_fmt = GFX_FTYPE_F16; + break; + case AMDGPU_PTL_FMT_BF16: + *ptl_fmt = GFX_FTYPE_BF16; + break; + case AMDGPU_PTL_FMT_F32: + *ptl_fmt = GFX_FTYPE_F32; + break; + case AMDGPU_PTL_FMT_F64: + *ptl_fmt = GFX_FTYPE_F64; + break; + default: + return -EINVAL; + } + + return 0; +} + +int psp_performance_monitor_hw(struct psp_context *psp, u32 req_code, + uint32_t *ptl_state, uint32_t *fmt1, uint32_t *fmt2) +{ + struct psp_gfx_cmd_resp *cmd; + uint32_t ptl_fmt1, ptl_fmt2; + int ret; + + if (!psp || !ptl_state || !fmt1 || !fmt2) + return -EINVAL; + + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + if (psp_ptl_fmt_verify(psp, *fmt1, &ptl_fmt1) || + psp_ptl_fmt_verify(psp, *fmt2, &ptl_fmt2)) + return -EINVAL; + + cmd = acquire_psp_cmd_buf(psp); + + cmd->cmd_id = GFX_CMD_ID_PERF_HW; + cmd->cmd.cmd_req_perf_hw.req = req_code; + cmd->cmd.cmd_req_perf_hw.ptl_state = *ptl_state; + cmd->cmd.cmd_req_perf_hw.pref_format1 = ptl_fmt1; + cmd->cmd.cmd_req_perf_hw.pref_format2 = ptl_fmt2; + + ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + if (ret) + goto out; + + switch (req_code) { + case PSP_PTL_PERF_MON_QUERY: + *ptl_state = cmd->resp.uresp.perf_hw_info.ptl_state; + *fmt1 = cmd->resp.uresp.perf_hw_info.pref_format1; + *fmt2 = cmd->resp.uresp.perf_hw_info.pref_format2; + break; + case PSP_PTL_PERF_MON_SET: + psp->ptl_enabled = *ptl_state; + psp->ptl_fmt1 = ptl_fmt1; + psp->ptl_fmt2 = ptl_fmt2; + break; + } + +out: + release_psp_cmd_buf(psp); + return ret; +} + int psp_spatial_partition(struct psp_context *psp, int mode) { struct psp_gfx_cmd_resp *cmd; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 90df8e29f532..47c8becbf710 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -471,6 +471,10 @@ struct psp_context { #if defined(CONFIG_DEBUG_FS) struct spirom_bo *spirom_dump_trip; #endif + enum amdgpu_ptl_fmt ptl_fmt1; + enum amdgpu_ptl_fmt ptl_fmt2; + bool ptl_enabled; + bool ptl_hw_supported; }; struct amdgpu_psp_funcs { @@ -654,5 +658,7 @@ void amdgpu_psp_debugfs_init(struct amdgpu_device *adev); int amdgpu_psp_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *type); +int psp_performance_monitor_hw(struct psp_context *psp, u32 req_code, + u32 *ptl_state, u32 *fmt1, u32 *fmt2); #endif diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index abb526c915c3..8db0c474a769 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -1558,6 +1558,15 @@ struct kfd_ioctl_dbg_trap_args { }; }; +enum amdgpu_ptl_fmt { + AMDGPU_PTL_FMT_I8 = 0, + AMDGPU_PTL_FMT_F16 = 1, + AMDGPU_PTL_FMT_BF16 = 2, + AMDGPU_PTL_FMT_F32 = 3, + AMDGPU_PTL_FMT_F64 = 4, + AMDGPU_PTL_FMT_INVALID = 5, +}; + #define KFD_IOC_PROFILER_VERSION_NUM 1 enum kfd_profiler_ops { KFD_IOC_PROFILER_PMC = 0, -- 2.34.1
