Introduce a Peak Tops Limiter (PTL) driver that dynamically caps
engine frequency to ensure delivered TOPS never exceeds a defined
TOPS_limit. This initial implementation provides core data structures
and kernel-space interfaces (set/get, enable/disable) to manage PTL state.

PTL performs a firmware handshake to initialize its state and update
predefined format types. It supports updating these format types at
runtime while user-space tools automatically switch PTL state, and
also allows explicitly switching PTL state via newly added commands.

Signed-off-by: Perry Yuan <[email protected]>
Reviewed-by: Lijo Lazar <[email protected]>
Acked-by: Alex Deucher <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 80 +++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h |  6 ++
 include/uapi/linux/kfd_ioctl.h          |  9 +++
 3 files changed, 95 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index b0540b009e84..2fbc3f95fedd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -679,6 +679,8 @@ static const char *psp_gfx_cmd_name(enum psp_gfx_cmd_id 
cmd_id)
                return "SPATIAL_PARTITION";
        case GFX_CMD_ID_FB_NPS_MODE:
                return "NPS_MODE_CHANGE";
+       case GFX_CMD_ID_PERF_HW:
+               return "PERF MONITORING HW";
        default:
                return "UNKNOWN CMD";
        }
@@ -1197,6 +1199,84 @@ int psp_memory_partition(struct psp_context *psp, int 
mode)
        return ret;
 }
 
+static int psp_ptl_fmt_verify(struct psp_context *psp, enum amdgpu_ptl_fmt fmt,
+                                                uint32_t *ptl_fmt)
+{
+       struct amdgpu_device *adev = psp->adev;
+
+       if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 4))
+               return -EINVAL;
+
+       switch (fmt) {
+       case AMDGPU_PTL_FMT_I8:
+               *ptl_fmt = GFX_FTYPE_I8;
+               break;
+       case AMDGPU_PTL_FMT_F16:
+               *ptl_fmt = GFX_FTYPE_F16;
+               break;
+       case AMDGPU_PTL_FMT_BF16:
+               *ptl_fmt = GFX_FTYPE_BF16;
+               break;
+       case AMDGPU_PTL_FMT_F32:
+               *ptl_fmt = GFX_FTYPE_F32;
+               break;
+       case AMDGPU_PTL_FMT_F64:
+               *ptl_fmt = GFX_FTYPE_F64;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+int psp_performance_monitor_hw(struct psp_context *psp, u32 req_code,
+                              uint32_t *ptl_state, uint32_t *fmt1, uint32_t 
*fmt2)
+{
+       struct psp_gfx_cmd_resp *cmd;
+       uint32_t ptl_fmt1, ptl_fmt2;
+       int ret;
+
+       if (!psp || !ptl_state || !fmt1 || !fmt2)
+               return -EINVAL;
+
+       if (amdgpu_sriov_vf(psp->adev))
+               return 0;
+
+       if (psp_ptl_fmt_verify(psp, *fmt1, &ptl_fmt1) ||
+                       psp_ptl_fmt_verify(psp, *fmt2, &ptl_fmt2))
+               return -EINVAL;
+
+       cmd = acquire_psp_cmd_buf(psp);
+
+       cmd->cmd_id                     = GFX_CMD_ID_PERF_HW;
+       cmd->cmd.cmd_req_perf_hw.req    = req_code;
+       cmd->cmd.cmd_req_perf_hw.ptl_state    = *ptl_state;
+       cmd->cmd.cmd_req_perf_hw.pref_format1 = ptl_fmt1;
+       cmd->cmd.cmd_req_perf_hw.pref_format2 = ptl_fmt2;
+
+       ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+       if (ret)
+               goto out;
+
+       switch (req_code) {
+       case PSP_PTL_PERF_MON_QUERY:
+               *ptl_state = cmd->resp.uresp.perf_hw_info.ptl_state;
+               *fmt1      = cmd->resp.uresp.perf_hw_info.pref_format1;
+               *fmt2      = cmd->resp.uresp.perf_hw_info.pref_format2;
+               break;
+       case PSP_PTL_PERF_MON_SET:
+               psp->ptl_enabled = *ptl_state;
+               psp->ptl_fmt1    = ptl_fmt1;
+               psp->ptl_fmt2    = ptl_fmt2;
+               break;
+       }
+
+out:
+       release_psp_cmd_buf(psp);
+       return ret;
+}
+
 int psp_spatial_partition(struct psp_context *psp, int mode)
 {
        struct psp_gfx_cmd_resp *cmd;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 90df8e29f532..47c8becbf710 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -471,6 +471,10 @@ struct psp_context {
 #if defined(CONFIG_DEBUG_FS)
        struct spirom_bo *spirom_dump_trip;
 #endif
+       enum amdgpu_ptl_fmt             ptl_fmt1;
+       enum amdgpu_ptl_fmt             ptl_fmt2;
+       bool                            ptl_enabled;
+       bool                            ptl_hw_supported;
 };
 
 struct amdgpu_psp_funcs {
@@ -654,5 +658,7 @@ void amdgpu_psp_debugfs_init(struct amdgpu_device *adev);
 int amdgpu_psp_get_fw_type(struct amdgpu_firmware_info *ucode,
                           enum psp_gfx_fw_type *type);
 
+int psp_performance_monitor_hw(struct psp_context *psp, u32 req_code,
+                              u32 *ptl_state, u32 *fmt1, u32 *fmt2);
 
 #endif
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index abb526c915c3..8db0c474a769 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -1558,6 +1558,15 @@ struct kfd_ioctl_dbg_trap_args {
        };
 };
 
+enum amdgpu_ptl_fmt {
+       AMDGPU_PTL_FMT_I8   = 0,
+       AMDGPU_PTL_FMT_F16  = 1,
+       AMDGPU_PTL_FMT_BF16 = 2,
+       AMDGPU_PTL_FMT_F32  = 3,
+       AMDGPU_PTL_FMT_F64  = 4,
+       AMDGPU_PTL_FMT_INVALID = 5,
+};
+
 #define KFD_IOC_PROFILER_VERSION_NUM 1
 enum kfd_profiler_ops {
        KFD_IOC_PROFILER_PMC = 0,
-- 
2.34.1

Reply via email to