[AMD Official Use Only - AMD Internal Distribution Only]

-----Original Message-----
From: amd-gfx <amd-gfx-boun...@lists.freedesktop.org> On Behalf Of Lijo Lazar
Sent: Tuesday, September 9, 2025 13:48
To: amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking <hawking.zh...@amd.com>; Deucher, Alexander 
<alexander.deuc...@amd.com>; Kamal, Asad <asad.ka...@amd.com>
Subject: [PATCH v3 1/7] drm/amdgpu/pm: Add definition for gpu_metrics v1.9

Add gpu metrics definition which is only a set of gpu metrics attributes. A 
field is encoded by its id, type and number of instances.

Signed-off-by: Lijo Lazar <lijo.la...@amd.com>
---
v2: Add unit field and change encoding to 64-bit.
v3: Fix unit encoding

 .../gpu/drm/amd/include/kgd_pp_interface.h    | 117 ++++++++++++++++++
 1 file changed, 117 insertions(+)

diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h 
b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 2f7e4b5bebf3..5538c7c671d3 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -528,6 +528,110 @@ struct metrics_table_header {
        uint8_t                         content_revision;
 };

+enum amdgpu_metrics_attr_id {
+       AMDGPU_METRICS_ATTR_TEMPERATURE_HOTSPOT,
+       AMDGPU_METRICS_ATTR_TEMPERATURE_MEM,
+       AMDGPU_METRICS_ATTR_TEMPERATURE_VRSOC,
+       AMDGPU_METRICS_ATTR_CURR_SOCKET_POWER,
+       AMDGPU_METRICS_ATTR_AVERAGE_GFX_ACTIVITY,
+       AMDGPU_METRICS_ATTR_AVERAGE_UMC_ACTIVITY,
+       AMDGPU_METRICS_ATTR_MEM_MAX_BANDWIDTH,
+       AMDGPU_METRICS_ATTR_ENERGY_ACCUMULATOR,
+       AMDGPU_METRICS_ATTR_SYSTEM_CLOCK_COUNTER,
+       AMDGPU_METRICS_ATTR_ACCUMULATION_COUNTER,
+       AMDGPU_METRICS_ATTR_PROCHOT_RESIDENCY_ACC,
+       AMDGPU_METRICS_ATTR_PPT_RESIDENCY_ACC,
+       AMDGPU_METRICS_ATTR_SOCKET_THM_RESIDENCY_ACC,
+       AMDGPU_METRICS_ATTR_VR_THM_RESIDENCY_ACC,
+       AMDGPU_METRICS_ATTR_HBM_THM_RESIDENCY_ACC,
+       AMDGPU_METRICS_ATTR_GFXCLK_LOCK_STATUS,
+       AMDGPU_METRICS_ATTR_PCIE_LINK_WIDTH,
+       AMDGPU_METRICS_ATTR_PCIE_LINK_SPEED,
+       AMDGPU_METRICS_ATTR_XGMI_LINK_WIDTH,
+       AMDGPU_METRICS_ATTR_XGMI_LINK_SPEED,
+       AMDGPU_METRICS_ATTR_GFX_ACTIVITY_ACC,
+       AMDGPU_METRICS_ATTR_MEM_ACTIVITY_ACC,
+       AMDGPU_METRICS_ATTR_PCIE_BANDWIDTH_ACC,
+       AMDGPU_METRICS_ATTR_PCIE_BANDWIDTH_INST,
+       AMDGPU_METRICS_ATTR_PCIE_L0_TO_RECOV_COUNT_ACC,
+       AMDGPU_METRICS_ATTR_PCIE_REPLAY_COUNT_ACC,
+       AMDGPU_METRICS_ATTR_PCIE_REPLAY_ROVER_COUNT_ACC,
+       AMDGPU_METRICS_ATTR_PCIE_NAK_SENT_COUNT_ACC,
+       AMDGPU_METRICS_ATTR_PCIE_NAK_RCVD_COUNT_ACC,
+       AMDGPU_METRICS_ATTR_XGMI_READ_DATA_ACC,
+       AMDGPU_METRICS_ATTR_XGMI_WRITE_DATA_ACC,
+       AMDGPU_METRICS_ATTR_XGMI_LINK_STATUS,
+       AMDGPU_METRICS_ATTR_FIRMWARE_TIMESTAMP,
+       AMDGPU_METRICS_ATTR_CURRENT_GFXCLK,
+       AMDGPU_METRICS_ATTR_CURRENT_SOCCLK,
+       AMDGPU_METRICS_ATTR_CURRENT_VCLK0,
+       AMDGPU_METRICS_ATTR_CURRENT_DCLK0,
+       AMDGPU_METRICS_ATTR_CURRENT_UCLK,
+       AMDGPU_METRICS_ATTR_NUM_PARTITION,
+       AMDGPU_METRICS_ATTR_PCIE_LC_PERF_OTHER_END_RECOVERY,
+       AMDGPU_METRICS_ATTR_GFX_BUSY_INST,
+       AMDGPU_METRICS_ATTR_JPEG_BUSY,
+       AMDGPU_METRICS_ATTR_VCN_BUSY,
+       AMDGPU_METRICS_ATTR_GFX_BUSY_ACC,
+       AMDGPU_METRICS_ATTR_GFX_BELOW_HOST_LIMIT_PPT_ACC,
+       AMDGPU_METRICS_ATTR_GFX_BELOW_HOST_LIMIT_THM_ACC,
+       AMDGPU_METRICS_ATTR_GFX_LOW_UTILIZATION_ACC,
+       AMDGPU_METRICS_ATTR_GFX_BELOW_HOST_LIMIT_TOTAL_ACC,
+       AMDGPU_METRICS_ATTR_MAX,
+};

[kevin]:
How about modify the name prefix to "AMDGPU_METRICS_ID" or 
"AMDGPU_METRICS_ATTR_ID" ?
+
+enum amdgpu_metrics_attr_type {
+       AMDGPU_METRICS_TYPE_U8,
+       AMDGPU_METRICS_TYPE_S8,
+       AMDGPU_METRICS_TYPE_U16,
+       AMDGPU_METRICS_TYPE_S16,
+       AMDGPU_METRICS_TYPE_U32,
+       AMDGPU_METRICS_TYPE_S32,
+       AMDGPU_METRICS_TYPE_U64,
+       AMDGPU_METRICS_TYPE_S64,
+       AMDGPU_METRICS_TYPE_MAX,
+};
+
+enum amdgpu_metrics_attr_unit {
+       /* None */
+       AMDGPU_METRICS_UNIT_NONE,
+       /* MHz*/
+       AMDGPU_METRICS_UNIT_CLOCK_1,
+       /* Degree Celsius*/
+       AMDGPU_METRICS_UNIT_TEMP_1,
+       /* Watts*/
+       AMDGPU_METRICS_UNIT_POWER_1,
+       /* In nanoseconds*/
+       AMDGPU_METRICS_UNIT_TIME_1,
+       /* In 10 nanoseconds*/
+       AMDGPU_METRICS_UNIT_TIME_2,
+       /* Speed in GT/s */
+       AMDGPU_METRICS_UNIT_SPEED_1,
+       /* Speed in 0.1 GT/s */
+       AMDGPU_METRICS_UNIT_SPEED_2,
+       /* Bandwidth GB/s */
+       AMDGPU_METRICS_UNIT_BW_1,
+       /* Data in KB */
+       AMDGPU_METRICS_UNIT_DATA_1,
+       /* Percentage */
+       AMDGPU_METRICS_UNIT_PERCENT,
+       AMDGPU_METRICS_UNIT_MAX,
+};
+
+#define AMDGPU_METRICS_ATTR_UNIT_MASK 0xFF000000 #define
+AMDGPU_METRICS_ATTR_UNIT_SHIFT 24 #define AMDGPU_METRICS_ATTR_TYPE_MASK
+0x00F00000 #define AMDGPU_METRICS_ATTR_TYPE_SHIFT 20 #define
+AMDGPU_METRICS_ATTR_ID_MASK 0x000FFC00 #define
+AMDGPU_METRICS_ATTR_ID_SHIFT 10 #define AMDGPU_METRICS_ATTR_INST_MASK
+0x000003FF #define AMDGPU_METRICS_ATTR_INST_SHIFT 0
+
+#define AMDGPU_METRICS_ENC_ATTR(unit, type, id, inst)      \
+       (((u64)(unit) << AMDGPU_METRICS_ATTR_UNIT_SHIFT) | \
+        ((u64)(type) << AMDGPU_METRICS_ATTR_TYPE_SHIFT) | \
+        ((u64)(id) << AMDGPU_METRICS_ATTR_ID_SHIFT) | (inst))
+
 /*
  * gpu_metrics_v1_0 is not recommended as it's not naturally aligned.
  * Use gpu_metrics_v1_1 or later instead.
@@ -1217,6 +1321,19 @@ struct gpu_metrics_v1_8 {
        uint32_t                        pcie_lc_perf_other_end_recovery;
 };

+struct gpu_metrics_attr {
+       /* Field type encoded with AMDGPU_METRICS_ENC_ATTR */
+       uint64_t attr_encoding;
+       /* Attribute value, depends on attr_encoding */
+       void *attr_value;
+};
[kevin]:

I think the "void *" is not good idea to export data as an interface, and this 
pointer width is dependent on OS/CPU width (4 bytes on 32bit system but 8 bytes 
on 64bit system )?
so, it is better to change it to uint64_t type.

Best Regards,
Kevin
+
+struct gpu_metrics_v1_9 {
+       struct metrics_table_header common_header;
+       int attr_count;
+       struct gpu_metrics_attr metrics_attrs[]; };
+
 /*
  * gpu_metrics_v2_0 is not recommended as it's not naturally aligned.
  * Use gpu_metrics_v2_1 or later instead.
--
2.49.0

Reply via email to