[AMD Official Use Only - AMD Internal Distribution Only] -----Original Message----- From: amd-gfx <amd-gfx-boun...@lists.freedesktop.org> On Behalf Of Lijo Lazar Sent: Tuesday, September 9, 2025 13:48 To: amd-gfx@lists.freedesktop.org Cc: Zhang, Hawking <hawking.zh...@amd.com>; Deucher, Alexander <alexander.deuc...@amd.com>; Kamal, Asad <asad.ka...@amd.com> Subject: [PATCH v3 1/7] drm/amdgpu/pm: Add definition for gpu_metrics v1.9
Add gpu metrics definition which is only a set of gpu metrics attributes. A field is encoded by its id, type and number of instances. Signed-off-by: Lijo Lazar <lijo.la...@amd.com> --- v2: Add unit field and change encoding to 64-bit. v3: Fix unit encoding .../gpu/drm/amd/include/kgd_pp_interface.h | 117 ++++++++++++++++++ 1 file changed, 117 insertions(+) diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 2f7e4b5bebf3..5538c7c671d3 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -528,6 +528,110 @@ struct metrics_table_header { uint8_t content_revision; }; +enum amdgpu_metrics_attr_id { + AMDGPU_METRICS_ATTR_TEMPERATURE_HOTSPOT, + AMDGPU_METRICS_ATTR_TEMPERATURE_MEM, + AMDGPU_METRICS_ATTR_TEMPERATURE_VRSOC, + AMDGPU_METRICS_ATTR_CURR_SOCKET_POWER, + AMDGPU_METRICS_ATTR_AVERAGE_GFX_ACTIVITY, + AMDGPU_METRICS_ATTR_AVERAGE_UMC_ACTIVITY, + AMDGPU_METRICS_ATTR_MEM_MAX_BANDWIDTH, + AMDGPU_METRICS_ATTR_ENERGY_ACCUMULATOR, + AMDGPU_METRICS_ATTR_SYSTEM_CLOCK_COUNTER, + AMDGPU_METRICS_ATTR_ACCUMULATION_COUNTER, + AMDGPU_METRICS_ATTR_PROCHOT_RESIDENCY_ACC, + AMDGPU_METRICS_ATTR_PPT_RESIDENCY_ACC, + AMDGPU_METRICS_ATTR_SOCKET_THM_RESIDENCY_ACC, + AMDGPU_METRICS_ATTR_VR_THM_RESIDENCY_ACC, + AMDGPU_METRICS_ATTR_HBM_THM_RESIDENCY_ACC, + AMDGPU_METRICS_ATTR_GFXCLK_LOCK_STATUS, + AMDGPU_METRICS_ATTR_PCIE_LINK_WIDTH, + AMDGPU_METRICS_ATTR_PCIE_LINK_SPEED, + AMDGPU_METRICS_ATTR_XGMI_LINK_WIDTH, + AMDGPU_METRICS_ATTR_XGMI_LINK_SPEED, + AMDGPU_METRICS_ATTR_GFX_ACTIVITY_ACC, + AMDGPU_METRICS_ATTR_MEM_ACTIVITY_ACC, + AMDGPU_METRICS_ATTR_PCIE_BANDWIDTH_ACC, + AMDGPU_METRICS_ATTR_PCIE_BANDWIDTH_INST, + AMDGPU_METRICS_ATTR_PCIE_L0_TO_RECOV_COUNT_ACC, + AMDGPU_METRICS_ATTR_PCIE_REPLAY_COUNT_ACC, + AMDGPU_METRICS_ATTR_PCIE_REPLAY_ROVER_COUNT_ACC, + AMDGPU_METRICS_ATTR_PCIE_NAK_SENT_COUNT_ACC, + AMDGPU_METRICS_ATTR_PCIE_NAK_RCVD_COUNT_ACC, + AMDGPU_METRICS_ATTR_XGMI_READ_DATA_ACC, + AMDGPU_METRICS_ATTR_XGMI_WRITE_DATA_ACC, + AMDGPU_METRICS_ATTR_XGMI_LINK_STATUS, + AMDGPU_METRICS_ATTR_FIRMWARE_TIMESTAMP, + AMDGPU_METRICS_ATTR_CURRENT_GFXCLK, + AMDGPU_METRICS_ATTR_CURRENT_SOCCLK, + AMDGPU_METRICS_ATTR_CURRENT_VCLK0, + AMDGPU_METRICS_ATTR_CURRENT_DCLK0, + AMDGPU_METRICS_ATTR_CURRENT_UCLK, + AMDGPU_METRICS_ATTR_NUM_PARTITION, + AMDGPU_METRICS_ATTR_PCIE_LC_PERF_OTHER_END_RECOVERY, + AMDGPU_METRICS_ATTR_GFX_BUSY_INST, + AMDGPU_METRICS_ATTR_JPEG_BUSY, + AMDGPU_METRICS_ATTR_VCN_BUSY, + AMDGPU_METRICS_ATTR_GFX_BUSY_ACC, + AMDGPU_METRICS_ATTR_GFX_BELOW_HOST_LIMIT_PPT_ACC, + AMDGPU_METRICS_ATTR_GFX_BELOW_HOST_LIMIT_THM_ACC, + AMDGPU_METRICS_ATTR_GFX_LOW_UTILIZATION_ACC, + AMDGPU_METRICS_ATTR_GFX_BELOW_HOST_LIMIT_TOTAL_ACC, + AMDGPU_METRICS_ATTR_MAX, +}; [kevin]: How about modify the name prefix to "AMDGPU_METRICS_ID" or "AMDGPU_METRICS_ATTR_ID" ? + +enum amdgpu_metrics_attr_type { + AMDGPU_METRICS_TYPE_U8, + AMDGPU_METRICS_TYPE_S8, + AMDGPU_METRICS_TYPE_U16, + AMDGPU_METRICS_TYPE_S16, + AMDGPU_METRICS_TYPE_U32, + AMDGPU_METRICS_TYPE_S32, + AMDGPU_METRICS_TYPE_U64, + AMDGPU_METRICS_TYPE_S64, + AMDGPU_METRICS_TYPE_MAX, +}; + +enum amdgpu_metrics_attr_unit { + /* None */ + AMDGPU_METRICS_UNIT_NONE, + /* MHz*/ + AMDGPU_METRICS_UNIT_CLOCK_1, + /* Degree Celsius*/ + AMDGPU_METRICS_UNIT_TEMP_1, + /* Watts*/ + AMDGPU_METRICS_UNIT_POWER_1, + /* In nanoseconds*/ + AMDGPU_METRICS_UNIT_TIME_1, + /* In 10 nanoseconds*/ + AMDGPU_METRICS_UNIT_TIME_2, + /* Speed in GT/s */ + AMDGPU_METRICS_UNIT_SPEED_1, + /* Speed in 0.1 GT/s */ + AMDGPU_METRICS_UNIT_SPEED_2, + /* Bandwidth GB/s */ + AMDGPU_METRICS_UNIT_BW_1, + /* Data in KB */ + AMDGPU_METRICS_UNIT_DATA_1, + /* Percentage */ + AMDGPU_METRICS_UNIT_PERCENT, + AMDGPU_METRICS_UNIT_MAX, +}; + +#define AMDGPU_METRICS_ATTR_UNIT_MASK 0xFF000000 #define +AMDGPU_METRICS_ATTR_UNIT_SHIFT 24 #define AMDGPU_METRICS_ATTR_TYPE_MASK +0x00F00000 #define AMDGPU_METRICS_ATTR_TYPE_SHIFT 20 #define +AMDGPU_METRICS_ATTR_ID_MASK 0x000FFC00 #define +AMDGPU_METRICS_ATTR_ID_SHIFT 10 #define AMDGPU_METRICS_ATTR_INST_MASK +0x000003FF #define AMDGPU_METRICS_ATTR_INST_SHIFT 0 + +#define AMDGPU_METRICS_ENC_ATTR(unit, type, id, inst) \ + (((u64)(unit) << AMDGPU_METRICS_ATTR_UNIT_SHIFT) | \ + ((u64)(type) << AMDGPU_METRICS_ATTR_TYPE_SHIFT) | \ + ((u64)(id) << AMDGPU_METRICS_ATTR_ID_SHIFT) | (inst)) + /* * gpu_metrics_v1_0 is not recommended as it's not naturally aligned. * Use gpu_metrics_v1_1 or later instead. @@ -1217,6 +1321,19 @@ struct gpu_metrics_v1_8 { uint32_t pcie_lc_perf_other_end_recovery; }; +struct gpu_metrics_attr { + /* Field type encoded with AMDGPU_METRICS_ENC_ATTR */ + uint64_t attr_encoding; + /* Attribute value, depends on attr_encoding */ + void *attr_value; +}; [kevin]: I think the "void *" is not good idea to export data as an interface, and this pointer width is dependent on OS/CPU width (4 bytes on 32bit system but 8 bytes on 64bit system )? so, it is better to change it to uint64_t type. Best Regards, Kevin + +struct gpu_metrics_v1_9 { + struct metrics_table_header common_header; + int attr_count; + struct gpu_metrics_attr metrics_attrs[]; }; + /* * gpu_metrics_v2_0 is not recommended as it's not naturally aligned. * Use gpu_metrics_v2_1 or later instead. -- 2.49.0