From: Asad Kamal <[email protected]> Add baseboard temperature metrics support via system metrics table for smu_v15_0_8
v4: Add separate function to fill baseboard temperature, use 16, remove casting v5: Optimize to use single switch case (Lijo) Signed-off-by: Asad Kamal <[email protected]> Reviewed-by: Lijo Lazar <[email protected]> Signed-off-by: Alex Deucher <[email protected]> --- .../gpu/drm/amd/include/kgd_pp_interface.h | 29 +++++ .../drm/amd/pm/swsmu/smu15/smu_v15_0_8_ppt.c | 110 +++++++++++++++--- .../drm/amd/pm/swsmu/smu15/smu_v15_0_8_ppt.h | 58 +++++++++ 3 files changed, 184 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 04efa127a3dd9..83bd621691829 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -617,6 +617,29 @@ enum amdgpu_metrics_attr_id { AMDGPU_METRICS_ATTR_ID_VR_TEMP_VDDIO_065_UCIEAM_A, AMDGPU_METRICS_ATTR_ID_VR_TEMP_VDDIO_065_UCIEAM_C, AMDGPU_METRICS_ATTR_ID_VR_TEMP_VDDAN_075, + AMDGPU_METRICS_ATTR_ID_SYSTEM_TEMP_UBB_FPGA, + AMDGPU_METRICS_ATTR_ID_SYSTEM_TEMP_UBB_FRONT, + AMDGPU_METRICS_ATTR_ID_SYSTEM_TEMP_UBB_BACK, + AMDGPU_METRICS_ATTR_ID_SYSTEM_TEMP_UBB_OAM7, + AMDGPU_METRICS_ATTR_ID_SYSTEM_TEMP_UBB_IBC, + AMDGPU_METRICS_ATTR_ID_SYSTEM_TEMP_UBB_UFPGA, + AMDGPU_METRICS_ATTR_ID_SYSTEM_TEMP_UBB_OAM1, + AMDGPU_METRICS_ATTR_ID_SYSTEM_TEMP_OAM_0_1_HSC, + AMDGPU_METRICS_ATTR_ID_SYSTEM_TEMP_OAM_2_3_HSC, + AMDGPU_METRICS_ATTR_ID_SYSTEM_TEMP_OAM_4_5_HSC, + AMDGPU_METRICS_ATTR_ID_SYSTEM_TEMP_OAM_6_7_HSC, + AMDGPU_METRICS_ATTR_ID_SYSTEM_TEMP_UBB_FPGA_0V72_VR, + AMDGPU_METRICS_ATTR_ID_SYSTEM_TEMP_UBB_FPGA_3V3_VR, + AMDGPU_METRICS_ATTR_ID_SYSTEM_TEMP_RETIMER_0_1_2_3_1V2_VR, + AMDGPU_METRICS_ATTR_ID_SYSTEM_TEMP_RETIMER_4_5_6_7_1V2_VR, + AMDGPU_METRICS_ATTR_ID_SYSTEM_TEMP_RETIMER_0_1_0V9_VR, + AMDGPU_METRICS_ATTR_ID_SYSTEM_TEMP_RETIMER_4_5_0V9_VR, + AMDGPU_METRICS_ATTR_ID_SYSTEM_TEMP_RETIMER_2_3_0V9_VR, + AMDGPU_METRICS_ATTR_ID_SYSTEM_TEMP_RETIMER_6_7_0V9_VR, + AMDGPU_METRICS_ATTR_ID_SYSTEM_TEMP_OAM_0_1_2_3_3V3_VR, + AMDGPU_METRICS_ATTR_ID_SYSTEM_TEMP_OAM_4_5_6_7_3V3_VR, + AMDGPU_METRICS_ATTR_ID_SYSTEM_TEMP_IBC_HSC, + AMDGPU_METRICS_ATTR_ID_SYSTEM_TEMP_IBC, AMDGPU_METRICS_ATTR_ID_MAX, }; @@ -1875,4 +1898,10 @@ struct amdgpu_gpuboard_temp_metrics_v1_1 { struct gpu_metrics_attr metrics_attrs[]; }; +struct amdgpu_baseboard_temp_metrics_v1_1 { + struct metrics_table_header common_header; + int attr_count; + struct gpu_metrics_attr metrics_attrs[]; +}; + #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0_8_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0_8_ppt.c index e6895e03aa3a8..aec0b6872f924 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0_8_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0_8_ppt.c @@ -178,6 +178,7 @@ static size_t smu_v15_0_8_get_system_metrics_size(void) static int smu_v15_0_8_tables_init(struct smu_context *smu) { + struct smu_v15_0_8_baseboard_temp_metrics *baseboard_temp_metrics; struct smu_v15_0_8_gpuboard_temp_metrics *gpuboard_temp_metrics; struct smu_table_context *smu_table = &smu->smu_table; int ret, gpu_metrcs_size = sizeof(MetricsTable_t); @@ -223,11 +224,23 @@ static int smu_v15_0_8_tables_init(struct smu_context *smu) if (ret) return ret; + /* Initialize base board temperature metrics */ + ret = smu_driver_table_init(smu, + SMU_DRIVER_TABLE_BASEBOARD_TEMP_METRICS, + sizeof(*baseboard_temp_metrics), 50); + if (ret) + return ret; + baseboard_temp_metrics = (struct smu_v15_0_8_baseboard_temp_metrics *) + smu_driver_table_ptr(smu, + SMU_DRIVER_TABLE_BASEBOARD_TEMP_METRICS); + smu_v15_0_8_baseboard_temp_metrics_init(baseboard_temp_metrics, 1, 1); /* Initialize GPU board temperature metrics */ ret = smu_driver_table_init(smu, SMU_DRIVER_TABLE_GPUBOARD_TEMP_METRICS, sizeof(*gpuboard_temp_metrics), 50); if (ret) { smu_table_cache_fini(smu, SMU_TABLE_PMFW_SYSTEM_METRICS); + smu_driver_table_fini(smu, + SMU_DRIVER_TABLE_BASEBOARD_TEMP_METRICS); return ret; } gpuboard_temp_metrics = (struct smu_v15_0_8_gpuboard_temp_metrics *) @@ -280,6 +293,7 @@ static int smu_v15_0_8_tables_fini(struct smu_context *smu) { struct smu_table_context *smu_table = &smu->smu_table; + smu_driver_table_fini(smu, SMU_DRIVER_TABLE_BASEBOARD_TEMP_METRICS); smu_driver_table_fini(smu, SMU_DRIVER_TABLE_GPUBOARD_TEMP_METRICS); smu_table_cache_fini(smu, SMU_TABLE_PMFW_SYSTEM_METRICS); mutex_destroy(&smu_table->metrics_lock); @@ -1353,6 +1367,10 @@ static bool smu_v15_0_8_is_temp_metrics_supported(struct smu_context *smu, enum smu_temp_metric_type type) { switch (type) { + case SMU_TEMP_METRIC_BASEBOARD: + if (smu->adev->gmc.xgmi.physical_node_id == 0) + return true; + return false; case SMU_TEMP_METRIC_GPUBOARD: return true; default: @@ -1360,6 +1378,62 @@ static bool smu_v15_0_8_is_temp_metrics_supported(struct smu_context *smu, } } +static void smu_v15_0_8_fill_baseboard_temp_metrics( + struct smu_v15_0_8_baseboard_temp_metrics *baseboard_temp_metrics, + const SystemMetricsTable_t *metrics) +{ + baseboard_temp_metrics->accumulation_counter = metrics->AccumulationCounter; + baseboard_temp_metrics->label_version = metrics->LabelVersion; + baseboard_temp_metrics->node_id = metrics->NodeIdentifier; + + baseboard_temp_metrics->system_temp_ubb_fpga = + metrics->SystemTemperatures[SYSTEM_TEMP_UBB_FPGA]; + baseboard_temp_metrics->system_temp_ubb_front = + metrics->SystemTemperatures[SYSTEM_TEMP_UBB_FRONT]; + baseboard_temp_metrics->system_temp_ubb_back = + metrics->SystemTemperatures[SYSTEM_TEMP_UBB_BACK]; + baseboard_temp_metrics->system_temp_ubb_oam7 = + metrics->SystemTemperatures[SYSTEM_TEMP_UBB_OAM7]; + baseboard_temp_metrics->system_temp_ubb_ibc = + metrics->SystemTemperatures[SYSTEM_TEMP_UBB_IBC]; + baseboard_temp_metrics->system_temp_ubb_ufpga = + metrics->SystemTemperatures[SYSTEM_TEMP_UBB_UFPGA]; + baseboard_temp_metrics->system_temp_ubb_oam1 = + metrics->SystemTemperatures[SYSTEM_TEMP_UBB_OAM1]; + baseboard_temp_metrics->system_temp_oam_0_1_hsc = + metrics->SystemTemperatures[SYSTEM_TEMP_OAM_0_1_HSC]; + baseboard_temp_metrics->system_temp_oam_2_3_hsc = + metrics->SystemTemperatures[SYSTEM_TEMP_OAM_2_3_HSC]; + baseboard_temp_metrics->system_temp_oam_4_5_hsc = + metrics->SystemTemperatures[SYSTEM_TEMP_OAM_4_5_HSC]; + baseboard_temp_metrics->system_temp_oam_6_7_hsc = + metrics->SystemTemperatures[SYSTEM_TEMP_OAM_6_7_HSC]; + baseboard_temp_metrics->system_temp_ubb_fpga_0v72_vr = + metrics->SystemTemperatures[SYSTEM_TEMP_UBB_FPGA_0V72_VR]; + baseboard_temp_metrics->system_temp_ubb_fpga_3v3_vr = + metrics->SystemTemperatures[SYSTEM_TEMP_UBB_FPGA_3V3_VR]; + baseboard_temp_metrics->system_temp_retimer_0_1_2_3_1v2_vr = + metrics->SystemTemperatures[SYSTEM_TEMP_RETIMER_0_1_2_3_1V2_VR]; + baseboard_temp_metrics->system_temp_retimer_4_5_6_7_1v2_vr = + metrics->SystemTemperatures[SYSTEM_TEMP_RETIMER_4_5_6_7_1V2_VR]; + baseboard_temp_metrics->system_temp_retimer_0_1_0v9_vr = + metrics->SystemTemperatures[SYSTEM_TEMP_RETIMER_0_1_0V9_VR]; + baseboard_temp_metrics->system_temp_retimer_4_5_0v9_vr = + metrics->SystemTemperatures[SYSTEM_TEMP_RETIMER_4_5_0V9_VR]; + baseboard_temp_metrics->system_temp_retimer_2_3_0v9_vr = + metrics->SystemTemperatures[SYSTEM_TEMP_RETIMER_2_3_0V9_VR]; + baseboard_temp_metrics->system_temp_retimer_6_7_0v9_vr = + metrics->SystemTemperatures[SYSTEM_TEMP_RETIMER_6_7_0V9_VR]; + baseboard_temp_metrics->system_temp_oam_0_1_2_3_3v3_vr = + metrics->SystemTemperatures[SYSTEM_TEMP_OAM_0_1_2_3_3V3_VR]; + baseboard_temp_metrics->system_temp_oam_4_5_6_7_3v3_vr = + metrics->SystemTemperatures[SYSTEM_TEMP_OAM_4_5_6_7_3V3_VR]; + baseboard_temp_metrics->system_temp_ibc_hsc = + metrics->SystemTemperatures[SYSTEM_TEMP_IBC_HSC]; + baseboard_temp_metrics->system_temp_ibc = + metrics->SystemTemperatures[SYSTEM_TEMP_IBC]; +} + static void smu_v15_0_8_fill_gpuboard_temp_metrics( struct smu_v15_0_8_gpuboard_temp_metrics *gpuboard_temp_metrics, const SystemMetricsTable_t *metrics) @@ -1429,33 +1503,43 @@ static ssize_t smu_v15_0_8_get_temp_metrics(struct smu_context *smu, enum smu_temp_metric_type type, void *table) { + struct smu_v15_0_8_baseboard_temp_metrics *baseboard_temp_metrics; struct smu_v15_0_8_gpuboard_temp_metrics *gpuboard_temp_metrics; struct smu_table_context *smu_table = &smu->smu_table; struct smu_table *tables = smu_table->tables; - enum smu_driver_table_id table_id; SystemMetricsTable_t *metrics; struct smu_table *sys_table; - ssize_t size; int ret; - table_id = SMU_DRIVER_TABLE_GPUBOARD_TEMP_METRICS; - gpuboard_temp_metrics = - (struct smu_v15_0_8_gpuboard_temp_metrics *) - smu_driver_table_ptr(smu, table_id); - size = sizeof(*gpuboard_temp_metrics); - ret = smu_v15_0_8_get_system_metrics_table(smu); if (ret) return ret; sys_table = &tables[SMU_TABLE_PMFW_SYSTEM_METRICS]; metrics = (SystemMetricsTable_t *)sys_table->cache.buffer; - smu_driver_table_update_cache_time(smu, table_id); - smu_v15_0_8_fill_gpuboard_temp_metrics(gpuboard_temp_metrics, - metrics); - memcpy(table, gpuboard_temp_metrics, size); - return size; + switch (type) { + case SMU_TEMP_METRIC_GPUBOARD: + gpuboard_temp_metrics = + (struct smu_v15_0_8_gpuboard_temp_metrics *) + smu_driver_table_ptr(smu, SMU_DRIVER_TABLE_GPUBOARD_TEMP_METRICS); + smu_driver_table_update_cache_time(smu, SMU_DRIVER_TABLE_GPUBOARD_TEMP_METRICS); + smu_v15_0_8_fill_gpuboard_temp_metrics(gpuboard_temp_metrics, + metrics); + memcpy(table, gpuboard_temp_metrics, sizeof(*gpuboard_temp_metrics)); + return sizeof(*gpuboard_temp_metrics); + case SMU_TEMP_METRIC_BASEBOARD: + baseboard_temp_metrics = + (struct smu_v15_0_8_baseboard_temp_metrics *) + smu_driver_table_ptr(smu, SMU_DRIVER_TABLE_BASEBOARD_TEMP_METRICS); + smu_driver_table_update_cache_time(smu, SMU_DRIVER_TABLE_BASEBOARD_TEMP_METRICS); + smu_v15_0_8_fill_baseboard_temp_metrics(baseboard_temp_metrics, + metrics); + memcpy(table, baseboard_temp_metrics, sizeof(*baseboard_temp_metrics)); + return sizeof(*baseboard_temp_metrics); + default: + return -EINVAL; + } } static ssize_t smu_v15_0_8_get_gpu_metrics(struct smu_context *smu, void **table) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0_8_ppt.h b/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0_8_ppt.h index 0856d11d8e55e..398ce44821741 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0_8_ppt.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0_8_ppt.h @@ -182,6 +182,7 @@ typedef struct { DECLARE_SMU_METRICS_CLASS(smu_v15_0_8_gpu_metrics, SMU_15_0_8_METRICS_FIELDS); /* Maximum temperature sensor counts for system metrics */ +#define SMU_15_0_8_MAX_SYSTEM_TEMP_ENTRIES 32 #define SMU_15_0_8_MAX_NODE_TEMP_ENTRIES 12 #define SMU_15_0_8_MAX_VR_TEMP_ENTRIES 22 @@ -251,5 +252,62 @@ DECLARE_SMU_METRICS_CLASS(smu_v15_0_8_gpu_metrics, SMU_15_0_8_METRICS_FIELDS); DECLARE_SMU_METRICS_CLASS(smu_v15_0_8_gpuboard_temp_metrics, SMU_15_0_8_GPUBOARD_TEMP_METRICS_FIELDS); +/* SMUv 15.0.8 Baseboard temperature metrics - ID-based approach */ +#define SMU_15_0_8_BASEBOARD_TEMP_METRICS_FIELDS(SMU_SCALAR, SMU_ARRAY) \ + SMU_SCALAR(SMU_MATTR(ACCUMULATION_COUNTER), SMU_MUNIT(NONE), \ + SMU_MTYPE(U64), accumulation_counter); \ + SMU_SCALAR(SMU_MATTR(LABEL_VERSION), SMU_MUNIT(NONE), \ + SMU_MTYPE(U16), label_version); \ + SMU_SCALAR(SMU_MATTR(NODE_ID), SMU_MUNIT(NONE), \ + SMU_MTYPE(U16), node_id); \ + SMU_SCALAR(SMU_MATTR(SYSTEM_TEMP_UBB_FPGA), SMU_MUNIT(TEMP_1), \ + SMU_MTYPE(S16), system_temp_ubb_fpga); \ + SMU_SCALAR(SMU_MATTR(SYSTEM_TEMP_UBB_FRONT), SMU_MUNIT(TEMP_1), \ + SMU_MTYPE(S16), system_temp_ubb_front); \ + SMU_SCALAR(SMU_MATTR(SYSTEM_TEMP_UBB_BACK), SMU_MUNIT(TEMP_1), \ + SMU_MTYPE(S16), system_temp_ubb_back); \ + SMU_SCALAR(SMU_MATTR(SYSTEM_TEMP_UBB_OAM7), SMU_MUNIT(TEMP_1), \ + SMU_MTYPE(S16), system_temp_ubb_oam7); \ + SMU_SCALAR(SMU_MATTR(SYSTEM_TEMP_UBB_IBC), SMU_MUNIT(TEMP_1), \ + SMU_MTYPE(S16), system_temp_ubb_ibc); \ + SMU_SCALAR(SMU_MATTR(SYSTEM_TEMP_UBB_UFPGA), SMU_MUNIT(TEMP_1), \ + SMU_MTYPE(S16), system_temp_ubb_ufpga); \ + SMU_SCALAR(SMU_MATTR(SYSTEM_TEMP_UBB_OAM1), SMU_MUNIT(TEMP_1), \ + SMU_MTYPE(S16), system_temp_ubb_oam1); \ + SMU_SCALAR(SMU_MATTR(SYSTEM_TEMP_OAM_0_1_HSC), SMU_MUNIT(TEMP_1), \ + SMU_MTYPE(S16), system_temp_oam_0_1_hsc); \ + SMU_SCALAR(SMU_MATTR(SYSTEM_TEMP_OAM_2_3_HSC), SMU_MUNIT(TEMP_1), \ + SMU_MTYPE(S16), system_temp_oam_2_3_hsc); \ + SMU_SCALAR(SMU_MATTR(SYSTEM_TEMP_OAM_4_5_HSC), SMU_MUNIT(TEMP_1), \ + SMU_MTYPE(S16), system_temp_oam_4_5_hsc); \ + SMU_SCALAR(SMU_MATTR(SYSTEM_TEMP_OAM_6_7_HSC), SMU_MUNIT(TEMP_1), \ + SMU_MTYPE(S16), system_temp_oam_6_7_hsc); \ + SMU_SCALAR(SMU_MATTR(SYSTEM_TEMP_UBB_FPGA_0V72_VR), SMU_MUNIT(TEMP_1), \ + SMU_MTYPE(S16), system_temp_ubb_fpga_0v72_vr); \ + SMU_SCALAR(SMU_MATTR(SYSTEM_TEMP_UBB_FPGA_3V3_VR), SMU_MUNIT(TEMP_1), \ + SMU_MTYPE(S16), system_temp_ubb_fpga_3v3_vr); \ + SMU_SCALAR(SMU_MATTR(SYSTEM_TEMP_RETIMER_0_1_2_3_1V2_VR), SMU_MUNIT(TEMP_1), \ + SMU_MTYPE(S16), system_temp_retimer_0_1_2_3_1v2_vr); \ + SMU_SCALAR(SMU_MATTR(SYSTEM_TEMP_RETIMER_4_5_6_7_1V2_VR), SMU_MUNIT(TEMP_1), \ + SMU_MTYPE(S16), system_temp_retimer_4_5_6_7_1v2_vr); \ + SMU_SCALAR(SMU_MATTR(SYSTEM_TEMP_RETIMER_0_1_0V9_VR), SMU_MUNIT(TEMP_1), \ + SMU_MTYPE(S16), system_temp_retimer_0_1_0v9_vr); \ + SMU_SCALAR(SMU_MATTR(SYSTEM_TEMP_RETIMER_4_5_0V9_VR), SMU_MUNIT(TEMP_1), \ + SMU_MTYPE(S16), system_temp_retimer_4_5_0v9_vr); \ + SMU_SCALAR(SMU_MATTR(SYSTEM_TEMP_RETIMER_2_3_0V9_VR), SMU_MUNIT(TEMP_1), \ + SMU_MTYPE(S16), system_temp_retimer_2_3_0v9_vr); \ + SMU_SCALAR(SMU_MATTR(SYSTEM_TEMP_RETIMER_6_7_0V9_VR), SMU_MUNIT(TEMP_1), \ + SMU_MTYPE(S16), system_temp_retimer_6_7_0v9_vr); \ + SMU_SCALAR(SMU_MATTR(SYSTEM_TEMP_OAM_0_1_2_3_3V3_VR), SMU_MUNIT(TEMP_1), \ + SMU_MTYPE(S16), system_temp_oam_0_1_2_3_3v3_vr); \ + SMU_SCALAR(SMU_MATTR(SYSTEM_TEMP_OAM_4_5_6_7_3V3_VR), SMU_MUNIT(TEMP_1), \ + SMU_MTYPE(S16), system_temp_oam_4_5_6_7_3v3_vr); \ + SMU_SCALAR(SMU_MATTR(SYSTEM_TEMP_IBC_HSC), SMU_MUNIT(TEMP_1), \ + SMU_MTYPE(S16), system_temp_ibc_hsc); \ + SMU_SCALAR(SMU_MATTR(SYSTEM_TEMP_IBC), SMU_MUNIT(TEMP_1), \ + SMU_MTYPE(S16), system_temp_ibc); + +DECLARE_SMU_METRICS_CLASS(smu_v15_0_8_baseboard_temp_metrics, + SMU_15_0_8_BASEBOARD_TEMP_METRICS_FIELDS); #endif #endif -- 2.53.0
