Fetch system metrics table to fill gpuboard/baseboard temperature
metrics data for smu_v13_0_12

v2: Remove unnecessary checks, used separate metrics time for
temperature metrics table(Lijo)

Signed-off-by: Asad Kamal <asad.ka...@amd.com>
---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c     |   1 +
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |   2 +
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h  |   3 +-
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c | 248 +++++++++++++++++-
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c  |   6 +
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h  |   3 +
 drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h        |  26 ++
 7 files changed, 287 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 8b015107f761..dc48a1dd8be4 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -766,6 +766,7 @@ static int smu_set_funcs(struct amdgpu_device *adev)
        case IP_VERSION(13, 0, 14):
        case IP_VERSION(13, 0, 12):
                smu_v13_0_6_set_ppt_funcs(smu);
+               smu_v13_0_6_set_temp_funcs(smu);
                /* Enable pp_od_clk_voltage node */
                smu->od_enabled = true;
                break;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index b490c39e313e..611b381b9147 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -257,6 +257,7 @@ struct smu_table {
        void *cpu_addr;
        struct amdgpu_bo *bo;
        uint32_t version;
+       unsigned long  metrics_time;
 };
 
 enum smu_perf_level_designation {
@@ -322,6 +323,7 @@ enum smu_table_id {
        SMU_TABLE_ECCINFO,
        SMU_TABLE_COMBO_PPTABLE,
        SMU_TABLE_WIFIBAND,
+       SMU_TABLE_TEMP_METRICS,
        SMU_TABLE_COUNT,
 };
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
index d7a9e41820fa..458c4ac2dfbc 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
@@ -278,7 +278,8 @@
        __SMU_DUMMY_MAP(MALLPowerState), \
        __SMU_DUMMY_MAP(ResetSDMA), \
        __SMU_DUMMY_MAP(ResetVCN), \
-       __SMU_DUMMY_MAP(GetStaticMetricsTable),
+       __SMU_DUMMY_MAP(GetStaticMetricsTable), \
+       __SMU_DUMMY_MAP(GetSystemMetricsTable),
 
 #undef __SMU_DUMMY_MAP
 #define __SMU_DUMMY_MAP(type)  SMU_MSG_##type
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c
index 02a455a31c25..d0dd84670361 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c
@@ -138,6 +138,7 @@ const struct cmn2asic_msg_mapping 
smu_v13_0_12_message_map[SMU_MSG_MAX_COUNT] =
        MSG_MAP(SetThrottlingPolicy,                 
PPSMC_MSG_SetThrottlingPolicy,             0),
        MSG_MAP(ResetSDMA,                           PPSMC_MSG_ResetSDMA,       
                0),
        MSG_MAP(GetStaticMetricsTable,               
PPSMC_MSG_GetStaticMetricsTable,           1),
+       MSG_MAP(GetSystemMetricsTable,               
PPSMC_MSG_GetSystemMetricsTable,           0),
 };
 
 static int smu_v13_0_12_get_enabled_mask(struct smu_context *smu,
@@ -184,7 +185,8 @@ static int smu_v13_0_12_fru_get_product_info(struct 
smu_context *smu,
 
 int smu_v13_0_12_get_max_metrics_size(void)
 {
-       return max(sizeof(StaticMetricsTable_t), sizeof(MetricsTable_t));
+       return max3(sizeof(StaticMetricsTable_t), sizeof(MetricsTable_t),
+                  sizeof(SystemMetricsTable_t));
 }
 
 static void smu_v13_0_12_init_xgmi_data(struct smu_context *smu,
@@ -359,6 +361,245 @@ int smu_v13_0_12_get_smu_metrics_data(struct smu_context 
*smu,
        return 0;
 }
 
+static int smu_v13_0_12_get_system_metrics_table(struct smu_context *smu, void 
*metrics_table,
+                                                bool bypass_cache)
+{
+       struct smu_table_context *smu_table = &smu->smu_table;
+       uint32_t table_size = smu_table->tables[SMU_TABLE_SMU_METRICS].size;
+       struct smu_table *table = &smu_table->driver_table;
+       int ret;
+
+       if (bypass_cache || 
!smu_table->tables[SMU_TABLE_TEMP_METRICS].metrics_time ||
+           time_after(jiffies,
+                      smu_table->tables[SMU_TABLE_TEMP_METRICS].metrics_time +
+                      msecs_to_jiffies(1))) {
+               ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetSystemMetricsTable, 
NULL);
+               if (ret) {
+                       dev_info(smu->adev->dev,
+                                "Failed to export system metrics table!\n");
+                       return ret;
+               }
+
+               amdgpu_asic_invalidate_hdp(smu->adev, NULL);
+               memcpy(smu_table->metrics_table, table->cpu_addr, table_size);
+
+               smu_table->tables[SMU_TABLE_TEMP_METRICS].metrics_time = 
jiffies;
+       }
+
+       if (metrics_table)
+               memcpy(metrics_table, smu_table->metrics_table, 
sizeof(SystemMetricsTable_t));
+
+       return 0;
+}
+
+static enum amdgpu_node_temp smu_v13_0_12_get_node_sensor_type(NODE_TEMP_e 
type)
+{
+       switch (type) {
+       case NODE_TEMP_RETIMER:
+               return AMDGPU_RETIMER_X_TEMP;
+       case NODE_TEMP_IBC_TEMP:
+               return AMDGPU_OAM_X_IBC_TEMP;
+       case NODE_TEMP_IBC_2_TEMP:
+               return AMDGPU_OAM_X_IBC_2_TEMP;
+       case NODE_TEMP_VDD18_VR_TEMP:
+               return AMDGPU_OAM_X_VDD18_VR_TEMP;
+       case NODE_TEMP_04_HBM_B_VR_TEMP:
+               return AMDGPU_OAM_X_04_HBM_B_VR_TEMP;
+       case NODE_TEMP_04_HBM_D_VR_TEMP:
+               return AMDGPU_OAM_X_04_HBM_D_VR_TEMP;
+       default:
+               return -EINVAL;
+       }
+}
+
+static enum amdgpu_vr_temp smu_v13_0_12_get_vr_sensor_type(SVI_TEMP_e type)
+{
+       switch (type) {
+       case SVI_VDDCR_VDD0_TEMP:
+               return AMDGPU_VDDCR_VDD0_TEMP;
+       case SVI_VDDCR_VDD1_TEMP:
+               return AMDGPU_VDDCR_VDD1_TEMP;
+       case SVI_VDDCR_VDD2_TEMP:
+               return AMDGPU_VDDCR_VDD2_TEMP;
+       case SVI_VDDCR_VDD3_TEMP:
+               return AMDGPU_VDDCR_VDD3_TEMP;
+       case SVI_VDDCR_SOC_A_TEMP:
+               return AMDGPU_VDDCR_SOC_A_TEMP;
+       case SVI_VDDCR_SOC_C_TEMP:
+               return AMDGPU_VDDCR_SOC_C_TEMP;
+       case SVI_VDDCR_SOCIO_A_TEMP:
+               return AMDGPU_VDDCR_SOCIO_A_TEMP;
+       case SVI_VDDCR_SOCIO_C_TEMP:
+               return AMDGPU_VDDCR_SOCIO_C_TEMP;
+       case SVI_VDD_085_HBM_TEMP:
+               return AMDGPU_VDD_085_HBM_TEMP;
+       case SVI_VDDCR_11_HBM_B_TEMP:
+               return AMDGPU_VDDCR_11_HBM_B_TEMP;
+       case SVI_VDDCR_11_HBM_D_TEMP:
+               return AMDGPU_VDDCR_11_HBM_D_TEMP;
+       case SVI_VDD_USR_TEMP:
+               return AMDGPU_VDD_USR_TEMP;
+       case SVI_VDDIO_11_E32_TEMP:
+               return AMDGPU_VDDIO_11_E32_TEMP;
+       default:
+               return -EINVAL;
+       }
+}
+
+static enum amdgpu_system_temp 
smu_v13_0_12_get_system_sensor_type(SYSTEM_TEMP_e type)
+{
+       switch (type) {
+       case SYSTEM_TEMP_UBB_FPGA:
+               return AMDGPU_UBB_FPGA_TEMP;
+       case SYSTEM_TEMP_UBB_FRONT:
+               return AMDGPU_UBB_FRONT_TEMP;
+       case SYSTEM_TEMP_UBB_BACK:
+               return AMDGPU_UBB_BACK_TEMP;
+       case SYSTEM_TEMP_UBB_OAM7:
+               return AMDGPU_UBB_OAM7_TEMP;
+       case SYSTEM_TEMP_UBB_IBC:
+               return AMDGPU_UBB_IBC_TEMP;
+       case SYSTEM_TEMP_UBB_UFPGA:
+               return AMDGPU_UBB_UFPGA_TEMP;
+       case SYSTEM_TEMP_UBB_OAM1:
+               return AMDGPU_UBB_OAM1_TEMP;
+       case SYSTEM_TEMP_OAM_0_1_HSC:
+               return AMDGPU_OAM_0_1_HSC_TEMP;
+       case SYSTEM_TEMP_OAM_2_3_HSC:
+               return AMDGPU_OAM_2_3_HSC_TEMP;
+       case SYSTEM_TEMP_OAM_4_5_HSC:
+               return AMDGPU_OAM_4_5_HSC_TEMP;
+       case SYSTEM_TEMP_OAM_6_7_HSC:
+               return AMDGPU_OAM_6_7_HSC_TEMP;
+       case SYSTEM_TEMP_UBB_FPGA_0V72_VR:
+               return AMDGPU_UBB_FPGA_0V72_VR_TEMP;
+       case SYSTEM_TEMP_UBB_FPGA_3V3_VR:
+               return AMDGPU_UBB_FPGA_3V3_VR_TEMP;
+       case SYSTEM_TEMP_RETIMER_0_1_2_3_1V2_VR:
+               return AMDGPU_RETIMER_0_1_2_3_1V2_VR_TEMP;
+       case SYSTEM_TEMP_RETIMER_4_5_6_7_1V2_VR:
+               return AMDGPU_RETIMER_4_5_6_7_1V2_VR_TEMP;
+       case SYSTEM_TEMP_RETIMER_0_1_0V9_VR:
+               return AMDGPU_RETIMER_0_1_0V9_VR_TEMP;
+       case SYSTEM_TEMP_RETIMER_4_5_0V9_VR:
+               return AMDGPU_RETIMER_4_5_0V9_VR_TEMP;
+       case SYSTEM_TEMP_RETIMER_2_3_0V9_VR:
+               return AMDGPU_RETIMER_2_3_0V9_VR_TEMP;
+       case SYSTEM_TEMP_RETIMER_6_7_0V9_VR:
+               return AMDGPU_RETIMER_6_7_0V9_VR_TEMP;
+       case SYSTEM_TEMP_OAM_0_1_2_3_3V3_VR:
+               return AMDGPU_OAM_0_1_2_3_3V3_VR_TEMP;
+       case SYSTEM_TEMP_OAM_4_5_6_7_3V3_VR:
+               return AMDGPU_OAM_4_5_6_7_3V3_VR_TEMP;
+       case SYSTEM_TEMP_IBC_HSC:
+               return AMDGPU_IBC_HSC_TEMP;
+       case SYSTEM_TEMP_IBC:
+               return AMDGPU_IBC_TEMP;
+       default:
+               return -EINVAL;
+       }
+}
+
+static bool smu_v13_0_12_is_temp_metrics_supported(struct smu_context *smu,
+                                                  enum smu_temp_metric_type 
type)
+{
+       switch (type) {
+       case SMU_TEMP_METRIC_BASEBOARD:
+               if (smu->adev->gmc.xgmi.physical_node_id == 0 &&
+                   smu->adev->gmc.xgmi.num_physical_nodes > 1 &&
+                   smu_v13_0_6_cap_supported(smu, SMU_CAP(TEMP_METRICS)))
+                       return true;
+               break;
+       case SMU_TEMP_METRIC_GPUBOARD:
+               return smu_v13_0_6_cap_supported(smu, SMU_CAP(TEMP_METRICS));
+       default:
+               break;
+       }
+
+       return false;
+}
+
+static ssize_t smu_v13_0_12_get_temp_metrics(struct smu_context *smu,
+                                            enum smu_temp_metric_type type, 
void *table)
+{
+       struct amdgpu_gpuboard_temp_metrics_v1_0 *gpuboard_temp_metrics;
+       struct amdgpu_baseboard_temp_metrics_v1_0 *baseboard_temp_metrics;
+       SystemMetricsTable_t *metrics;
+       u32 idx, sensors;
+       int ret, sensor_type;
+       ssize_t size = 0;
+
+       size = (type == SMU_TEMP_METRIC_GPUBOARD) ?
+               sizeof(*gpuboard_temp_metrics) : 
sizeof(*baseboard_temp_metrics);
+
+       if (!table)
+               goto out;
+       metrics = kzalloc(sizeof(SystemMetricsTable_t), GFP_KERNEL);
+       if (!metrics)
+               return -ENOMEM;
+       gpuboard_temp_metrics = (struct amdgpu_gpuboard_temp_metrics_v1_0 
*)table;
+       baseboard_temp_metrics = (struct amdgpu_baseboard_temp_metrics_v1_0 
*)table;
+       if (type  == SMU_TEMP_METRIC_GPUBOARD)
+               smu_cmn_init_gpuboard_temp_metrics(gpuboard_temp_metrics, 1, 0);
+       else if (type  == SMU_TEMP_METRIC_BASEBOARD)
+               smu_cmn_init_baseboard_temp_metrics(baseboard_temp_metrics, 1, 
0);
+
+       ret = smu_v13_0_12_get_system_metrics_table(smu, metrics, true);
+       if (ret) {
+               kfree(metrics);
+               return ret;
+       }
+
+       if (type == SMU_TEMP_METRIC_GPUBOARD) {
+               gpuboard_temp_metrics->accumulation_counter = 
metrics->AccumulationCounter;
+               gpuboard_temp_metrics->label_version = metrics->LabelVersion;
+               gpuboard_temp_metrics->node_id = metrics->NodeIdentifier;
+
+               idx = 0;
+               for (sensors = 0; sensors < NODE_TEMP_MAX_TEMP_ENTRIES; 
sensors++) {
+                       if (metrics->NodeTemperatures[sensors] != -1) {
+                               sensor_type = 
smu_v13_0_12_get_node_sensor_type(sensors);
+                               gpuboard_temp_metrics->node_temp[idx] =
+                                       
((int)metrics->NodeTemperatures[sensors])  & 0xFFFFFF;
+                               gpuboard_temp_metrics->node_temp[idx] |= 
(sensor_type << 24);
+                               idx++;
+                       }
+               }
+
+               idx = 0;
+
+               for (sensors = 0; sensors < SVI_MAX_TEMP_ENTRIES; sensors++) {
+                       if (metrics->VrTemperatures[sensors] != -1) {
+                               sensor_type = 
smu_v13_0_12_get_vr_sensor_type(sensors);
+                               gpuboard_temp_metrics->vr_temp[idx] =
+                                       ((int)metrics->VrTemperatures[sensors]) 
 & 0xFFFFFF;
+                               gpuboard_temp_metrics->vr_temp[idx] |= 
(sensor_type << 24);
+                               idx++;
+                       }
+               }
+       } else if (type == SMU_TEMP_METRIC_BASEBOARD) {
+               baseboard_temp_metrics->accumulation_counter = 
metrics->AccumulationCounter;
+               baseboard_temp_metrics->label_version = metrics->LabelVersion;
+               baseboard_temp_metrics->node_id = metrics->NodeIdentifier;
+
+               idx = 0;
+               for (sensors = 0; sensors < SYSTEM_TEMP_MAX_ENTRIES; sensors++) 
{
+                       if (metrics->SystemTemperatures[sensors] != -1) {
+                               sensor_type = 
smu_v13_0_12_get_system_sensor_type(sensors);
+                               baseboard_temp_metrics->system_temp[idx] =
+                                       
((int)metrics->SystemTemperatures[sensors])  & 0xFFFFFF;
+                               baseboard_temp_metrics->system_temp[idx] |= 
(sensor_type << 24);
+                               idx++;
+                       }
+               }
+       }
+
+       kfree(metrics);
+
+out:
+       return size;
+}
+
 ssize_t smu_v13_0_12_get_xcp_metrics(struct smu_context *smu, struct 
amdgpu_xcp *xcp, void *table, void *smu_metrics)
 {
        const u8 num_jpeg_rings = NUM_JPEG_RINGS_FW;
@@ -572,3 +813,8 @@ ssize_t smu_v13_0_12_get_gpu_metrics(struct smu_context 
*smu, void **table, void
 
        return sizeof(*gpu_metrics);
 }
+
+const struct smu_temp_funcs smu_v13_0_12_temp_funcs = {
+       .temp_metrics_is_supported = smu_v13_0_12_is_temp_metrics_supported,
+       .get_temp_metrics = smu_v13_0_12_get_temp_metrics,
+};
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 9cc294f4708b..c777c0e4ea11 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -3857,3 +3857,9 @@ void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu)
        amdgpu_mca_smu_init_funcs(smu->adev, &smu_v13_0_6_mca_smu_funcs);
        amdgpu_aca_set_smu_funcs(smu->adev, &smu_v13_0_6_aca_smu_funcs);
 }
+
+void smu_v13_0_6_set_temp_funcs(struct smu_context *smu)
+{
+       smu->smu_temp.temp_funcs = (amdgpu_ip_version(smu->adev, MP1_HWIP, 0)
+                       == IP_VERSION(13, 0, 12)) ? &smu_v13_0_12_temp_funcs : 
NULL;
+}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h
index 67b30674fd31..ece04ad724fb 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h
@@ -68,10 +68,12 @@ enum smu_v13_0_6_caps {
        SMU_CAP(HST_LIMIT_METRICS),
        SMU_CAP(BOARD_VOLTAGE),
        SMU_CAP(PLDM_VERSION),
+       SMU_CAP(TEMP_METRICS),
        SMU_CAP(ALL),
 };
 
 extern void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu);
+extern void smu_v13_0_6_set_temp_funcs(struct smu_context *smu);
 bool smu_v13_0_6_cap_supported(struct smu_context *smu, enum smu_v13_0_6_caps 
cap);
 int smu_v13_0_6_get_static_metrics_table(struct smu_context *smu);
 int smu_v13_0_6_get_metrics_table(struct smu_context *smu, void *metrics_table,
@@ -88,4 +90,5 @@ ssize_t smu_v13_0_12_get_xcp_metrics(struct smu_context *smu,
                                     void *smu_metrics);
 extern const struct cmn2asic_mapping smu_v13_0_12_feature_mask_map[];
 extern const struct cmn2asic_msg_mapping smu_v13_0_12_message_map[];
+extern const struct smu_temp_funcs smu_v13_0_12_temp_funcs;
 #endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h 
b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
index a608cdbdada4..d588f74b98de 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
@@ -65,6 +65,32 @@
                header->structure_size = sizeof(*tmp);                     \
        } while (0)
 
+#define smu_cmn_init_baseboard_temp_metrics(ptr, fr, cr)                       
 \
+       do {                                                                    
\
+               typecheck(struct amdgpu_baseboard_temp_metrics_v##fr##_##cr *,  
\
+                         (ptr));                                               
\
+               struct amdgpu_baseboard_temp_metrics_v##fr##_##cr *tmp = (ptr); 
\
+               struct metrics_table_header *header =                           
\
+                       (struct metrics_table_header *)tmp;                     
\
+               memset(header, 0xFF, sizeof(*tmp));                             
\
+               header->format_revision = fr;                                   
\
+               header->content_revision = cr;                                  
\
+               header->structure_size = sizeof(*tmp);                          
\
+       } while (0)
+
+#define smu_cmn_init_gpuboard_temp_metrics(ptr, fr, cr)                        
 \
+       do {                                                                    
\
+               typecheck(struct amdgpu_gpuboard_temp_metrics_v##fr##_##cr *,   
\
+                         (ptr));                                               
\
+               struct amdgpu_gpuboard_temp_metrics_v##fr##_##cr *tmp = (ptr);  
\
+               struct metrics_table_header *header =                           
\
+                       (struct metrics_table_header *)tmp;                     
\
+               memset(header, 0xFF, sizeof(*tmp));                             
\
+               header->format_revision = fr;                                   
\
+               header->content_revision = cr;                                  
\
+               header->structure_size = sizeof(*tmp);                          
\
+       } while (0)
+
 extern const int link_speed[];
 
 /* Helper to Convert from PCIE Gen 1/2/3/4/5/6 to 0.1 GT/s speed units */
-- 
2.46.0

Reply via email to