PMFW may return invalid values due to internal calculation errors.
so, the kmd driver must validate and sanitize the returned values to
prevent issues caused by firmware calculation errors.

For example, values 0xfffe (-2) and 0xffff (-1) are treated
as invalid and clamped to 0.

this applies to devices with CAB (Cache As Buffer) functionality.

Closes: https://gitlab.freedesktop.org/drm/amd/-/work_items/4905

Signed-off-by: Yang Wang <[email protected]>
---
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h   | 17 +++++++++++++++++
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c    | 10 +++++-----
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c    | 10 +++++-----
 .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c    | 10 +++++-----
 4 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 609f5ab07d8a..365946c43e11 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -2164,4 +2164,21 @@ static inline void smu_feature_init(struct smu_context 
*smu, int feature_num)
        smu_feature_list_clear_all(smu, SMU_FEATURE_LIST_ALLOWED);
 }
 
+/*
+ * smu_safe_u16_nn - Make u16 safe by filtering negative overflow errors
+ * @val: Input u16 value, may contain invalid negative overflows
+ *
+ * Convert u16 to non-negative value. Cast to s16 to detect negative values
+ * caused by calculation errors. Return 0 for negative errors, return
+ * original value if valid.
+ *
+ * Return: Valid u16 value or 0
+ */
+static inline u16 smu_safe_u16_nn(u16 val)
+{
+    s16 tmp = (s16)val;
+
+    return tmp < 0 ? 0 : val;
+}
+
 #endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 9be7a2af560d..16f69b548ca4 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -774,13 +774,13 @@ static int smu_v13_0_0_get_smu_metrics_data(struct 
smu_context *smu,
                        *value = metrics->AverageGfxclkFrequencyPreDs;
                break;
        case METRICS_AVERAGE_FCLK:
-               if (metrics->AverageUclkActivity <= SMU_13_0_0_BUSY_THRESHOLD)
+               if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= 
SMU_13_0_0_BUSY_THRESHOLD)
                        *value = metrics->AverageFclkFrequencyPostDs;
                else
                        *value = metrics->AverageFclkFrequencyPreDs;
                break;
        case METRICS_AVERAGE_UCLK:
-               if (metrics->AverageUclkActivity <= SMU_13_0_0_BUSY_THRESHOLD)
+               if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= 
SMU_13_0_0_BUSY_THRESHOLD)
                        *value = metrics->AverageMemclkFrequencyPostDs;
                else
                        *value = metrics->AverageMemclkFrequencyPreDs;
@@ -801,7 +801,7 @@ static int smu_v13_0_0_get_smu_metrics_data(struct 
smu_context *smu,
                *value = metrics->AverageGfxActivity;
                break;
        case METRICS_AVERAGE_MEMACTIVITY:
-               *value = metrics->AverageUclkActivity;
+               *value = smu_safe_u16_nn(metrics->AverageUclkActivity);
                break;
        case METRICS_AVERAGE_VCNACTIVITY:
                *value = max(metrics->Vcn0ActivityPercentage,
@@ -2086,7 +2086,7 @@ static ssize_t smu_v13_0_0_get_gpu_metrics(struct 
smu_context *smu,
                                             
metrics->AvgTemperature[TEMP_VR_MEM1]);
 
        gpu_metrics->average_gfx_activity = metrics->AverageGfxActivity;
-       gpu_metrics->average_umc_activity = metrics->AverageUclkActivity;
+       gpu_metrics->average_umc_activity = 
smu_safe_u16_nn(metrics->AverageUclkActivity);
        gpu_metrics->average_mm_activity = max(metrics->Vcn0ActivityPercentage,
                                               metrics->Vcn1ActivityPercentage);
 
@@ -2103,7 +2103,7 @@ static ssize_t smu_v13_0_0_get_gpu_metrics(struct 
smu_context *smu,
        else
                gpu_metrics->average_gfxclk_frequency = 
metrics->AverageGfxclkFrequencyPreDs;
 
-       if (metrics->AverageUclkActivity <= SMU_13_0_0_BUSY_THRESHOLD)
+       if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= 
SMU_13_0_0_BUSY_THRESHOLD)
                gpu_metrics->average_uclk_frequency = 
metrics->AverageMemclkFrequencyPostDs;
        else
                gpu_metrics->average_uclk_frequency = 
metrics->AverageMemclkFrequencyPreDs;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index 5cc15545da6e..34a5973b9a06 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -784,13 +784,13 @@ static int smu_v13_0_7_get_smu_metrics_data(struct 
smu_context *smu,
                *value = metrics->AverageGfxclkFrequencyPreDs;
                break;
        case METRICS_AVERAGE_FCLK:
-               if (metrics->AverageUclkActivity <= SMU_13_0_7_BUSY_THRESHOLD)
+               if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= 
SMU_13_0_7_BUSY_THRESHOLD)
                        *value = metrics->AverageFclkFrequencyPostDs;
                else
                        *value = metrics->AverageFclkFrequencyPreDs;
                break;
        case METRICS_AVERAGE_UCLK:
-               if (metrics->AverageUclkActivity <= SMU_13_0_7_BUSY_THRESHOLD)
+               if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= 
SMU_13_0_7_BUSY_THRESHOLD)
                        *value = metrics->AverageMemclkFrequencyPostDs;
                else
                        *value = metrics->AverageMemclkFrequencyPreDs;
@@ -815,7 +815,7 @@ static int smu_v13_0_7_get_smu_metrics_data(struct 
smu_context *smu,
                *value = metrics->AverageGfxActivity;
                break;
        case METRICS_AVERAGE_MEMACTIVITY:
-               *value = metrics->AverageUclkActivity;
+               *value = smu_safe_u16_nn(metrics->AverageUclkActivity);
                break;
        case METRICS_AVERAGE_SOCKETPOWER:
                *value = metrics->AverageSocketPower << 8;
@@ -2092,7 +2092,7 @@ static ssize_t smu_v13_0_7_get_gpu_metrics(struct 
smu_context *smu,
                                             
metrics->AvgTemperature[TEMP_VR_MEM1]);
 
        gpu_metrics->average_gfx_activity = metrics->AverageGfxActivity;
-       gpu_metrics->average_umc_activity = metrics->AverageUclkActivity;
+       gpu_metrics->average_umc_activity = 
smu_safe_u16_nn(metrics->AverageUclkActivity);
        gpu_metrics->average_mm_activity = max(metrics->Vcn0ActivityPercentage,
                                               metrics->Vcn1ActivityPercentage);
 
@@ -2105,7 +2105,7 @@ static ssize_t smu_v13_0_7_get_gpu_metrics(struct 
smu_context *smu,
        else
                gpu_metrics->average_gfxclk_frequency = 
metrics->AverageGfxclkFrequencyPreDs;
 
-       if (metrics->AverageUclkActivity <= SMU_13_0_7_BUSY_THRESHOLD)
+       if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= 
SMU_13_0_7_BUSY_THRESHOLD)
                gpu_metrics->average_uclk_frequency = 
metrics->AverageMemclkFrequencyPostDs;
        else
                gpu_metrics->average_uclk_frequency = 
metrics->AverageMemclkFrequencyPreDs;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
index 28c1b084fe62..aaec3a251e0f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
@@ -660,13 +660,13 @@ static int smu_v14_0_2_get_smu_metrics_data(struct 
smu_context *smu,
                        *value = metrics->AverageGfxclkFrequencyPreDs;
                break;
        case METRICS_AVERAGE_FCLK:
-               if (metrics->AverageUclkActivity <= SMU_14_0_2_BUSY_THRESHOLD)
+               if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= 
SMU_14_0_2_BUSY_THRESHOLD)
                        *value = metrics->AverageFclkFrequencyPostDs;
                else
                        *value = metrics->AverageFclkFrequencyPreDs;
                break;
        case METRICS_AVERAGE_UCLK:
-               if (metrics->AverageUclkActivity <= SMU_14_0_2_BUSY_THRESHOLD)
+               if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= 
SMU_14_0_2_BUSY_THRESHOLD)
                        *value = metrics->AverageMemclkFrequencyPostDs;
                else
                        *value = metrics->AverageMemclkFrequencyPreDs;
@@ -687,7 +687,7 @@ static int smu_v14_0_2_get_smu_metrics_data(struct 
smu_context *smu,
                *value = metrics->AverageGfxActivity;
                break;
        case METRICS_AVERAGE_MEMACTIVITY:
-               *value = metrics->AverageUclkActivity;
+               *value = smu_safe_u16_nn(metrics->AverageUclkActivity);
                break;
        case METRICS_AVERAGE_VCNACTIVITY:
                *value = max(metrics->AverageVcn0ActivityPercentage,
@@ -2146,7 +2146,7 @@ static ssize_t smu_v14_0_2_get_gpu_metrics(struct 
smu_context *smu,
                                             
metrics->AvgTemperature[TEMP_VR_MEM1]);
 
        gpu_metrics->average_gfx_activity = metrics->AverageGfxActivity;
-       gpu_metrics->average_umc_activity = metrics->AverageUclkActivity;
+       gpu_metrics->average_umc_activity = 
smu_safe_u16_nn(metrics->AverageUclkActivity);
        gpu_metrics->average_mm_activity = 
max(metrics->AverageVcn0ActivityPercentage,
                                               metrics->Vcn1ActivityPercentage);
 
@@ -2158,7 +2158,7 @@ static ssize_t smu_v14_0_2_get_gpu_metrics(struct 
smu_context *smu,
        else
                gpu_metrics->average_gfxclk_frequency = 
metrics->AverageGfxclkFrequencyPreDs;
 
-       if (metrics->AverageUclkActivity <= SMU_14_0_2_BUSY_THRESHOLD)
+       if (smu_safe_u16_nn(metrics->AverageUclkActivity) <= 
SMU_14_0_2_BUSY_THRESHOLD)
                gpu_metrics->average_uclk_frequency = 
metrics->AverageMemclkFrequencyPostDs;
        else
                gpu_metrics->average_uclk_frequency = 
metrics->AverageMemclkFrequencyPreDs;
-- 
2.47.3

Reply via email to