this applies to devices with CAB (Cache As Buffer) functionality.
Closes: https://gitlab.freedesktop.org/drm/amd/-/work_items/4905
Signed-off-by: Yang Wang <[email protected]>
---
drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 17 +++++++++++++++++
.../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 10 +++++-----
.../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 10 +++++-----
.../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 10 +++++-----
4 files changed, 32 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 609f5ab07d8a..365946c43e11 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -2164,4 +2164,21 @@ static inline void smu_feature_init(struct smu_context
*smu, int feature_num)
smu_feature_list_clear_all(smu, SMU_FEATURE_LIST_ALLOWED);
}
+/*
+ * smu_safe_u16_nn - Make u16 safe by filtering negative overflow errors
+ * @val: Input u16 value, may contain invalid negative overflows
+ *
+ * Convert u16 to non-negative value. Cast to s16 to detect negative values
+ * caused by calculation errors. Return 0 for negative errors, return
+ * original value if valid.
+ *
+ * Return: Valid u16 value or 0
+ */
+static inline u16 smu_safe_u16_nn(u16 val)
+{
+ s16 tmp = (s16)val;
+
+ return tmp < 0 ? 0 : val;
+}
+
#endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 9be7a2af560d..16f69b548ca4 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -774,13 +774,13 @@ static int smu_v13_0_0_get_smu_metrics_data(struct
smu_context *smu,
*value = metrics->AverageGfxclkFrequencyPreDs;
break;
case METRICS_AVERAGE_FCLK:
- if (metrics->AverageUclkActivity <= SMU_13_0_0_BUSY_THRESHOLD)
+ if (smu_safe_u16_nn(metrics->AverageUclkActivity) <=
SMU_13_0_0_BUSY_THRESHOLD)
*value = metrics->AverageFclkFrequencyPostDs;
else
*value = metrics->AverageFclkFrequencyPreDs;
break;
case METRICS_AVERAGE_UCLK:
- if (metrics->AverageUclkActivity <= SMU_13_0_0_BUSY_THRESHOLD)
+ if (smu_safe_u16_nn(metrics->AverageUclkActivity) <=
SMU_13_0_0_BUSY_THRESHOLD)
*value = metrics->AverageMemclkFrequencyPostDs;
else
*value = metrics->AverageMemclkFrequencyPreDs;
@@ -801,7 +801,7 @@ static int smu_v13_0_0_get_smu_metrics_data(struct
smu_context *smu,
*value = metrics->AverageGfxActivity;
break;
case METRICS_AVERAGE_MEMACTIVITY:
- *value = metrics->AverageUclkActivity;
+ *value = smu_safe_u16_nn(metrics->AverageUclkActivity);
break;
case METRICS_AVERAGE_VCNACTIVITY:
*value = max(metrics->Vcn0ActivityPercentage,
@@ -2086,7 +2086,7 @@ static ssize_t smu_v13_0_0_get_gpu_metrics(struct
smu_context *smu,
metrics->AvgTemperature[TEMP_VR_MEM1]);
gpu_metrics->average_gfx_activity = metrics->AverageGfxActivity;
- gpu_metrics->average_umc_activity = metrics->AverageUclkActivity;
+ gpu_metrics->average_umc_activity =
smu_safe_u16_nn(metrics->AverageUclkActivity);
gpu_metrics->average_mm_activity = max(metrics->Vcn0ActivityPercentage,
metrics->Vcn1ActivityPercentage);
@@ -2103,7 +2103,7 @@ static ssize_t smu_v13_0_0_get_gpu_metrics(struct smu_context *smu,
else
gpu_metrics->average_gfxclk_frequency =
metrics->AverageGfxclkFrequencyPreDs;
- if (metrics->AverageUclkActivity <= SMU_13_0_0_BUSY_THRESHOLD)
+ if (smu_safe_u16_nn(metrics->AverageUclkActivity) <=
SMU_13_0_0_BUSY_THRESHOLD)
gpu_metrics->average_uclk_frequency =
metrics->AverageMemclkFrequencyPostDs;
else
gpu_metrics->average_uclk_frequency =
metrics->AverageMemclkFrequencyPreDs;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index 5cc15545da6e..34a5973b9a06 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -784,13 +784,13 @@ static int smu_v13_0_7_get_smu_metrics_data(struct
smu_context *smu,
*value = metrics->AverageGfxclkFrequencyPreDs;
break;
case METRICS_AVERAGE_FCLK:
- if (metrics->AverageUclkActivity <= SMU_13_0_7_BUSY_THRESHOLD)
+ if (smu_safe_u16_nn(metrics->AverageUclkActivity) <=
SMU_13_0_7_BUSY_THRESHOLD)
*value = metrics->AverageFclkFrequencyPostDs;
else
*value = metrics->AverageFclkFrequencyPreDs;
break;
case METRICS_AVERAGE_UCLK:
- if (metrics->AverageUclkActivity <= SMU_13_0_7_BUSY_THRESHOLD)
+ if (smu_safe_u16_nn(metrics->AverageUclkActivity) <=
SMU_13_0_7_BUSY_THRESHOLD)
*value = metrics->AverageMemclkFrequencyPostDs;
else
*value = metrics->AverageMemclkFrequencyPreDs;
@@ -815,7 +815,7 @@ static int smu_v13_0_7_get_smu_metrics_data(struct
smu_context *smu,
*value = metrics->AverageGfxActivity;
break;
case METRICS_AVERAGE_MEMACTIVITY:
- *value = metrics->AverageUclkActivity;
+ *value = smu_safe_u16_nn(metrics->AverageUclkActivity);
break;
case METRICS_AVERAGE_SOCKETPOWER:
*value = metrics->AverageSocketPower << 8;
@@ -2092,7 +2092,7 @@ static ssize_t smu_v13_0_7_get_gpu_metrics(struct
smu_context *smu,
metrics->AvgTemperature[TEMP_VR_MEM1]);
gpu_metrics->average_gfx_activity = metrics->AverageGfxActivity;
- gpu_metrics->average_umc_activity = metrics->AverageUclkActivity;
+ gpu_metrics->average_umc_activity =
smu_safe_u16_nn(metrics->AverageUclkActivity);
gpu_metrics->average_mm_activity = max(metrics->Vcn0ActivityPercentage,
metrics->Vcn1ActivityPercentage);
@@ -2105,7 +2105,7 @@ static ssize_t smu_v13_0_7_get_gpu_metrics(struct smu_context *smu,
else
gpu_metrics->average_gfxclk_frequency =
metrics->AverageGfxclkFrequencyPreDs;
- if (metrics->AverageUclkActivity <= SMU_13_0_7_BUSY_THRESHOLD)
+ if (smu_safe_u16_nn(metrics->AverageUclkActivity) <=
SMU_13_0_7_BUSY_THRESHOLD)
gpu_metrics->average_uclk_frequency =
metrics->AverageMemclkFrequencyPostDs;
else
gpu_metrics->average_uclk_frequency =
metrics->AverageMemclkFrequencyPreDs;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
index 28c1b084fe62..aaec3a251e0f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
@@ -660,13 +660,13 @@ static int smu_v14_0_2_get_smu_metrics_data(struct
smu_context *smu,
*value = metrics->AverageGfxclkFrequencyPreDs;
break;
case METRICS_AVERAGE_FCLK:
- if (metrics->AverageUclkActivity <= SMU_14_0_2_BUSY_THRESHOLD)
+ if (smu_safe_u16_nn(metrics->AverageUclkActivity) <=
SMU_14_0_2_BUSY_THRESHOLD)
*value = metrics->AverageFclkFrequencyPostDs;
else
*value = metrics->AverageFclkFrequencyPreDs;
break;
case METRICS_AVERAGE_UCLK:
- if (metrics->AverageUclkActivity <= SMU_14_0_2_BUSY_THRESHOLD)
+ if (smu_safe_u16_nn(metrics->AverageUclkActivity) <=
SMU_14_0_2_BUSY_THRESHOLD)
*value = metrics->AverageMemclkFrequencyPostDs;
else
*value = metrics->AverageMemclkFrequencyPreDs;
@@ -687,7 +687,7 @@ static int smu_v14_0_2_get_smu_metrics_data(struct
smu_context *smu,
*value = metrics->AverageGfxActivity;
break;
case METRICS_AVERAGE_MEMACTIVITY:
- *value = metrics->AverageUclkActivity;
+ *value = smu_safe_u16_nn(metrics->AverageUclkActivity);
break;
case METRICS_AVERAGE_VCNACTIVITY:
*value = max(metrics->AverageVcn0ActivityPercentage,
@@ -2146,7 +2146,7 @@ static ssize_t smu_v14_0_2_get_gpu_metrics(struct
smu_context *smu,
metrics->AvgTemperature[TEMP_VR_MEM1]);
gpu_metrics->average_gfx_activity = metrics->AverageGfxActivity;
- gpu_metrics->average_umc_activity = metrics->AverageUclkActivity;
+ gpu_metrics->average_umc_activity =
smu_safe_u16_nn(metrics->AverageUclkActivity);
gpu_metrics->average_mm_activity =
max(metrics->AverageVcn0ActivityPercentage,
metrics->Vcn1ActivityPercentage);
@@ -2158,7 +2158,7 @@ static ssize_t smu_v14_0_2_get_gpu_metrics(struct smu_context *smu,
else
gpu_metrics->average_gfxclk_frequency =
metrics->AverageGfxclkFrequencyPreDs;
- if (metrics->AverageUclkActivity <= SMU_14_0_2_BUSY_THRESHOLD)
+ if (smu_safe_u16_nn(metrics->AverageUclkActivity) <=
SMU_14_0_2_BUSY_THRESHOLD)
gpu_metrics->average_uclk_frequency =
metrics->AverageMemclkFrequencyPostDs;
else
gpu_metrics->average_uclk_frequency =
metrics->AverageMemclkFrequencyPreDs;