RAS info update in PMFW is time cost, wait for it.
Signed-off-by: Tao Zhou <[email protected]>
---
.../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 22 ++++++++++++++++++-
1 file changed, 21 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 511c5882b37e..de7b268a9862 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -867,13 +867,33 @@ amdgpu_ras_eeprom_update_header(struct
amdgpu_ras_eeprom_control *control)
int amdgpu_ras_eeprom_update_record_num(struct amdgpu_ras_eeprom_control
*control)
{
struct amdgpu_device *adev = to_amdgpu_device(control);
+ int ret, timeout = 1000;
if (!amdgpu_ras_smu_eeprom_supported(adev))
return 0;
control->ras_num_recs_old = control->ras_num_recs;
- return amdgpu_ras_smu_get_badpage_count(adev,
+
+ do {
+ ret = amdgpu_ras_smu_get_badpage_count(adev,
&(control->ras_num_recs), 12);
+ if (!ret &&
+ (control->ras_num_recs_old == control->ras_num_recs)) {
+ /* record number update in PMFW needs some time */
+ msleep(50);
+ timeout -= 50;
+ } else {
+ break;
+ }
+ } while (timeout);
+
+ /* no update of record number is not a real failure,
+ * don't print warning here
+ */
+ if (!ret && (control->ras_num_recs_old == control->ras_num_recs))
+ ret = -EINVAL;
+
+ return ret;
}
/**
--
2.34.1