Check and drop invalid bad page records.

Signed-off-by: YiPeng Chai <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 27 +++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 57f13ad5605a..b0ef0800b380 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -3094,6 +3094,20 @@ static int amdgpu_ras_mca2pa(struct amdgpu_device *adev,
                return  -EINVAL;
 }
 
+static bool __check_record_in_range(struct amdgpu_device *adev,
+                       struct eeprom_table_record *bps, int count)
+{
+       int i;
+
+       for (i = 0; i < count; i++) {
+               if (bps[i].retired_page >=
+                   (adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT))
+                       return false;
+       }
+
+       return true;
+}
+
 static int __amdgpu_ras_restore_bad_pages(struct amdgpu_device *adev,
                                        struct eeprom_table_record *bps, int 
count)
 {
@@ -3101,6 +3115,14 @@ static int __amdgpu_ras_restore_bad_pages(struct 
amdgpu_device *adev,
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
        struct ras_err_handler_data *data = con->eh_data;
 
+       if (!__check_record_in_range(adev, bps, count)) {
+               dev_warn(adev->dev,
+                       "Recorded address out of range: 0x%llx, 0x%llx, 0x%x, 
0x%x\n",
+                       bps[0].address, bps[0].retired_page,
+                       bps[0].mem_channel, bps[0].mcumc_id);
+               return 0;
+       }
+
        for (j = 0; j < count; j++) {
                if (!data->space_left &&
                    amdgpu_ras_realloc_eh_data_space(adev, data, 256)) {
@@ -5642,6 +5664,11 @@ int amdgpu_ras_reserve_page(struct amdgpu_device *adev, 
uint64_t pfn)
        uint64_t start = pfn << AMDGPU_GPU_PAGE_SHIFT;
        int ret = 0;
 
+       if (pfn >= (adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT)) {
+               dev_warn(adev->dev, "Ignoring out-of-range bad page 0x%llx", 
start);
+               return 0;
+       }
+
        if (amdgpu_ras_check_critical_address(adev, start))
                return 0;
 
-- 
2.43.0

Reply via email to