Add address sanity check for uniras

Signed-off-by: Ce Sun <[email protected]>
---
 .../gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c  |  3 ---
 .../gpu/drm/amd/ras/ras_mgr/amdgpu_ras_sys.c  | 18 +++++++++++++
 drivers/gpu/drm/amd/ras/ras_mgr/ras_sys.h     |  3 +++
 drivers/gpu/drm/amd/ras/rascore/ras.h         |  2 ++
 drivers/gpu/drm/amd/ras/rascore/ras_core.c    | 10 ++++++++
 drivers/gpu/drm/amd/ras/rascore/ras_umc.c     | 25 +++++++++++++++++--
 drivers/gpu/drm/amd/ras/rascore/ras_umc.h     |  1 +
 .../gpu/drm/amd/ras/rascore/ras_umc_v12_0.c   |  9 +++++--
 8 files changed, 64 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c 
b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c
index cb6498c30834..473b387fa3db 100644
--- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c
+++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c
@@ -30,9 +30,6 @@
 #include "amdgpu_ras_mgr.h"
 #include "amdgpu_virt_ras_cmd.h"
 
-/* inject address is 52 bits */
-#define        RAS_UMC_INJECT_ADDR_LIMIT       (0x1ULL << 52)
-
 #define AMDGPU_RAS_TYPE_RASCORE  0x1
 #define AMDGPU_RAS_TYPE_AMDGPU   0x2
 #define AMDGPU_RAS_TYPE_VF       0x3
diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_sys.c 
b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_sys.c
index 7d728e523604..eb840f0861fe 100644
--- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_sys.c
+++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_sys.c
@@ -266,6 +266,23 @@ static int amdgpu_ras_sys_put_gpu_mem(struct 
ras_core_context *ras_core,
 
        return 0;
 }
+static int amdgpu_ras_sys_check_address_sanity(struct ras_core_context 
*ras_core,
+                                               uint64_t addr)
+{
+       struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev;
+
+       if ((addr >= adev->gmc.mc_vram_size &&
+           adev->gmc.mc_vram_size) ||
+           (addr >= RAS_UMC_INJECT_ADDR_LIMIT))
+               return -EINVAL;
+
+       if (addr >= adev->gmc.real_vram_size) {
+               RAS_DEV_WARN(ras_core->dev, "Recorded address out of range: 
0x%llx!\n", addr);
+               return -EINVAL;
+       }
+
+       return 0;
+}
 
 const struct ras_sys_func amdgpu_ras_sys_fn = {
        .ras_notifier = amdgpu_ras_sys_event_notifier,
@@ -277,4 +294,5 @@ const struct ras_sys_func amdgpu_ras_sys_fn = {
        .detect_ras_interrupt = amdgpu_ras_sys_detect_ras_interrupt,
        .get_gpu_mem = amdgpu_ras_sys_get_gpu_mem,
        .put_gpu_mem = amdgpu_ras_sys_put_gpu_mem,
+       .check_address_sanity = amdgpu_ras_sys_check_address_sanity,
 };
diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/ras_sys.h 
b/drivers/gpu/drm/amd/ras/ras_mgr/ras_sys.h
index 8156531a7b63..239e56732e3e 100644
--- a/drivers/gpu/drm/amd/ras/ras_mgr/ras_sys.h
+++ b/drivers/gpu/drm/amd/ras/ras_mgr/ras_sys.h
@@ -30,6 +30,9 @@
 #include <linux/mempool.h>
 #include "amdgpu.h"
 
+/* inject address is 52 bits */
+#define RAS_UMC_INJECT_ADDR_LIMIT       (0x1ULL << 52)
+
 #define RAS_DEV_ERR(device, fmt, ...)                                          
     \
        do {                                                                    
  \
                if (device)                                                     
        \
diff --git a/drivers/gpu/drm/amd/ras/rascore/ras.h 
b/drivers/gpu/drm/amd/ras/rascore/ras.h
index 6449d7b8627d..44ddb7943a48 100644
--- a/drivers/gpu/drm/amd/ras/rascore/ras.h
+++ b/drivers/gpu/drm/amd/ras/rascore/ras.h
@@ -231,6 +231,7 @@ struct ras_sys_func {
                enum gpu_mem_type mem_type, struct gpu_mem_block *gpu_mem);
        int (*put_gpu_mem)(struct ras_core_context *ras_core,
                enum gpu_mem_type mem_type, struct gpu_mem_block *gpu_mem);
+       int (*check_address_sanity)(struct ras_core_context *ras_core, uint64_t 
addr);
 };
 
 struct ras_ecc_count {
@@ -399,4 +400,5 @@ int ras_core_get_device_system_info(struct ras_core_context 
*ras_core,
                struct device_system_info *dev_info);
 int ras_core_convert_soc_pa_to_cur_nps_pages(struct ras_core_context *ras_core,
                uint64_t soc_pa, uint64_t *page_pfn, uint32_t max_pages);
+int ras_core_check_address_sanity(struct ras_core_context *ras_core, uint64_t 
addr);
 #endif
diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_core.c 
b/drivers/gpu/drm/amd/ras/rascore/ras_core.c
index 29b1b8f0cc26..cfab7a7d2623 100644
--- a/drivers/gpu/drm/amd/ras/rascore/ras_core.c
+++ b/drivers/gpu/drm/amd/ras/rascore/ras_core.c
@@ -676,3 +676,13 @@ int ras_core_convert_soc_pa_to_cur_nps_pages(struct 
ras_core_context *ras_core,
 
        return count;
 }
+
+int ras_core_check_address_sanity(struct ras_core_context *ras_core,
+               uint64_t addr)
+{
+       if (ras_core && ras_core->sys_fn &&
+               ras_core->sys_fn->check_address_sanity)
+               return ras_core->sys_fn->check_address_sanity(ras_core, addr);
+
+       return 0;
+}
diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_umc.c 
b/drivers/gpu/drm/amd/ras/rascore/ras_umc.c
index d4072350f48f..0d4405a975b5 100644
--- a/drivers/gpu/drm/amd/ras/rascore/ras_umc.c
+++ b/drivers/gpu/drm/amd/ras/rascore/ras_umc.c
@@ -480,6 +480,27 @@ int ras_umc_load_bad_pages(struct ras_core_context 
*ras_core)
        return ret;
 }
 
+static int __calc_bad_page_count(struct ras_core_context *ras_core,
+               struct eeprom_umc_record *record, const u32 num)
+{
+       uint64_t *pfns;
+       uint32_t pfns_sz = ras_core->ras_umc.retire_unit;
+       int i, ret, count = 0;
+
+       pfns = kcalloc(pfns_sz, sizeof(*pfns), GFP_KERNEL);
+       if (!pfns)
+               return pfns_sz;
+
+       for (i = 0; i < num; i++) {
+               ret = ras_core_convert_soc_pa_to_cur_nps_pages(ras_core,
+                       RAS_PFN_TO_ADDR(record[i].retired_row_pfn), pfns, 
pfns_sz);
+               count  += (ret <= 0) ? pfns_sz : ret;
+       }
+
+       kfree(pfns);
+       return count;
+}
+
 /*
  * write error record array to eeprom, the function should be
  * protected by recovery_lock
@@ -515,8 +536,8 @@ static int ras_umc_save_bad_pages(struct ras_core_context 
*ras_core)
                        ret = -EIO;
                        goto exit;
                }
-
-               RAS_DEV_INFO(ras_core->dev, "Saved %d pages to EEPROM 
table.\n", save_count);
+               RAS_DEV_INFO(ras_core->dev, "Saved %d pages to EEPROM table.\n",
+                       __calc_bad_page_count(ras_core, 
&data->bps[eeprom_record_num], save_count));
        }
 
 exit:
diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_umc.h 
b/drivers/gpu/drm/amd/ras/rascore/ras_umc.h
index 1d3026be509b..05edacc165ba 100644
--- a/drivers/gpu/drm/amd/ras/rascore/ras_umc.h
+++ b/drivers/gpu/drm/amd/ras/rascore/ras_umc.h
@@ -139,6 +139,7 @@ struct ras_umc {
        struct mutex  pending_ecc_lock;
        struct ras_umc_err_data umc_err_data;
        struct list_head pending_ecc_list;
+       u32 retire_unit;
 };
 
 int ras_umc_sw_init(struct ras_core_context *ras);
diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c 
b/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c
index b809a2f21d73..fe5f92eb94a1 100644
--- a/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c
+++ b/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c
@@ -26,6 +26,8 @@
 #include "ras_core_status.h"
 #include "ras_umc_v12_0.h"
 
+#define RAS_UMC_V12_0_ADDR_LIMIT       (0x1ULL << 52)
+
 #define NumDieInterleaved 4
 
 static const uint32_t umc_v12_0_channel_idx_tbl[]
@@ -110,6 +112,7 @@ static void __get_nps_pa_flip_bits(struct ras_core_context 
*ras_core,
                        "Unknown HBM type, set RAS retire flip bits to the 
value in NPS1 mode.\n");
                break;
        }
+       ras_core->ras_umc.retire_unit = 0x1 << flip_bits->bit_num;
 }
 
 static uint64_t  convert_nps_pa_to_row_pa(struct ras_core_context *ras_core,
@@ -166,7 +169,7 @@ static int lookup_bad_pages_in_a_row(struct 
ras_core_context *ras_core,
 
        idx = 0;
        row = 0;
-       retire_unit = 0x1 << flip_bits.bit_num;
+       retire_unit = ras_core->ras_umc.retire_unit;
        /* loop for all possibilities of retire bits */
        for (column = 0; column < retire_unit; column++) {
                soc_pa = row_pa;
@@ -186,7 +189,9 @@ static int lookup_bad_pages_in_a_row(struct 
ras_core_context *ras_core,
                                record->cur_nps_bank, record->mem_channel);
 
 
-               if (pfns && (idx < num))
+               if (pfns && (idx < num) &&
+                  (soc_pa < RAS_UMC_V12_0_ADDR_LIMIT) &&
+                  !ras_core_check_address_sanity(ras_core, soc_pa))
                        pfns[idx++] = RAS_ADDR_TO_PFN(soc_pa);
        }
 
-- 
2.34.1

Reply via email to