On 30-Jan-26 7:59 AM, Gangliang Xie wrote:
add read func for pmfw eeprom, and adapt address converting
for bad pages loaded from pmfw eeprom

Signed-off-by: Tao Zhou <[email protected]>
Signed-off-by: Gangliang Xie <[email protected]>
---
  drivers/gpu/drm/amd/ras/rascore/ras.h         |  1 +
  drivers/gpu/drm/amd/ras/rascore/ras_core.c    |  5 +-
  .../gpu/drm/amd/ras/rascore/ras_eeprom_fw.c   | 70 +++++++++++++++++++
  .../gpu/drm/amd/ras/rascore/ras_eeprom_fw.h   |  5 ++
  drivers/gpu/drm/amd/ras/rascore/ras_umc.c     | 27 +++++--
  .../gpu/drm/amd/ras/rascore/ras_umc_v12_0.c   |  2 +-
  6 files changed, 101 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/ras/rascore/ras.h 
b/drivers/gpu/drm/amd/ras/rascore/ras.h
index ae10d853c565..05c7923e8f0f 100644
--- a/drivers/gpu/drm/amd/ras/rascore/ras.h
+++ b/drivers/gpu/drm/amd/ras/rascore/ras.h
@@ -241,6 +241,7 @@ struct ras_bank_ecc {
        uint64_t status;
        uint64_t ipid;
        uint64_t addr;
+       uint64_t ts;
  };
struct ras_bank_ecc_node {
diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_core.c 
b/drivers/gpu/drm/amd/ras/rascore/ras_core.c
index 1f2ce3749d43..fe188a5304d9 100644
--- a/drivers/gpu/drm/amd/ras/rascore/ras_core.c
+++ b/drivers/gpu/drm/amd/ras/rascore/ras_core.c
@@ -239,7 +239,10 @@ static int ras_core_eeprom_recovery(struct 
ras_core_context *ras_core)
        int count;
        int ret;
- count = ras_eeprom_get_record_count(ras_core);
+       if (ras_fw_eeprom_supported(ras_core))
+               count = ras_fw_eeprom_get_record_count(ras_core);

As mentioned in earlier patches, suggestion is to keep just

ras_eeprom_get_record_count.

With ras_eeprom, you may decide the access mechanism and fork to different paths. That looks cleaner and all common variables can be kept inside ras_eeprom itself.


+       else
+               count = ras_eeprom_get_record_count(ras_core);
        if (!count)
                return 0;
diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c
index 580dd7b09d00..79494ad16ee5 100644
--- a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c
+++ b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c
@@ -259,3 +259,73 @@ int ras_fw_eeprom_append(struct ras_core_context *ras_core,
        mutex_unlock(&control->ras_tbl_mutex);
        return 0;
  }
+
+int ras_fw_eeprom_read_idx(struct ras_core_context *ras_core,
+                        struct eeprom_umc_record *record_umc,
+                        struct ras_bank_ecc *ras_ecc,
+                        u32 rec_idx, const u32 num)
+{
+       struct ras_fw_eeprom_control *control = &ras_core->ras_fw_eeprom;
+       int i, ret, end_idx;
+       u64 mca, ipid, ts;
+
+       if (!ras_core->ras_umc.ip_func ||
+           !ras_core->ras_umc.ip_func->mca_ipid_parse)
+               return -EOPNOTSUPP;
+
+       mutex_lock(&control->ras_tbl_mutex);
+
+       end_idx = rec_idx + num;
+       for (i = rec_idx; i < end_idx; i++) {
+               ret = ras_fw_get_badpage_mca_addr(ras_core, i, &mca);
+               if (ret)
+                       goto out;
+
+               ret = ras_fw_get_badpage_ipid(ras_core, i, &ipid);
+               if (ret)
+                       goto out;
+
+               ret = ras_fw_get_timestamp(ras_core, i, &ts);
+               if (ret)
+                       goto out;
+
+               if (record_umc) {
+                       record_umc[i - rec_idx].address = mca;
+                       /* retired_page (pa) is unused now */
+                       record_umc[i - rec_idx].retired_row_pfn = 0x1ULL;
+                       record_umc[i - rec_idx].ts = ts;
+                       record_umc[i - rec_idx].err_type = 
RAS_EEPROM_ERR_NON_RECOVERABLE;
+
+                       ras_core->ras_umc.ip_func->mca_ipid_parse(ras_core, 
ipid,
+                               (uint32_t *)&(record_umc[i - rec_idx].cu),
+                               (uint32_t *)&(record_umc[i - 
rec_idx].mem_channel),
+                               (uint32_t *)&(record_umc[i - 
rec_idx].mcumc_id), NULL);
+
+                       /* update bad channel bitmap */
+                       if ((record_umc[i - rec_idx].mem_channel < 
BITS_PER_TYPE(control->bad_channel_bitmap)) &&
+                               !(control->bad_channel_bitmap & (1 << 
record_umc[i - rec_idx].mem_channel))) {
+                               control->bad_channel_bitmap |= 1 << 
record_umc[i - rec_idx].mem_channel;
+                               control->update_channel_flag = true;
+                       }
+               }
+
+               if (ras_ecc) {
+                       ras_ecc[i - rec_idx].addr = mca;
+                       ras_ecc[i - rec_idx].ipid = ipid;
+                       ras_ecc[i - rec_idx].ts = ts;
+               }
+
+       }
+
+out:
+       mutex_unlock(&control->ras_tbl_mutex);
+       return ret;
+}
+
+uint32_t ras_fw_eeprom_get_record_count(struct ras_core_context *ras_core)
+{
+       if (!ras_core)
+               return 0;
+
+       return ras_core->ras_fw_eeprom.ras_num_recs;
+}
diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h 
b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h
index b94d3c9703e3..353977a2371e 100644
--- a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h
+++ b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h
@@ -70,5 +70,10 @@ int ras_fw_eeprom_reset_table(struct ras_core_context 
*ras_core);
  bool ras_fw_eeprom_check_safety_watermark(struct ras_core_context *ras_core);
  int ras_fw_eeprom_append(struct ras_core_context *ras_core,
                           struct eeprom_umc_record *record, const u32 num);
+int ras_fw_eeprom_read_idx(struct ras_core_context *ras_core,
+                        struct eeprom_umc_record *record_umc,
+                        struct ras_bank_ecc *ras_ecc,
+                        u32 rec_idx, const u32 num);
+uint32_t ras_fw_eeprom_get_record_count(struct ras_core_context *ras_core);
#endif
diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_umc.c 
b/drivers/gpu/drm/amd/ras/rascore/ras_umc.c
index fd427fd59ecf..eb5bb6df18f5 100644
--- a/drivers/gpu/drm/amd/ras/rascore/ras_umc.c
+++ b/drivers/gpu/drm/amd/ras/rascore/ras_umc.c
@@ -436,17 +436,27 @@ int ras_umc_load_bad_pages(struct ras_core_context 
*ras_core)
        uint32_t ras_num_recs;
        int ret;
- ras_num_recs = ras_eeprom_get_record_count(ras_core);
-       /* no bad page record, skip eeprom access */
-       if (!ras_num_recs ||
-           ras_core->ras_eeprom.record_threshold_config == DISABLE_RETIRE_PAGE)
-               return 0;
+       if (ras_fw_eeprom_supported(ras_core)) {
+               ras_num_recs = ras_fw_eeprom_get_record_count(ras_core);
+               /* no bad page record, skip eeprom access */
+               if (!ras_num_recs ||
+                   ras_core->ras_fw_eeprom.record_threshold_config == 
DISABLE_RETIRE_PAGE)
+                       return 0;
+       } else {
+               ras_num_recs = ras_eeprom_get_record_count(ras_core);
+               if (!ras_num_recs ||
+                   ras_core->ras_eeprom.record_threshold_config == 
DISABLE_RETIRE_PAGE)

This is an example where common variables/logic get repeated.

Thanks,
Lijo

+                       return 0;
+       }
bps = kcalloc(ras_num_recs, sizeof(*bps), GFP_KERNEL);
        if (!bps)
                return -ENOMEM;
- ret = ras_eeprom_read(ras_core, bps, ras_num_recs);
+       if (ras_fw_eeprom_supported(ras_core))
+               ret = ras_fw_eeprom_read_idx(ras_core, bps, 0, 0, ras_num_recs);
+       else
+               ret = ras_eeprom_read(ras_core, bps, ras_num_recs);
        if (ret) {
                RAS_DEV_ERR(ras_core->dev, "Failed to load EEPROM table 
records!");
        } else {
@@ -474,7 +484,10 @@ static int ras_umc_save_bad_pages(struct ras_core_context 
*ras_core)
        if (!data->bps)
                return 0;
- eeprom_record_num = ras_eeprom_get_record_count(ras_core);
+       if (ras_fw_eeprom_supported(ras_core))
+               eeprom_record_num = ras_fw_eeprom_get_record_count(ras_core);
+       else
+               eeprom_record_num = ras_eeprom_get_record_count(ras_core);
        mutex_lock(&ras_umc->umc_lock);
        save_count = data->count - eeprom_record_num;
        /* only new entries are saved */
diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c 
b/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c
index e2792b239bea..53dc59e4de0c 100644
--- a/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c
+++ b/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c
@@ -413,7 +413,7 @@ static int umc_v12_0_eeprom_record_to_nps_record(struct 
ras_core_context *ras_co
        uint64_t pa = 0;
        int ret = 0;
- if (nps == EEPROM_RECORD_UMC_NPS_MODE(record)) {
+       if (nps == EEPROM_RECORD_UMC_NPS_MODE(record) && 
!ras_fw_eeprom_supported(ras_core)) {
                record->cur_nps_retired_row_pfn = 
EEPROM_RECORD_UMC_ADDR_PFN(record);
        } else {
                ret = convert_eeprom_record_to_nps_addr(ras_core,

Reply via email to