[AMD Official Use Only - AMD Internal Distribution Only]

+               if (adev->umc.ras->mca_ipid_parse)
+                       adev->umc.ras->mca_ipid_parse(adev, ipid, NULL,
+                               (uint32_t *)&(record[i - rec_idx].mem_channel),
+                               (uint32_t *)&(record[i - rec_idx].mcumc_id), 
NULL);
+               else
+                       return -EOPNOTSUPP;


It is better to remove the null pointer check from the loop and perform this 
check in the early stages to avoid unnecessary operations
With that fixed, the patch is

Reviewed-by: Yang Wang <[email protected]>

Best Regards,
Kevin

-----Original Message-----
From: amd-gfx <[email protected]> On Behalf Of Tao Zhou
Sent: Wednesday, November 5, 2025 10:05
To: [email protected]
Cc: Zhou1, Tao <[email protected]>
Subject: [PATCH 2/8] drm/amdgpu: add ras_eeprom_read_idx interface

PMFW will manage RAS eeprom data by itself, add new interface to read eeprom 
data via PMFW, we can read part of records by setting index.

v2: use IPID parse interface.
    pa is not used and set it to a fixed value.

Signed-off-by: Tao Zhou <[email protected]>
---
 .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c    | 51 +++++++++++++++++++
 .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h    |  4 ++
 2 files changed, 55 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index d7e2a81bc274..47f292557a0c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -970,6 +970,50 @@ static int __amdgpu_ras_eeprom_read(struct 
amdgpu_ras_eeprom_control *control,
        return res;
 }

+int amdgpu_ras_eeprom_read_idx(struct amdgpu_ras_eeprom_control *control,
+                       struct eeprom_table_record *record, u32 rec_idx,
+                       const u32 num)
+{
+       struct amdgpu_device *adev = to_amdgpu_device(control);
+       uint64_t ts, end_idx;
+       int i, ret;
+       u64 mca, ipid;
+
+       if (!amdgpu_ras_smu_eeprom_supported(adev))
+               return 0;
+
+       end_idx = rec_idx + num;
+       for (i = rec_idx; i < end_idx; i++) {
+               ret = amdgpu_ras_smu_get_badpage_mca_addr(adev, i, &mca);
+               if (ret)
+                       return ret;
+
+               ret = amdgpu_ras_smu_get_badpage_ipid(adev, i, &ipid);
+               if (ret)
+                       return ret;
+
+               ret = amdgpu_ras_smu_get_timestamp(adev, i, &ts);
+               if (ret)
+                       return ret;
+
+               record[i - rec_idx].address = mca;
+               /* retired_page (pa) is unused now */
+               record[i - rec_idx].retired_page = 0x1ULL;
+               record[i - rec_idx].ts = ts;
+               record[i - rec_idx].err_type = 
AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
+               record[i - rec_idx].cu = 0;
+
+               if (adev->umc.ras->mca_ipid_parse)
+                       adev->umc.ras->mca_ipid_parse(adev, ipid, NULL,
+                               (uint32_t *)&(record[i - rec_idx].mem_channel),
+                               (uint32_t *)&(record[i - rec_idx].mcumc_id), 
NULL);
+               else
+                       return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
 /**
  * amdgpu_ras_eeprom_read -- read EEPROM
  * @control: pointer to control structure @@ -991,6 +1035,9 @@ int 
amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
        u8 *buf, *pp;
        u32 g0, g1;

+       if (amdgpu_ras_smu_eeprom_supported(adev))
+               return amdgpu_ras_eeprom_read_idx(control, record, 0, num);
+
        if (!__is_ras_eeprom_supported(adev))
                return 0;

@@ -1162,6 +1209,10 @@ static ssize_t amdgpu_ras_debugfs_table_read(struct file 
*f, char __user *buf,
        int res = -EFAULT;
        size_t data_len;

+       /* pmfw manages eeprom data by itself */
+       if (amdgpu_ras_smu_eeprom_supported(adev))
+               return 0;
+
        mutex_lock(&control->ras_tbl_mutex);

        /* We want *pos - data_len > 0, which means there's diff --git 
a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
index cfbd402ddea2..e881007f715b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
@@ -186,6 +186,10 @@ int amdgpu_ras_smu_get_badpage_ipid(struct amdgpu_device 
*adev,  int amdgpu_ras_smu_erase_ras_table(struct amdgpu_device *adev,
                                                                        
uint32_t *result);

+int amdgpu_ras_eeprom_read_idx(struct amdgpu_ras_eeprom_control *control,
+                       struct eeprom_table_record *record, u32 rec_idx,
+                       const u32 num);
+
 extern const struct file_operations amdgpu_ras_debugfs_eeprom_size_ops;
 extern const struct file_operations amdgpu_ras_debugfs_eeprom_table_ops;

--
2.34.1

Reply via email to