Instead of from physical address.

v2: add comment to make the code more readable

Signed-off-by: Tao Zhou <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c        | 15 ++++++++++++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c |  4 ++--
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 81135cd45d0e..ac00bc93f34c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -3010,8 +3010,13 @@ static int amdgpu_ras_mca2pa_by_idx(struct amdgpu_device 
*adev,
        addr_in.ma.err_addr = bps->address;
        addr_in.ma.socket_id = socket;
        addr_in.ma.ch_inst = bps->mem_channel;
-       /* tell RAS TA the node instance is not used */
-       addr_in.ma.node_inst = TA_RAS_INV_NODE;
+       if (!amdgpu_ras_smu_eeprom_supported(adev)) {
+               /* tell RAS TA the node instance is not used */
+               addr_in.ma.node_inst = TA_RAS_INV_NODE;
+       } else {
+               addr_in.ma.umc_inst = bps->mcumc_id;
+               addr_in.ma.node_inst = bps->cu;
+       }
 
        if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
                ret = adev->umc.ras->convert_ras_err_addr(adev, err_data,
@@ -3158,7 +3163,11 @@ static int __amdgpu_ras_convert_rec_from_rom(struct 
amdgpu_device *adev,
                save_nps = (bps->retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK;
                bps->retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT);
        } else {
-               save_nps = nps;
+               /* if pmfw manages eeprom, save_nps is not stored on eeprom,
+                * we should always convert mca address into physical address,
+                * make save_nps different from nps
+                */
+               save_nps = nps + 1;
        }
 
        if (save_nps == nps) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 64c004a2b767..425aa44446cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -1019,10 +1019,10 @@ int amdgpu_ras_eeprom_read_idx(struct 
amdgpu_ras_eeprom_control *control,
                record[i - rec_idx].retired_page = 0x1ULL;
                record[i - rec_idx].ts = ts;
                record[i - rec_idx].err_type = 
AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
-               record[i - rec_idx].cu = 0;
 
                if (adev->umc.ras->mca_ipid_parse)
-                       adev->umc.ras->mca_ipid_parse(adev, ipid, NULL,
+                       adev->umc.ras->mca_ipid_parse(adev, ipid,
+                               (uint32_t *)&(record[i - rec_idx].cu),
                                (uint32_t *)&(record[i - rec_idx].mem_channel),
                                (uint32_t *)&(record[i - rec_idx].mcumc_id), 
NULL);
                else
-- 
2.34.1

Reply via email to