RE: [PATCH] drm/amd/pm: Retrieve UMC ODECC error count from aca bank
[AMD Official Use Only - General] Reviewed-by: Tao Zhou > -Original Message- > From: amd-gfx On Behalf Of Candice Li > Sent: Friday, February 2, 2024 7:13 PM > To: amd-gfx@lists.freedesktop.org > Cc: Li, Candice > Subject: [PATCH] drm/amd/pm: Retrieve UMC ODECC error count from aca bank > > Instead of software managed counters. > > Signed-off-by: Candice Li > --- > drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 6 +- > 1 file changed, 5 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c > b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c > index d6e14a5f406e63..03873d784be6d6 100644 > --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c > +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c > @@ -2552,8 +2552,12 @@ static int mca_umc_mca_get_err_count(const struct > mca_ras_info *mca_ras, struct >enum amdgpu_mca_error_type type, struct > mca_bank_entry *entry, uint32_t *count) { > uint64_t status0; > + uint32_t ext_error_code; > + uint32_t odecc_err_cnt; > > status0 = entry->regs[MCA_REG_IDX_STATUS]; > + ext_error_code = MCA_REG__STATUS__ERRORCODEEXT(status0); > + odecc_err_cnt = > +MCA_REG__MISC0__ERRCNT(entry->regs[MCA_REG_IDX_MISC0]); > > if (!REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) { > *count = 0; > @@ -2563,7 +2567,7 @@ static int mca_umc_mca_get_err_count(const struct > mca_ras_info *mca_ras, struct > if (umc_v12_0_is_deferred_error(adev, status0) || > umc_v12_0_is_uncorrectable_error(adev, status0) || > umc_v12_0_is_correctable_error(adev, status0)) > - *count = 1; > + *count = (ext_error_code == 0) ? odecc_err_cnt : 1; > > return 0; > } > -- > 2.25.1
RE: [PATCH] drm/amd/pm: Retrieve UMC ODECC error count from aca bank
[AMD Official Use Only - General] Reviewed-by: Hawking Zhang Regards, Hawking -Original Message- From: amd-gfx On Behalf Of Candice Li Sent: Friday, February 2, 2024 19:13 To: amd-gfx@lists.freedesktop.org Cc: Li, Candice Subject: [PATCH] drm/amd/pm: Retrieve UMC ODECC error count from aca bank Instead of software managed counters. Signed-off-by: Candice Li --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index d6e14a5f406e63..03873d784be6d6 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -2552,8 +2552,12 @@ static int mca_umc_mca_get_err_count(const struct mca_ras_info *mca_ras, struct enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count) { uint64_t status0; + uint32_t ext_error_code; + uint32_t odecc_err_cnt; status0 = entry->regs[MCA_REG_IDX_STATUS]; + ext_error_code = MCA_REG__STATUS__ERRORCODEEXT(status0); + odecc_err_cnt = +MCA_REG__MISC0__ERRCNT(entry->regs[MCA_REG_IDX_MISC0]); if (!REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) { *count = 0; @@ -2563,7 +2567,7 @@ static int mca_umc_mca_get_err_count(const struct mca_ras_info *mca_ras, struct if (umc_v12_0_is_deferred_error(adev, status0) || umc_v12_0_is_uncorrectable_error(adev, status0) || umc_v12_0_is_correctable_error(adev, status0)) - *count = 1; + *count = (ext_error_code == 0) ? odecc_err_cnt : 1; return 0; } -- 2.25.1