RE: [PATCH] drm/amd/pm: Retrieve UMC ODECC error count from aca bank

2024-02-03 Thread Zhou1, Tao
[AMD Official Use Only - General]

Reviewed-by: Tao Zhou 

> -Original Message-
> From: amd-gfx  On Behalf Of Candice Li
> Sent: Friday, February 2, 2024 7:13 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Li, Candice 
> Subject: [PATCH] drm/amd/pm: Retrieve UMC ODECC error count from aca bank
>
> Instead of software managed counters.
>
> Signed-off-by: Candice Li 
> ---
>  drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 6 +-
>  1 file changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> index d6e14a5f406e63..03873d784be6d6 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
> @@ -2552,8 +2552,12 @@ static int mca_umc_mca_get_err_count(const struct
> mca_ras_info *mca_ras, struct
>enum amdgpu_mca_error_type type, struct
> mca_bank_entry *entry, uint32_t *count)  {
>   uint64_t status0;
> + uint32_t ext_error_code;
> + uint32_t odecc_err_cnt;
>
>   status0 = entry->regs[MCA_REG_IDX_STATUS];
> + ext_error_code = MCA_REG__STATUS__ERRORCODEEXT(status0);
> + odecc_err_cnt =
> +MCA_REG__MISC0__ERRCNT(entry->regs[MCA_REG_IDX_MISC0]);
>
>   if (!REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) {
>   *count = 0;
> @@ -2563,7 +2567,7 @@ static int mca_umc_mca_get_err_count(const struct
> mca_ras_info *mca_ras, struct
>   if (umc_v12_0_is_deferred_error(adev, status0) ||
>   umc_v12_0_is_uncorrectable_error(adev, status0) ||
>   umc_v12_0_is_correctable_error(adev, status0))
> - *count = 1;
> + *count = (ext_error_code == 0) ? odecc_err_cnt : 1;
>
>   return 0;
>  }
> --
> 2.25.1



RE: [PATCH] drm/amd/pm: Retrieve UMC ODECC error count from aca bank

2024-02-03 Thread Zhang, Hawking
[AMD Official Use Only - General]

Reviewed-by: Hawking Zhang 

Regards,
Hawking
-Original Message-
From: amd-gfx  On Behalf Of Candice Li
Sent: Friday, February 2, 2024 19:13
To: amd-gfx@lists.freedesktop.org
Cc: Li, Candice 
Subject: [PATCH] drm/amd/pm: Retrieve UMC ODECC error count from aca bank

Instead of software managed counters.

Signed-off-by: Candice Li 
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index d6e14a5f406e63..03873d784be6d6 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -2552,8 +2552,12 @@ static int mca_umc_mca_get_err_count(const struct 
mca_ras_info *mca_ras, struct
 enum amdgpu_mca_error_type type, struct 
mca_bank_entry *entry, uint32_t *count)  {
uint64_t status0;
+   uint32_t ext_error_code;
+   uint32_t odecc_err_cnt;

status0 = entry->regs[MCA_REG_IDX_STATUS];
+   ext_error_code = MCA_REG__STATUS__ERRORCODEEXT(status0);
+   odecc_err_cnt =
+MCA_REG__MISC0__ERRCNT(entry->regs[MCA_REG_IDX_MISC0]);

if (!REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) {
*count = 0;
@@ -2563,7 +2567,7 @@ static int mca_umc_mca_get_err_count(const struct 
mca_ras_info *mca_ras, struct
if (umc_v12_0_is_deferred_error(adev, status0) ||
umc_v12_0_is_uncorrectable_error(adev, status0) ||
umc_v12_0_is_correctable_error(adev, status0))
-   *count = 1;
+   *count = (ext_error_code == 0) ? odecc_err_cnt : 1;

return 0;
 }
--
2.25.1