RE: [PATCH] drm/amdgpu: Correct se_num and reg_inst for gfx v9_4_3 ras counters

2023-09-06 Thread Zhou1, Tao
[AMD Official Use Only - General]

Reviewed-by: Tao Zhou 

> -Original Message-
> From: amd-gfx  On Behalf Of Hawking
> Zhang
> Sent: Wednesday, September 6, 2023 6:12 PM
> To: amd-gfx@lists.freedesktop.org; Zhou1, Tao ; Yang,
> Stanley ; Li, Candice ; Chai,
> Thomas 
> Cc: Zhang, Hawking 
> Subject: [PATCH] drm/amdgpu: Correct se_num and reg_inst for gfx v9_4_3 ras
> counters
>
> gfx_v9_4_3_ue|ce_reg_list is an array per gfx core instance correct the 
> settings of
> se_num and reg_inst for some of gfx ras counters so all the available register
> instances can be polled for ras status.
>
> Signed-off-by: Hawking Zhang 
> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 40 -
>  1 file changed, 20 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> index 0a26a00074a6..a60d1a8405d4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> @@ -3653,19 +3653,19 @@ static const struct amdgpu_gfx_ras_reg_entry
> gfx_v9_4_3_ce_reg_list[] = {
>   AMDGPU_GFX_GC_CANE_MEM, 1},
>   {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSPI_CE_ERR_STATUS_LO,
> regSPI_CE_ERR_STATUS_HI),
>   1, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "SPI"},
> - AMDGPU_GFX_SPI_MEM, 8},
> + AMDGPU_GFX_SPI_MEM, 1},
>   {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP0_CE_ERR_STATUS_LO,
> regSP0_CE_ERR_STATUS_HI),
>   10, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "SP0"},
> - AMDGPU_GFX_SP_MEM, 1},
> + AMDGPU_GFX_SP_MEM, 4},
>   {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP1_CE_ERR_STATUS_LO,
> regSP1_CE_ERR_STATUS_HI),
>   10, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "SP1"},
> - AMDGPU_GFX_SP_MEM, 1},
> + AMDGPU_GFX_SP_MEM, 4},
>   {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQ_CE_ERR_STATUS_LO,
> regSQ_CE_ERR_STATUS_HI),
>   10, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "SQ"},
> - AMDGPU_GFX_SQ_MEM, 8},
> + AMDGPU_GFX_SQ_MEM, 4},
>   {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQC_CE_EDC_LO,
> regSQC_CE_EDC_HI),
>   5, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "SQC"},
> - AMDGPU_GFX_SQC_MEM, 8},
> + AMDGPU_GFX_SQC_MEM, 4},
>   {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCX_CE_ERR_STATUS_LO,
> regTCX_CE_ERR_STATUS_HI),
>   2, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "TCX"},
>   AMDGPU_GFX_TCX_MEM, 1},
> @@ -3674,22 +3674,22 @@ static const struct amdgpu_gfx_ras_reg_entry
> gfx_v9_4_3_ce_reg_list[] = {
>   AMDGPU_GFX_TCC_MEM, 1},
>   {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTA_CE_EDC_LO,
> regTA_CE_EDC_HI),
>   10, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "TA"},
> - AMDGPU_GFX_TA_MEM, 8},
> + AMDGPU_GFX_TA_MEM, 4},
>   {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCI_CE_EDC_LO_REG,
> regTCI_CE_EDC_HI_REG),
> - 31, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "TCI"},
> + 27, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID),
> +"TCI"},
>   AMDGPU_GFX_TCI_MEM, 1},
>   {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCP_CE_EDC_LO_REG,
> regTCP_CE_EDC_HI_REG),
>   10, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "TCP"},
> - AMDGPU_GFX_TCP_MEM, 8},
> + AMDGPU_GFX_TCP_MEM, 4},
>   {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTD_CE_EDC_LO,
> regTD_CE_EDC_HI),
>   10, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "TD"},
> - AMDGPU_GFX_TD_MEM, 8},
> + AMDGPU_GFX_TD_MEM, 4},
>   {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGCEA_CE_ERR_STATUS_LO,
> regGCEA_CE_ERR_STATUS_HI),
>   16, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "GCEA"},
>   AMDGPU_GFX_GCEA_MEM, 1},
>   {{AMDGPU_RAS_REG_ENTRY(GC, 0, regLDS_CE_ERR_STATUS_LO,
> regLDS_CE_ERR_STATUS_HI),
>   10, (AMDGPU_RAS_ERR_INFO_VALID |
> AMDGPU_RAS_ERR_STATUS_VALID), "LDS"},
> - AMDGPU_GFX_LDS_MEM, 1},
> + AMDGPU_GFX_LDS_MEM, 4},
>  };
>
>  static const struct amdgpu_gfx_ras_reg_entry gfx_v9_4_3_ue_reg_list[] = { @@
> -3713,19 +3713,19 @@ static const struct amdgpu_gfx_ras_reg_entry
> gfx_v9_4_3_ue_reg_list[] = {
>   AMDGPU_GFX_GC_CANE_MEM, 1},
>   {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSPI_UE_ERR_STATUS_LO,
> regSPI_UE_ERR_STATUS_HI),
>   1, (AMDGPU_RAS_ERR_INFO_VA

[PATCH] drm/amdgpu: Correct se_num and reg_inst for gfx v9_4_3 ras counters

2023-09-06 Thread Hawking Zhang
gfx_v9_4_3_ue|ce_reg_list is an array per gfx core instance
correct the settings of se_num and reg_inst for some of
gfx ras counters so all the available register instances
can be polled for ras status.

Signed-off-by: Hawking Zhang 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 40 -
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 0a26a00074a6..a60d1a8405d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -3653,19 +3653,19 @@ static const struct amdgpu_gfx_ras_reg_entry 
gfx_v9_4_3_ce_reg_list[] = {
AMDGPU_GFX_GC_CANE_MEM, 1},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regSPI_CE_ERR_STATUS_LO, 
regSPI_CE_ERR_STATUS_HI),
1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), 
"SPI"},
-   AMDGPU_GFX_SPI_MEM, 8},
+   AMDGPU_GFX_SPI_MEM, 1},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP0_CE_ERR_STATUS_LO, 
regSP0_CE_ERR_STATUS_HI),
10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), 
"SP0"},
-   AMDGPU_GFX_SP_MEM, 1},
+   AMDGPU_GFX_SP_MEM, 4},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP1_CE_ERR_STATUS_LO, 
regSP1_CE_ERR_STATUS_HI),
10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), 
"SP1"},
-   AMDGPU_GFX_SP_MEM, 1},
+   AMDGPU_GFX_SP_MEM, 4},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQ_CE_ERR_STATUS_LO, 
regSQ_CE_ERR_STATUS_HI),
10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), 
"SQ"},
-   AMDGPU_GFX_SQ_MEM, 8},
+   AMDGPU_GFX_SQ_MEM, 4},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQC_CE_EDC_LO, regSQC_CE_EDC_HI),
5, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), 
"SQC"},
-   AMDGPU_GFX_SQC_MEM, 8},
+   AMDGPU_GFX_SQC_MEM, 4},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCX_CE_ERR_STATUS_LO, 
regTCX_CE_ERR_STATUS_HI),
2, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), 
"TCX"},
AMDGPU_GFX_TCX_MEM, 1},
@@ -3674,22 +3674,22 @@ static const struct amdgpu_gfx_ras_reg_entry 
gfx_v9_4_3_ce_reg_list[] = {
AMDGPU_GFX_TCC_MEM, 1},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regTA_CE_EDC_LO, regTA_CE_EDC_HI),
10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), 
"TA"},
-   AMDGPU_GFX_TA_MEM, 8},
+   AMDGPU_GFX_TA_MEM, 4},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCI_CE_EDC_LO_REG, 
regTCI_CE_EDC_HI_REG),
-   31, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), 
"TCI"},
+   27, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), 
"TCI"},
AMDGPU_GFX_TCI_MEM, 1},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCP_CE_EDC_LO_REG, 
regTCP_CE_EDC_HI_REG),
10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), 
"TCP"},
-   AMDGPU_GFX_TCP_MEM, 8},
+   AMDGPU_GFX_TCP_MEM, 4},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regTD_CE_EDC_LO, regTD_CE_EDC_HI),
10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), 
"TD"},
-   AMDGPU_GFX_TD_MEM, 8},
+   AMDGPU_GFX_TD_MEM, 4},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regGCEA_CE_ERR_STATUS_LO, 
regGCEA_CE_ERR_STATUS_HI),
16, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), 
"GCEA"},
AMDGPU_GFX_GCEA_MEM, 1},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regLDS_CE_ERR_STATUS_LO, 
regLDS_CE_ERR_STATUS_HI),
10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), 
"LDS"},
-   AMDGPU_GFX_LDS_MEM, 1},
+   AMDGPU_GFX_LDS_MEM, 4},
 };
 
 static const struct amdgpu_gfx_ras_reg_entry gfx_v9_4_3_ue_reg_list[] = {
@@ -3713,19 +3713,19 @@ static const struct amdgpu_gfx_ras_reg_entry 
gfx_v9_4_3_ue_reg_list[] = {
AMDGPU_GFX_GC_CANE_MEM, 1},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regSPI_UE_ERR_STATUS_LO, 
regSPI_UE_ERR_STATUS_HI),
1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), 
"SPI"},
-   AMDGPU_GFX_SPI_MEM, 8},
+   AMDGPU_GFX_SPI_MEM, 1},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP0_UE_ERR_STATUS_LO, 
regSP0_UE_ERR_STATUS_HI),
10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), 
"SP0"},
-   AMDGPU_GFX_SP_MEM, 1},
+   AMDGPU_GFX_SP_MEM, 4},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP1_UE_ERR_STATUS_LO, 
regSP1_UE_ERR_STATUS_HI),
10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), 
"SP1"},
-   AMDGPU_GFX_SP_MEM, 1},
+   AMDGPU_GFX_SP_MEM, 4},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQ_UE_ERR_STATUS_LO, 
regSQ_UE_ERR_STATUS_HI),
10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), 
"SQ"},
-   AMDGPU_GFX_SQ_MEM, 8},
+   AMDGPU_GFX_SQ_MEM, 4},
{{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQC_UE_EDC_LO, regSQC_UE_EDC_HI),
5