[PATCH 1/2] drm/amdgpu: add umc retire unit element

2023-02-19 Thread Tao Zhou
It records how many bad pages are retired in one uncorrectable error.

Signed-off-by: Tao Zhou 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 2 ++
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c  | 1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 3 +++
 4 files changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index a6951160f13a..f2bf979af588 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -74,6 +74,8 @@ struct amdgpu_umc {
 
/* UMC regiser per channel offset */
uint32_t channel_offs;
+   /* how many pages are retired in one UE */
+   uint32_t retire_unit;
/* channel index table of interleaved memory */
const uint32_t *channel_idx_tbl;
struct ras_common_if *ras_if;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index fe2c15f598b8..c59c2332d191 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -696,6 +696,7 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device 
*adev)
adev->umc.channel_inst_num = UMC_V8_7_CHANNEL_INSTANCE_NUM;
adev->umc.umc_inst_num = UMC_V8_7_UMC_INSTANCE_NUM;
adev->umc.channel_offs = UMC_V8_7_PER_CHANNEL_OFFSET_SIENNA;
+   adev->umc.retire_unit = 1;
adev->umc.channel_idx_tbl = _v8_7_channel_idx_tbl[0][0];
adev->umc.ras = _v8_7_ras;
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 0a31a341aa43..85e0afc3d4f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -570,6 +570,7 @@ static void gmc_v11_0_set_umc_funcs(struct amdgpu_device 
*adev)
adev->umc.node_inst_num = adev->gmc.num_umc;
adev->umc.max_ras_err_cnt_per_query = 
UMC_V8_10_TOTAL_CHANNEL_NUM(adev);
adev->umc.channel_offs = UMC_V8_10_PER_CHANNEL_OFFSET;
+   adev->umc.retire_unit = UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM;
if (adev->umc.node_inst_num == 4)
adev->umc.channel_idx_tbl = 
_v8_10_channel_idx_tbl_ext0[0][0][0];
else
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index d65c6cea3445..b06170c00dfc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1288,6 +1288,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device 
*adev)
adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20;
+   adev->umc.retire_unit = 1;
adev->umc.channel_idx_tbl = _v6_1_channel_idx_tbl[0][0];
adev->umc.ras = _v6_1_ras;
break;
@@ -1296,6 +1297,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device 
*adev)
adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT;
+   adev->umc.retire_unit = 1;
adev->umc.channel_idx_tbl = _v6_1_channel_idx_tbl[0][0];
adev->umc.ras = _v6_1_ras;
break;
@@ -1305,6 +1307,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device 
*adev)
adev->umc.channel_inst_num = UMC_V6_7_CHANNEL_INSTANCE_NUM;
adev->umc.umc_inst_num = UMC_V6_7_UMC_INSTANCE_NUM;
adev->umc.channel_offs = UMC_V6_7_PER_CHANNEL_OFFSET;
+   adev->umc.retire_unit = (UMC_V6_7_NA_MAP_PA_NUM * 2);
if (!adev->gmc.xgmi.connected_to_cpu)
adev->umc.ras = _v6_7_ras;
if (1 & adev->smuio.funcs->get_die_id(adev))
-- 
2.35.1



[PATCH 1/2] drm/amdgpu: add umc retire unit element

2023-02-16 Thread Tao Zhou
It records how many bad pages are retired in one uncorrectable error.

Signed-off-by: Tao Zhou 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 2 ++
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c  | 1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 3 +++
 4 files changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index a6951160f13a..f2bf979af588 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -74,6 +74,8 @@ struct amdgpu_umc {
 
/* UMC regiser per channel offset */
uint32_t channel_offs;
+   /* how many pages are retired in one UE */
+   uint32_t retire_unit;
/* channel index table of interleaved memory */
const uint32_t *channel_idx_tbl;
struct ras_common_if *ras_if;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index fe2c15f598b8..c59c2332d191 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -696,6 +696,7 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device 
*adev)
adev->umc.channel_inst_num = UMC_V8_7_CHANNEL_INSTANCE_NUM;
adev->umc.umc_inst_num = UMC_V8_7_UMC_INSTANCE_NUM;
adev->umc.channel_offs = UMC_V8_7_PER_CHANNEL_OFFSET_SIENNA;
+   adev->umc.retire_unit = 1;
adev->umc.channel_idx_tbl = _v8_7_channel_idx_tbl[0][0];
adev->umc.ras = _v8_7_ras;
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 0a31a341aa43..85e0afc3d4f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -570,6 +570,7 @@ static void gmc_v11_0_set_umc_funcs(struct amdgpu_device 
*adev)
adev->umc.node_inst_num = adev->gmc.num_umc;
adev->umc.max_ras_err_cnt_per_query = 
UMC_V8_10_TOTAL_CHANNEL_NUM(adev);
adev->umc.channel_offs = UMC_V8_10_PER_CHANNEL_OFFSET;
+   adev->umc.retire_unit = UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM;
if (adev->umc.node_inst_num == 4)
adev->umc.channel_idx_tbl = 
_v8_10_channel_idx_tbl_ext0[0][0][0];
else
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index d65c6cea3445..b06170c00dfc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1288,6 +1288,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device 
*adev)
adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20;
+   adev->umc.retire_unit = 1;
adev->umc.channel_idx_tbl = _v6_1_channel_idx_tbl[0][0];
adev->umc.ras = _v6_1_ras;
break;
@@ -1296,6 +1297,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device 
*adev)
adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT;
+   adev->umc.retire_unit = 1;
adev->umc.channel_idx_tbl = _v6_1_channel_idx_tbl[0][0];
adev->umc.ras = _v6_1_ras;
break;
@@ -1305,6 +1307,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device 
*adev)
adev->umc.channel_inst_num = UMC_V6_7_CHANNEL_INSTANCE_NUM;
adev->umc.umc_inst_num = UMC_V6_7_UMC_INSTANCE_NUM;
adev->umc.channel_offs = UMC_V6_7_PER_CHANNEL_OFFSET;
+   adev->umc.retire_unit = (UMC_V6_7_NA_MAP_PA_NUM * 2);
if (!adev->gmc.xgmi.connected_to_cpu)
adev->umc.ras = _v6_7_ras;
if (1 & adev->smuio.funcs->get_die_id(adev))
-- 
2.35.1