[PATCH 2/2] drm/amdgpu: exclude duplicate pages from UMC RAS UE count

2023-02-19 Thread Tao Zhou
If a UMC bad page is reserved but not freed by an application, the
application may trigger uncorrectable error repeatly by accessing the page.

v2: add specific function to do the check.
v3: remove duplicate pages, calculate new added bad page number.
v4: reuse save_bad_pages to calculate new added bad page number.

Signed-off-by: Tao Zhou 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 16 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h |  3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c |  5 +++--
 3 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 6e543558386d..5c02c6c9f773 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -176,7 +176,7 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device 
*adev, uint64_t addre
if (amdgpu_bad_page_threshold != 0) {
amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
 err_data.err_addr_cnt);
-   amdgpu_ras_save_bad_pages(adev);
+   amdgpu_ras_save_bad_pages(adev, NULL);
}
 
dev_warn(adev->dev, "WARNING: THIS IS ONLY FOR TEST PURPOSES AND WILL 
CORRUPT RAS EEPROM\n");
@@ -2084,22 +2084,32 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
 /*
  * write error record array to eeprom, the function should be
  * protected by recovery_lock
+ * new_cnt: new added UE count, excluding reserved bad pages, can be NULL
  */
-int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev)
+int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
+   unsigned long *new_cnt)
 {
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data *data;
struct amdgpu_ras_eeprom_control *control;
int save_count;
 
-   if (!con || !con->eh_data)
+   if (!con || !con->eh_data) {
+   if (new_cnt)
+   *new_cnt = 0;
+
return 0;
+   }
 
mutex_lock(>recovery_lock);
control = >eeprom_control;
data = con->eh_data;
save_count = data->count - control->ras_num_recs;
mutex_unlock(>recovery_lock);
+
+   if (new_cnt)
+   *new_cnt = save_count / adev->umc.retire_unit;
+
/* only new entries are saved */
if (save_count > 0) {
if (amdgpu_ras_eeprom_append(control,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index f2ad93f6..ef38f4c93df0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -547,7 +547,8 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
 int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
struct eeprom_table_record *bps, int pages);
 
-int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev);
+int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
+   unsigned long *new_cnt);
 
 static inline enum ta_ras_block
 amdgpu_ras_block_to_ta(enum amdgpu_ras_block block) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index 1c7fcb4f2380..7c6fc3214339 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -68,7 +68,7 @@ int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev,
if (amdgpu_bad_page_threshold != 0) {
amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
err_data.err_addr_cnt);
-   amdgpu_ras_save_bad_pages(adev);
+   amdgpu_ras_save_bad_pages(adev, NULL);
}
 
 out:
@@ -147,7 +147,8 @@ static int amdgpu_umc_do_page_retirement(struct 
amdgpu_device *adev,
err_data->err_addr_cnt) {
amdgpu_ras_add_bad_pages(adev, err_data->err_addr,
err_data->err_addr_cnt);
-   amdgpu_ras_save_bad_pages(adev);
+
+   amdgpu_ras_save_bad_pages(adev, &(err_data->ue_count));
 
amdgpu_dpm_send_hbm_bad_pages_num(adev, 
con->eeprom_control.ras_num_recs);
 
-- 
2.35.1



[PATCH 1/2] drm/amdgpu: add umc retire unit element

2023-02-19 Thread Tao Zhou
It records how many bad pages are retired in one uncorrectable error.

Signed-off-by: Tao Zhou 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 2 ++
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c  | 1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 3 +++
 4 files changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index a6951160f13a..f2bf979af588 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -74,6 +74,8 @@ struct amdgpu_umc {
 
/* UMC regiser per channel offset */
uint32_t channel_offs;
+   /* how many pages are retired in one UE */
+   uint32_t retire_unit;
/* channel index table of interleaved memory */
const uint32_t *channel_idx_tbl;
struct ras_common_if *ras_if;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index fe2c15f598b8..c59c2332d191 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -696,6 +696,7 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device 
*adev)
adev->umc.channel_inst_num = UMC_V8_7_CHANNEL_INSTANCE_NUM;
adev->umc.umc_inst_num = UMC_V8_7_UMC_INSTANCE_NUM;
adev->umc.channel_offs = UMC_V8_7_PER_CHANNEL_OFFSET_SIENNA;
+   adev->umc.retire_unit = 1;
adev->umc.channel_idx_tbl = _v8_7_channel_idx_tbl[0][0];
adev->umc.ras = _v8_7_ras;
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 0a31a341aa43..85e0afc3d4f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -570,6 +570,7 @@ static void gmc_v11_0_set_umc_funcs(struct amdgpu_device 
*adev)
adev->umc.node_inst_num = adev->gmc.num_umc;
adev->umc.max_ras_err_cnt_per_query = 
UMC_V8_10_TOTAL_CHANNEL_NUM(adev);
adev->umc.channel_offs = UMC_V8_10_PER_CHANNEL_OFFSET;
+   adev->umc.retire_unit = UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM;
if (adev->umc.node_inst_num == 4)
adev->umc.channel_idx_tbl = 
_v8_10_channel_idx_tbl_ext0[0][0][0];
else
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index d65c6cea3445..b06170c00dfc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1288,6 +1288,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device 
*adev)
adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20;
+   adev->umc.retire_unit = 1;
adev->umc.channel_idx_tbl = _v6_1_channel_idx_tbl[0][0];
adev->umc.ras = _v6_1_ras;
break;
@@ -1296,6 +1297,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device 
*adev)
adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT;
+   adev->umc.retire_unit = 1;
adev->umc.channel_idx_tbl = _v6_1_channel_idx_tbl[0][0];
adev->umc.ras = _v6_1_ras;
break;
@@ -1305,6 +1307,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device 
*adev)
adev->umc.channel_inst_num = UMC_V6_7_CHANNEL_INSTANCE_NUM;
adev->umc.umc_inst_num = UMC_V6_7_UMC_INSTANCE_NUM;
adev->umc.channel_offs = UMC_V6_7_PER_CHANNEL_OFFSET;
+   adev->umc.retire_unit = (UMC_V6_7_NA_MAP_PA_NUM * 2);
if (!adev->gmc.xgmi.connected_to_cpu)
adev->umc.ras = _v6_7_ras;
if (1 & adev->smuio.funcs->get_die_id(adev))
-- 
2.35.1



RE: [PATCH] drm/amdgpu: fix incorrect active rb bitmap for gfx11

2023-02-19 Thread Gao, Likun
This patch is Reviewed-by: Likun Gao 

Regards,
Likun

-Original Message-
From: amd-gfx  On Behalf Of Hawking Zhang
Sent: Monday, February 20, 2023 9:09 AM
To: amd-gfx@lists.freedesktop.org; Xu, Feifei ; Gao, Likun 
; Deucher, Alexander 
Cc: Zhang, Hawking 
Subject: [PATCH] drm/amdgpu: fix incorrect active rb bitmap for gfx11

GFX v11 changes RB_BACKEND_DISABLE related registers from per SA to global 
ones. The approach to query active rb bitmap needs to be changed accordingly. 
Query per SE setting returns wrong active RB bitmap especially in the case when 
some of SA are disabled. With the new approach, driver will generate the active 
rb bitmap based on active SA bitmap and global active RB bitmap.

Signed-off-by: Hawking Zhang 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 78 +-
 1 file changed, 52 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index e7e5a2c31896..7b7f01b304cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -1503,44 +1503,70 @@ static void gfx_v11_0_select_se_sh(struct amdgpu_device 
*adev, u32 se_num,
WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);  }
 
-static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev)
+static u32 gfx_v11_0_get_sa_active_bitmap(struct amdgpu_device *adev)
 {
-   u32 data, mask;
+   u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask;
+
+   gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regCC_GC_SA_UNIT_DISABLE);
+   gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask,
+  CC_GC_SA_UNIT_DISABLE,
+  SA_DISABLE);
+   gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, 
regGC_USER_SA_UNIT_DISABLE);
+   gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask,
+GC_USER_SA_UNIT_DISABLE,
+SA_DISABLE);
+   sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se *
+   
adev->gfx.config.max_shader_engines);
 
-   data = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
-   data |= RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
+   return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask)); 
+}
 
-   data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
-   data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
+static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev) {
+   u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask;
+   u32 rb_mask;
 
-   mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
-adev->gfx.config.max_sh_per_se);
+   gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
+   gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask,
+   CC_RB_BACKEND_DISABLE,
+   BACKEND_DISABLE);
+   gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, 
regGC_USER_RB_BACKEND_DISABLE);
+   gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask,
+GC_USER_RB_BACKEND_DISABLE,
+BACKEND_DISABLE);
+   rb_mask = 
amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se *
+   
adev->gfx.config.max_shader_engines);
 
-   return (~data) & mask;
+   return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask));
 }
 
 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)  {
-   int i, j;
-   u32 data;
-   u32 active_rbs = 0;
-   u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
-   adev->gfx.config.max_sh_per_se;
+   u32 rb_bitmap_width_per_sa;
+   u32 max_sa;
+   u32 active_sa_bitmap;
+   u32 global_active_rb_bitmap;
+   u32 active_rb_bitmap = 0;
+   u32 i;
 
-   mutex_lock(>grbm_idx_mutex);
-   for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
-   for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-   gfx_v11_0_select_se_sh(adev, i, j, 0x);
-   data = gfx_v11_0_get_rb_active_bitmap(adev);
-   active_rbs |= data << ((i * 
adev->gfx.config.max_sh_per_se + j) *
-  rb_bitmap_width_per_sh);
-   }
+   /* query sa bitmap from SA_UNIT_DISABLE registers */
+   active_sa_bitmap = gfx_v11_0_get_sa_active_bitmap(adev);
+   /* query rb bitmap from RB_BACKEND_DISABLE registers */
+   global_active_rb_bitmap = gfx_v11_0_get_rb_active_bitmap(adev);
+
+   /* generate active rb bitmap according to 

[PATCH] drm/amdgpu: fix incorrect active rb bitmap for gfx11

2023-02-19 Thread Hawking Zhang
GFX v11 changes RB_BACKEND_DISABLE related registers
from per SA to global ones. The approach to query active
rb bitmap needs to be changed accordingly. Query per
SE setting returns wrong active RB bitmap especially
in the case when some of SA are disabled. With the new
approach, driver will generate the active rb bitmap
based on active SA bitmap and global active RB bitmap.

Signed-off-by: Hawking Zhang 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 78 +-
 1 file changed, 52 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index e7e5a2c31896..7b7f01b304cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -1503,44 +1503,70 @@ static void gfx_v11_0_select_se_sh(struct amdgpu_device 
*adev, u32 se_num,
WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);
 }
 
-static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev)
+static u32 gfx_v11_0_get_sa_active_bitmap(struct amdgpu_device *adev)
 {
-   u32 data, mask;
+   u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask;
+
+   gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regCC_GC_SA_UNIT_DISABLE);
+   gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask,
+  CC_GC_SA_UNIT_DISABLE,
+  SA_DISABLE);
+   gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, 
regGC_USER_SA_UNIT_DISABLE);
+   gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask,
+GC_USER_SA_UNIT_DISABLE,
+SA_DISABLE);
+   sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se *
+   
adev->gfx.config.max_shader_engines);
 
-   data = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
-   data |= RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
+   return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask));
+}
 
-   data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
-   data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
+static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev)
+{
+   u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask;
+   u32 rb_mask;
 
-   mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
-adev->gfx.config.max_sh_per_se);
+   gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
+   gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask,
+   CC_RB_BACKEND_DISABLE,
+   BACKEND_DISABLE);
+   gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, 
regGC_USER_RB_BACKEND_DISABLE);
+   gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask,
+GC_USER_RB_BACKEND_DISABLE,
+BACKEND_DISABLE);
+   rb_mask = 
amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se *
+   
adev->gfx.config.max_shader_engines);
 
-   return (~data) & mask;
+   return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask));
 }
 
 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
 {
-   int i, j;
-   u32 data;
-   u32 active_rbs = 0;
-   u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
-   adev->gfx.config.max_sh_per_se;
+   u32 rb_bitmap_width_per_sa;
+   u32 max_sa;
+   u32 active_sa_bitmap;
+   u32 global_active_rb_bitmap;
+   u32 active_rb_bitmap = 0;
+   u32 i;
 
-   mutex_lock(>grbm_idx_mutex);
-   for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
-   for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-   gfx_v11_0_select_se_sh(adev, i, j, 0x);
-   data = gfx_v11_0_get_rb_active_bitmap(adev);
-   active_rbs |= data << ((i * 
adev->gfx.config.max_sh_per_se + j) *
-  rb_bitmap_width_per_sh);
-   }
+   /* query sa bitmap from SA_UNIT_DISABLE registers */
+   active_sa_bitmap = gfx_v11_0_get_sa_active_bitmap(adev);
+   /* query rb bitmap from RB_BACKEND_DISABLE registers */
+   global_active_rb_bitmap = gfx_v11_0_get_rb_active_bitmap(adev);
+
+   /* generate active rb bitmap according to active sa bitmap */
+   max_sa = adev->gfx.config.max_shader_engines *
+adev->gfx.config.max_sh_per_se;
+   rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
+adev->gfx.config.max_sh_per_se;
+   for (i = 0; i < max_sa; i++) {
+   if (active_sa_bitmap & (1 

RE: [PATCH] drm/amdgpu: fix incorrect active rb bitmap for gfx11

2023-02-19 Thread Zhang, Hawking
Please ignore this one. Some code needs to be optimized. I'll send out another 
one for the review.

Regards,
Hawking

-Original Message-
From: Zhang, Hawking  
Sent: Sunday, February 19, 2023 14:33
To: amd-gfx@lists.freedesktop.org; Xu, Feifei ; Gao, Likun 
; Deucher, Alexander 
Cc: Zhang, Hawking 
Subject: [PATCH] drm/amdgpu: fix incorrect active rb bitmap for gfx11

GFX v11 changes RB_BACKEND_DISABLE related registers from per SA to global 
ones. The approach to query active rb bitmap needs to be changed accordingly. 
Query per SE setting returns wrong active RB bitmap especially in the case when 
some of SA are disabled. With the new approach, driver will generate the active 
rb bitmap based on active SA bitmap and global active RB bitmap.

Signed-off-by: Hawking Zhang 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 75 +-
 1 file changed, 49 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index e7e5a2c31896..87a6cdac3d45 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -1503,44 +1503,67 @@ static void gfx_v11_0_select_se_sh(struct amdgpu_device 
*adev, u32 se_num,
WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);  }
 
-static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev)
+static u32 gfx_v11_0_get_sa_active_bitmap(struct amdgpu_device *adev)
 {
-   u32 data, mask;
+   u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask;
+
+   gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regCC_GC_SA_UNIT_DISABLE);
+   gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask,
+  CC_GC_SA_UNIT_DISABLE,
+  SA_DISABLE);
+   gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, 
regGC_USER_SA_UNIT_DISABLE);
+   gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask,
+GC_USER_SA_UNIT_DISABLE,
+SA_DISABLE);
+   sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se *
+   
adev->gfx.config.max_shader_engines);
 
-   data = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
-   data |= RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
+   return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask)); 
+}
 
-   data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
-   data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
+static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev) {
+   u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask;
+   u32 rb_mask;
 
-   mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
-adev->gfx.config.max_sh_per_se);
+   gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
+   gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask,
+   CC_RB_BACKEND_DISABLE,
+   BACKEND_DISABLE);
+   gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, 
regGC_USER_RB_BACKEND_DISABLE);
+   gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask,
+GC_USER_RB_BACKEND_DISABLE,
+BACKEND_DISABLE);
+   rb_mask = 
amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se *
+   
adev->gfx.config.max_shader_engines);
 
-   return (~data) & mask;
+   return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask));
 }
 
 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)  {
-   int i, j;
-   u32 data;
-   u32 active_rbs = 0;
-   u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
-   adev->gfx.config.max_sh_per_se;
+   u32 active_rb_bitmap = 0;
+   u32 max_sa;
+   u32 active_sa_bitmap;
+   u32 global_active_rb_bitmap;
+   u32 i;
 
-   mutex_lock(>grbm_idx_mutex);
-   for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
-   for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-   gfx_v11_0_select_se_sh(adev, i, j, 0x);
-   data = gfx_v11_0_get_rb_active_bitmap(adev);
-   active_rbs |= data << ((i * 
adev->gfx.config.max_sh_per_se + j) *
-  rb_bitmap_width_per_sh);
-   }
+   /* query sa bitmap from SA_UNIT_DISABLE registers */
+   active_sa_bitmap = gfx_v11_0_get_sa_active_bitmap(adev);
+   /* query rb bitmap from RB_BACKEND_DISABLE registers */
+   global_active_rb_bitmap = gfx_v11_0_get_rb_active_bitmap(adev);
+
+   /* generate active rb bitmap