RE: [PATCH 08/11] drm/amdgpu: Rework mca ras sw_init

2023-03-05 Thread Yang, Stanley



> -Original Message-
> From: Zhang, Hawking 
> Sent: Monday, March 6, 2023 10:32 AM
> To: amd-gfx@lists.freedesktop.org; Zhou1, Tao ;
> Yang, Stanley ; Li, Candice ;
> Chai, Thomas 
> Cc: Zhang, Hawking 
> Subject: [PATCH 08/11] drm/amdgpu: Rework mca ras sw_init
> 
> To align with other IP blocks
> 
> Signed-off-by: Hawking Zhang 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 21 
> drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c | 72
> +
> drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h |  9 ++--
>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 15 +++---
>  drivers/gpu/drm/amd/amdgpu/mca_v3_0.c   | 44 ++-
>  drivers/gpu/drm/amd/amdgpu/mca_v3_0.h   |  4 +-
>  6 files changed, 111 insertions(+), 54 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> index 087a75374610..524e2c9b3012 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> @@ -477,6 +477,27 @@ int amdgpu_gmc_ras_sw_init(struct amdgpu_device
> *adev)
>   }
>   }
> 
> + /* mca.x ras block */
> + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MCA))
> {
> + r = amdgpu_mca_mp0_ras_sw_init(adev);
> + if (r) {
> + dev_err(adev->dev, "Failed to initialize mca.mp0 ras
> block!\n");
> + return r;
> + }
> +
> + r = amdgpu_mca_mp1_ras_sw_init(adev);
> + if (r) {
> + dev_err(adev->dev, "Failed to initialize mca.mp1 ras
> block!\n");
> + return r;
> + }
> +
> + r = amdgpu_mca_mpio_ras_sw_init(adev);
> + if (r) {
> + dev_err(adev->dev, "Failed to initialize mca.mpio ras
> block!\n");
> + return r;
> + }
> + }
> +
>   /* xgmi ras block */
>   if (amdgpu_ras_is_supported(adev,
> AMDGPU_RAS_BLOCK__XGMI_WAFL)) {
>   adev->gmc.xgmi.ras = &xgmi_ras;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
> index 51c2a82e2fa4..0b545bdcd636 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
> @@ -70,3 +70,75 @@ void amdgpu_mca_query_ras_error_count(struct
> amdgpu_device *adev,
> 
>   amdgpu_mca_reset_error_count(adev, mc_status_addr);  }
> +
> +int amdgpu_mca_mp0_ras_sw_init(struct amdgpu_device *adev) {
> + int err;
> + struct amdgpu_mca_ras_block *ras;
> +
> + if (!adev->mca.mp0.ras)
> + return 0;
> +
> + ras = adev->mca.mp0.ras;
> +
> + err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
> + if (err) {
> + dev_err(adev->dev, "Failed to register mca.mp0 ras
> block!\n");
> + return err;
> + }
> +
> + strcpy(ras->ras_block.ras_comm.name, "mca.mp0");
> + ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA;
> + ras->ras_block.ras_comm.type =
> AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
> + adev->mca.mp0.ras_if = &ras->ras_block.ras_comm;
> +
> + return 0;
> +}
> +
> +int amdgpu_mca_mp1_ras_sw_init(struct amdgpu_device *adev) {
> +int err;
> +struct amdgpu_mca_ras_block *ras;
> +
> +if (!adev->mca.mp1.ras)
> +return 0;
> +
> +ras = adev->mca.mp1.ras;
> +
> +err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
> +if (err) {
> +dev_err(adev->dev, "Failed to register mca.mp1 ras 
> block!\n");
> +return err;
> +}
> +
> +strcpy(ras->ras_block.ras_comm.name, "mca.mp1");
> +ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA;
> +ras->ras_block.ras_comm.type =
> AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
> +adev->mca.mp1.ras_if = &ras->ras_block.ras_comm;
> +
> +return 0;
> +}
> +
> +int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev) {
> +int err;
> +struct amdgpu_mca_ras_block *ras;
> +
> +if (!adev->mca.mpio.ras)
> +return 0;
> +
> +ras = adev->mca.mpio.ras;
> +
> +err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
> +if (err) {
> +dev_err(adev->dev, "Failed to register mca.mpio ras 
> block!\n");
> +return err;
> +

[PATCH 08/11] drm/amdgpu: Rework mca ras sw_init

2023-03-05 Thread Hawking Zhang
To align with other IP blocks

Signed-off-by: Hawking Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 21 
 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c | 72 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h |  9 ++--
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 15 +++---
 drivers/gpu/drm/amd/amdgpu/mca_v3_0.c   | 44 ++-
 drivers/gpu/drm/amd/amdgpu/mca_v3_0.h   |  4 +-
 6 files changed, 111 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 087a75374610..524e2c9b3012 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -477,6 +477,27 @@ int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev)
}
}
 
+   /* mca.x ras block */
+   if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MCA)) {
+   r = amdgpu_mca_mp0_ras_sw_init(adev);
+   if (r) {
+   dev_err(adev->dev, "Failed to initialize mca.mp0 ras 
block!\n");
+   return r;
+   }
+
+   r = amdgpu_mca_mp1_ras_sw_init(adev);
+   if (r) {
+   dev_err(adev->dev, "Failed to initialize mca.mp1 ras 
block!\n");
+   return r;
+   }
+
+   r = amdgpu_mca_mpio_ras_sw_init(adev);
+   if (r) {
+   dev_err(adev->dev, "Failed to initialize mca.mpio ras 
block!\n");
+   return r;
+   }
+   }
+
/* xgmi ras block */
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL)) {
adev->gmc.xgmi.ras = &xgmi_ras;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
index 51c2a82e2fa4..0b545bdcd636 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
@@ -70,3 +70,75 @@ void amdgpu_mca_query_ras_error_count(struct amdgpu_device 
*adev,
 
amdgpu_mca_reset_error_count(adev, mc_status_addr);
 }
+
+int amdgpu_mca_mp0_ras_sw_init(struct amdgpu_device *adev)
+{
+   int err;
+   struct amdgpu_mca_ras_block *ras;
+
+   if (!adev->mca.mp0.ras)
+   return 0;
+
+   ras = adev->mca.mp0.ras;
+
+   err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+   if (err) {
+   dev_err(adev->dev, "Failed to register mca.mp0 ras block!\n");
+   return err;
+   }
+
+   strcpy(ras->ras_block.ras_comm.name, "mca.mp0");
+   ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA;
+   ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+   adev->mca.mp0.ras_if = &ras->ras_block.ras_comm;
+
+   return 0;
+}
+
+int amdgpu_mca_mp1_ras_sw_init(struct amdgpu_device *adev)
+{
+int err;
+struct amdgpu_mca_ras_block *ras;
+
+if (!adev->mca.mp1.ras)
+return 0;
+
+ras = adev->mca.mp1.ras;
+
+err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+if (err) {
+dev_err(adev->dev, "Failed to register mca.mp1 ras block!\n");
+return err;
+}
+
+strcpy(ras->ras_block.ras_comm.name, "mca.mp1");
+ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA;
+ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+adev->mca.mp1.ras_if = &ras->ras_block.ras_comm;
+
+return 0;
+}
+
+int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev)
+{
+int err;
+struct amdgpu_mca_ras_block *ras;
+
+if (!adev->mca.mpio.ras)
+return 0;
+
+ras = adev->mca.mpio.ras;
+
+err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+if (err) {
+dev_err(adev->dev, "Failed to register mca.mpio ras block!\n");
+return err;
+}
+
+strcpy(ras->ras_block.ras_comm.name, "mca.mpio");
+ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA;
+ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+adev->mca.mpio.ras_if = &ras->ras_block.ras_comm;
+
+return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
index 7ce16d16e34b..997a073e2409 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
@@ -30,12 +30,7 @@ struct amdgpu_mca_ras {
struct amdgpu_mca_ras_block *ras;
 };
 
-struct amdgpu_mca_funcs {
-   void (*init)(struct amdgpu_device *adev);
-};
-
 struct amdgpu_mca {
-   const struct amdgpu_mca_funcs *funcs;
struct amdgpu_mca_ras mp0;
struct amdgpu_mca_ras mp1;
struct amdgpu_mca_ras mpio;
@@ -55,5 +50,7 @@ void amdgpu_mca_reset_error_count(struct amdgpu_device *adev,
 void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev,