> -----Original Message-----
> From: Zhang, Hawking <hawking.zh...@amd.com>
> Sent: Monday, March 6, 2023 10:32 AM
> To: amd-gfx@lists.freedesktop.org; Zhou1, Tao <tao.zh...@amd.com>;
> Yang, Stanley <stanley.y...@amd.com>; Li, Candice <candice...@amd.com>;
> Chai, Thomas <yipeng.c...@amd.com>
> Cc: Zhang, Hawking <hawking.zh...@amd.com>
> Subject: [PATCH 08/11] drm/amdgpu: Rework mca ras sw_init
> 
> To align with other IP blocks
> 
> Signed-off-by: Hawking Zhang <hawking.zh...@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 21 ++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c | 72
> +++++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h |  9 ++--
>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 15 +++---
>  drivers/gpu/drm/amd/amdgpu/mca_v3_0.c   | 44 ++-------------
>  drivers/gpu/drm/amd/amdgpu/mca_v3_0.h   |  4 +-
>  6 files changed, 111 insertions(+), 54 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> index 087a75374610..524e2c9b3012 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> @@ -477,6 +477,27 @@ int amdgpu_gmc_ras_sw_init(struct amdgpu_device
> *adev)
>               }
>       }
> 
> +     /* mca.x ras block */
> +     if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MCA))
> {
> +             r = amdgpu_mca_mp0_ras_sw_init(adev);
> +             if (r) {
> +                     dev_err(adev->dev, "Failed to initialize mca.mp0 ras
> block!\n");
> +                     return r;
> +             }
> +
> +             r = amdgpu_mca_mp1_ras_sw_init(adev);
> +             if (r) {
> +                     dev_err(adev->dev, "Failed to initialize mca.mp1 ras
> block!\n");
> +                     return r;
> +             }
> +
> +             r = amdgpu_mca_mpio_ras_sw_init(adev);
> +             if (r) {
> +                     dev_err(adev->dev, "Failed to initialize mca.mpio ras
> block!\n");
> +                     return r;
> +             }
> +     }
> +
>       /* xgmi ras block */
>       if (amdgpu_ras_is_supported(adev,
> AMDGPU_RAS_BLOCK__XGMI_WAFL)) {
>               adev->gmc.xgmi.ras = &xgmi_ras;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
> index 51c2a82e2fa4..0b545bdcd636 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
> @@ -70,3 +70,75 @@ void amdgpu_mca_query_ras_error_count(struct
> amdgpu_device *adev,
> 
>       amdgpu_mca_reset_error_count(adev, mc_status_addr);  }
> +
> +int amdgpu_mca_mp0_ras_sw_init(struct amdgpu_device *adev) {
> +     int err;
> +     struct amdgpu_mca_ras_block *ras;
> +
> +     if (!adev->mca.mp0.ras)
> +             return 0;
> +
> +     ras = adev->mca.mp0.ras;
> +
> +     err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
> +     if (err) {
> +             dev_err(adev->dev, "Failed to register mca.mp0 ras
> block!\n");
> +             return err;
> +     }
> +
> +     strcpy(ras->ras_block.ras_comm.name, "mca.mp0");
> +     ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA;
> +     ras->ras_block.ras_comm.type =
> AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
> +     adev->mca.mp0.ras_if = &ras->ras_block.ras_comm;
> +
> +     return 0;
> +}
> +
> +int amdgpu_mca_mp1_ras_sw_init(struct amdgpu_device *adev) {
> +        int err;
> +        struct amdgpu_mca_ras_block *ras;
> +
> +        if (!adev->mca.mp1.ras)
> +                return 0;
> +
> +        ras = adev->mca.mp1.ras;
> +
> +        err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
> +        if (err) {
> +                dev_err(adev->dev, "Failed to register mca.mp1 ras 
> block!\n");
> +                return err;
> +        }
> +
> +        strcpy(ras->ras_block.ras_comm.name, "mca.mp1");
> +        ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA;
> +        ras->ras_block.ras_comm.type =
> AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
> +        adev->mca.mp1.ras_if = &ras->ras_block.ras_comm;
> +
> +        return 0;
> +}
> +
> +int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev) {
> +        int err;
> +        struct amdgpu_mca_ras_block *ras;
> +
> +        if (!adev->mca.mpio.ras)
> +                return 0;
> +
> +        ras = adev->mca.mpio.ras;
> +
> +        err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
> +        if (err) {
> +                dev_err(adev->dev, "Failed to register mca.mpio ras 
> block!\n");
> +                return err;
> +        }
> +
> +        strcpy(ras->ras_block.ras_comm.name, "mca.mpio");
> +        ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA;
> +        ras->ras_block.ras_comm.type =
> AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
> +        adev->mca.mpio.ras_if = &ras->ras_block.ras_comm;
> +
> +        return 0;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
> index 7ce16d16e34b..997a073e2409 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
> @@ -30,12 +30,7 @@ struct amdgpu_mca_ras {
>       struct amdgpu_mca_ras_block *ras;
>  };
> 
> -struct amdgpu_mca_funcs {
> -     void (*init)(struct amdgpu_device *adev);
> -};
> -
>  struct amdgpu_mca {
> -     const struct amdgpu_mca_funcs *funcs;
>       struct amdgpu_mca_ras mp0;
>       struct amdgpu_mca_ras mp1;
>       struct amdgpu_mca_ras mpio;
> @@ -55,5 +50,7 @@ void amdgpu_mca_reset_error_count(struct
> amdgpu_device *adev,  void amdgpu_mca_query_ras_error_count(struct
> amdgpu_device *adev,
>                                     uint64_t mc_status_addr,
>                                     void *ras_error_status);
> -
> +int amdgpu_mca_mp0_ras_sw_init(struct amdgpu_device *adev); int
> +amdgpu_mca_mp1_ras_sw_init(struct amdgpu_device *adev); int
> +amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev);
>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 9a333f9744bf..67c2a5186b8a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -1363,13 +1363,18 @@ static void gmc_v9_0_set_hdp_ras_funcs(struct
> amdgpu_device *adev)
>       adev->hdp.ras = &hdp_v4_0_ras;
>  }
> 
> -static void gmc_v9_0_set_mca_funcs(struct amdgpu_device *adev)
> +static void gmc_v9_0_set_mca_ras_funcs(struct amdgpu_device *adev)
>  {
> +     struct amdgpu_mca *mca = &adev->mca;
> +
>       /* is UMC the right IP to check for MCA?  Maybe DF? */
>       switch (adev->ip_versions[UMC_HWIP][0]) {
>       case IP_VERSION(6, 7, 0):
> -             if (!adev->gmc.xgmi.connected_to_cpu)
> -                     adev->mca.funcs = &mca_v3_0_funcs;
> +             if (!adev->gmc.xgmi.connected_to_cpu) {

[Stanley]: Can we use if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MCA)) 
instead of if (!adev->gmc.xgmi.connected_to_cpu)
            to keep the ip ras judgment uniform.

Regards,
Stanley
> +                     mca->mp0.ras = &mca_v3_0_mp0_ras;
> +                     mca->mp1.ras = &mca_v3_0_mp1_ras;
> +                     mca->mpio.ras = &mca_v3_0_mpio_ras;
> +             }
>               break;
>       default:
>               break;
> @@ -1398,7 +1403,7 @@ static int gmc_v9_0_early_init(void *handle)
>       gmc_v9_0_set_mmhub_ras_funcs(adev);
>       gmc_v9_0_set_gfxhub_funcs(adev);
>       gmc_v9_0_set_hdp_ras_funcs(adev);
> -     gmc_v9_0_set_mca_funcs(adev);
> +     gmc_v9_0_set_mca_ras_funcs(adev);
> 
>       adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
>       adev->gmc.shared_aperture_end =
> @@ -1611,8 +1616,6 @@ static int gmc_v9_0_sw_init(void *handle)
>       adev->gfxhub.funcs->init(adev);
> 
>       adev->mmhub.funcs->init(adev);
> -     if (adev->mca.funcs)
> -             adev->mca.funcs->init(adev);
> 
>       spin_lock_init(&adev->gmc.invalidate_lock);
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
> b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
> index d4bd7d1d2649..6dae4a2e2767 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
> @@ -51,19 +51,13 @@ static int mca_v3_0_ras_block_match(struct
> amdgpu_ras_block_object *block_obj,
>       return -EINVAL;
>  }
> 
> -const struct amdgpu_ras_block_hw_ops mca_v3_0_mp0_hw_ops = {
> +static const struct amdgpu_ras_block_hw_ops mca_v3_0_mp0_hw_ops = {
>       .query_ras_error_count = mca_v3_0_mp0_query_ras_error_count,
>       .query_ras_error_address = NULL,
>  };
> 
>  struct amdgpu_mca_ras_block mca_v3_0_mp0_ras = {
>       .ras_block = {
> -             .ras_comm = {
> -                     .block = AMDGPU_RAS_BLOCK__MCA,
> -                     .sub_block_index =
> AMDGPU_RAS_MCA_BLOCK__MP0,
> -                     .type =
> AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
> -                     .name = "mp0",
> -             },
>               .hw_ops = &mca_v3_0_mp0_hw_ops,
>               .ras_block_match = mca_v3_0_ras_block_match,
>       },
> @@ -77,19 +71,13 @@ static void
> mca_v3_0_mp1_query_ras_error_count(struct amdgpu_device *adev,
>                                        ras_error_status);
>  }
> 
> -const struct amdgpu_ras_block_hw_ops mca_v3_0_mp1_hw_ops = {
> +static const struct amdgpu_ras_block_hw_ops mca_v3_0_mp1_hw_ops = {
>       .query_ras_error_count = mca_v3_0_mp1_query_ras_error_count,
>       .query_ras_error_address = NULL,
>  };
> 
>  struct amdgpu_mca_ras_block mca_v3_0_mp1_ras = {
>       .ras_block = {
> -             .ras_comm = {
> -                     .block = AMDGPU_RAS_BLOCK__MCA,
> -                     .sub_block_index =
> AMDGPU_RAS_MCA_BLOCK__MP1,
> -                     .type =
> AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
> -                     .name = "mp1",
> -             },
>               .hw_ops = &mca_v3_0_mp1_hw_ops,
>               .ras_block_match = mca_v3_0_ras_block_match,
>       },
> @@ -103,40 +91,14 @@ static void
> mca_v3_0_mpio_query_ras_error_count(struct amdgpu_device *adev,
>                                        ras_error_status);
>  }
> 
> -const struct amdgpu_ras_block_hw_ops mca_v3_0_mpio_hw_ops = {
> +static const struct amdgpu_ras_block_hw_ops mca_v3_0_mpio_hw_ops = {
>       .query_ras_error_count = mca_v3_0_mpio_query_ras_error_count,
>       .query_ras_error_address = NULL,
>  };
> 
>  struct amdgpu_mca_ras_block mca_v3_0_mpio_ras = {
>       .ras_block = {
> -             .ras_comm = {
> -                     .block = AMDGPU_RAS_BLOCK__MCA,
> -                     .sub_block_index =
> AMDGPU_RAS_MCA_BLOCK__MPIO,
> -                     .type =
> AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
> -                     .name = "mpio",
> -             },
>               .hw_ops = &mca_v3_0_mpio_hw_ops,
>               .ras_block_match = mca_v3_0_ras_block_match,
>       },
>  };
> -
> -
> -static void mca_v3_0_init(struct amdgpu_device *adev) -{
> -     struct amdgpu_mca *mca = &adev->mca;
> -
> -     mca->mp0.ras = &mca_v3_0_mp0_ras;
> -     mca->mp1.ras = &mca_v3_0_mp1_ras;
> -     mca->mpio.ras = &mca_v3_0_mpio_ras;
> -     amdgpu_ras_register_ras_block(adev, &mca->mp0.ras->ras_block);
> -     amdgpu_ras_register_ras_block(adev, &mca->mp1.ras->ras_block);
> -     amdgpu_ras_register_ras_block(adev, &mca->mpio.ras->ras_block);
> -     mca->mp0.ras_if = &mca->mp0.ras->ras_block.ras_comm;
> -     mca->mp1.ras_if = &mca->mp1.ras->ras_block.ras_comm;
> -     mca->mpio.ras_if = &mca->mpio.ras->ras_block.ras_comm;
> -}
> -
> -const struct amdgpu_mca_funcs mca_v3_0_funcs = {
> -     .init = mca_v3_0_init,
> -};
> \ No newline at end of file
> diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.h
> b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.h
> index b899b86194c2..d3eaef0d7f2d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.h
> +++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.h
> @@ -21,6 +21,8 @@
>  #ifndef __MCA_V3_0_H__
>  #define __MCA_V3_0_H__
> 
> -extern const struct amdgpu_mca_funcs mca_v3_0_funcs;
> +extern struct amdgpu_mca_ras_block mca_v3_0_mp0_ras; extern struct
> +amdgpu_mca_ras_block mca_v3_0_mp1_ras; extern struct
> +amdgpu_mca_ras_block mca_v3_0_mpio_ras;
> 
>  #endif
> --
> 2.17.1

Reply via email to