The KFD part looks good to me, other than the SDMA comment that Guchun
pointed out. With that fixed this patch is

Acked-by: Felix Kuehling <[email protected]>

Thanks,
  Felix


Am 2020-08-24 um 6:33 a.m. schrieb Stanley.Yang:
> The ctx->features are new RAS implementation which
> is only available for Vega20 and onwards, it is not
> available for vega10, vega10 should follow legacy
> ECC implementation.
>
> Changed from V1:
>     wrap function to initialize kfd node properties
>
> Changed from V2:
>     remove wrap funcion, remove SRMA ECC check
>
> Change-Id: I1e3ff899bf066611fe5775e67104ce2e0bf8b7d0
> Signed-off-by: Stanley.Yang <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c   | 16 ++++++++-------
>  drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 24 +++++++++++------------
>  3 files changed, 21 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 1f9d97f61aa5..573e2712df35 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -986,6 +986,7 @@ struct amdgpu_device {
>  
>       atomic_t                        throttling_logging_enabled;
>       struct ratelimit_state          throttling_logging_rs;
> +     uint32_t                        ras_features;
>  };
>  
>  static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device 
> *bdev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index cd1403f83dcf..d462244863f6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -1974,7 +1974,8 @@ static void amdgpu_ras_check_supported(struct 
> amdgpu_device *adev,
>       *supported = 0;
>  
>       if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw ||
> -         (adev->asic_type != CHIP_VEGA20   &&
> +         (adev->asic_type != CHIP_VEGA10 &&
> +          adev->asic_type != CHIP_VEGA20 &&
>            adev->asic_type != CHIP_ARCTURUS &&
>            adev->asic_type != CHIP_SIENNA_CICHLID))
>               return;
> @@ -1998,6 +1999,7 @@ static void amdgpu_ras_check_supported(struct 
> amdgpu_device *adev,
>  
>       *supported = amdgpu_ras_enable == 0 ?
>                       0 : *hw_supported & amdgpu_ras_mask;
> +     adev->ras_features = *supported;
>  }
>  
>  int amdgpu_ras_init(struct amdgpu_device *adev)
> @@ -2020,9 +2022,9 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
>  
>       amdgpu_ras_check_supported(adev, &con->hw_supported,
>                       &con->supported);
> -     if (!con->hw_supported) {
> +     if (!con->hw_supported || (adev->asic_type == CHIP_VEGA10)) {
>               r = 0;
> -             goto err_out;
> +             goto release_con;
>       }
>  
>       con->features = 0;
> @@ -2033,25 +2035,25 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
>       if (adev->nbio.funcs->init_ras_controller_interrupt) {
>               r = adev->nbio.funcs->init_ras_controller_interrupt(adev);
>               if (r)
> -                     goto err_out;
> +                     goto release_con;
>       }
>  
>       if (adev->nbio.funcs->init_ras_err_event_athub_interrupt) {
>               r = adev->nbio.funcs->init_ras_err_event_athub_interrupt(adev);
>               if (r)
> -                     goto err_out;
> +                     goto release_con;
>       }
>  
>       if (amdgpu_ras_fs_init(adev)) {
>               r = -EINVAL;
> -             goto err_out;
> +             goto release_con;
>       }
>  
>       dev_info(adev->dev, "RAS INFO: ras initialized successfully, "
>                       "hardware ability[%x] ras_mask[%x]\n",
>                       con->hw_supported, con->supported);
>       return 0;
> -err_out:
> +release_con:
>       amdgpu_ras_set_context(adev, NULL);
>       kfree(con);
>  
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> index f185f6cbc05c..0ba960a17ead 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> @@ -1239,7 +1239,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
>       void *crat_image = NULL;
>       size_t image_size = 0;
>       int proximity_domain;
> -     struct amdgpu_ras *ctx;
> +     struct amdgpu_device *adev;
>  
>       INIT_LIST_HEAD(&temp_topology_device_list);
>  
> @@ -1404,19 +1404,17 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
>               dev->node_props.max_waves_per_simd = 10;
>       }
>  
> -     ctx = amdgpu_ras_get_context((struct amdgpu_device *)(dev->gpu->kgd));
> -     if (ctx) {
> -             /* kfd only concerns sram ecc on GFX/SDMA and HBM ecc on UMC */
> -             dev->node_props.capability |=
> -                     (((ctx->features & BIT(AMDGPU_RAS_BLOCK__SDMA)) != 0) ||
> -                      ((ctx->features & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0)) ?
> -                     HSA_CAP_SRAM_EDCSUPPORTED : 0;
> -             dev->node_props.capability |= ((ctx->features & 
> BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
> -                     HSA_CAP_MEM_EDCSUPPORTED : 0;
> -
> -             dev->node_props.capability |= (ctx->features != 0) ?
> +     adev = (struct amdgpu_device *)(dev->gpu->kgd);
> +     /* kfd only concerns sram ecc on GFX/SDMA and HBM ecc on UMC */
> +     dev->node_props.capability |=
> +             ((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ?
> +             HSA_CAP_SRAM_EDCSUPPORTED : 0;
> +     dev->node_props.capability |= ((adev->ras_features & 
> BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
> +             HSA_CAP_MEM_EDCSUPPORTED : 0;
> +
> +     if (adev->asic_type != CHIP_VEGA10)
> +             dev->node_props.capability |= (adev->ras_features != 0) ?
>                       HSA_CAP_RASEVENTNOTIFY : 0;
> -     }
>  
>       kfd_debug_print_topology();
>  
_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to