[AMD Public Use]

Hi Hawking,

Thanks for your review, please see my replies inline.

Regards,
Stanley
> -----Original Message-----
> From: Zhang, Hawking <hawking.zh...@amd.com>
> Sent: Wednesday, June 3, 2020 11:26 PM
> To: Yang, Stanley <stanley.y...@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Chen, Guchun <guchun.c...@amd.com>; Liu, Monk
> <monk....@amd.com>; Clements, John <john.cleme...@amd.com>; Zhou1,
> Tao <tao.zh...@amd.com>; Li, Dennis <dennis...@amd.com>; Yang,
> Stanley <stanley.y...@amd.com>
> Subject: RE: [PATCH] drm/amdgpu: support reserve bad page for virt
> 
> [AMD Public Use]
> 
> Please check my comments inline.
> 
> Regards,
> Hawking
> 
> -----Original Message-----
> From: Stanley.Yang <stanley.y...@amd.com>
> Sent: Wednesday, June 3, 2020 22:10
> To: amd-gfx@lists.freedesktop.org
> Cc: Zhang, Hawking <hawking.zh...@amd.com>; Chen, Guchun
> <guchun.c...@amd.com>; Liu, Monk <monk....@amd.com>; Clements,
> John <john.cleme...@amd.com>; Zhou1, Tao <tao.zh...@amd.com>; Li,
> Dennis <dennis...@amd.com>; Yang, Stanley <stanley.y...@amd.com>
> Subject: [PATCH] drm/amdgpu: support reserve bad page for virt
> 
> Signed-off-by: Stanley.Yang <stanley.y...@amd.com>
> Change-Id: Ia0ad9453ac3ac929f95c73cbee5b7a8fc42a9816
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   7 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   | 164
> +++++++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h   |  30 +++-
>  3 files changed, 196 insertions(+), 5 deletions(-)  mode change 100644 =>
> 100755 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>  mode change 100644 => 100755
> drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index b633171281f8..e8986e007206 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2001,8 +2001,10 @@ static int amdgpu_device_ip_init(struct
> amdgpu_device *adev)
>               }
>       }
> 
> -     if (amdgpu_sriov_vf(adev))
> +     if (amdgpu_sriov_vf(adev)) {
> +             amdgpu_virt_init_err_handler_data(adev);
> 
> [Hawking]:
> 1. It might be better to rename the function to
> amdgpu_virt_init_ras_err_handler_data
> 2. we shall apply either asic type check or more general data structure check
> before call into amdgpu_virt_init_ras_err_handler_data. Please check my
> comments in amdgpu_virt_init_ras_err_handler_data function
> 
> 
>               amdgpu_virt_init_data_exchange(adev);
> +     }
> 
>       r = amdgpu_ib_pool_init(adev);
>       if (r) {
> @@ -2306,6 +2308,9 @@ static int amdgpu_device_ip_fini(struct
> amdgpu_device *adev)  {
>       int i, r;
> 
> +     if (amdgpu_sriov_vf(adev))
> +             amdgpu_release_virt_err_handler_data(adev);
> 
> [Hawking]:
> 1. It might be better to rename the function to
> amdgpu_virt_release_ras_err_handler_data
> 2. we shall apply either asic type check or more general data structure check
> In case it introduce regression on MI100 and V340L
> 
> +
>       amdgpu_ras_pre_fini(adev);
> 
>       if (adev->gmc.xgmi.num_physical_nodes > 1) diff --git
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> old mode 100644
> new mode 100755
> index f3b38c9e04ca..c1554562a2ce
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -26,6 +26,7 @@
>  #include <drm/drm_drv.h>
> 
>  #include "amdgpu.h"
> +#include "amdgpu_ras.h"
> 
>  bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)  { @@ -
> 255,12 +256,164 @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj,
>       return ret;
>  }
> 
> +int amdgpu_virt_init_err_handler_data(struct amdgpu_device *adev) {
> 
> [Hawking]:
> 1. same as above, rename it to amdgpu_virt_init_ras_err_handler_data
> 
> +     struct amdgpu_virt *virt = &adev->virt;
> +     struct virt_ras_err_handler_data **data = &virt->virt_eh_data;
> +     /* GPU will be marked bad on host if bp count more then 10,
> +      * so alloc 512 is enough.
> +      */
> +     unsigned int align_space = 512;
> +     void *bps = NULL;
> +     struct amdgpu_bo **bps_bo = NULL;
> +
> 
> [Hawking]:
> 1. This will result to regression on MI100 SRIOV if we don't apply check. Or
> leverage general data structure for the purpose, thoughts?
[Yang, Stanley]:
Thanks for reminding, I will add asic type check to call it.
> 
> +     *data = kmalloc(sizeof(struct virt_ras_err_handler_data),
> GFP_KERNEL);
> 
> [Hawking]:
> 1. rename the data structure to amdgpu_virt_ras_err_handler_data
> 
> +     if (!*data)
> +             return -ENOMEM;
> +
> +     bps = kmalloc(align_space * sizeof((*data)->bps), GFP_KERNEL);
> +     bps_bo = kmalloc(align_space * sizeof((*data)->bps_bo),
> GFP_KERNEL);
> +
> +     if (!bps || !bps_bo) {
> +             kfree(bps);
> +             kfree(bps_bo);
> +             return -ENOMEM;
> +     }
> +
> +     (*data)->bps = bps;
> +     (*data)->bps_bo = bps_bo;
> +     (*data)->count = 0;
> +     (*data)->last_reserved = 0;
> +     return 0;
> +}
> +
> +static void amdgpu_virt_release_bp(struct amdgpu_device *adev) {
> +     struct amdgpu_virt *virt = &adev->virt;
> +     struct virt_ras_err_handler_data *data = virt->virt_eh_data;
> 
> [Hawking]:
> 1. same as above, please rename the virt_ras_err_handler_data to
> amdgpu_virt_ras_err_handler_data
> 
> +     struct amdgpu_bo *bo;
> +     int i;
> +
> +     if (!data)
> +             return;
> +
> +     for (i = data->last_reserved - 1; i >= 0; i--) {
> +             bo = data->bps_bo[i];
> +             amdgpu_bo_free_kernel(&bo, NULL, NULL);
> +             data->bps_bo[i] = bo;
> +             data->last_reserved = i;
> +     }
> +}
> +
> +void amdgpu_release_virt_err_handler_data(struct amdgpu_device *adev)
> {
> 
> [Hawking]:
> 1. same as above, rename the function to
> amdgpu_virt_release_ras_err_handler_data
> 
> +     struct amdgpu_virt *virt = &adev->virt;
> +     struct virt_ras_err_handler_data *data = virt->virt_eh_data;
> +
> +     if (!data)
> +             return;
> +
> +     amdgpu_virt_release_bp(adev);
> +
> +     kfree(data->bps);
> +     kfree(data->bps_bo);
> +     kfree(data);
> +     virt->virt_eh_data = NULL;
> +}
> +
> +static void amdgpu_virt_ras_add_bps(struct amdgpu_device *adev,
> +             struct eeprom_table_record *bps, int pages) {
> +     struct amdgpu_virt *virt = &adev->virt;
> +     struct virt_ras_err_handler_data *data = virt->virt_eh_data;
> +
> +     if (!data)
> +             return;
> +
> +     memcpy(&data->bps[data->count], bps, pages * sizeof(*data->bps));
> +     data->count += pages;
> +}
> +
> +static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev) {
> +     struct amdgpu_virt *virt = &adev->virt;
> +     struct virt_ras_err_handler_data *data = virt->virt_eh_data;
> +     struct amdgpu_bo *bo = NULL;
> +     uint64_t bp;
> +     int i;
> +
> +     if (!data)
> +             return;
> +
> +     for (i = data->last_reserved; i < data->count; i++) {
> +             bp = data->bps[i].retired_page;
> +
> +             /* There are two cases of reserve error should be ignored:
> +              * 1) a ras bad page has been allocated (used by someone);
> +              * 2) a ras bad page has been reserved (duplicate error
> injection
> +              *    for one page);
> +              */
> +             if (amdgpu_bo_create_kernel_at(adev, bp <<
> AMDGPU_GPU_PAGE_SHIFT,
> +                                            AMDGPU_GPU_PAGE_SIZE,
> +                                            AMDGPU_GEM_DOMAIN_VRAM,
> +                                            &bo, NULL))
> +                     DRM_WARN("RAS WARN: reserve vram for retired
> page %llx fail\n", bp);
> 
> [Hawking]:
> 1. can we make this as debug message?
[Yang, Stanley] :
Sure, will modify it.
> 
> +
> +             data->bps_bo[i] = bo;
> +             data->last_reserved = i + 1;
> +             bo = NULL;
> +     }
> +}
> +
> +static bool amdgpu_virt_check_bad_page(struct amdgpu_device *adev,
> +             uint64_t retired_page)
> +{
> +     struct amdgpu_virt *virt = &adev->virt;
> +     struct virt_ras_err_handler_data *data = virt->virt_eh_data;
> +     int i;
> +
> +     if (!data)
> +             return true;
> +
> +     for (i = 0; i < data->count; i++)
> +             if (retired_page == data->bps[i].retired_page)
> +                     return true;
> +
> +     return false;
> +}
> +
> +static void amdgpu_virt_add_bad_page(struct amdgpu_device *adev,
> +             uint64_t bp_block_offset, uint32_t bp_block_size) {
> +     struct eeprom_table_record bp;
> +     uint64_t retired_page;
> +     uint32_t bp_idx, bp_cnt;
> +
> +     if (bp_block_size) {
> +             bp_cnt = bp_block_size / sizeof(uint64_t);
> +             for (bp_idx = 0; bp_idx < bp_cnt; bp_idx++) {
> +                     retired_page = *(uint64_t *)(adev-
> >fw_vram_usage.va +
> +                                     bp_block_offset + bp_idx *
> sizeof(uint64_t));
> +                     bp.retired_page = retired_page;
> +
> +                     if (amdgpu_virt_check_bad_page(adev,
> retired_page))
> +                             continue;
> +
> +                     amdgpu_virt_ras_add_bps(adev, &bp, 1);
> +
> +                     amdgpu_virt_ras_reserve_bps(adev);
> +             }
> +     }
> +}
> +
>  void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)  {
>       uint32_t pf2vf_size = 0;
>       uint32_t checksum = 0;
>       uint32_t checkval;
>       char *str;
> +     uint64_t bp_block_offset = 0;
> +     uint32_t bp_block_size = 0;
> 
>       adev->virt.fw_reserve.p_pf2vf = NULL;
>       adev->virt.fw_reserve.p_vf2pf = NULL;
> @@ -275,6 +428,17 @@ void amdgpu_virt_init_data_exchange(struct
> amdgpu_device *adev)
> 
>               /* pf2vf message must be in 4K */
>               if (pf2vf_size > 0 && pf2vf_size < 4096) {
> +                     if (adev->virt.fw_reserve.p_pf2vf->version == 2) {
> +                             struct amdgim_pf2vf_info_v2 *pf2vf_v2 =
> NULL;
> +
> +                             pf2vf_v2 = (struct amdgim_pf2vf_info_v2
> *)adev->virt.fw_reserve.p_pf2vf;
> +                             bp_block_offset = ((uint64_t)pf2vf_v2-
> >bp_block_offset_L & 0xFFFFFFFF) |
> +                                             ((((uint64_t)pf2vf_v2-
> >bp_block_offset_H) << 32) & 0xFFFFFFFF00000000);
> +                             bp_block_size = pf2vf_v2->bp_block_size;
> +
> +                             amdgpu_virt_add_bad_page(adev,
> bp_block_offset, bp_block_size);
> +                     }
> +
>                       checkval = amdgpu_virt_fw_reserve_get_checksum(
>                               adev->virt.fw_reserve.p_pf2vf, pf2vf_size,
>                               adev->virt.fw_reserve.checksum_key,
> checksum); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> old mode 100644
> new mode 100755
> index b90e822cebd7..96d84f036e96
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> @@ -143,19 +143,27 @@ struct  amdgim_pf2vf_info_v2 {
>       uint32_t vce_enc_max_pixels_count;
>       /* 16x16 pixels/sec, codec independent */
>       uint32_t vce_enc_max_bandwidth;
> +     /* Bad pages block position in BYTE */
> +     uint32_t bp_block_offset_L;
> +     uint32_t bp_block_offset_H;
> +     /* Bad pages block size in BYTE */
> +     uint32_t bp_block_size;
>       /* MEC FW position in kb from the start of VF visible frame buffer */
> -     uint64_t mecfw_kboffset;
> +     uint32_t mecfw_kboffset_L;
> +     uint32_t mecfw_kboffset_H;
> 
> [Hawking]:
> 1. can you just explain the reason that split the position in lower 32bit and
> higher 32 bit respectively?
[Yang, Stanley]:
The value is misalignment from the 64bit member in amdgim_pf2vf_info_v2 struct  
if use 64bit variable.
> 
>       /* MEC FW size in KB */
>       uint32_t mecfw_ksize;
>       /* UVD FW position in kb from the start of VF visible frame buffer */
> -     uint64_t uvdfw_kboffset;
> +     uint32_t uvdfw_kboffset_L;
> +     uint32_t uvdfw_kboffset_H;
>       /* UVD FW size in KB */
>       uint32_t uvdfw_ksize;
>       /* VCE FW position in kb from the start of VF visible frame buffer */
> -     uint64_t vcefw_kboffset;
> +     uint32_t vcefw_kboffset_L;
> +     uint32_t vcefw_kboffset_H;
>       /* VCE FW size in KB */
>       uint32_t vcefw_ksize;
> -     uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256,
> 0, 0, (9 + sizeof(struct amd_sriov_msg_pf2vf_info_header)/sizeof(uint32_t)),
> 3)];
> +     uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256,
> 0, 0, (18 +
> +sizeof(struct amd_sriov_msg_pf2vf_info_header)/sizeof(uint32_t)), 0)];
>  } __aligned(4);
> 
> 
> @@ -254,6 +262,17 @@ typedef struct amdgim_vf2pf_info_v2
> amdgim_vf2pf_info ;
>               } \
>       } while (0)
> 
> +struct virt_ras_err_handler_data {
> +     /* point to bad page records array */
> +     struct eeprom_table_record *bps;
> +     /* point to reserved bo array */
> +     struct amdgpu_bo **bps_bo;
> +     /* the count of entries */
> +     int count;
> +     /* last reserved entry's index + 1 */
> +     int last_reserved;
> +};
> +
>  /* GPU virtualization */
>  struct amdgpu_virt {
>       uint32_t                        caps;
> @@ -272,6 +291,7 @@ struct amdgpu_virt {
>       uint32_t reg_access_mode;
>       int req_init_data_ver;
>       bool tdr_debug;
> +     struct virt_ras_err_handler_data *virt_eh_data;
>  };
> 
>  #define amdgpu_sriov_enabled(adev) \
> @@ -323,6 +343,8 @@ void amdgpu_virt_free_mm_table(struct
> amdgpu_device *adev);  int amdgpu_virt_fw_reserve_get_checksum(void
> *obj, unsigned long obj_size,
>                                       unsigned int key,
>                                       unsigned int chksum);
> +void amdgpu_release_virt_err_handler_data(struct amdgpu_device
> *adev);
> +int amdgpu_virt_init_err_handler_data(struct amdgpu_device *adev);
>  void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);  void
> amdgpu_detect_virtualization(struct amdgpu_device *adev);
> 
> --
> 2.25.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to