Re: [PATCH v1] drm/amd/amdgpu: support MES command SET_HW_RESOURCE1 in sriov

2024-04-01 Thread JingWen Chen
Acked-by: Jingwen Chen 

On 2024/3/27 11:52, chongli2 wrote:
>   support MES command SET_HW_RESOURCE1 in sriov
>
> Signed-off-by: chongli2 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h   |  6 +++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c  |  5 +++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h  |  4 ++
>  drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h   |  9 ++--
>  drivers/gpu/drm/amd/amdgpu/mes_v11_0.c| 43 +++
>  drivers/gpu/drm/amd/include/mes_v11_api_def.h | 21 +
>  6 files changed, 85 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> index 7d4f93fea937..3774148f3e5d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> @@ -140,6 +140,12 @@ struct amdgpu_mes {
>  
>   /* ip specific functions */
>   const struct amdgpu_mes_funcs   *funcs;
> +
> + /* mes resource_1 bo*/
> + struct amdgpu_bo*resource_1;
> + uint64_tresource_1_gpu_addr;
> + void*resource_1_addr;
> +
>  };
>  
>  struct amdgpu_mes_process {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index aed60aaf1a55..52f01efde2fe 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -576,6 +576,11 @@ static int amdgpu_virt_write_vf2pf_data(struct 
> amdgpu_device *adev)
>   vf2pf_info->decode_usage = 0;
>  
>   vf2pf_info->dummy_page_addr = (uint64_t)adev->dummy_page_addr;
> + vf2pf_info->mes_info_addr = (uint64_t)adev->mes.resource_1_gpu_addr;
> +
> + if (adev->mes.resource_1) {
> + vf2pf_info->mes_info_size = adev->mes.resource_1->tbo.base.size;
> + }
>   vf2pf_info->checksum =
>   amd_sriov_msg_checksum(
>   vf2pf_info, vf2pf_info->header.size, 0, 0);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> index a858bc98cad4..a9f2f0c4f799 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> @@ -132,6 +132,8 @@ enum AMDGIM_FEATURE_FLAG {
>   AMDGIM_FEATURE_AV1_SUPPORT = (1 << 6),
>   /* VCN RB decouple */
>   AMDGIM_FEATURE_VCN_RB_DECOUPLE = (1 << 7),
> + /* MES info */
> + AMDGIM_FEATURE_MES_INFO_ENABLE = (1 << 8),
>  };
>  
>  enum AMDGIM_REG_ACCESS_FLAG {
> @@ -335,6 +337,8 @@ static inline bool is_virtual_machine(void)
>   ((adev)->virt.gim_feature & AMDGIM_FEATURE_AV1_SUPPORT)
>  #define amdgpu_sriov_is_vcn_rb_decouple(adev) \
>   ((adev)->virt.gim_feature & AMDGIM_FEATURE_VCN_RB_DECOUPLE)
> +#define amdgpu_sriov_is_mes_info_enable(adev) \
> + ((adev)->virt.gim_feature & AMDGIM_FEATURE_MES_INFO_ENABLE)
>  bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
>  void amdgpu_virt_init_setting(struct amdgpu_device *adev);
>  int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
> index 51a14f6d93bd..0de78d6a83fe 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
> @@ -94,7 +94,8 @@ union amd_sriov_msg_feature_flags {
>   uint32_t reg_indirect_acc  : 1;
>   uint32_t av1_support   : 1;
>   uint32_t vcn_rb_decouple   : 1;
> - uint32_t reserved  : 24;
> + uint32_t mes_info_enable   : 1;
> + uint32_t reserved  : 23;
>   } flags;
>   uint32_t all;
>  };
> @@ -221,7 +222,7 @@ struct amd_sriov_msg_vf2pf_info_header {
>   uint32_t reserved[2];
>  };
>  
> -#define AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE (70)
> +#define AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE (73)
>  struct amd_sriov_msg_vf2pf_info {
>   /* header contains size and version */
>   struct amd_sriov_msg_vf2pf_info_header header;
> @@ -265,7 +266,9 @@ struct amd_sriov_msg_vf2pf_info {
>   uint32_t version;
>   } ucode_info[AMD_SRIOV_MSG_RESERVE_UCODE];
>   uint64_t dummy_page_addr;
> -
> + /* FB allocated for guest MES to record UQ info */
> + uint64_t mes_info_addr;
> + uint32_t mes_info_size;
>   /* reserved */
>   uint32_t reserved[256 - AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE];
>  };
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c 
> b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> index 072c478665ad..78ec170cfeef 1006

Re: [PATCH] drm/amd/pm set pp_dpm_*clk as read only for SRIOV one VF mode

2024-03-19 Thread JingWen Chen
Acked-by: Jingwen Chen 

On 2024/3/15 14:31, Lin.Cao wrote:
> pp_dpm_*clk should be set as read only for SRIOV one VF mode, remove
> S_IWUGO flag and _store function of these debugfs in one VF mode.
>
> Signed-off-by: Lin.Cao 
> ---
>  drivers/gpu/drm/amd/pm/amdgpu_pm.c | 10 +-
>  1 file changed, 9 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
> b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> index efc631bddf4a..2883a1d873ab 100644
> --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> @@ -2367,7 +2367,15 @@ static int default_attr_update(struct amdgpu_device 
> *adev, struct amdgpu_device_
>   }
>  
>   /* setting should not be allowed from VF if not in one VF mode */
> - if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) {
> + if (amdgpu_sriov_vf(adev) && (!amdgpu_sriov_is_pp_one_vf(adev) ||
> + DEVICE_ATTR_IS(pp_dpm_sclk) ||
> + DEVICE_ATTR_IS(pp_dpm_mclk) ||
> + DEVICE_ATTR_IS(pp_dpm_socclk) ||
> + DEVICE_ATTR_IS(pp_dpm_fclk) ||
> + DEVICE_ATTR_IS(pp_dpm_vclk) ||
> + DEVICE_ATTR_IS(pp_dpm_vclk1) ||
> + DEVICE_ATTR_IS(pp_dpm_dclk) ||
> + DEVICE_ATTR_IS(pp_dpm_dclk1))) {
>   dev_attr->attr.mode &= ~S_IWUGO;
>   dev_attr->store = NULL;
>   }

-- 
Best Regards,
JingWen Chen



Re: [PATCH] drm/amdgpu: release gpu full access after "amdgpu_device_ip_late_init"

2023-04-17 Thread JingWen Chen
Reviewed-by: jingwen.ch...@amd.com

On 4/14/23 4:41 PM, Chong Li wrote:
> [WHY]
>  Function "amdgpu_irq_update()" called by "amdgpu_device_ip_late_init()" is 
> an atomic context.
>  We shouldn't access registers through KIQ since "msleep()" may be called in 
> "amdgpu_kiq_rreg()".
>
> [HOW]
>  Move function "amdgpu_virt_release_full_gpu()" after function 
> "amdgpu_device_ip_late_init()",
>  to ensure that registers be accessed through RLCG instead of KIQ.
>
> Call Trace:
>   
>   show_stack+0x52/0x69
>   dump_stack_lvl+0x49/0x6d
>   dump_stack+0x10/0x18
>   __schedule_bug.cold+0x4f/0x6b
>   __schedule+0x473/0x5d0
>   ? __wake_up_klogd.part.0+0x40/0x70
>   ? vprintk_emit+0xbe/0x1f0
>   schedule+0x68/0x110
>   schedule_timeout+0x87/0x160
>   ? timer_migration_handler+0xa0/0xa0
>   msleep+0x2d/0x50
>   amdgpu_kiq_rreg+0x18d/0x1f0 [amdgpu]
>   amdgpu_device_rreg.part.0+0x59/0xd0 [amdgpu]
>   amdgpu_device_rreg+0x3a/0x50 [amdgpu]
>   amdgpu_sriov_rreg+0x3c/0xb0 [amdgpu]
>   gfx_v10_0_set_gfx_eop_interrupt_state.constprop.0+0x16c/0x190 [amdgpu]
>   gfx_v10_0_set_eop_interrupt_state+0xa5/0xb0 [amdgpu]
>   amdgpu_irq_update+0x53/0x80 [amdgpu]
>   amdgpu_irq_get+0x7c/0xb0 [amdgpu]
>   amdgpu_fence_driver_hw_init+0x58/0x90 [amdgpu]
>   amdgpu_device_init.cold+0x16b7/0x2022 [amdgpu]
>
> Signed-off-by: Chong Li 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 32 --
>  1 file changed, 17 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 051b9e231cf4..ee21a99ab4d4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2538,8 +2538,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device 
> *adev)
>   amdgpu_fru_get_product_info(adev);
>  
>  init_failed:
> - if (amdgpu_sriov_vf(adev))
> - amdgpu_virt_release_full_gpu(adev, true);
>  
>   return r;
>  }
> @@ -3856,18 +3854,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>  
>   r = amdgpu_device_ip_init(adev);
>   if (r) {
> - /* failed in exclusive mode due to timeout */
> - if (amdgpu_sriov_vf(adev) &&
> - !amdgpu_sriov_runtime(adev) &&
> - amdgpu_virt_mmio_blocked(adev) &&
> - !amdgpu_virt_wait_reset(adev)) {
> - dev_err(adev->dev, "VF exclusive mode timeout\n");
> - /* Don't send request since VF is inactive. */
> - adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
> - adev->virt.ops = NULL;
> - r = -EAGAIN;
> - goto release_ras_con;
> - }
>   dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
>   amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 
> 0);
>   goto release_ras_con;
> @@ -3936,8 +3922,10 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>  msecs_to_jiffies(AMDGPU_RESUME_MS));
>   }
>  
> - if (amdgpu_sriov_vf(adev))
> + if (amdgpu_sriov_vf(adev)) {
> + amdgpu_virt_release_full_gpu(adev, true);
>   flush_delayed_work(>delayed_init_work);
> + }
>  
>   r = sysfs_create_files(>dev->kobj, amdgpu_dev_attributes);
>   if (r)
> @@ -3977,6 +3965,20 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>   return 0;
>  
>  release_ras_con:
> + if (amdgpu_sriov_vf(adev))
> + amdgpu_virt_release_full_gpu(adev, true);
> +
> + /* failed in exclusive mode due to timeout */
> + if (amdgpu_sriov_vf(adev) &&
> + !amdgpu_sriov_runtime(adev) &&
> + amdgpu_virt_mmio_blocked(adev) &&
> + !amdgpu_virt_wait_reset(adev)) {
> + dev_err(adev->dev, "VF exclusive mode timeout\n");
> + /* Don't send request since VF is inactive. */
> + adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
> + adev->virt.ops = NULL;
> + r = -EAGAIN;
> + }
>   amdgpu_release_ras_context(adev);
>  
>  failed:


Re: [PATCH] drm/ttm: update bulk move object of ghost BO

2022-09-01 Thread JingWen Chen
Acked-by: Jingwen Chen 

still need confirmation from Christian

On 9/1/22 5:29 PM, ZhenGuo Yin wrote:
> [Why]
> Ghost BO is released with non-empty bulk move object. There is a
> warning trace:
> WARNING: CPU: 19 PID: 1582 at ttm/ttm_bo.c:366 ttm_bo_release+0x2e1/0x2f0 
> [amdttm]
> Call Trace:
>   amddma_resv_reserve_fences+0x10d/0x1f0 [amdkcl]
>   amdttm_bo_put+0x28/0x30 [amdttm]
>   amdttm_bo_move_accel_cleanup+0x126/0x200 [amdttm]
>   amdgpu_bo_move+0x1a8/0x770 [amdgpu]
>   ttm_bo_handle_move_mem+0xb0/0x140 [amdttm]
>   amdttm_bo_validate+0xbf/0x100 [amdttm]
>
> [How]
> The resource of ghost BO should be moved to LRU directly, instead of
> using bulk move. The bulk move object of ghost BO should set to NULL
> before function ttm_bo_move_to_lru_tail_unlocked.
>
> Fixed:·5b951e487fd6bf5f·("drm/ttm:·fix·bulk·move·handling·v2")
> Signed-off-by: ZhenGuo Yin 
> ---
>  drivers/gpu/drm/ttm/ttm_bo_util.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
> b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index 1cbfb00c1d65..a90bbbd91910 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -238,6 +238,7 @@ static int ttm_buffer_object_transfer(struct 
> ttm_buffer_object *bo,
>  
>   if (fbo->base.resource) {
>   ttm_resource_set_bo(fbo->base.resource, >base);
> + ttm_bo_set_bulk_move(>base, NULL);
>   bo->resource = NULL;
>   }
>  


Re: [PATCH] drm/amdgpu: Call trace info was found in dmesg when loading amdgpu

2022-07-13 Thread JingWen Chen
feel free to add

Reviewed-by: Jingwen Chen 

On 7/14/22 10:31 AM, lin cao wrote:
> In the case of SRIOV, the register smnMp1_PMI_3_FIFO will get an invalid
> value which will cause the "shift out of bound". In Ubuntu22.04, this
> issue will be checked an related call trace will be reported in dmesg.
>
> Signed-off-by: lin cao 
> ---
>  drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c 
> b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
> index b71860e5324a..fa520d79ef67 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
> @@ -886,6 +886,7 @@ static void sienna_cichlid_stb_init(struct smu_context 
> *smu);
>  
>  static int sienna_cichlid_init_smc_tables(struct smu_context *smu)
>  {
> + struct amdgpu_device *adev = smu->adev;
>   int ret = 0;
>  
>   ret = sienna_cichlid_tables_init(smu);
> @@ -896,7 +897,8 @@ static int sienna_cichlid_init_smc_tables(struct 
> smu_context *smu)
>   if (ret)
>   return ret;
>  
> - sienna_cichlid_stb_init(smu);
> + if (!amdgpu_sriov_vf(adev))
> + sienna_cichlid_stb_init(smu);
>  
>   return smu_v11_0_init_smc_tables(smu);
>  }


Re: [RFC v4 02/11] drm/amdgpu: Move scheduler init to after XGMI is ready

2022-03-02 Thread JingWen Chen
Hi Andrey,

Most part of the patches are OK, but the code will introduce a ib test fail on 
the disabled vcn of sienna_cichlid.

In SRIOV use case we will disable one vcn on sienna_cichlid, I have attached a 
patch to fix this issue, please check the attachment.

Best Regards,

Jingwen Chen


On 2/26/22 5:22 AM, Andrey Grodzovsky wrote:
> Hey, patches attached - i applied the patches and resolved merge conflicts 
> but weren't able to test as my on board's network card doesn't work with 5.16 
> kernel (it does with 5.17, maybe it's Kconfig issue and i need to check more).
> The patches are on top of 'cababde192b2 Yifan Zhang 2 days ago 
> drm/amd/pm: fix mode2 reset fail for smu 13.0.5 ' commit.
>
> Please test and let me know. Maybe by Monday I will be able to resolve the 
> connectivity issue on 5.16.
>
> Andrey
>
> On 2022-02-24 22:13, JingWen Chen wrote:
>> Hi Andrey,
>>
>> Sorry for the misleading, I mean the whole patch series. We are depending on 
>> this patch series to fix the concurrency issue within SRIOV TDR sequence.
>>
>>
>>
>> On 2/25/22 1:26 AM, Andrey Grodzovsky wrote:
>>> No problem if so but before I do,
>>>
>>>
>>> JingWen - why you think this patch is needed as a standalone now ? It has 
>>> no use without the
>>> entire feature together with it. Is it some changes you want to do on top 
>>> of that code ?
>>>
>>>
>>> Andrey
>>>
>>>
>>> On 2022-02-24 12:12, Deucher, Alexander wrote:
>>>> [Public]
>>>>
>>>>
>>>> If it applies cleanly, feel free to drop it in.  I'll drop those patches 
>>>> for drm-next since they are already in drm-misc.
>>>>
>>>> Alex
>>>>
>>>> 
>>>> *From:* amd-gfx  on behalf of 
>>>> Andrey Grodzovsky 
>>>> *Sent:* Thursday, February 24, 2022 11:24 AM
>>>> *To:* Chen, JingWen ; Christian König 
>>>> ; dri-de...@lists.freedesktop.org 
>>>> ; amd-gfx@lists.freedesktop.org 
>>>> 
>>>> *Cc:* Liu, Monk ; Chen, Horace ; 
>>>> Lazar, Lijo ; Koenig, Christian 
>>>> ; dan...@ffwll.ch 
>>>> *Subject:* Re: [RFC v4 02/11] drm/amdgpu: Move scheduler init to after 
>>>> XGMI is ready
>>>> No because all the patch-set including this patch was landed into
>>>> drm-misc-next and will reach amd-staging-drm-next on the next upstream
>>>> rebase i guess.
>>>>
>>>> Andrey
>>>>
>>>> On 2022-02-24 01:47, JingWen Chen wrote:
>>>>> Hi Andrey,
>>>>>
>>>>> Will you port this patch into amd-staging-drm-next?
>>>>>
>>>>> on 2/10/22 2:06 AM, Andrey Grodzovsky wrote:
>>>>>> All comments are fixed and code pushed. Thanks for everyone
>>>>>> who helped reviewing.
>>>>>>
>>>>>> Andrey
>>>>>>
>>>>>> On 2022-02-09 02:53, Christian König wrote:
>>>>>>> Am 09.02.22 um 01:23 schrieb Andrey Grodzovsky:
>>>>>>>> Before we initialize schedulers we must know which reset
>>>>>>>> domain are we in - for single device there iis a single
>>>>>>>> domain per device and so single wq per device. For XGMI
>>>>>>>> the reset domain spans the entire XGMI hive and so the
>>>>>>>> reset wq is per hive.
>>>>>>>>
>>>>>>>> Signed-off-by: Andrey Grodzovsky 
>>>>>>> One more comment below, with that fixed Reviewed-by: Christian König 
>>>>>>> .
>>>>>>>
>>>>>>>> ---
>>>>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 45 ++
>>>>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 34 ++--
>>>>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   |  2 +
>>>>>>>>      3 files changed, 51 insertions(+), 30 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>>>> index 9704b0e1fd82..00123b0013d3 100644
>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_de

Re: [RFC v4 02/11] drm/amdgpu: Move scheduler init to after XGMI is ready

2022-02-24 Thread JingWen Chen
Hi Andrey,

Sorry for the misleading, I mean the whole patch series. We are depending on 
this patch series to fix the concurrency issue within SRIOV TDR sequence.



On 2/25/22 1:26 AM, Andrey Grodzovsky wrote:
> No problem if so but before I do,
>
>
> JingWen - why you think this patch is needed as a standalone now ? It has no 
> use without the
> entire feature together with it. Is it some changes you want to do on top of 
> that code ?
>
>
> Andrey
>
>
> On 2022-02-24 12:12, Deucher, Alexander wrote:
>>
>> [Public]
>>
>>
>> If it applies cleanly, feel free to drop it in.  I'll drop those patches for 
>> drm-next since they are already in drm-misc.
>>
>> Alex
>>
>> 
>> *From:* amd-gfx  on behalf of Andrey 
>> Grodzovsky 
>> *Sent:* Thursday, February 24, 2022 11:24 AM
>> *To:* Chen, JingWen ; Christian König 
>> ; dri-de...@lists.freedesktop.org 
>> ; amd-gfx@lists.freedesktop.org 
>> 
>> *Cc:* Liu, Monk ; Chen, Horace ; 
>> Lazar, Lijo ; Koenig, Christian 
>> ; dan...@ffwll.ch 
>> *Subject:* Re: [RFC v4 02/11] drm/amdgpu: Move scheduler init to after XGMI 
>> is ready
>> No because all the patch-set including this patch was landed into
>> drm-misc-next and will reach amd-staging-drm-next on the next upstream
>> rebase i guess.
>>
>> Andrey
>>
>> On 2022-02-24 01:47, JingWen Chen wrote:
>> > Hi Andrey,
>> >
>> > Will you port this patch into amd-staging-drm-next?
>> >
>> > on 2/10/22 2:06 AM, Andrey Grodzovsky wrote:
>> >> All comments are fixed and code pushed. Thanks for everyone
>> >> who helped reviewing.
>> >>
>> >> Andrey
>> >>
>> >> On 2022-02-09 02:53, Christian König wrote:
>> >>> Am 09.02.22 um 01:23 schrieb Andrey Grodzovsky:
>> >>>> Before we initialize schedulers we must know which reset
>> >>>> domain are we in - for single device there iis a single
>> >>>> domain per device and so single wq per device. For XGMI
>> >>>> the reset domain spans the entire XGMI hive and so the
>> >>>> reset wq is per hive.
>> >>>>
>> >>>> Signed-off-by: Andrey Grodzovsky 
>> >>> One more comment below, with that fixed Reviewed-by: Christian König 
>> >>> .
>> >>>
>> >>>> ---
>> >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 45 ++
>> >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 34 ++--
>> >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   |  2 +
>> >>>>    3 files changed, 51 insertions(+), 30 deletions(-)
>> >>>>
>> >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>> >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> >>>> index 9704b0e1fd82..00123b0013d3 100644
>> >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> >>>> @@ -2287,6 +2287,47 @@ static int amdgpu_device_fw_loading(struct 
>> >>>> amdgpu_device *adev)
>> >>>>    return r;
>> >>>>    }
>> >>>>    +static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
>> >>>> +{
>> >>>> +    long timeout;
>> >>>> +    int r, i;
>> >>>> +
>> >>>> +    for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
>> >>>> +    struct amdgpu_ring *ring = adev->rings[i];
>> >>>> +
>> >>>> +    /* No need to setup the GPU scheduler for rings that don't 
>> >>>> need it */
>> >>>> +    if (!ring || ring->no_scheduler)
>> >>>> +    continue;
>> >>>> +
>> >>>> +    switch (ring->funcs->type) {
>> >>>> +    case AMDGPU_RING_TYPE_GFX:
>> >>>> +    timeout = adev->gfx_timeout;
>> >>>> +    break;
>> >>>> +    case AMDGPU_RING_TYPE_COMPUTE:
>> >>>> +    timeout = adev->compute_timeout;
>> >>>> +    break;
>> >>>> +    case AMDGPU_RING_TYPE_SDMA:
>> >>>> +    timeout = adev->sdma_timeout;
>> >>>> +    break;
>&

Re: [RFC v4 02/11] drm/amdgpu: Move scheduler init to after XGMI is ready

2022-02-23 Thread JingWen Chen
Hi Andrey,

Will you port this patch into amd-staging-drm-next?

on 2/10/22 2:06 AM, Andrey Grodzovsky wrote:
> All comments are fixed and code pushed. Thanks for everyone
> who helped reviewing.
>
> Andrey
>
> On 2022-02-09 02:53, Christian König wrote:
>> Am 09.02.22 um 01:23 schrieb Andrey Grodzovsky:
>>> Before we initialize schedulers we must know which reset
>>> domain are we in - for single device there iis a single
>>> domain per device and so single wq per device. For XGMI
>>> the reset domain spans the entire XGMI hive and so the
>>> reset wq is per hive.
>>>
>>> Signed-off-by: Andrey Grodzovsky 
>>
>> One more comment below, with that fixed Reviewed-by: Christian König 
>> .
>>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 45 ++
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 34 ++--
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   |  2 +
>>>   3 files changed, 51 insertions(+), 30 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index 9704b0e1fd82..00123b0013d3 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -2287,6 +2287,47 @@ static int amdgpu_device_fw_loading(struct 
>>> amdgpu_device *adev)
>>>   return r;
>>>   }
>>>   +static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
>>> +{
>>> +    long timeout;
>>> +    int r, i;
>>> +
>>> +    for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
>>> +    struct amdgpu_ring *ring = adev->rings[i];
>>> +
>>> +    /* No need to setup the GPU scheduler for rings that don't need it 
>>> */
>>> +    if (!ring || ring->no_scheduler)
>>> +    continue;
>>> +
>>> +    switch (ring->funcs->type) {
>>> +    case AMDGPU_RING_TYPE_GFX:
>>> +    timeout = adev->gfx_timeout;
>>> +    break;
>>> +    case AMDGPU_RING_TYPE_COMPUTE:
>>> +    timeout = adev->compute_timeout;
>>> +    break;
>>> +    case AMDGPU_RING_TYPE_SDMA:
>>> +    timeout = adev->sdma_timeout;
>>> +    break;
>>> +    default:
>>> +    timeout = adev->video_timeout;
>>> +    break;
>>> +    }
>>> +
>>> +    r = drm_sched_init(>sched, _sched_ops,
>>> +   ring->num_hw_submission, amdgpu_job_hang_limit,
>>> +   timeout, adev->reset_domain.wq, ring->sched_score, 
>>> ring->name);
>>> +    if (r) {
>>> +    DRM_ERROR("Failed to create scheduler on ring %s.\n",
>>> +  ring->name);
>>> +    return r;
>>> +    }
>>> +    }
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +
>>>   /**
>>>    * amdgpu_device_ip_init - run init for hardware IPs
>>>    *
>>> @@ -2419,6 +2460,10 @@ static int amdgpu_device_ip_init(struct 
>>> amdgpu_device *adev)
>>>   }
>>>   }
>>>   +    r = amdgpu_device_init_schedulers(adev);
>>> +    if (r)
>>> +    goto init_failed;
>>> +
>>>   /* Don't init kfd if whole hive need to be reset during init */
>>>   if (!adev->gmc.xgmi.pending_reset)
>>>   amdgpu_amdkfd_device_init(adev);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>>> index 45977a72b5dd..fa302540c69a 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>>> @@ -457,8 +457,6 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring 
>>> *ring,
>>>     atomic_t *sched_score)
>>>   {
>>>   struct amdgpu_device *adev = ring->adev;
>>> -    long timeout;
>>> -    int r;
>>>     if (!adev)
>>>   return -EINVAL;
>>> @@ -478,36 +476,12 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring 
>>> *ring,
>>>   spin_lock_init(>fence_drv.lock);
>>>   ring->fence_drv.fences = kcalloc(num_hw_submission * 2, sizeof(void 
>>> *),
>>>    GFP_KERNEL);
>>> -    if (!ring->fence_drv.fences)
>>> -    return -ENOMEM;
>>>   -    /* No need to setup the GPU scheduler for rings that don't need it */
>>> -    if (ring->no_scheduler)
>>> -    return 0;
>>> +    ring->num_hw_submission = num_hw_submission;
>>> +    ring->sched_score = sched_score;
>>
>> Let's move this into the caller and then use ring->num_hw_submission in the 
>> fence code as well.
>>
>> The maximum number of jobs on the ring is not really fence specific.
>>
>> Regards,
>> Christian.
>>
>>>   -    switch (ring->funcs->type) {
>>> -    case AMDGPU_RING_TYPE_GFX:
>>> -    timeout = adev->gfx_timeout;
>>> -    break;
>>> -    case AMDGPU_RING_TYPE_COMPUTE:
>>> -    timeout = adev->compute_timeout;
>>> -    break;
>>> -    case AMDGPU_RING_TYPE_SDMA:
>>> -    timeout = adev->sdma_timeout;
>>> -    break;
>>> -    default:
>>> -    timeout = adev->video_timeout;
>>> -    break;
>>> -    }
>>> -
>>> -    r = drm_sched_init(>sched, _sched_ops,
>>> -   

Re: [RFC v3 00/12] Define and use reset domain for GPU recovery in amdgpu

2022-02-08 Thread JingWen Chen
Hi Andrey,

I have been testing your patch and it seems fine till now.

Best Regards,

Jingwen Chen

On 2022/2/3 上午2:57, Andrey Grodzovsky wrote:
> Just another ping, with Shyun's help I was able to do some smoke testing on 
> XGMI SRIOV system (booting and triggering hive reset)
> and for now looks good.
>
> Andrey
>
> On 2022-01-28 14:36, Andrey Grodzovsky wrote:
>> Just a gentle ping if people have more comments on this patch set ? 
>> Especially last 5 patches
>> as first 7 are exact same as V2 and we already went over them mostly.
>>
>> Andrey
>>
>> On 2022-01-25 17:37, Andrey Grodzovsky wrote:
>>> This patchset is based on earlier work by Boris[1] that allowed to have an
>>> ordered workqueue at the driver level that will be used by the different
>>> schedulers to queue their timeout work. On top of that I also serialized
>>> any GPU reset we trigger from within amdgpu code to also go through the same
>>> ordered wq and in this way simplify somewhat our GPU reset code so we don't 
>>> need
>>> to protect from concurrency by multiple GPU reset triggeres such as TDR on 
>>> one
>>> hand and sysfs trigger or RAS trigger on the other hand.
>>>
>>> As advised by Christian and Daniel I defined a reset_domain struct such that
>>> all the entities that go through reset together will be serialized one 
>>> against
>>> another.
>>>
>>> TDR triggered by multiple entities within the same domain due to the same 
>>> reason will not
>>> be triggered as the first such reset will cancel all the pending resets. 
>>> This is
>>> relevant only to TDR timers and not to triggered resets coming from RAS or 
>>> SYSFS,
>>> those will still happen after the in flight resets finishes.
>>>
>>> v2:
>>> Add handling on SRIOV configuration, the reset notify coming from host
>>> and driver already trigger a work queue to handle the reset so drop this
>>> intermediate wq and send directly to timeout wq. (Shaoyun)
>>>
>>> v3:
>>> Lijo suggested puting 'adev->in_gpu_reset' in amdgpu_reset_domain struct.
>>> I followed his advise and also moved adev->reset_sem into same place. This
>>> in turn caused to do some follow-up refactor of the original patches
>>> where i decoupled amdgpu_reset_domain life cycle frolm XGMI hive because 
>>> hive is destroyed and
>>> reconstructed for the case of reset the devices in the XGMI hive during 
>>> probe for SRIOV See [2]
>>> while we need the reset sem and gpu_reset flag to always be present. This 
>>> was attained
>>> by adding refcount to amdgpu_reset_domain so each device can safely point 
>>> to it as long as
>>> it needs.
>>>
>>>
>>> [1] 
>>> https://patchwork.kernel.org/project/dri-devel/patch/20210629073510.2764391-3-boris.brezil...@collabora.com/
>>> [2] https://www.spinics.net/lists/amd-gfx/msg58836.html
>>>
>>> P.S Going through drm-misc-next and not amd-staging-drm-next as Boris work 
>>> hasn't landed yet there.
>>>
>>> P.P.S Patches 8-12 are the refactor on top of the original V2 patchset.
>>>
>>> P.P.P.S I wasn't able yet to test the reworked code on XGMI SRIOV system 
>>> because drm-misc-next fails to load there.
>>> Would appriciate if maybe jingwech can try it on his system like he tested 
>>> V2.
>>>
>>> Andrey Grodzovsky (12):
>>>    drm/amdgpu: Introduce reset domain
>>>    drm/amdgpu: Move scheduler init to after XGMI is ready
>>>    drm/amdgpu: Fix crash on modprobe
>>>    drm/amdgpu: Serialize non TDR gpu recovery with TDRs
>>>    drm/amd/virt: For SRIOV send GPU reset directly to TDR queue.
>>>    drm/amdgpu: Drop hive->in_reset
>>>    drm/amdgpu: Drop concurrent GPU reset protection for device
>>>    drm/amdgpu: Rework reset domain to be refcounted.
>>>    drm/amdgpu: Move reset sem into reset_domain
>>>    drm/amdgpu: Move in_gpu_reset into reset_domain
>>>    drm/amdgpu: Rework amdgpu_device_lock_adev
>>>    Revert 'drm/amdgpu: annotate a false positive recursive locking'
>>>
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  15 +-
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c   |  10 +-
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    | 275 ++
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c |  43 +--
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c   |   2 +-
>>>   .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c    |  18 +-
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c |  39 +++
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h |  12 +
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h  |   2 +
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c  |  24 +-
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h  |   3 +-
>>>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c    |   6 +-
>>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  14 +-
>>>   drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c |  19 +-
>>>   drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c |  19 +-
>>>   drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c |  11 +-
>>>   16 files changed, 313 insertions(+), 199 deletions(-)
>>>


Re: [RFC v2 4/8] drm/amdgpu: Serialize non TDR gpu recovery with TDRs

2022-02-06 Thread JingWen Chen
Hi Andrey,

I don't have any XGMI machines here, maybe you can reach out shaoyun for help.

On 2022/1/29 上午12:57, Grodzovsky, Andrey wrote:
> Just a gentle ping.
>
> Andrey
> --
> *From:* Grodzovsky, Andrey
> *Sent:* 26 January 2022 10:52
> *To:* Christian König ; Koenig, Christian 
> ; Lazar, Lijo ; 
> dri-de...@lists.freedesktop.org ; 
> amd-gfx@lists.freedesktop.org ; Chen, JingWen 
> 
> *Cc:* Chen, Horace ; Liu, Monk 
> *Subject:* Re: [RFC v2 4/8] drm/amdgpu: Serialize non TDR gpu recovery with 
> TDRs
>  
>
> JingWen - could you maybe give those patches a try on SRIOV XGMI system ? If 
> you see issues maybe you could let me connect and debug. My SRIOV XGMI system 
> which Shayun kindly arranged for me is not loading the driver with my 
> drm-misc-next branch even without my patches.
>
> Andrey
>
> On 2022-01-17 14:21, Andrey Grodzovsky wrote:
>>
>>
>> On 2022-01-17 2:17 p.m., Christian König wrote:
>>> Am 17.01.22 um 20:14 schrieb Andrey Grodzovsky:

 Ping on the question

>>>
>>> Oh, my! That was already more than a week ago and is completely swapped out 
>>> of my head again.
>>>
 Andrey

 On 2022-01-05 1:11 p.m., Andrey Grodzovsky wrote:
>>> Also, what about having the reset_active or in_reset flag in the 
>>> reset_domain itself?
>>
>> Of hand that sounds like a good idea.
>
>
> What then about the adev->reset_sem semaphore ? Should we also move this 
> to reset_domain ?  Both of the moves have functional
> implications only for XGMI case because there will be contention over 
> accessing those single instance variables from multiple devices
> while now each device has it's own copy.
>>>
>>> Since this is a rw semaphore that should be unproblematic I think. It could 
>>> just be that the cache line of the lock then plays ping/pong between the 
>>> CPU cores.
>>>
>
> What benefit the centralization into reset_domain gives - is it for 
> example to prevent one device in a hive trying to access through MMIO 
> another one's
> VRAM (shared FB memory) while the other one goes through reset ?
>>>
>>> I think that this is the killer argument for a centralized lock, yes.
>>
>>
>> np, i will add a patch with centralizing both flag into reset domain and 
>> resend.
>>
>> Andrey
>>
>>
>>>
>>> Christian.
>>>
>
> Andrey 
>>>


[PATCH v4] drm/amd/amdgpu: fixing read wrong pf2vf data in SRIOV

2022-01-13 Thread Jingwen Chen
[Why]
This fixes 35709bd76d229a51b0c571a768b741650ec47828.
we should read pf2vf data based at mman.fw_vram_usage_va after gmc
sw_init. patch 35709bd76 breaks this logic.

[How]
calling amdgpu_virt_exchange_data in amdgpu_virt_init_data_exchange to
set the right base in the right sequence.

v2:
call amdgpu_virt_init_data_exchange after gmc sw_init to make data
exchange workqueue run

v3:
clean up the code logic

v4:
add some comment and make the code more readable

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   | 20 +++-
 2 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index da3348fa7b0e..5610eae8ef0c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2353,7 +2353,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device 
*adev)
}
 
if (amdgpu_sriov_vf(adev))
-   amdgpu_virt_exchange_data(adev);
+   amdgpu_virt_init_data_exchange(adev);
 
r = amdgpu_ib_pool_init(adev);
if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 89ab0032..07bc0f504713 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -625,20 +625,20 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device 
*adev)
adev->virt.fw_reserve.p_vf2pf = NULL;
adev->virt.vf2pf_update_interval_ms = 0;
 
-   if (adev->bios != NULL) {
-   adev->virt.vf2pf_update_interval_ms = 2000;
+   if (adev->mman.fw_vram_usage_va != NULL) {
+   /* go through this logic in ip_init and reset to init 
workqueue*/
+   amdgpu_virt_exchange_data(adev);
 
+   INIT_DELAYED_WORK(>virt.vf2pf_work, 
amdgpu_virt_update_vf2pf_work_item);
+   schedule_delayed_work(&(adev->virt.vf2pf_work), 
msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms));
+   } else if (adev->bios != NULL) {
+   /* got through this logic in early init stage to get necessary 
flags, e.g. rlcg_acc related*/
adev->virt.fw_reserve.p_pf2vf =
(struct amd_sriov_msg_pf2vf_info_header *)
(adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
 
amdgpu_virt_read_pf2vf_data(adev);
}
-
-   if (adev->virt.vf2pf_update_interval_ms != 0) {
-   INIT_DELAYED_WORK(>virt.vf2pf_work, 
amdgpu_virt_update_vf2pf_work_item);
-   schedule_delayed_work(&(adev->virt.vf2pf_work), 
msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms));
-   }
 }
 
 
@@ -674,12 +674,6 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
if (adev->virt.ras_init_done)
amdgpu_virt_add_bad_page(adev, 
bp_block_offset, bp_block_size);
}
-   } else if (adev->bios != NULL) {
-   adev->virt.fw_reserve.p_pf2vf =
-   (struct amd_sriov_msg_pf2vf_info_header *)
-   (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
-
-   amdgpu_virt_read_pf2vf_data(adev);
}
 }
 
-- 
2.30.2



[PATCH v3] drm/amd/amdgpu: fixing read wrong pf2vf data in SRIOV

2022-01-13 Thread Jingwen Chen
[Why]
This fixes 35709bd76d229a51b0c571a768b741650ec47828.
we should read pf2vf data based at mman.fw_vram_usage_va after gmc
sw_init. patch 35709bd76 breaks this logic.

[How]
calling amdgpu_virt_exchange_data in amdgpu_virt_init_data_exchange to
set the right base in the right sequence.

v2:
call amdgpu_virt_init_data_exchange after gmc sw_init to make data
exchange workqueue run

v3:
clean up the code logic

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   | 12 
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index da3348fa7b0e..5610eae8ef0c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2353,7 +2353,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device 
*adev)
}
 
if (amdgpu_sriov_vf(adev))
-   amdgpu_virt_exchange_data(adev);
+   amdgpu_virt_init_data_exchange(adev);
 
r = amdgpu_ib_pool_init(adev);
if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 89ab0032..d83dfdc64d49 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -625,14 +625,16 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device 
*adev)
adev->virt.fw_reserve.p_vf2pf = NULL;
adev->virt.vf2pf_update_interval_ms = 0;
 
-   if (adev->bios != NULL) {
+   if (adev->mman.fw_vram_usage_va != NULL) {
adev->virt.vf2pf_update_interval_ms = 2000;
-
+   amdgpu_virt_exchange_data(adev);
+   } else if (adev->bios != NULL) {
adev->virt.fw_reserve.p_pf2vf =
(struct amd_sriov_msg_pf2vf_info_header *)
(adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
 
amdgpu_virt_read_pf2vf_data(adev);
+   return;
}
 
if (adev->virt.vf2pf_update_interval_ms != 0) {
@@ -674,12 +676,6 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
if (adev->virt.ras_init_done)
amdgpu_virt_add_bad_page(adev, 
bp_block_offset, bp_block_size);
}
-   } else if (adev->bios != NULL) {
-   adev->virt.fw_reserve.p_pf2vf =
-   (struct amd_sriov_msg_pf2vf_info_header *)
-   (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
-
-   amdgpu_virt_read_pf2vf_data(adev);
}
 }
 
-- 
2.30.2



[PATCH] drm/amd/amdgpu: fixing read wrong pf2vf data in SRIOV

2022-01-13 Thread Jingwen Chen
[Why]
This fixes 35709bd76d229a51b0c571a768b741650ec47828.
we should read pf2vf data based at mman.fw_vram_usage_va after gmc
sw_init. patch 35709bd76 breaks this logic.

[How]
calling amdgpu_virt_exchange_data in amdgpu_virt_init_data_exchange to
set the right base in the right sequence.

v2:
call amdgpu_virt_init_data_exchange after gmc sw_init to make data
exchange workqueue run

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   | 10 +++---
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index da3348fa7b0e..5610eae8ef0c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2353,7 +2353,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device 
*adev)
}
 
if (amdgpu_sriov_vf(adev))
-   amdgpu_virt_exchange_data(adev);
+   amdgpu_virt_init_data_exchange(adev);
 
r = amdgpu_ib_pool_init(adev);
if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 89ab0032..0b887a49b604 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -625,16 +625,12 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device 
*adev)
adev->virt.fw_reserve.p_vf2pf = NULL;
adev->virt.vf2pf_update_interval_ms = 0;
 
-   if (adev->bios != NULL) {
+   if (adev->mman.fw_vram_usage_va != NULL) {
adev->virt.vf2pf_update_interval_ms = 2000;
-
-   adev->virt.fw_reserve.p_pf2vf =
-   (struct amd_sriov_msg_pf2vf_info_header *)
-   (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
-
-   amdgpu_virt_read_pf2vf_data(adev);
}
 
+   amdgpu_virt_exchange_data(adev);
+
if (adev->virt.vf2pf_update_interval_ms != 0) {
INIT_DELAYED_WORK(>virt.vf2pf_work, 
amdgpu_virt_update_vf2pf_work_item);
schedule_delayed_work(&(adev->virt.vf2pf_work), 
msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms));
-- 
2.30.2



[PATCH] drm/amd/amdgpu: fixing read wrong pf2vf data in SRIOV

2022-01-13 Thread Jingwen Chen
[Why]
This fixes 35709bd76d229a51b0c571a768b741650ec47828.
we should read pf2vf data based at mman.fw_vram_usage_va after gmc
sw_init. patch 35709bd76 breaks this logic.

[How]
calling amdgpu_virt_exchange_data in amdgpu_virt_init_data_exchange to
set the right base in the right sequence.

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 10 +++---
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 89ab0032..0b887a49b604 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -625,16 +625,12 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device 
*adev)
adev->virt.fw_reserve.p_vf2pf = NULL;
adev->virt.vf2pf_update_interval_ms = 0;
 
-   if (adev->bios != NULL) {
+   if (adev->mman.fw_vram_usage_va != NULL) {
adev->virt.vf2pf_update_interval_ms = 2000;
-
-   adev->virt.fw_reserve.p_pf2vf =
-   (struct amd_sriov_msg_pf2vf_info_header *)
-   (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
-
-   amdgpu_virt_read_pf2vf_data(adev);
}
 
+   amdgpu_virt_exchange_data(adev);
+
if (adev->virt.vf2pf_update_interval_ms != 0) {
INIT_DELAYED_WORK(>virt.vf2pf_work, 
amdgpu_virt_update_vf2pf_work_item);
schedule_delayed_work(&(adev->virt.vf2pf_work), 
msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms));
-- 
2.30.2



Re: [RFC v2 8/8] drm/amd/virt: Drop concurrent GPU reset protection for SRIOV

2022-01-11 Thread JingWen Chen
Hi Andrey,

Please go ahead and push your change. I will prepare the RFC later.

On 2022/1/8 上午12:02, Andrey Grodzovsky wrote:
>
> On 2022-01-07 12:46 a.m., JingWen Chen wrote:
>> On 2022/1/7 上午11:57, JingWen Chen wrote:
>>> On 2022/1/7 上午3:13, Andrey Grodzovsky wrote:
>>>> On 2022-01-06 12:18 a.m., JingWen Chen wrote:
>>>>> On 2022/1/6 下午12:59, JingWen Chen wrote:
>>>>>> On 2022/1/6 上午2:24, Andrey Grodzovsky wrote:
>>>>>>> On 2022-01-05 2:59 a.m., Christian König wrote:
>>>>>>>> Am 05.01.22 um 08:34 schrieb JingWen Chen:
>>>>>>>>> On 2022/1/5 上午12:56, Andrey Grodzovsky wrote:
>>>>>>>>>> On 2022-01-04 6:36 a.m., Christian König wrote:
>>>>>>>>>>> Am 04.01.22 um 11:49 schrieb Liu, Monk:
>>>>>>>>>>>> [AMD Official Use Only]
>>>>>>>>>>>>
>>>>>>>>>>>>>> See the FLR request from the hypervisor is just another source 
>>>>>>>>>>>>>> of signaling the need for a reset, similar to each job timeout 
>>>>>>>>>>>>>> on each queue. Otherwise you have a race condition between the 
>>>>>>>>>>>>>> hypervisor and the scheduler.
>>>>>>>>>>>> No it's not, FLR from hypervisor is just to notify guest the hw VF 
>>>>>>>>>>>> FLR is about to start or was already executed, but host will do 
>>>>>>>>>>>> FLR anyway without waiting for guest too long
>>>>>>>>>>>>
>>>>>>>>>>> Then we have a major design issue in the SRIOV protocol and really 
>>>>>>>>>>> need to question this.
>>>>>>>>>>>
>>>>>>>>>>> How do you want to prevent a race between the hypervisor resetting 
>>>>>>>>>>> the hardware and the client trying the same because of a timeout?
>>>>>>>>>>>
>>>>>>>>>>> As far as I can see the procedure should be:
>>>>>>>>>>> 1. We detect that a reset is necessary, either because of a fault a 
>>>>>>>>>>> timeout or signal from hypervisor.
>>>>>>>>>>> 2. For each of those potential reset sources a work item is send to 
>>>>>>>>>>> the single workqueue.
>>>>>>>>>>> 3. One of those work items execute first and prepares the reset.
>>>>>>>>>>> 4. We either do the reset our self or notify the hypervisor that we 
>>>>>>>>>>> are ready for the reset.
>>>>>>>>>>> 5. Cleanup after the reset, eventually resubmit jobs etc..
>>>>>>>>>>> 6. Cancel work items which might have been scheduled from other 
>>>>>>>>>>> reset sources.
>>>>>>>>>>>
>>>>>>>>>>> It does make sense that the hypervisor resets the hardware without 
>>>>>>>>>>> waiting for the clients for too long, but if we don't follow this 
>>>>>>>>>>> general steps we will always have a race between the different 
>>>>>>>>>>> components.
>>>>>>>>>> Monk, just to add to this - if indeed as you say that 'FLR from 
>>>>>>>>>> hypervisor is just to notify guest the hw VF FLR is about to start 
>>>>>>>>>> or was already executed, but host will do FLR anyway without waiting 
>>>>>>>>>> for guest too long'
>>>>>>>>>> and there is no strict waiting from the hypervisor for 
>>>>>>>>>> IDH_READY_TO_RESET to be recived from guest before starting the 
>>>>>>>>>> reset then setting in_gpu_reset and locking reset_sem from guest 
>>>>>>>>>> side is not really full proof
>>>>>>>>>> protection from MMIO accesses by the guest - it only truly helps if 
>>>>>>>>>> hypervisor waits for that message before initiation of HW reset.
>>>>>>>>>>
>>>>>>>>> Hi Andrey, this cannot be done. If somehow guest kernel hangs and 
>>>>>>>>> never has the

Re: [RFC v2 8/8] drm/amd/virt: Drop concurrent GPU reset protection for SRIOV

2022-01-06 Thread JingWen Chen


On 2022/1/7 上午11:57, JingWen Chen wrote:
> On 2022/1/7 上午3:13, Andrey Grodzovsky wrote:
>> On 2022-01-06 12:18 a.m., JingWen Chen wrote:
>>> On 2022/1/6 下午12:59, JingWen Chen wrote:
>>>> On 2022/1/6 上午2:24, Andrey Grodzovsky wrote:
>>>>> On 2022-01-05 2:59 a.m., Christian König wrote:
>>>>>> Am 05.01.22 um 08:34 schrieb JingWen Chen:
>>>>>>> On 2022/1/5 上午12:56, Andrey Grodzovsky wrote:
>>>>>>>> On 2022-01-04 6:36 a.m., Christian König wrote:
>>>>>>>>> Am 04.01.22 um 11:49 schrieb Liu, Monk:
>>>>>>>>>> [AMD Official Use Only]
>>>>>>>>>>
>>>>>>>>>>>> See the FLR request from the hypervisor is just another source of 
>>>>>>>>>>>> signaling the need for a reset, similar to each job timeout on 
>>>>>>>>>>>> each queue. Otherwise you have a race condition between the 
>>>>>>>>>>>> hypervisor and the scheduler.
>>>>>>>>>> No it's not, FLR from hypervisor is just to notify guest the hw VF 
>>>>>>>>>> FLR is about to start or was already executed, but host will do FLR 
>>>>>>>>>> anyway without waiting for guest too long
>>>>>>>>>>
>>>>>>>>> Then we have a major design issue in the SRIOV protocol and really 
>>>>>>>>> need to question this.
>>>>>>>>>
>>>>>>>>> How do you want to prevent a race between the hypervisor resetting 
>>>>>>>>> the hardware and the client trying the same because of a timeout?
>>>>>>>>>
>>>>>>>>> As far as I can see the procedure should be:
>>>>>>>>> 1. We detect that a reset is necessary, either because of a fault a 
>>>>>>>>> timeout or signal from hypervisor.
>>>>>>>>> 2. For each of those potential reset sources a work item is send to 
>>>>>>>>> the single workqueue.
>>>>>>>>> 3. One of those work items execute first and prepares the reset.
>>>>>>>>> 4. We either do the reset our self or notify the hypervisor that we 
>>>>>>>>> are ready for the reset.
>>>>>>>>> 5. Cleanup after the reset, eventually resubmit jobs etc..
>>>>>>>>> 6. Cancel work items which might have been scheduled from other reset 
>>>>>>>>> sources.
>>>>>>>>>
>>>>>>>>> It does make sense that the hypervisor resets the hardware without 
>>>>>>>>> waiting for the clients for too long, but if we don't follow this 
>>>>>>>>> general steps we will always have a race between the different 
>>>>>>>>> components.
>>>>>>>> Monk, just to add to this - if indeed as you say that 'FLR from 
>>>>>>>> hypervisor is just to notify guest the hw VF FLR is about to start or 
>>>>>>>> was already executed, but host will do FLR anyway without waiting for 
>>>>>>>> guest too long'
>>>>>>>> and there is no strict waiting from the hypervisor for 
>>>>>>>> IDH_READY_TO_RESET to be recived from guest before starting the reset 
>>>>>>>> then setting in_gpu_reset and locking reset_sem from guest side is not 
>>>>>>>> really full proof
>>>>>>>> protection from MMIO accesses by the guest - it only truly helps if 
>>>>>>>> hypervisor waits for that message before initiation of HW reset.
>>>>>>>>
>>>>>>> Hi Andrey, this cannot be done. If somehow guest kernel hangs and never 
>>>>>>> has the chance to send the response back, then other VFs will have to 
>>>>>>> wait it reset. All the vfs will hang in this case. Or sometimes the 
>>>>>>> mailbox has some delay and other VFs will also wait. The user of other 
>>>>>>> VFs will be affected in this case.
>>>>>> Yeah, agree completely with JingWen. The hypervisor is the one in charge 
>>>>>> here, not the guest.
>>>>>>
>>>>>> What the hypervisor should do (and it already seems to be designed that 
>>>>>> way) is to s

Re: [RFC v2 8/8] drm/amd/virt: Drop concurrent GPU reset protection for SRIOV

2022-01-06 Thread JingWen Chen


On 2022/1/7 上午3:13, Andrey Grodzovsky wrote:
>
> On 2022-01-06 12:18 a.m., JingWen Chen wrote:
>> On 2022/1/6 下午12:59, JingWen Chen wrote:
>>> On 2022/1/6 上午2:24, Andrey Grodzovsky wrote:
>>>> On 2022-01-05 2:59 a.m., Christian König wrote:
>>>>> Am 05.01.22 um 08:34 schrieb JingWen Chen:
>>>>>> On 2022/1/5 上午12:56, Andrey Grodzovsky wrote:
>>>>>>> On 2022-01-04 6:36 a.m., Christian König wrote:
>>>>>>>> Am 04.01.22 um 11:49 schrieb Liu, Monk:
>>>>>>>>> [AMD Official Use Only]
>>>>>>>>>
>>>>>>>>>>> See the FLR request from the hypervisor is just another source of 
>>>>>>>>>>> signaling the need for a reset, similar to each job timeout on each 
>>>>>>>>>>> queue. Otherwise you have a race condition between the hypervisor 
>>>>>>>>>>> and the scheduler.
>>>>>>>>> No it's not, FLR from hypervisor is just to notify guest the hw VF 
>>>>>>>>> FLR is about to start or was already executed, but host will do FLR 
>>>>>>>>> anyway without waiting for guest too long
>>>>>>>>>
>>>>>>>> Then we have a major design issue in the SRIOV protocol and really 
>>>>>>>> need to question this.
>>>>>>>>
>>>>>>>> How do you want to prevent a race between the hypervisor resetting the 
>>>>>>>> hardware and the client trying the same because of a timeout?
>>>>>>>>
>>>>>>>> As far as I can see the procedure should be:
>>>>>>>> 1. We detect that a reset is necessary, either because of a fault a 
>>>>>>>> timeout or signal from hypervisor.
>>>>>>>> 2. For each of those potential reset sources a work item is send to 
>>>>>>>> the single workqueue.
>>>>>>>> 3. One of those work items execute first and prepares the reset.
>>>>>>>> 4. We either do the reset our self or notify the hypervisor that we 
>>>>>>>> are ready for the reset.
>>>>>>>> 5. Cleanup after the reset, eventually resubmit jobs etc..
>>>>>>>> 6. Cancel work items which might have been scheduled from other reset 
>>>>>>>> sources.
>>>>>>>>
>>>>>>>> It does make sense that the hypervisor resets the hardware without 
>>>>>>>> waiting for the clients for too long, but if we don't follow this 
>>>>>>>> general steps we will always have a race between the different 
>>>>>>>> components.
>>>>>>> Monk, just to add to this - if indeed as you say that 'FLR from 
>>>>>>> hypervisor is just to notify guest the hw VF FLR is about to start or 
>>>>>>> was already executed, but host will do FLR anyway without waiting for 
>>>>>>> guest too long'
>>>>>>> and there is no strict waiting from the hypervisor for 
>>>>>>> IDH_READY_TO_RESET to be recived from guest before starting the reset 
>>>>>>> then setting in_gpu_reset and locking reset_sem from guest side is not 
>>>>>>> really full proof
>>>>>>> protection from MMIO accesses by the guest - it only truly helps if 
>>>>>>> hypervisor waits for that message before initiation of HW reset.
>>>>>>>
>>>>>> Hi Andrey, this cannot be done. If somehow guest kernel hangs and never 
>>>>>> has the chance to send the response back, then other VFs will have to 
>>>>>> wait it reset. All the vfs will hang in this case. Or sometimes the 
>>>>>> mailbox has some delay and other VFs will also wait. The user of other 
>>>>>> VFs will be affected in this case.
>>>>> Yeah, agree completely with JingWen. The hypervisor is the one in charge 
>>>>> here, not the guest.
>>>>>
>>>>> What the hypervisor should do (and it already seems to be designed that 
>>>>> way) is to send the guest a message that a reset is about to happen and 
>>>>> give it some time to response appropriately.
>>>>>
>>>>> The guest on the other hand then tells the hypervisor that all processing 
>>>>> has stopped and it

Re: [RFC v2 8/8] drm/amd/virt: Drop concurrent GPU reset protection for SRIOV

2022-01-05 Thread JingWen Chen


On 2022/1/6 下午12:59, JingWen Chen wrote:
> On 2022/1/6 上午2:24, Andrey Grodzovsky wrote:
>> On 2022-01-05 2:59 a.m., Christian König wrote:
>>> Am 05.01.22 um 08:34 schrieb JingWen Chen:
>>>> On 2022/1/5 上午12:56, Andrey Grodzovsky wrote:
>>>>> On 2022-01-04 6:36 a.m., Christian König wrote:
>>>>>> Am 04.01.22 um 11:49 schrieb Liu, Monk:
>>>>>>> [AMD Official Use Only]
>>>>>>>
>>>>>>>>> See the FLR request from the hypervisor is just another source of 
>>>>>>>>> signaling the need for a reset, similar to each job timeout on each 
>>>>>>>>> queue. Otherwise you have a race condition between the hypervisor and 
>>>>>>>>> the scheduler.
>>>>>>> No it's not, FLR from hypervisor is just to notify guest the hw VF FLR 
>>>>>>> is about to start or was already executed, but host will do FLR anyway 
>>>>>>> without waiting for guest too long
>>>>>>>
>>>>>> Then we have a major design issue in the SRIOV protocol and really need 
>>>>>> to question this.
>>>>>>
>>>>>> How do you want to prevent a race between the hypervisor resetting the 
>>>>>> hardware and the client trying the same because of a timeout?
>>>>>>
>>>>>> As far as I can see the procedure should be:
>>>>>> 1. We detect that a reset is necessary, either because of a fault a 
>>>>>> timeout or signal from hypervisor.
>>>>>> 2. For each of those potential reset sources a work item is send to the 
>>>>>> single workqueue.
>>>>>> 3. One of those work items execute first and prepares the reset.
>>>>>> 4. We either do the reset our self or notify the hypervisor that we are 
>>>>>> ready for the reset.
>>>>>> 5. Cleanup after the reset, eventually resubmit jobs etc..
>>>>>> 6. Cancel work items which might have been scheduled from other reset 
>>>>>> sources.
>>>>>>
>>>>>> It does make sense that the hypervisor resets the hardware without 
>>>>>> waiting for the clients for too long, but if we don't follow this 
>>>>>> general steps we will always have a race between the different 
>>>>>> components.
>>>>> Monk, just to add to this - if indeed as you say that 'FLR from 
>>>>> hypervisor is just to notify guest the hw VF FLR is about to start or was 
>>>>> already executed, but host will do FLR anyway without waiting for guest 
>>>>> too long'
>>>>> and there is no strict waiting from the hypervisor for IDH_READY_TO_RESET 
>>>>> to be recived from guest before starting the reset then setting 
>>>>> in_gpu_reset and locking reset_sem from guest side is not really full 
>>>>> proof
>>>>> protection from MMIO accesses by the guest - it only truly helps if 
>>>>> hypervisor waits for that message before initiation of HW reset.
>>>>>
>>>> Hi Andrey, this cannot be done. If somehow guest kernel hangs and never 
>>>> has the chance to send the response back, then other VFs will have to wait 
>>>> it reset. All the vfs will hang in this case. Or sometimes the mailbox has 
>>>> some delay and other VFs will also wait. The user of other VFs will be 
>>>> affected in this case.
>>> Yeah, agree completely with JingWen. The hypervisor is the one in charge 
>>> here, not the guest.
>>>
>>> What the hypervisor should do (and it already seems to be designed that 
>>> way) is to send the guest a message that a reset is about to happen and 
>>> give it some time to response appropriately.
>>>
>>> The guest on the other hand then tells the hypervisor that all processing 
>>> has stopped and it is ready to restart. If that doesn't happen in time the 
>>> hypervisor should eliminate the guest probably trigger even more severe 
>>> consequences, e.g. restart the whole VM etc...
>>>
>>> Christian.
>>
>> So what's the end conclusion here regarding dropping this particular patch ? 
>> Seems to me we still need to drop it to prevent driver's MMIO access
>> to the GPU during reset from various places in the code.
>>
>> Andrey
>>
> Hi Andrey & Christian,
>
> I have ported your patch(drop the reset_sem and in_gpu_rese

Re: [RFC v2 8/8] drm/amd/virt: Drop concurrent GPU reset protection for SRIOV

2022-01-05 Thread JingWen Chen


On 2022/1/6 上午2:24, Andrey Grodzovsky wrote:
>
> On 2022-01-05 2:59 a.m., Christian König wrote:
>> Am 05.01.22 um 08:34 schrieb JingWen Chen:
>>> On 2022/1/5 上午12:56, Andrey Grodzovsky wrote:
>>>> On 2022-01-04 6:36 a.m., Christian König wrote:
>>>>> Am 04.01.22 um 11:49 schrieb Liu, Monk:
>>>>>> [AMD Official Use Only]
>>>>>>
>>>>>>>> See the FLR request from the hypervisor is just another source of 
>>>>>>>> signaling the need for a reset, similar to each job timeout on each 
>>>>>>>> queue. Otherwise you have a race condition between the hypervisor and 
>>>>>>>> the scheduler.
>>>>>> No it's not, FLR from hypervisor is just to notify guest the hw VF FLR 
>>>>>> is about to start or was already executed, but host will do FLR anyway 
>>>>>> without waiting for guest too long
>>>>>>
>>>>> Then we have a major design issue in the SRIOV protocol and really need 
>>>>> to question this.
>>>>>
>>>>> How do you want to prevent a race between the hypervisor resetting the 
>>>>> hardware and the client trying the same because of a timeout?
>>>>>
>>>>> As far as I can see the procedure should be:
>>>>> 1. We detect that a reset is necessary, either because of a fault a 
>>>>> timeout or signal from hypervisor.
>>>>> 2. For each of those potential reset sources a work item is send to the 
>>>>> single workqueue.
>>>>> 3. One of those work items execute first and prepares the reset.
>>>>> 4. We either do the reset our self or notify the hypervisor that we are 
>>>>> ready for the reset.
>>>>> 5. Cleanup after the reset, eventually resubmit jobs etc..
>>>>> 6. Cancel work items which might have been scheduled from other reset 
>>>>> sources.
>>>>>
>>>>> It does make sense that the hypervisor resets the hardware without 
>>>>> waiting for the clients for too long, but if we don't follow this general 
>>>>> steps we will always have a race between the different components.
>>>>
>>>> Monk, just to add to this - if indeed as you say that 'FLR from hypervisor 
>>>> is just to notify guest the hw VF FLR is about to start or was already 
>>>> executed, but host will do FLR anyway without waiting for guest too long'
>>>> and there is no strict waiting from the hypervisor for IDH_READY_TO_RESET 
>>>> to be recived from guest before starting the reset then setting 
>>>> in_gpu_reset and locking reset_sem from guest side is not really full proof
>>>> protection from MMIO accesses by the guest - it only truly helps if 
>>>> hypervisor waits for that message before initiation of HW reset.
>>>>
>>> Hi Andrey, this cannot be done. If somehow guest kernel hangs and never has 
>>> the chance to send the response back, then other VFs will have to wait it 
>>> reset. All the vfs will hang in this case. Or sometimes the mailbox has 
>>> some delay and other VFs will also wait. The user of other VFs will be 
>>> affected in this case.
>>
>> Yeah, agree completely with JingWen. The hypervisor is the one in charge 
>> here, not the guest.
>>
>> What the hypervisor should do (and it already seems to be designed that way) 
>> is to send the guest a message that a reset is about to happen and give it 
>> some time to response appropriately.
>>
>> The guest on the other hand then tells the hypervisor that all processing 
>> has stopped and it is ready to restart. If that doesn't happen in time the 
>> hypervisor should eliminate the guest probably trigger even more severe 
>> consequences, e.g. restart the whole VM etc...
>>
>> Christian.
>
>
> So what's the end conclusion here regarding dropping this particular patch ? 
> Seems to me we still need to drop it to prevent driver's MMIO access
> to the GPU during reset from various places in the code.
>
> Andrey
>
Hi Andrey & Christian,

I have ported your patch(drop the reset_sem and in_gpu_reset in flr work) and 
run some tests. If a engine hang during an OCL benchmark(using kfd), we can see 
the logs below:

[  397.190727] amdgpu :00:07.0: amdgpu: wait for kiq fence error: 0.
[  397.301496] amdgpu :00:07.0: amdgpu: wait for kiq fence error: 0.
[  397.406601] amdgpu :00:07.0: amdgpu: wait for kiq fence error: 0.
[  397.532343] amdgpu :00:07.0: 

Re: [RFC v2 8/8] drm/amd/virt: Drop concurrent GPU reset protection for SRIOV

2022-01-04 Thread JingWen Chen
>>> Hi Jingwen,
>>>
>>> well what I mean is that we need to adjust the implementation in amdgpu to 
>>> actually match the requirements.
>>>
>>> Could be that the reset sequence is questionable in general, but I doubt so 
>>> at least for now.
>>>
>>> See the FLR request from the hypervisor is just another source of signaling 
>>> the need for a reset, similar to each job timeout on each queue. Otherwise 
>>> you have a race condition between the hypervisor and the scheduler.
>>>
>>> Properly setting in_gpu_reset is indeed mandatory, but should happen at a 
>>> central place and not in the SRIOV specific code.
>>>
>>> In other words I strongly think that the current SRIOV reset implementation 
>>> is severely broken and what Andrey is doing is actually fixing it.
>>>
>>> Regards,
>>> Christian.
>>>
>>> Am 04.01.22 um 10:07 schrieb JingWen Chen:
>>>> Hi Christian,
>>>> I'm not sure what do you mean by "we need to change SRIOV not the driver".
>>>>
>>>> Do you mean we should change the reset sequence in SRIOV? This will be a 
>>>> huge change for our SRIOV solution.
>>>>
>>>>   From my point of view, we can directly use amdgpu_device_lock_adev
>>>> and amdgpu_device_unlock_adev in flr_work instead of try_lock since no one 
>>>> will conflict with this thread with reset_domain introduced.
>>>> But we do need the reset_sem and adev->in_gpu_reset to keep device 
>>>> untouched via user space.
>>>>
>>>> Best Regards,
>>>> Jingwen Chen
>>>>
>>>> On 2022/1/3 下午6:17, Christian König wrote:
>>>>> Please don't. This patch is vital to the cleanup of the reset procedure.
>>>>>
>>>>> If SRIOV doesn't work with that we need to change SRIOV and not the 
>>>>> driver.
>>>>>
>>>>> Christian.
>>>>>
>>>>> Am 30.12.21 um 19:45 schrieb Andrey Grodzovsky:
>>>>>> Sure, I guess i can drop this patch then.
>>>>>>
>>>>>> Andrey
>>>>>>
>>>>>> On 2021-12-24 4:57 a.m., JingWen Chen wrote:
>>>>>>> I do agree with shaoyun, if the host find the gpu engine hangs first, 
>>>>>>> and do the flr, guest side thread may not know this and still try to 
>>>>>>> access HW(e.g. kfd is using a lot of amdgpu_in_reset and reset_sem to 
>>>>>>> identify the reset status). And this may lead to very bad result.
>>>>>>>
>>>>>>> On 2021/12/24 下午4:58, Deng, Emily wrote:
>>>>>>>> These patches look good to me. JingWen will pull these patches and do 
>>>>>>>> some basic TDR test on sriov environment, and give feedback.
>>>>>>>>
>>>>>>>> Best wishes
>>>>>>>> Emily Deng
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>> -Original Message-
>>>>>>>>> From: Liu, Monk 
>>>>>>>>> Sent: Thursday, December 23, 2021 6:14 PM
>>>>>>>>> To: Koenig, Christian ; Grodzovsky,
>>>>>>>>> Andrey ;
>>>>>>>>> dri-de...@lists.freedesktop.org; amd- g...@lists.freedesktop.org;
>>>>>>>>> Chen, Horace ; Chen, JingWen
>>>>>>>>> ; Deng, Emily 
>>>>>>>>> Cc: dan...@ffwll.ch
>>>>>>>>> Subject: RE: [RFC v2 8/8] drm/amd/virt: Drop concurrent GPU reset
>>>>>>>>> protection for SRIOV
>>>>>>>>>
>>>>>>>>> [AMD Official Use Only]
>>>>>>>>>
>>>>>>>>> @Chen, Horace @Chen, JingWen @Deng, Emily
>>>>>>>>>
>>>>>>>>> Please take a review on Andrey's patch
>>>>>>>>>
>>>>>>>>> Thanks
>>>>>>>>> -
>>>>>>>>> -- Monk Liu | Cloud GPU & Virtualization Solution | AMD
>>>>>>>>> -
>>>>>>>>> -- we are hiring software manager for CVS core team
>>>>>&g

Re: [RFC v2 8/8] drm/amd/virt: Drop concurrent GPU reset protection for SRIOV

2022-01-04 Thread JingWen Chen


On 2022/1/4 下午7:36, Christian König wrote:
> Am 04.01.22 um 11:49 schrieb Liu, Monk:
>> [AMD Official Use Only]
>>
>>>> See the FLR request from the hypervisor is just another source of 
>>>> signaling the need for a reset, similar to each job timeout on each queue. 
>>>> Otherwise you have a race condition between the hypervisor and the 
>>>> scheduler.
>> No it's not, FLR from hypervisor is just to notify guest the hw VF FLR is 
>> about to start or was already executed, but host will do FLR anyway without 
>> waiting for guest too long
>>
>
> Then we have a major design issue in the SRIOV protocol and really need to 
> question this.
>
> How do you want to prevent a race between the hypervisor resetting the 
> hardware and the client trying the same because of a timeout?
>
> As far as I can see the procedure should be:
> 1. We detect that a reset is necessary, either because of a fault a timeout 
> or signal from hypervisor.
> 2. For each of those potential reset sources a work item is send to the 
> single workqueue.

I think Andrey has already used the same ordered work queue to handle the reset 
from both ring timeout and hypervisor. (Patch 5)

So there should be no race between different reset sources. As ring timeout is 
much longer than the world switch time slice(6ms), we should see a reset from 
hypervisor queued into reset domain wq first and after the flr work done, then 
the ring timeout reset queued into reset domain.

> 3. One of those work items execute first and prepares the reset.
> 4. We either do the reset our self or notify the hypervisor that we are ready 
> for the reset.
> 5. Cleanup after the reset, eventually resubmit jobs etc..
> 6. Cancel work items which might have been scheduled from other reset sources.
>
> It does make sense that the hypervisor resets the hardware without waiting 
> for the clients for too long, but if we don't follow this general steps we 
> will always have a race between the different components.

So the reset_sem and in_gpu_reset is to prevent race between 
reset_domain(mostly hypervisor source) and other user spaces(e.g. kfd).

>
> Regards,
> Christian.
>
> Am 04.01.22 um 11:49 schrieb Liu, Monk:
>> [AMD Official Use Only]
>>
>>>> See the FLR request from the hypervisor is just another source of 
>>>> signaling the need for a reset, similar to each job timeout on each queue. 
>>>> Otherwise you have a race condition between the hypervisor and the 
>>>> scheduler.
>> No it's not, FLR from hypervisor is just to notify guest the hw VF FLR is 
>> about to start or was already executed, but host will do FLR anyway without 
>> waiting for guest too long
>>
>>>> In other words I strongly think that the current SRIOV reset 
>>>> implementation is severely broken and what Andrey is doing is actually 
>>>> fixing it.
>> It makes the code to crash ... how could it be a fix ?
>>
>> I'm afraid the patch is NAK from me,  but it is welcome if the cleanup do 
>> not ruin the logic, Andry or jingwen can try it if needed.
>>
>> Thanks
>> ---
>> Monk Liu | Cloud GPU & Virtualization Solution | AMD
>> ---
>> we are hiring software manager for CVS core team
>> ---
>>
>> -Original Message-
>> From: Koenig, Christian 
>> Sent: Tuesday, January 4, 2022 6:19 PM
>> To: Chen, JingWen ; Christian König 
>> ; Grodzovsky, Andrey 
>> ; Deng, Emily ; Liu, Monk 
>> ; dri-de...@lists.freedesktop.org; 
>> amd-gfx@lists.freedesktop.org; Chen, Horace ; Chen, 
>> JingWen 
>> Cc: dan...@ffwll.ch
>> Subject: Re: [RFC v2 8/8] drm/amd/virt: Drop concurrent GPU reset protection 
>> for SRIOV
>>
>> Hi Jingwen,
>>
>> well what I mean is that we need to adjust the implementation in amdgpu to 
>> actually match the requirements.
>>
>> Could be that the reset sequence is questionable in general, but I doubt so 
>> at least for now.
>>
>> See the FLR request from the hypervisor is just another source of signaling 
>> the need for a reset, similar to each job timeout on each queue. Otherwise 
>> you have a race condition between the hypervisor and the scheduler.
>>
>> Properly setting in_gpu_reset is indeed mandatory, but should happen at a 
>> central place and not in the SRIOV specific code.
>>
>> In other words I strongly think that the current SRIOV reset imp

Re: [RFC v2 8/8] drm/amd/virt: Drop concurrent GPU reset protection for SRIOV

2022-01-04 Thread JingWen Chen
Hi Christian,
I'm not sure what do you mean by "we need to change SRIOV not the driver".

Do you mean we should change the reset sequence in SRIOV? This will be a huge 
change for our SRIOV solution.

>From my point of view, we can directly use
amdgpu_device_lock_adev and amdgpu_device_unlock_adev in flr_work instead of 
try_lock since no one will conflict with this thread with reset_domain 
introduced.
But we do need the reset_sem and adev->in_gpu_reset to keep device untouched 
via user space.

Best Regards,
Jingwen Chen

On 2022/1/3 下午6:17, Christian König wrote:
> Please don't. This patch is vital to the cleanup of the reset procedure.
>
> If SRIOV doesn't work with that we need to change SRIOV and not the driver.
>
> Christian.
>
> Am 30.12.21 um 19:45 schrieb Andrey Grodzovsky:
>> Sure, I guess i can drop this patch then.
>>
>> Andrey
>>
>> On 2021-12-24 4:57 a.m., JingWen Chen wrote:
>>> I do agree with shaoyun, if the host find the gpu engine hangs first, and 
>>> do the flr, guest side thread may not know this and still try to access 
>>> HW(e.g. kfd is using a lot of amdgpu_in_reset and reset_sem to identify the 
>>> reset status). And this may lead to very bad result.
>>>
>>> On 2021/12/24 下午4:58, Deng, Emily wrote:
>>>> These patches look good to me. JingWen will pull these patches and do some 
>>>> basic TDR test on sriov environment, and give feedback.
>>>>
>>>> Best wishes
>>>> Emily Deng
>>>>
>>>>
>>>>
>>>>> -Original Message-
>>>>> From: Liu, Monk 
>>>>> Sent: Thursday, December 23, 2021 6:14 PM
>>>>> To: Koenig, Christian ; Grodzovsky, Andrey
>>>>> ; dri-de...@lists.freedesktop.org; amd-
>>>>> g...@lists.freedesktop.org; Chen, Horace ; Chen,
>>>>> JingWen ; Deng, Emily 
>>>>> Cc: dan...@ffwll.ch
>>>>> Subject: RE: [RFC v2 8/8] drm/amd/virt: Drop concurrent GPU reset 
>>>>> protection
>>>>> for SRIOV
>>>>>
>>>>> [AMD Official Use Only]
>>>>>
>>>>> @Chen, Horace @Chen, JingWen @Deng, Emily
>>>>>
>>>>> Please take a review on Andrey's patch
>>>>>
>>>>> Thanks
>>>>> ---
>>>>> Monk Liu | Cloud GPU & Virtualization Solution | AMD
>>>>> ---
>>>>> we are hiring software manager for CVS core team
>>>>> ---
>>>>>
>>>>> -Original Message-
>>>>> From: Koenig, Christian 
>>>>> Sent: Thursday, December 23, 2021 4:42 PM
>>>>> To: Grodzovsky, Andrey ; dri-
>>>>> de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org
>>>>> Cc: dan...@ffwll.ch; Liu, Monk ; Chen, Horace
>>>>> 
>>>>> Subject: Re: [RFC v2 8/8] drm/amd/virt: Drop concurrent GPU reset 
>>>>> protection
>>>>> for SRIOV
>>>>>
>>>>> Am 22.12.21 um 23:14 schrieb Andrey Grodzovsky:
>>>>>> Since now flr work is serialized against  GPU resets there is no need
>>>>>> for this.
>>>>>>
>>>>>> Signed-off-by: Andrey Grodzovsky 
>>>>> Acked-by: Christian König 
>>>>>
>>>>>> ---
>>>>>>    drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 11 ---
>>>>>>    drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 11 ---
>>>>>>    2 files changed, 22 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
>>>>>> index 487cd654b69e..7d59a66e3988 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
>>>>>> @@ -248,15 +248,7 @@ static void xgpu_ai_mailbox_flr_work(struct
>>>>> work_struct *work)
>>>>>>    struct amdgpu_device *adev = container_of(virt, struct
>>>>> amdgpu_device, virt);
>>>>>>    int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT;
>>>>>>
>>>>>> -    /* block amdgpu_gpu_recover till msg FLR COMPLETE received,
>>>>>> - * otherwise the mailbo

Re: [PATCH] drm/amdgpu: add dummy event6 for vega10

2021-12-30 Thread JingWen Chen
Reviewed-by: Jingwen Chen 

On 2021/12/29 下午6:38, James Yao wrote:
> [why]
> Malicious mailbox event1 fails driver loading on vega10.
> An dummy event6 prevent driver from taking response from malicious event1 as 
> its own.
>
> [how]
> On vega10, send a mailbox event6 before sending event1.
>
> Signed-off-by: James Yao 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c |  4 
>  drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c| 11 +++
>  drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h|  2 ++
>  3 files changed, 17 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index f8e574cc0e22..d9509c3482e2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -727,6 +727,10 @@ void amdgpu_detect_virtualization(struct amdgpu_device 
> *adev)
>   vi_set_virt_ops(adev);
>   break;
>   case CHIP_VEGA10:
> + soc15_set_virt_ops(adev);
> + /* send a dummy GPU_INIT_DATA request to host on vega10 
> */
> + amdgpu_virt_request_init_data(adev);
> + break;
>   case CHIP_VEGA20:
>   case CHIP_ARCTURUS:
>   case CHIP_ALDEBARAN:
> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c 
> b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> index 0077e738db31..56da5ab82987 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> @@ -180,6 +180,11 @@ static int xgpu_ai_send_access_requests(struct 
> amdgpu_device *adev,
>   RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
>   mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW2));
>   }
> + } else if (req == IDH_REQ_GPU_INIT_DATA){
> + /* Dummy REQ_GPU_INIT_DATA handling */
> + r = xgpu_ai_poll_msg(adev, IDH_REQ_GPU_INIT_DATA_READY);
> + /* version set to 0 since dummy */
> + adev->virt.req_init_data_ver = 0;   
>   }
>  
>   return 0;
> @@ -381,10 +386,16 @@ void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev)
>   amdgpu_irq_put(adev, >virt.rcv_irq, 0);
>  }
>  
> +static int xgpu_ai_request_init_data(struct amdgpu_device *adev)
> +{
> + return xgpu_ai_send_access_requests(adev, IDH_REQ_GPU_INIT_DATA);
> +}
> +
>  const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
>   .req_full_gpu   = xgpu_ai_request_full_gpu_access,
>   .rel_full_gpu   = xgpu_ai_release_full_gpu_access,
>   .reset_gpu = xgpu_ai_request_reset,
>   .wait_reset = NULL,
>   .trans_msg = xgpu_ai_mailbox_trans_msg,
> + .req_init_data  = xgpu_ai_request_init_data,
>  };
> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h 
> b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
> index f9aa4d0bb638..fa7e13e0459e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
> @@ -35,6 +35,7 @@ enum idh_request {
>   IDH_REQ_GPU_FINI_ACCESS,
>   IDH_REL_GPU_FINI_ACCESS,
>   IDH_REQ_GPU_RESET_ACCESS,
> + IDH_REQ_GPU_INIT_DATA,
>  
>   IDH_LOG_VF_ERROR   = 200,
>   IDH_READY_TO_RESET  = 201,
> @@ -48,6 +49,7 @@ enum idh_event {
>   IDH_SUCCESS,
>   IDH_FAIL,
>   IDH_QUERY_ALIVE,
> + IDH_REQ_GPU_INIT_DATA_READY,
>  
>   IDH_TEXT_MESSAGE = 255,
>  };


Re: [RFC v2 8/8] drm/amd/virt: Drop concurrent GPU reset protection for SRIOV

2021-12-24 Thread JingWen Chen
I do agree with shaoyun, if the host find the gpu engine hangs first, and do 
the flr, guest side thread may not know this and still try to access HW(e.g. 
kfd is using a lot of amdgpu_in_reset and reset_sem to identify the reset 
status). And this may lead to very bad result.

On 2021/12/24 下午4:58, Deng, Emily wrote:
> These patches look good to me. JingWen will pull these patches and do some 
> basic TDR test on sriov environment, and give feedback.
>
> Best wishes
> Emily Deng
>
>
>
>> -Original Message-
>> From: Liu, Monk 
>> Sent: Thursday, December 23, 2021 6:14 PM
>> To: Koenig, Christian ; Grodzovsky, Andrey
>> ; dri-de...@lists.freedesktop.org; amd-
>> g...@lists.freedesktop.org; Chen, Horace ; Chen,
>> JingWen ; Deng, Emily 
>> Cc: dan...@ffwll.ch
>> Subject: RE: [RFC v2 8/8] drm/amd/virt: Drop concurrent GPU reset protection
>> for SRIOV
>>
>> [AMD Official Use Only]
>>
>> @Chen, Horace @Chen, JingWen @Deng, Emily
>>
>> Please take a review on Andrey's patch
>>
>> Thanks
>> ---
>> Monk Liu | Cloud GPU & Virtualization Solution | AMD
>> ---
>> we are hiring software manager for CVS core team
>> ---
>>
>> -Original Message-
>> From: Koenig, Christian 
>> Sent: Thursday, December 23, 2021 4:42 PM
>> To: Grodzovsky, Andrey ; dri-
>> de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org
>> Cc: dan...@ffwll.ch; Liu, Monk ; Chen, Horace
>> 
>> Subject: Re: [RFC v2 8/8] drm/amd/virt: Drop concurrent GPU reset protection
>> for SRIOV
>>
>> Am 22.12.21 um 23:14 schrieb Andrey Grodzovsky:
>>> Since now flr work is serialized against  GPU resets there is no need
>>> for this.
>>>
>>> Signed-off-by: Andrey Grodzovsky 
>> Acked-by: Christian König 
>>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 11 ---
>>>   drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 11 ---
>>>   2 files changed, 22 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
>>> b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
>>> index 487cd654b69e..7d59a66e3988 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
>>> @@ -248,15 +248,7 @@ static void xgpu_ai_mailbox_flr_work(struct
>> work_struct *work)
>>> struct amdgpu_device *adev = container_of(virt, struct
>> amdgpu_device, virt);
>>> int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT;
>>>
>>> -   /* block amdgpu_gpu_recover till msg FLR COMPLETE received,
>>> -* otherwise the mailbox msg will be ruined/reseted by
>>> -* the VF FLR.
>>> -*/
>>> -   if (!down_write_trylock(>reset_sem))
>>> -   return;
>>> -
>>> amdgpu_virt_fini_data_exchange(adev);
>>> -   atomic_set(>in_gpu_reset, 1);
>>>
>>> xgpu_ai_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0);
>>>
>>> @@ -269,9 +261,6 @@ static void xgpu_ai_mailbox_flr_work(struct
>> work_struct *work)
>>> } while (timeout > 1);
>>>
>>>   flr_done:
>>> -   atomic_set(>in_gpu_reset, 0);
>>> -   up_write(>reset_sem);
>>> -
>>> /* Trigger recovery for world switch failure if no TDR */
>>> if (amdgpu_device_should_recover_gpu(adev)
>>> && (!amdgpu_device_has_job_running(adev) || diff --git
>>> a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
>>> b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
>>> index e3869067a31d..f82c066c8e8d 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
>>> @@ -277,15 +277,7 @@ static void xgpu_nv_mailbox_flr_work(struct
>> work_struct *work)
>>> struct amdgpu_device *adev = container_of(virt, struct
>> amdgpu_device, virt);
>>> int timeout = NV_MAILBOX_POLL_FLR_TIMEDOUT;
>>>
>>> -   /* block amdgpu_gpu_recover till msg FLR COMPLETE received,
>>> -* otherwise the mailbox msg will be ruined/reseted by
>>> -* the VF FLR.
>>> -*/
>>> -   if (!down_write_trylock(>reset_sem))
>>> -   return;
>>> -
>>> amdgpu_virt_fini_data_exchange(adev);
>>> -   atomic_set(>in_gpu_reset, 1);
>>>
>>> xgpu_nv_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0);
>>>
>>> @@ -298,9 +290,6 @@ static void xgpu_nv_mailbox_flr_work(struct
>> work_struct *work)
>>> } while (timeout > 1);
>>>
>>>   flr_done:
>>> -   atomic_set(>in_gpu_reset, 0);
>>> -   up_write(>reset_sem);
>>> -
>>> /* Trigger recovery for world switch failure if no TDR */
>>> if (amdgpu_device_should_recover_gpu(adev)
>>> && (!amdgpu_device_has_job_running(adev) ||


[PATCH v2 2/2] drm/amd/amdgpu: fix gmc bo pin count leak in SRIOV

2021-12-13 Thread Jingwen Chen
[Why]
gmc bo will be pinned during loading amdgpu and reset in SRIOV while
only unpinned in unload amdgpu

[How]
add amdgpu_in_reset and sriov judgement to skip pin bo

v2: fix wrong judgement

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 4 
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 4 
 2 files changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index d696c4754bea..ae46eb35b3d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -992,10 +992,14 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device 
*adev)
return -EINVAL;
}
 
+   if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev))
+   goto skip_pin_bo;
+
r = amdgpu_gart_table_vram_pin(adev);
if (r)
return r;
 
+skip_pin_bo:
r = adev->gfxhub.funcs->gart_enable(adev);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index db2ec84f7237..d91eb7eb0ebe 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1717,10 +1717,14 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device 
*adev)
return -EINVAL;
}
 
+   if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev))
+   goto skip_pin_bo;
+
r = amdgpu_gart_table_vram_pin(adev);
if (r)
return r;
 
+skip_pin_bo:
r = adev->gfxhub.funcs->gart_enable(adev);
if (r)
return r;
-- 
2.30.2



[PATCH v2 1/2] drm/amd/amdgpu: fix psp tmr bo pin count leak in SRIOV

2021-12-13 Thread Jingwen Chen
[Why]
psp tmr bo will be pinned during loading amdgpu and reset in SRIOV while
only unpinned in unload amdgpu

[How]
add amdgpu_in_reset and sriov judgement to skip pin bo

v2: fix wrong judgement

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 103bcadbc8b8..4de46fcb486c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -2017,12 +2017,16 @@ static int psp_hw_start(struct psp_context *psp)
return ret;
}
 
+   if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) 
+   goto skip_pin_bo;
+
ret = psp_tmr_init(psp);
if (ret) {
DRM_ERROR("PSP tmr init failed!\n");
return ret;
}
 
+skip_pin_bo:
/*
 * For ASICs with DF Cstate management centralized
 * to PMFW, TMR setup should be performed after PMFW
-- 
2.30.2



[PATCH 2/2] drm/amd/amdgpu: fix gmc bo pin count leak in SRIOV

2021-12-13 Thread Jingwen Chen
[Why]
gmc bo will be pinned during loading amdgpu and reset in SRIOV while
only unpinned in unload amdgpu

[How]
add amdgpu_in_reset and sriov judgement to skip pin bo

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 4 
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 4 
 2 files changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index d696c4754bea..927cf1e68520 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -992,10 +992,14 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device 
*adev)
return -EINVAL;
}
 
+   if (amdgpu_sriov_vf(adev) && !amdgpu_in_reset(adev))
+   goto skip_pin_bo;
+
r = amdgpu_gart_table_vram_pin(adev);
if (r)
return r;
 
+skip_pin_bo:
r = adev->gfxhub.funcs->gart_enable(adev);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index db2ec84f7237..d91eb7eb0ebe 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1717,10 +1717,14 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device 
*adev)
return -EINVAL;
}
 
+   if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev))
+   goto skip_pin_bo;
+
r = amdgpu_gart_table_vram_pin(adev);
if (r)
return r;
 
+skip_pin_bo:
r = adev->gfxhub.funcs->gart_enable(adev);
if (r)
return r;
-- 
2.30.2



[PATCH 1/2] drm/amd/amdgpu: fix psp tmr bo pin count leak in SRIOV

2021-12-13 Thread Jingwen Chen
[Why]
psp tmr bo will be pinned during loading amdgpu and reset in SRIOV while
only unpinned in unload amdgpu

[How]
add amdgpu_in_reset and sriov judgement to skip pin bo

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 103bcadbc8b8..cf5baa57ab95 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -2017,12 +2017,16 @@ static int psp_hw_start(struct psp_context *psp)
return ret;
}
 
+   if (amdgpu_sriov_vf(adev) && !amdgpu_in_reset(adev)) 
+   goto skip_pin_bo;
+
ret = psp_tmr_init(psp);
if (ret) {
DRM_ERROR("PSP tmr init failed!\n");
return ret;
}
 
+skip_pin_bo:
/*
 * For ASICs with DF Cstate management centralized
 * to PMFW, TMR setup should be performed after PMFW
-- 
2.30.2



Re: [PATCH 2/2] drm/amd/amdgpu: fix gmc bo pin count leak in SRIOV

2021-12-13 Thread JingWen Chen
patch abandoned

On 2021/12/14 上午11:52, Jingwen Chen wrote:
> [Why]
> gmc bo will be pinned during loading amdgpu and reset in SRIOV while
> only unpinned in unload amdgpu
>
> [How]
> add amdgpu_in_reset and sriov judgement for pin bo in gart_enable
>
> Signed-off-by: Jingwen Chen 
> ---
>  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 8 +---
>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 8 +---
>  2 files changed, 10 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index d696c4754bea..b0f4d1ded977 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -992,9 +992,11 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device 
> *adev)
>   return -EINVAL;
>   }
>  
> - r = amdgpu_gart_table_vram_pin(adev);
> - if (r)
> - return r;
> + if (amdgpu_sriov_vf(adev) && !amdgpu_in_reset(adev)) {
> + r = amdgpu_gart_table_vram_pin(adev);
> + if (r)
> + return r;
> + }
>  
>   r = adev->gfxhub.funcs->gart_enable(adev);
>   if (r)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index db2ec84f7237..c1adb212276d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -1717,9 +1717,11 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device 
> *adev)
>   return -EINVAL;
>   }
>  
> - r = amdgpu_gart_table_vram_pin(adev);
> - if (r)
> - return r;
> + if (amdgpu_sriov_vf(adev) && !amdgpu_in_reset(adev)) {
> + r = amdgpu_gart_table_vram_pin(adev);
> + if (r)
> + return r;
> + }
>  
>   r = adev->gfxhub.funcs->gart_enable(adev);
>   if (r)


[PATCH 2/2] drm/amd/amdgpu: fix gmc bo pin count leak in SRIOV

2021-12-13 Thread Jingwen Chen
[Why]
gmc bo will be pinned during loading amdgpu and reset in SRIOV while
only unpinned in unload amdgpu

[How]
add amdgpu_in_reset and sriov judgement for pin bo in gart_enable

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 8 +---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 8 +---
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index d696c4754bea..b0f4d1ded977 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -992,9 +992,11 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device 
*adev)
return -EINVAL;
}
 
-   r = amdgpu_gart_table_vram_pin(adev);
-   if (r)
-   return r;
+   if (amdgpu_sriov_vf(adev) && !amdgpu_in_reset(adev)) {
+   r = amdgpu_gart_table_vram_pin(adev);
+   if (r)
+   return r;
+   }
 
r = adev->gfxhub.funcs->gart_enable(adev);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index db2ec84f7237..c1adb212276d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1717,9 +1717,11 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device 
*adev)
return -EINVAL;
}
 
-   r = amdgpu_gart_table_vram_pin(adev);
-   if (r)
-   return r;
+   if (amdgpu_sriov_vf(adev) && !amdgpu_in_reset(adev)) {
+   r = amdgpu_gart_table_vram_pin(adev);
+   if (r)
+   return r;
+   }
 
r = adev->gfxhub.funcs->gart_enable(adev);
if (r)
-- 
2.30.2



[PATCH 1/2] drm/amd/amdgpu: fix psp tmr bo pin count leak in SRIOV

2021-12-13 Thread Jingwen Chen
[Why]
psp tmr bo will be pinned during loading amdgpu and reset in SRIOV while
only unpinned in unload amdgpu

[How]
add amdgpu_in_reset and sriov judgement for psp_tmr_init

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 103bcadbc8b8..19c026c47c96 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -2017,13 +2017,14 @@ static int psp_hw_start(struct psp_context *psp)
return ret;
}
 
-   ret = psp_tmr_init(psp);
-   if (ret) {
-   DRM_ERROR("PSP tmr init failed!\n");
-   return ret;
+   if (amdgpu_sriov_vf(adev) && !amdgpu_in_reset(adev)) {
+   ret = psp_tmr_init(psp);
+   if (ret) {
+   DRM_ERROR("PSP tmr init failed!\n");
+   goto failed;
+   }
}
 
-   /*
 * For ASICs with DF Cstate management centralized
 * to PMFW, TMR setup should be performed after PMFW
 * loaded and before other non-psp firmware loaded.
-- 
2.30.2



Re: [PATCH] drm/amd/amdgpu: use advanced TDR mode by default

2021-11-29 Thread JingWen Chen
Hi Bokun,

please remove the change-id in your commit message when submitting this patch.

Acked-by:  Jingwen Chen 

On 2021/11/27 上午8:57, Bokun Zhang wrote:
> From: Bokun Zhang 
>
> In the patch about advanced TDR mode, we force to always set
> amdgpu_gpu_recovery=2 under SRIOV. This is not ideal, since we
> may want to revert back to use the legacy TDR routine.
>
> Therefore, we only set amdgpu_gpu_recovery=2 when it is
> configured as AUTO, which is the default value (gpu_recovery=-1)
>
> Signed-off-by: Bokun Zhang 
> Change-Id: Ifae78854b53f124d2ea53f401919ab5e403ef822
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index 93af2cd2a065..62ec484a35aa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -64,8 +64,8 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
>   adev->cg_flags = 0;
>   adev->pg_flags = 0;
>  
> - /*use advance recovery mode for SRIOV*/
> - if (amdgpu_gpu_recovery)
> + /* use advance recovery mode for SRIOV by default */
> + if (amdgpu_gpu_recovery == -1)
>   amdgpu_gpu_recovery = 2;
>  }
>  


Re: [PATCH] drm/amd/amdgpu: fix potential bad job hw_fence underflow

2021-10-27 Thread JingWen Chen


On 2021/10/28 上午3:43, Andrey Grodzovsky wrote:
>
> On 2021-10-25 10:57 p.m., JingWen Chen wrote:
>> On 2021/10/25 下午11:18, Andrey Grodzovsky wrote:
>>> On 2021-10-24 10:56 p.m., JingWen Chen wrote:
>>>> On 2021/10/23 上午4:41, Andrey Grodzovsky wrote:
>>>>> What do you mean by underflow in this case ? You mean use after free 
>>>>> because of extra dma_fence_put() ?
>>>> yes
>>>
>>> Then maybe update the description  because 'underflow' is very confusing
>>>
>> will do
>>>>> On 2021-10-22 4:14 a.m., JingWen Chen wrote:
>>>>>> ping
>>>>>>
>>>>>> On 2021/10/22 AM11:33, Jingwen Chen wrote:
>>>>>>> [Why]
>>>>>>> In advance tdr mode, the real bad job will be resubmitted twice, while
>>>>>>> in drm_sched_resubmit_jobs_ext, there's a dma_fence_put, so the bad job
>>>>>>> is put one more time than other jobs.
>>>>>>>
>>>>>>> [How]
>>>>>>> Adding dma_fence_get before resbumit job in
>>>>>>> amdgpu_device_recheck_guilty_jobs and put the fence for normal jobs
>>>>>>>
>>>>>>> Signed-off-by: Jingwen Chen 
>>>>>>> ---
>>>>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 
>>>>>>>     1 file changed, 4 insertions(+)
>>>>>>>
>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>>> index 41ce86244144..975f069f6fe8 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>>> @@ -4841,6 +4841,9 @@ static void amdgpu_device_recheck_guilty_jobs(
>>>>>>>       /* clear job's guilty and depend the folowing step to 
>>>>>>> decide the real one */
>>>>>>>     drm_sched_reset_karma(s_job);
>>>>>>> +    /* for the real bad job, it will be resubmitted twice, adding 
>>>>>>> a dma_fence_get
>>>>>>> + * to make sure fence is balanced */
>>>>> But that put in drm_sched_resubmit_jobs_ext is for the previous parent 
>>>>> fence.
>>>>> fence = sched->ops->run_job(s_job); returns a new HW fence and the put 
>>>>> drops the refcount on the old one.
>>>>>
>>>>> Andrey
>>>>>
>>>>>
>>>> Hi Andrey,
>>>>
>>>> If I remember correctly, after we embedded the hw_fence into amdgpu_job, 
>>>> there will be not fence replacement in amdgpu_job_run.
>>>
>>> Right, I forgot that... What about removing line 
>>> https://elixir.bootlin.com/linux/v5.15-rc6/source/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c#L265
>>>  ?
>>> What if you make dma_get_fence unconditional instead ?
>>>
>>> Andrey
>>>
>>>
>> Hi Andrey,
>>
>> I have tried this and this will cause normal jobs cannot be free(lacks a 
>> dma_fence_put).
>
>
> I can't see it  - can you point me where in that case you get unbalanced 
> refcount ? As far as I see for a a normal job
> being ran in amdgpu_device_recheck_guilty_jobs the refcount on hw_fence is  -
>
> drm_sched_resubmit_jobs_ext->dma_fence_put -> refcount decrease by 1
> drm_sched_resubmit_jobs_ext->amdgpu_job_run->dma_fence_get increase by 1
>
> In total refcount didn't change until now
>
> Next,  dma_fence_wait_timeout completed successfully because the job is 
> normal and then you delete that job from pending list and call the
> free_job cb which drops remaining refcounts on the hw_fence.
>
> I am probably missing some  dma_fence_get since you checked it on a device 
> but I wonder where is my mistake ?
>
> Andrey
>
>
Hi Andrey,

The thing is the put/get is balanced right now for normal jobs in TDR. Changing 
this dma_fence_get to unconditional simply adds 1 dma_fence_get but there's no 
corresponding dma_fence_put for normal jobs.

And if this can be helpful, I try to find all dma_fence_get/put for a normal 
job in advance TDR based on the latest drm-next.

amdgpu_fence_emit -> dma_fence_init    ref_count = 1​
amdgpu_fence_emit -> add into rcu    ref_count = 2​
amdgpu_job_run->get after ib_schedule    ref_count = 3​
drm_sched_main-> add fence callback get    ref_count = 4​
drm_sched_main-> add fenc

Re: [PATCH] drm/amd/amdgpu: fix potential bad job hw_fence underflow

2021-10-25 Thread JingWen Chen


On 2021/10/25 下午11:18, Andrey Grodzovsky wrote:
>
> On 2021-10-24 10:56 p.m., JingWen Chen wrote:
>> On 2021/10/23 上午4:41, Andrey Grodzovsky wrote:
>>> What do you mean by underflow in this case ? You mean use after free 
>>> because of extra dma_fence_put() ?
>> yes
>
>
> Then maybe update the description  because 'underflow' is very confusing
>
will do
>
>>> On 2021-10-22 4:14 a.m., JingWen Chen wrote:
>>>> ping
>>>>
>>>> On 2021/10/22 AM11:33, Jingwen Chen wrote:
>>>>> [Why]
>>>>> In advance tdr mode, the real bad job will be resubmitted twice, while
>>>>> in drm_sched_resubmit_jobs_ext, there's a dma_fence_put, so the bad job
>>>>> is put one more time than other jobs.
>>>>>
>>>>> [How]
>>>>> Adding dma_fence_get before resbumit job in
>>>>> amdgpu_device_recheck_guilty_jobs and put the fence for normal jobs
>>>>>
>>>>> Signed-off-by: Jingwen Chen 
>>>>> ---
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 
>>>>>    1 file changed, 4 insertions(+)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> index 41ce86244144..975f069f6fe8 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> @@ -4841,6 +4841,9 @@ static void amdgpu_device_recheck_guilty_jobs(
>>>>>      /* clear job's guilty and depend the folowing step to decide 
>>>>> the real one */
>>>>>    drm_sched_reset_karma(s_job);
>>>>> +    /* for the real bad job, it will be resubmitted twice, adding a 
>>>>> dma_fence_get
>>>>> + * to make sure fence is balanced */
>>>
>>> But that put in drm_sched_resubmit_jobs_ext is for the previous parent 
>>> fence.
>>> fence = sched->ops->run_job(s_job); returns a new HW fence and the put 
>>> drops the refcount on the old one.
>>>
>>> Andrey
>>>
>>>
>> Hi Andrey,
>>
>> If I remember correctly, after we embedded the hw_fence into amdgpu_job, 
>> there will be not fence replacement in amdgpu_job_run.
>
>
> Right, I forgot that... What about removing line 
> https://elixir.bootlin.com/linux/v5.15-rc6/source/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c#L265
>  ?
> What if you make dma_get_fence unconditional instead ?
>
> Andrey
>
>
Hi Andrey,

I have tried this and this will cause normal jobs cannot be free(lacks a 
dma_fence_put). I have figured out all the get/put

for sched_jobs and only the bad job lacks a dma_fence_get, other jobs are just 
fine.

>>
>>>>> +    dma_fence_get(s_job->s_fence->parent);
>>>>>    drm_sched_resubmit_jobs_ext(>sched, 1);
>>>>>      ret = dma_fence_wait_timeout(s_job->s_fence->parent, false, 
>>>>> ring->sched.timeout);
>>>>> @@ -4876,6 +4879,7 @@ static void amdgpu_device_recheck_guilty_jobs(
>>>>>      /* got the hw fence, signal finished fence */
>>>>>    atomic_dec(ring->sched.score);
>>>>> +    dma_fence_put(s_job->s_fence->parent);
>>>>>    dma_fence_get(_job->s_fence->finished);
>>>>>    dma_fence_signal(_job->s_fence->finished);
>>>>>    dma_fence_put(_job->s_fence->finished);


Re: [PATCH] drm/amd/amdgpu: fix potential bad job hw_fence underflow

2021-10-24 Thread JingWen Chen


On 2021/10/23 上午4:41, Andrey Grodzovsky wrote:
>
> What do you mean by underflow in this case ? You mean use after free because 
> of extra dma_fence_put() ?
yes
>
> On 2021-10-22 4:14 a.m., JingWen Chen wrote:
>> ping
>>
>> On 2021/10/22 AM11:33, Jingwen Chen wrote:
>>> [Why]
>>> In advance tdr mode, the real bad job will be resubmitted twice, while
>>> in drm_sched_resubmit_jobs_ext, there's a dma_fence_put, so the bad job
>>> is put one more time than other jobs.
>>>
>>> [How]
>>> Adding dma_fence_get before resbumit job in
>>> amdgpu_device_recheck_guilty_jobs and put the fence for normal jobs
>>>
>>> Signed-off-by: Jingwen Chen 
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 
>>>   1 file changed, 4 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index 41ce86244144..975f069f6fe8 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -4841,6 +4841,9 @@ static void amdgpu_device_recheck_guilty_jobs(
>>>     /* clear job's guilty and depend the folowing step to decide 
>>> the real one */
>>>   drm_sched_reset_karma(s_job);
>>> +    /* for the real bad job, it will be resubmitted twice, adding a 
>>> dma_fence_get
>>> + * to make sure fence is balanced */
>
>
> But that put in drm_sched_resubmit_jobs_ext is for the previous parent fence.
> fence = sched->ops->run_job(s_job); returns a new HW fence and the put drops 
> the refcount on the old one.
>
> Andrey
>
>
Hi Andrey,

If I remember correctly, after we embedded the hw_fence into amdgpu_job, there 
will be not fence replacement in amdgpu_job_run.

>>> +    dma_fence_get(s_job->s_fence->parent);
>>>   drm_sched_resubmit_jobs_ext(>sched, 1);
>>>     ret = dma_fence_wait_timeout(s_job->s_fence->parent, false, 
>>> ring->sched.timeout);
>>> @@ -4876,6 +4879,7 @@ static void amdgpu_device_recheck_guilty_jobs(
>>>     /* got the hw fence, signal finished fence */
>>>   atomic_dec(ring->sched.score);
>>> +    dma_fence_put(s_job->s_fence->parent);
>>>   dma_fence_get(_job->s_fence->finished);
>>>   dma_fence_signal(_job->s_fence->finished);
>>>   dma_fence_put(_job->s_fence->finished);


Re: [PATCH] drm/amd/amdgpu: fix potential bad job hw_fence underflow

2021-10-22 Thread JingWen Chen
ping

On 2021/10/22 AM11:33, Jingwen Chen wrote:
> [Why]
> In advance tdr mode, the real bad job will be resubmitted twice, while
> in drm_sched_resubmit_jobs_ext, there's a dma_fence_put, so the bad job
> is put one more time than other jobs.
>
> [How]
> Adding dma_fence_get before resbumit job in
> amdgpu_device_recheck_guilty_jobs and put the fence for normal jobs
>
> Signed-off-by: Jingwen Chen 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 
>  1 file changed, 4 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 41ce86244144..975f069f6fe8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -4841,6 +4841,9 @@ static void amdgpu_device_recheck_guilty_jobs(
>  
>   /* clear job's guilty and depend the folowing step to decide 
> the real one */
>   drm_sched_reset_karma(s_job);
> + /* for the real bad job, it will be resubmitted twice, adding a 
> dma_fence_get
> +  * to make sure fence is balanced */
> + dma_fence_get(s_job->s_fence->parent);
>   drm_sched_resubmit_jobs_ext(>sched, 1);
>  
>   ret = dma_fence_wait_timeout(s_job->s_fence->parent, false, 
> ring->sched.timeout);
> @@ -4876,6 +4879,7 @@ static void amdgpu_device_recheck_guilty_jobs(
>  
>   /* got the hw fence, signal finished fence */
>   atomic_dec(ring->sched.score);
> + dma_fence_put(s_job->s_fence->parent);
>   dma_fence_get(_job->s_fence->finished);
>   dma_fence_signal(_job->s_fence->finished);
>   dma_fence_put(_job->s_fence->finished);


[PATCH] drm/amd/amdgpu: fix potential bad job hw_fence underflow

2021-10-21 Thread Jingwen Chen
[Why]
In advance tdr mode, the real bad job will be resubmitted twice, while
in drm_sched_resubmit_jobs_ext, there's a dma_fence_put, so the bad job
is put one more time than other jobs.

[How]
Adding dma_fence_get before resbumit job in
amdgpu_device_recheck_guilty_jobs and put the fence for normal jobs

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 41ce86244144..975f069f6fe8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4841,6 +4841,9 @@ static void amdgpu_device_recheck_guilty_jobs(
 
/* clear job's guilty and depend the folowing step to decide 
the real one */
drm_sched_reset_karma(s_job);
+   /* for the real bad job, it will be resubmitted twice, adding a 
dma_fence_get
+* to make sure fence is balanced */
+   dma_fence_get(s_job->s_fence->parent);
drm_sched_resubmit_jobs_ext(>sched, 1);
 
ret = dma_fence_wait_timeout(s_job->s_fence->parent, false, 
ring->sched.timeout);
@@ -4876,6 +4879,7 @@ static void amdgpu_device_recheck_guilty_jobs(
 
/* got the hw fence, signal finished fence */
atomic_dec(ring->sched.score);
+   dma_fence_put(s_job->s_fence->parent);
dma_fence_get(_job->s_fence->finished);
dma_fence_signal(_job->s_fence->finished);
dma_fence_put(_job->s_fence->finished);
-- 
2.30.2



[PATCH v2] drm/amd/amdgpu: add dummy_page_addr to sriov msg

2021-10-21 Thread Jingwen Chen
Add dummy_page_addr to sriov msg for host driver to set
GCVM_L2_PROTECTION_DEFAULT_ADDR* registers correctly.

v2:
should update vf2pf msg instead
Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c| 1 +
 drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 88c4177b708a..99c149397aae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -584,6 +584,7 @@ static int amdgpu_virt_write_vf2pf_data(struct 
amdgpu_device *adev)
vf2pf_info->encode_usage = 0;
vf2pf_info->decode_usage = 0;
 
+   vf2pf_info->dummy_page_addr = (uint64_t)adev->dummy_page_addr;
vf2pf_info->checksum =
amd_sriov_msg_checksum(
vf2pf_info, vf2pf_info->header.size, 0, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h 
b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
index 995899191288..7326b6c1b71c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -261,9 +261,10 @@ struct amd_sriov_msg_vf2pf_info {
uint8_t  id;
uint32_t version;
} ucode_info[AMD_SRIOV_MSG_RESERVE_UCODE];
+   uint64_t dummy_page_addr;
 
/* reserved */
-   uint32_t reserved[256-68];
+   uint32_t reserved[256-70];
 };
 
 /* mailbox message send from guest to host  */
-- 
2.30.2



[PATCH] drm/amd/amdgpu: add dummy_page_addr to sriov msg

2021-10-21 Thread Jingwen Chen
Add dummy_page_addr to sriov msg for host driver to set
GCVM_L2_PROTECTION_DEFAULT_ADDR* registers correctly.

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c| 1 +
 drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 88c4177b708a..99c149397aae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -584,6 +584,7 @@ static int amdgpu_virt_write_vf2pf_data(struct 
amdgpu_device *adev)
vf2pf_info->encode_usage = 0;
vf2pf_info->decode_usage = 0;
 
+   vf2pf_info->dummy_page_addr = (uint64_t)adev->dummy_page_addr;
vf2pf_info->checksum =
amd_sriov_msg_checksum(
vf2pf_info, vf2pf_info->header.size, 0, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h 
b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
index 995899191288..5e3d8ecfa968 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -206,8 +206,10 @@ struct amd_sriov_msg_pf2vf_info {
struct amd_sriov_msg_uuid_info uuid_info;
/* pcie atomic Ops info */
uint32_t pcie_atomic_ops_enabled_flags;
+   /* dummy page addr */
+   uint64_t dummy_page_addr;
/* reserved */
-   uint32_t reserved[256 - 48];
+   uint32_t reserved[256 - 50];
 };
 
 struct amd_sriov_msg_vf2pf_info_header {
-- 
2.30.2



Re: [diagnostic TDR mode patches] unify our solution opinions/suggestions in one thread

2021-09-06 Thread Jingwen Chen
Hi Christian/Andrey/Daniel,

I read Boris's patch about ordered workqueue and I think maybe we can
leverage this change.
https://lore.kernel.org/dri-devel/20210625133327.2598825-2-boris.brezil...@collabora.com/

As the TDR race condition we are talking about is caused by a bailing
job being deleted from pending list. While if we use the ordered
workqueue for timedout in the driver, there will be no bailing job.

Do you have any suggestions?

Best Regards,
JingWen Chen

On Mon Sep 06, 2021 at 02:36:52PM +0800, Liu, Monk wrote:
> [AMD Official Use Only]
> 
> > I'm fearing that just repeating what Alex said, but to make it clear 
> > once more: That is *not* necessary!
> >
> > The shared repository is owned by upstream maintainers and they are 
> > usually free to do restructuring work without getting acknowledge from 
> > every single driver maintainer.
> 
> Hi Daniel
> 
> Anyway thanks for officially confirm to me of working model & policy in 
> community, I don't want to put my opinion here due to that's not my call to 
> change no matter how.
> I only want to let this diagnostic TDR scheme going to a good end for AMD or 
> even for all DRM vendor.
> 
> How about this way, we still have a final patch not landed in DRM scheduler 
> and I would like jingwen to present it to you and AlexD/Christian/Andrey,  I 
> believe you will have concerns or objections regarding this patch, but that's 
> fine, let us figure it out together, how to make it acceptable by you and 
> other vendors that working with DRM scheduler.
> 
> P.S.:  I had to repeat myself again, we are not popping up new idea suddenly, 
> it is disconnection issue, we didn't have changes (or plan to have changes) 
> in DRM scheduler before, but eventually we found we must make job_timeout and 
> sched_main to work in a serialized otherwise it won't work based on current 
> scheduler's code structure.
> 
> Thanks 
> 
> --
> Monk Liu | Cloud-GPU Core team
> --
> 
> -Original Message-
> From: Daniel Vetter  
> Sent: Friday, September 3, 2021 12:11 AM
> To: Koenig, Christian 
> Cc: Liu, Monk ; Dave Airlie ; Alex 
> Deucher ; Grodzovsky, Andrey 
> ; Chen, JingWen ; DRI 
> Development ; amd-gfx@lists.freedesktop.org
> Subject: Re: [diagnostic TDR mode patches] unify our solution 
> opinions/suggestions in one thread
> 
> On Thu, Sep 2, 2021 at 1:00 PM Christian König  
> wrote:
> >
> > Hi Monk,
> >
> > Am 02.09.21 um 07:52 schrieb Liu, Monk:
> > > [AMD Official Use Only]
> > >
> > > I'm not sure I can add much to help this along, I'm sure Alex has 
> > > some internal training, Once your driver is upstream, it belongs to 
> > > upstream, you can maintain it, but you no longer control it 100%, it's a 
> > > tradeoff, it's not one companies always understand.
> > > Usually people are fine developing away internally, but once interaction 
> > > with other parts of the kernel/subsystem is required they have the 
> > > realisation that they needed to work upstream 6 months earlier.
> > > The best time to interact with upstream was 6 months ago, the second best 
> > > time is now.
> > > <<<
> > >
> > > Daniel/AlexD
> > >
> > > I didn't mean your changes on AMD driver need my personal approval 
> > > or review ... and  I'm totally already get used that our driver is not 
> > > 100% under control by AMDers, but supposedly any one from community 
> > > (including you) who tend to change AMD's driver need at least to get 
> > > approvement from someone in AMD, e.g.: AlexD or Christian, doesn't that 
> > > reasonable?
> >
> > I'm fearing that just repeating what Alex said, but to make it clear 
> > once more: That is *not* necessary!
> >
> > The shared repository is owned by upstream maintainers and they are 
> > usually free to do restructuring work without getting acknowledge from 
> > every single driver maintainer.
> >
> > Anybody can of course technically object to upstream design decisions, 
> > but that means that you need to pay attention to the mailing lists in 
> > the first place.
> >
> > > just like we need your approve if we try to modify DRM-sched, or need 
> > > panfrost's approval if we need to change panfrost code ...
> > >
> > > by only CC AMD's engineers looks not quite properly, how do you know if 
> > > your changes (on AMD code part) are conflicting with AMD's on-going 
> > > internal features/refactoring or not ?
> >
> > Well because AMD 

Re: [diagnostic TDR mode patches] unify our solution opinions/suggestions in one thread

2021-08-31 Thread Jingwen Chen
On Wed Sep 01, 2021 at 12:28:59AM -0400, Andrey Grodzovsky wrote:
> 
> On 2021-09-01 12:25 a.m., Jingwen Chen wrote:
> > On Wed Sep 01, 2021 at 12:04:47AM -0400, Andrey Grodzovsky wrote:
> > > I will answer everything here -
> > > 
> > > On 2021-08-31 9:58 p.m., Liu, Monk wrote:
> > > 
> > > 
> > >  [AMD Official Use Only]
> > > 
> > > 
> > >  In the previous discussion, you guys stated that we should drop the
> > >  “kthread_should_park” in cleanup_job.
> > > 
> > > 
> > >  @@ -676,15 +676,6 @@ drm_sched_get_cleanup_job(struct 
> > > drm_gpu_scheduler
> > >  *sched)
> > > 
> > >  {
> > > 
> > >  struct drm_sched_job *job, *next;
> > > 
> > > 
> > >  -   /*
> > > 
> > >  -* Don't destroy jobs while the timeout worker is running  OR
> > >  thread
> > > 
> > >  -* is being parked and hence assumed to not touch 
> > > pending_list
> > > 
> > >  -*/
> > > 
> > >  -   if ((sched->timeout != MAX_SCHEDULE_TIMEOUT &&
> > > 
> > >  -   !cancel_delayed_work(>work_tdr)) ||
> > > 
> > >  -   kthread_should_park())
> > > 
> > >  -   return NULL;
> > > 
> > > 
> > >  But I suddenly have a question here: if return the timedout job no 
> > > matter
> > >  kthread_should_park() or not, then we are backing to the original 
> > > problem
> > >  again: that the timedout_job is suddenly signaling and cleanup_job 
> > > still
> > >  returns it to sched_main and job is freed while it is still handling 
> > > by
> > >  vendor’s timeout callback
> > > 
> > > 
> > >  If we return NULL when kthread_should_park() in cleanup_job, we can 
> > > prevent
> > >  above scenario from happening: once a job is processed by 
> > > job_timedout we
> > >  can stop its scheduler, and after that even this job suddenly 
> > > signaled the
> > >  cleanup_job won’t return it so sched_main won’t free it in parallel …
> > > 
> > > 
> > >  What do you think ?
> > > 
> > > 
> > > Is your analysis above takes into account that you also submit
> > > '[PATCH 2/2] drm/sched: serialize job_timeout and scheduler' then I don't 
> > > see a
> > > problem -
> > Hi Andrey,
> > Monk has talked to me and we agreed that as there're multiple opinions 
> > about the
> > '[PATCH 2/2] drm/sched: serialize job_timeout and scheduler' and patch
> > 1 is an independent patch to fix some error. So we should not take the 
> > patch 2 into
> > analysis.
> > 
> > > I think that as long as you put kthread_park(sched->thread) BEFORE
> > > fetching next bad job from pending list (under spinlock) there is no
> > > such issue as in the case you describe because this potential bad job
> > > that became signaled will be removed from pending list before you
> > > even fetch the next job and by the time you fetch it the scheduler
> > > thread is already stopped anyway
> > > 
> > > If you don't submit and we keep the removal hack for now then also no 
> > > problem
> > > because
> > > we temporary remove the job we fetch for TDR from pending list under 
> > > spinlock
> > > exactly to avoid this race
> > > 
> > So can you help review [PATCH 1/2] drm/sched: fix the bug of time out 
> > calculation(v3)?
> > patch v3 keeps this kthread_should_park check.
> 
> 
> But since in both cases looks like there is no danger of use after free
> then I see no reason to keep kthread_should_park check.
> 
> Andrey
OK, I get it. So patch v4 has removed this check, can you help review
[PATCH 1/2] drm/sched: fix the bug of time out calculation(v4)?
> 
> 
> > 
> > Best Regards,
> > JingWen
> > > 
> > >  Thanks
> > > 
> > > 
> > >  --
> > > 
> > >  Monk Liu | Cloud-GPU Core team
> > > 
> > >  --
> > > 
> > > 
> > >  From: Liu, Monk
> > >  Sent: Wednesday, September 1, 2021 9:23 AM
> > >  To: Koenig, Christian ; Grodzovsky, Andrey
> > >  ; Daniel 

Re: [diagnostic TDR mode patches] unify our solution opinions/suggestions in one thread

2021-08-31 Thread Jingwen Chen
On Wed Sep 01, 2021 at 12:04:47AM -0400, Andrey Grodzovsky wrote:
> I will answer everything here -
> 
> On 2021-08-31 9:58 p.m., Liu, Monk wrote:
> 
> 
> [AMD Official Use Only]
> 
>  
> 
> In the previous discussion, you guys stated that we should drop the
> “kthread_should_park” in cleanup_job.
> 
>  
> 
> @@ -676,15 +676,6 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler
> *sched)
> 
> {
> 
> struct drm_sched_job *job, *next;
> 
>  
> 
> -   /*
> 
> -* Don't destroy jobs while the timeout worker is running  OR
> thread
> 
> -* is being parked and hence assumed to not touch pending_list
> 
> -*/
> 
> -   if ((sched->timeout != MAX_SCHEDULE_TIMEOUT &&
> 
> -   !cancel_delayed_work(>work_tdr)) ||
> 
> -   kthread_should_park())
> 
> -   return NULL;
> 
>  
> 
> But I suddenly have a question here: if return the timedout job no matter
> kthread_should_park() or not, then we are backing to the original problem
> again: that the timedout_job is suddenly signaling and cleanup_job still
> returns it to sched_main and job is freed while it is still handling by
> vendor’s timeout callback
> 
>  
> 
> If we return NULL when kthread_should_park() in cleanup_job, we can 
> prevent
> above scenario from happening: once a job is processed by job_timedout we
> can stop its scheduler, and after that even this job suddenly signaled the
> cleanup_job won’t return it so sched_main won’t free it in parallel …
> 
>  
> 
> What do you think ?
> 
> 
> Is your analysis above takes into account that you also submit
> '[PATCH 2/2] drm/sched: serialize job_timeout and scheduler' then I don't see 
> a
> problem -
Hi Andrey,
Monk has talked to me and we agreed that as there're multiple opinions about the
'[PATCH 2/2] drm/sched: serialize job_timeout and scheduler' and patch
1 is an independent patch to fix some error. So we should not take the patch 2 
into
analysis.

> I think that as long as you put kthread_park(sched->thread) BEFORE
> fetching next bad job from pending list (under spinlock) there is no
> such issue as in the case you describe because this potential bad job
> that became signaled will be removed from pending list before you
> even fetch the next job and by the time you fetch it the scheduler
> thread is already stopped anyway
> 
> If you don't submit and we keep the removal hack for now then also no problem
> because
> we temporary remove the job we fetch for TDR from pending list under spinlock
> exactly to avoid this race
> 
So can you help review [PATCH 1/2] drm/sched: fix the bug of time out 
calculation(v3)?
patch v3 keeps this kthread_should_park check.

Best Regards,
JingWen
> 
> 
> Thanks
> 
>  
> 
> --
> 
> Monk Liu | Cloud-GPU Core team
> 
> --
> 
>  
> 
> From: Liu, Monk
> Sent: Wednesday, September 1, 2021 9:23 AM
> To: Koenig, Christian ; Grodzovsky, Andrey
> ; Daniel Vetter ; Chen, 
> JingWen
> 
> Cc: DRI Development ;
> amd-gfx@lists.freedesktop.org
> Subject: [diagnostic TDR mode patches] unify our solution opinions/
> suggestions in one thread
> 
>  
> 
> [AMD Official Use Only]
> 
>  
> 
> Hi Daniel/Christian/Andrey
> 
>  
> 
> It looks the voice from you three are spread over those email floods to 
> me,
> the feature we are working on (diagnostic TDR scheme) is pending there for
> more than 6 month (we started it from feb 2021).
> 
>  
> 
> Honestly speaking the email ways that we are using now is not friendly and
> quite painful to me ….
> 
> Can we try to put all our opinions, suggestions, or even objects here
> together, let’s go through them one by one, it’s too hard for us to reply
> each email on different questions .
> 
>  
> 
> For [PATCH 1/2] drm/sched: fix the bug of time out calculation(v4)
> 
>  
> 
> This is a fixing patch on the timeout timer in scheduler, can we complete
> this one first ? it should already resolved all the questions and
> suggestions.
> 
> 
> I have no objections for this one besides getting rid of the
> kthread_should_park()) return null part,
> if my answer above is not wrong then it seems superfluous to me
> 
> 
>  
> 
> For [PATCH 2/2] drm/sched: serialize job_timeout and scheduler
> 
>  
> 
> I think I already explained the questions raised by Daniel in other thread
> , regarding why I use __kthread_should_park()
> 
> 
> Is this race free ? Can't the other thread execute kthread_park after the 
> check
> ?
> 
> 
> For other aspects, can we put all our opinion synthesized here ?
> 
> 
> So to summarize from previous threads I think that the best solution
> to the problem being solved in this patch is if we do HW fence 

Re: [PATCH] drm/amd/amdgpu: Add ready_to_reset resp for vega10

2021-08-27 Thread Jingwen Chen
Reviewed-by: Jingwen Chen 
On Fri Aug 27, 2021 at 02:56:51PM +0800, YuBiao Wang wrote:
> Send response to host after received the flr notification from host.
> Port NV change to vega10.
> 
> Signed-off-by: YuBiao Wang 
> ---
>  drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 2 ++
>  drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h | 1 +
>  2 files changed, 3 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c 
> b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> index ff2307d7ee0f..23b066bcffb2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> @@ -258,6 +258,8 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct 
> *work)
>   amdgpu_virt_fini_data_exchange(adev);
>   atomic_set(>in_gpu_reset, 1);
>  
> + xgpu_ai_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0);
> +
>   do {
>   if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
>   goto flr_done;
> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h 
> b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
> index 50572635d0f8..bd3b23171579 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
> @@ -37,6 +37,7 @@ enum idh_request {
>   IDH_REQ_GPU_RESET_ACCESS,
>  
>   IDH_LOG_VF_ERROR   = 200,
> + IDH_READY_TO_RESET  = 201,
>  };
>  
>  enum idh_event {
> -- 
> 2.25.1
> 


Re: [PATCH v2] Revert "drm/scheduler: Avoid accessing freed bad job."

2021-08-20 Thread Jingwen Chen
he head of the queue to reflect it was the 
> > earliest
> > -* job extracted.
> > -*/
> > -   list_add(>list, >pending_list);
> > -
> > -   /*
> >   * Iterate the job list from later to  earlier one and either 
> > deactive
> >   * their HW callbacks or remove them from pending list if they 
> > already
> >   * signaled.
> > 
> > 
> > Thanks
> > 
> > --
> > Monk Liu | Cloud-GPU Core team
> > --
> > 
> > -Original Message-
> > From: Daniel Vetter 
> > Sent: Thursday, August 19, 2021 5:31 PM
> > To: Grodzovsky, Andrey 
> > Cc: Daniel Vetter ; Alex Deucher ; 
> > Chen, JingWen ; Maling list - DRI developers 
> > ; amd-gfx list 
> > ; Liu, Monk ; Koenig, 
> > Christian 
> > Subject: Re: [PATCH v2] Revert "drm/scheduler: Avoid accessing freed bad 
> > job."
> > 
> > On Wed, Aug 18, 2021 at 10:51:00AM -0400, Andrey Grodzovsky wrote:
> > > On 2021-08-18 10:42 a.m., Daniel Vetter wrote:
> > > > On Wed, Aug 18, 2021 at 10:36:32AM -0400, Andrey Grodzovsky wrote:
> > > > > On 2021-08-18 10:32 a.m., Daniel Vetter wrote:
> > > > > > On Wed, Aug 18, 2021 at 10:26:25AM -0400, Andrey Grodzovsky wrote:
> > > > > > > On 2021-08-18 10:02 a.m., Alex Deucher wrote:
> > > > > > > 
> > > > > > > > + dri-devel
> > > > > > > > 
> > > > > > > > Since scheduler is a shared component, please add dri-devel
> > > > > > > > on all scheduler patches.
> > > > > > > > 
> > > > > > > > On Wed, Aug 18, 2021 at 7:21 AM Jingwen Chen 
> > > > > > > >  wrote:
> > > > > > > > > [Why]
> > > > > > > > > for bailing job, this commit will delete it from pending
> > > > > > > > > list thus the bailing job will never have a chance to be
> > > > > > > > > resubmitted even in advance tdr mode.
> > > > > > > > > 
> > > > > > > > > [How]
> > > > > > > > > after embeded hw_fence into amdgpu_job is done, the race
> > > > > > > > > condition that this commit tries to work around is
> > > > > > > > > completely solved.So revert this commit.
> > > > > > > > > This reverts commit 135517d3565b48f4def3b1b82008bc17eb5d1c90.
> > > > > > > > > v2:
> > > > > > > > > add dma_fence_get/put() around timedout_job to avoid
> > > > > > > > > concurrent delete during processing timedout_job
> > > > > > > > > 
> > > > > > > > > Signed-off-by: Jingwen Chen 
> > > > > > > > > ---
> > > > > > > > >  drivers/gpu/drm/scheduler/sched_main.c | 23 
> > > > > > > > > +--
> > > > > > > > >  1 file changed, 5 insertions(+), 18 deletions(-)
> > > > > > > > > 
> > > > > > > > > diff --git a/drivers/gpu/drm/scheduler/sched_main.c
> > > > > > > > > b/drivers/gpu/drm/scheduler/sched_main.c
> > > > > > > > > index a2a953693b45..f9b9b3aefc4a 100644
> > > > > > > > > --- a/drivers/gpu/drm/scheduler/sched_main.c
> > > > > > > > > +++ b/drivers/gpu/drm/scheduler/sched_main.c
> > > > > > > > > @@ -314,6 +314,7 @@ static void drm_sched_job_timedout(struct 
> > > > > > > > > work_struct *work)
> > > > > > > > >  {
> > > > > > > > > struct drm_gpu_scheduler *sched;
> > > > > > > > > struct drm_sched_job *job;
> > > > > > > > > +   struct dma_fence *fence;
> > > > > > > > > enum drm_gpu_sched_stat status =
> > > > > > > > > DRM_GPU_SCHED_STAT_NOMINAL;
> > > > > > > > > 
> > > > > > > > > sched = container_of(work, struct
> > > > > > > > > drm_gpu_scheduler, work_tdr.work); @@ -325,11 +326,10 @@
> > > > > > > > > static

[PATCH v3] Revert "drm/scheduler: Avoid accessing freed bad job."

2021-08-20 Thread Jingwen Chen
[Why]
for bailing job, this commit will delete it from pending list thus the
bailing job will never have a chance to be resubmitted even in advance
tdr mode.

[How]
after embeded hw_fence into amdgpu_job is done, the race condition that
this commit tries to work around is completely solved.So revert this
commit.
This reverts commit 135517d3565b48f4def3b1b82008bc17eb5d1c90.
v2:
add dma_fence_get/put() around timedout_job to avoid  concurrent delete
during processing timedout_job
v3:
park sched->thread instead during timedout_job.

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/scheduler/sched_main.c | 22 ++
 1 file changed, 2 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index a2a953693b45..c187fd3a6bb6 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -319,17 +319,12 @@ static void drm_sched_job_timedout(struct work_struct 
*work)
sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
 
/* Protects against concurrent deletion in drm_sched_get_cleanup_job */
+   kthread_park(sched->thread);
spin_lock(>job_list_lock);
job = list_first_entry_or_null(>pending_list,
   struct drm_sched_job, list);
 
if (job) {
-   /*
-* Remove the bad job so it cannot be freed by concurrent
-* drm_sched_cleanup_jobs. It will be reinserted back after 
sched->thread
-* is parked at which point it's safe.
-*/
-   list_del_init(>list);
spin_unlock(>job_list_lock);
 
status = job->sched->ops->timedout_job(job);
@@ -345,6 +340,7 @@ static void drm_sched_job_timedout(struct work_struct *work)
} else {
spin_unlock(>job_list_lock);
}
+   kthread_unpark(sched->thread);
 
if (status != DRM_GPU_SCHED_STAT_ENODEV) {
spin_lock(>job_list_lock);
@@ -392,20 +388,6 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, 
struct drm_sched_job *bad)
 
kthread_park(sched->thread);
 
-   /*
-* Reinsert back the bad job here - now it's safe as
-* drm_sched_get_cleanup_job cannot race against us and release the
-* bad job at this point - we parked (waited for) any in progress
-* (earlier) cleanups and drm_sched_get_cleanup_job will not be called
-* now until the scheduler thread is unparked.
-*/
-   if (bad && bad->sched == sched)
-   /*
-* Add at the head of the queue to reflect it was the earliest
-* job extracted.
-*/
-   list_add(>list, >pending_list);
-
/*
 * Iterate the job list from later to  earlier one and either deactive
 * their HW callbacks or remove them from pending list if they already
-- 
2.25.1



Re: [PATCH] Revert "drm/scheduler: Avoid accessing freed bad job."

2021-08-18 Thread Jingwen Chen
Sorry, just get what you mean, will submit a v2 patch.

On Wed Aug 18, 2021 at 04:08:37PM +0800, Jingwen Chen wrote:
> On Tue Aug 17, 2021 at 03:43:58PM +0200, Christian König wrote:
> > 
> > 
> > Am 17.08.21 um 15:37 schrieb Andrey Grodzovsky:
> > > On 2021-08-17 12:28 a.m., Jingwen Chen wrote:
> > > > [Why]
> > > > for bailing job, this commit will delete it from pending list thus the
> > > > bailing job will never have a chance to be resubmitted even in advance
> > > > tdr mode.
> > > > 
> > > > [How]
> > > > after embeded hw_fence into amdgpu_job is done, the race condition that
> > > > this commit tries to work around is completely solved.So revert this
> > > > commit.
> > > > This reverts commit 135517d3565b48f4def3b1b82008bc17eb5d1c90.
> > > 
> > > 
> > > Can you elaborate please how this solves the race ?
> > > As far as I see and  with this patch reverted, in drm_sched_job_timedout
> > > you get a pointer to next job to process in timed out handler,
> > > immediately
> > > next this job is actually finished and it's fence signaled, this in turn
> > > triggers
> > > drm_sched_get_cleanup_job which fetches this job and returns to
> Hi Andrey,
> 
> if drm_sched_job_timedout is triggered first, drm_sched_get_cleanup_job will 
> return
> NULL when the timeout worker is running according to this code:
>   if ((sched->timeout != MAX_SCHEDULE_TIMEOUT &&
>   !cancel_delayed_work(>work_tdr)) ||
>   kthread_should_park())
>   return NULL;
> 
> But yes a dma_fence_get(job->s_fence->parent) is needed before
> processing timedout_job. When the bad job is signaled just before
> processing, the amdgpu_fence_free can be triggered by the dma_fence_put() of 
> HW fence.
> And if I'm understanding this race condition correctly, the spin_lock is
> still needed here to avoid the drm_sched_get_cleanup_job get the
> spin_lock first and then enter the tdr work.
> > > drm_sched_main
> > > which in turn call free_job callabck->...->amdgpu_fence_free which frees
> > > the job
> > > from the HW dma_fence release callback. After that you proceed with a
> > > freed job
> > > in timed out handler.
> > > 
> > > If you could take the HW fence reference from drm_sched_job_timedout
> > > before
> > > starting processing then yes, I think it would work.
> > 
> > Yes, precisely that's what I had in mind as well and seems to be missing
> > from this patch.
> > 
> > Regards,
> > Christian.
> > 
> > > 
> > > Andrey
> > > 
> > > 
> > > > 
> > > > Signed-off-by: Jingwen Chen 
> > > > ---
> > > >   drivers/gpu/drm/scheduler/sched_main.c | 27 --
> > > >   1 file changed, 27 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/scheduler/sched_main.c
> > > > b/drivers/gpu/drm/scheduler/sched_main.c
> > > > index a2a953693b45..31d1176d939f 100644
> > > > --- a/drivers/gpu/drm/scheduler/sched_main.c
> > > > +++ b/drivers/gpu/drm/scheduler/sched_main.c
> > > > @@ -317,21 +317,10 @@ static void drm_sched_job_timedout(struct
> > > > work_struct *work)
> > > >   enum drm_gpu_sched_stat status = DRM_GPU_SCHED_STAT_NOMINAL;
> > > >     sched = container_of(work, struct drm_gpu_scheduler,
> > > > work_tdr.work);
> > > > -
> > > > -    /* Protects against concurrent deletion in
> > > > drm_sched_get_cleanup_job */
> > > > -    spin_lock(>job_list_lock);
> > > >   job = list_first_entry_or_null(>pending_list,
> > > >  struct drm_sched_job, list);
> > > >     if (job) {
> > > > -    /*
> > > > - * Remove the bad job so it cannot be freed by concurrent
> > > > - * drm_sched_cleanup_jobs. It will be reinserted back after
> > > > sched->thread
> > > > - * is parked at which point it's safe.
> > > > - */
> > > > -    list_del_init(>list);
> > > > -    spin_unlock(>job_list_lock);
> > > > -
> > > >   status = job->sched->ops->timedout_job(job);
> > > >     /*
> > > > @@ -342,8 +331,6 @@ static void drm_sched_job_timedout(struct
> > > > work_struct *work)
> > > >    

Re: [PATCH] Revert "drm/scheduler: Avoid accessing freed bad job."

2021-08-18 Thread Jingwen Chen
On Tue Aug 17, 2021 at 03:43:58PM +0200, Christian König wrote:
> 
> 
> Am 17.08.21 um 15:37 schrieb Andrey Grodzovsky:
> > On 2021-08-17 12:28 a.m., Jingwen Chen wrote:
> > > [Why]
> > > for bailing job, this commit will delete it from pending list thus the
> > > bailing job will never have a chance to be resubmitted even in advance
> > > tdr mode.
> > > 
> > > [How]
> > > after embeded hw_fence into amdgpu_job is done, the race condition that
> > > this commit tries to work around is completely solved.So revert this
> > > commit.
> > > This reverts commit 135517d3565b48f4def3b1b82008bc17eb5d1c90.
> > 
> > 
> > Can you elaborate please how this solves the race ?
> > As far as I see and  with this patch reverted, in drm_sched_job_timedout
> > you get a pointer to next job to process in timed out handler,
> > immediately
> > next this job is actually finished and it's fence signaled, this in turn
> > triggers
> > drm_sched_get_cleanup_job which fetches this job and returns to
Hi Andrey,

if drm_sched_job_timedout is triggered first, drm_sched_get_cleanup_job will 
return
NULL when the timeout worker is running according to this code:
if ((sched->timeout != MAX_SCHEDULE_TIMEOUT &&
!cancel_delayed_work(>work_tdr)) ||
kthread_should_park())
return NULL;

But yes a dma_fence_get(job->s_fence->parent) is needed before
processing timedout_job. When the bad job is signaled just before
processing, the amdgpu_fence_free can be triggered by the dma_fence_put() of HW 
fence.
And if I'm understanding this race condition correctly, the spin_lock is
still needed here to avoid the drm_sched_get_cleanup_job get the
spin_lock first and then enter the tdr work.
> > drm_sched_main
> > which in turn call free_job callabck->...->amdgpu_fence_free which frees
> > the job
> > from the HW dma_fence release callback. After that you proceed with a
> > freed job
> > in timed out handler.
> > 
> > If you could take the HW fence reference from drm_sched_job_timedout
> > before
> > starting processing then yes, I think it would work.
> 
> Yes, precisely that's what I had in mind as well and seems to be missing
> from this patch.
> 
> Regards,
> Christian.
> 
> > 
> > Andrey
> > 
> > 
> > > 
> > > Signed-off-by: Jingwen Chen 
> > > ---
> > >   drivers/gpu/drm/scheduler/sched_main.c | 27 --
> > >   1 file changed, 27 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/scheduler/sched_main.c
> > > b/drivers/gpu/drm/scheduler/sched_main.c
> > > index a2a953693b45..31d1176d939f 100644
> > > --- a/drivers/gpu/drm/scheduler/sched_main.c
> > > +++ b/drivers/gpu/drm/scheduler/sched_main.c
> > > @@ -317,21 +317,10 @@ static void drm_sched_job_timedout(struct
> > > work_struct *work)
> > >   enum drm_gpu_sched_stat status = DRM_GPU_SCHED_STAT_NOMINAL;
> > >     sched = container_of(work, struct drm_gpu_scheduler,
> > > work_tdr.work);
> > > -
> > > -    /* Protects against concurrent deletion in
> > > drm_sched_get_cleanup_job */
> > > -    spin_lock(>job_list_lock);
> > >   job = list_first_entry_or_null(>pending_list,
> > >  struct drm_sched_job, list);
> > >     if (job) {
> > > -    /*
> > > - * Remove the bad job so it cannot be freed by concurrent
> > > - * drm_sched_cleanup_jobs. It will be reinserted back after
> > > sched->thread
> > > - * is parked at which point it's safe.
> > > - */
> > > -    list_del_init(>list);
> > > -    spin_unlock(>job_list_lock);
> > > -
> > >   status = job->sched->ops->timedout_job(job);
> > >     /*
> > > @@ -342,8 +331,6 @@ static void drm_sched_job_timedout(struct
> > > work_struct *work)
> > >   job->sched->ops->free_job(job);
> > >   sched->free_guilty = false;
> > >   }
> > > -    } else {
> > > -    spin_unlock(>job_list_lock);
> > >   }
> > >     if (status != DRM_GPU_SCHED_STAT_ENODEV) {
> > > @@ -392,20 +379,6 @@ void drm_sched_stop(struct drm_gpu_scheduler
> > > *sched, struct drm_sched_job *bad)
> > >     kthread_park(sched->thread);
> > >   -    /*
> > > - * Reinsert back the bad job here - now it's safe as
> > > - * drm_sched_get_cleanup_job cannot race against us and release the
> > > - * bad job at this point - we parked (waited for) any in progress
> > > - * (earlier) cleanups and drm_sched_get_cleanup_job will not be
> > > called
> > > - * now until the scheduler thread is unparked.
> > > - */
> > > -    if (bad && bad->sched == sched)
> > > -    /*
> > > - * Add at the head of the queue to reflect it was the earliest
> > > - * job extracted.
> > > - */
> > > -    list_add(>list, >pending_list);
> > > -
> > >   /*
> > >    * Iterate the job list from later to  earlier one and either
> > > deactive
> > >    * their HW callbacks or remove them from pending list if they
> > > already
> 


[PATCH v2] Revert "drm/scheduler: Avoid accessing freed bad job."

2021-08-18 Thread Jingwen Chen
[Why]
for bailing job, this commit will delete it from pending list thus the
bailing job will never have a chance to be resubmitted even in advance
tdr mode.

[How]
after embeded hw_fence into amdgpu_job is done, the race condition that
this commit tries to work around is completely solved.So revert this
commit.
This reverts commit 135517d3565b48f4def3b1b82008bc17eb5d1c90.
v2:
add dma_fence_get/put() around timedout_job to avoid concurrent delete
during processing timedout_job

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/scheduler/sched_main.c | 23 +--
 1 file changed, 5 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index a2a953693b45..f9b9b3aefc4a 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -314,6 +314,7 @@ static void drm_sched_job_timedout(struct work_struct *work)
 {
struct drm_gpu_scheduler *sched;
struct drm_sched_job *job;
+   struct dma_fence *fence;
enum drm_gpu_sched_stat status = DRM_GPU_SCHED_STAT_NOMINAL;
 
sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
@@ -325,11 +326,10 @@ static void drm_sched_job_timedout(struct work_struct 
*work)
 
if (job) {
/*
-* Remove the bad job so it cannot be freed by concurrent
-* drm_sched_cleanup_jobs. It will be reinserted back after 
sched->thread
-* is parked at which point it's safe.
+* Get job->s_fence->parent here to avoid concurrent delete 
during
+* processing timedout_job
 */
-   list_del_init(>list);
+   fence = dma_fence_get(job->s_fence->parent);
spin_unlock(>job_list_lock);
 
status = job->sched->ops->timedout_job(job);
@@ -342,6 +342,7 @@ static void drm_sched_job_timedout(struct work_struct *work)
job->sched->ops->free_job(job);
sched->free_guilty = false;
}
+   dma_fence_put(fence);
} else {
spin_unlock(>job_list_lock);
}
@@ -392,20 +393,6 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, 
struct drm_sched_job *bad)
 
kthread_park(sched->thread);
 
-   /*
-* Reinsert back the bad job here - now it's safe as
-* drm_sched_get_cleanup_job cannot race against us and release the
-* bad job at this point - we parked (waited for) any in progress
-* (earlier) cleanups and drm_sched_get_cleanup_job will not be called
-* now until the scheduler thread is unparked.
-*/
-   if (bad && bad->sched == sched)
-   /*
-* Add at the head of the queue to reflect it was the earliest
-* job extracted.
-*/
-   list_add(>list, >pending_list);
-
/*
 * Iterate the job list from later to  earlier one and either deactive
 * their HW callbacks or remove them from pending list if they already
-- 
2.25.1



[PATCH] Revert "drm/scheduler: Avoid accessing freed bad job."

2021-08-16 Thread Jingwen Chen
[Why]
for bailing job, this commit will delete it from pending list thus the
bailing job will never have a chance to be resubmitted even in advance
tdr mode.

[How]
after embeded hw_fence into amdgpu_job is done, the race condition that
this commit tries to work around is completely solved.So revert this
commit.
This reverts commit 135517d3565b48f4def3b1b82008bc17eb5d1c90.

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/scheduler/sched_main.c | 27 --
 1 file changed, 27 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index a2a953693b45..31d1176d939f 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -317,21 +317,10 @@ static void drm_sched_job_timedout(struct work_struct 
*work)
enum drm_gpu_sched_stat status = DRM_GPU_SCHED_STAT_NOMINAL;
 
sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
-
-   /* Protects against concurrent deletion in drm_sched_get_cleanup_job */
-   spin_lock(>job_list_lock);
job = list_first_entry_or_null(>pending_list,
   struct drm_sched_job, list);
 
if (job) {
-   /*
-* Remove the bad job so it cannot be freed by concurrent
-* drm_sched_cleanup_jobs. It will be reinserted back after 
sched->thread
-* is parked at which point it's safe.
-*/
-   list_del_init(>list);
-   spin_unlock(>job_list_lock);
-
status = job->sched->ops->timedout_job(job);
 
/*
@@ -342,8 +331,6 @@ static void drm_sched_job_timedout(struct work_struct *work)
job->sched->ops->free_job(job);
sched->free_guilty = false;
}
-   } else {
-   spin_unlock(>job_list_lock);
}
 
if (status != DRM_GPU_SCHED_STAT_ENODEV) {
@@ -392,20 +379,6 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, 
struct drm_sched_job *bad)
 
kthread_park(sched->thread);
 
-   /*
-* Reinsert back the bad job here - now it's safe as
-* drm_sched_get_cleanup_job cannot race against us and release the
-* bad job at this point - we parked (waited for) any in progress
-* (earlier) cleanups and drm_sched_get_cleanup_job will not be called
-* now until the scheduler thread is unparked.
-*/
-   if (bad && bad->sched == sched)
-   /*
-* Add at the head of the queue to reflect it was the earliest
-* job extracted.
-*/
-   list_add(>list, >pending_list);
-
/*
 * Iterate the job list from later to  earlier one and either deactive
 * their HW callbacks or remove them from pending list if they already
-- 
2.25.1



Re: [PATCH v4] drm/amd/amdgpu embed hw_fence into amdgpu_job

2021-08-11 Thread Jingwen Chen
Hi Guchun

Sorry to cause this fail. already submit a v5 patch to fix this.

On Wed Aug 11, 2021 at 05:26:52PM +0800, Chen, Guchun wrote:
> [Public]
> 
> Attach the error log.
> 
> [   99.534964] kfd kfd: amdgpu: Allocated 3969056 bytes on gart
> [   99.535531] amdgpu: SRAT table not found
> [   99.535532] amdgpu: Virtual CRAT table created for GPU
> [   99.536695] amdgpu: Topology: Add dGPU node [0x73a3:0x1002]
> [   99.536697] kfd kfd: amdgpu: added device 1002:73a3
> [   99.536717] amdgpu :03:00.0: amdgpu: SE 4, SH per SE 2, CU per SH 10, 
> active_cu_number 60
> [   99.536904] BUG: kernel NULL pointer dereference, address: 0048
> [   99.536906] #PF: supervisor read access in kernel mode
> [   99.536907] #PF: error_code(0x) - not-present page
> [   99.536908] PGD 0 P4D 0
> [   99.536910] Oops:  [#1] SMP PTI
> [   99.536911] CPU: 8 PID: 2282 Comm: sdma0 Not tainted 5.13.0-guchchen #1
> [   99.536913] Hardware name: System manufacturer System Product Name/TUF 
> Z370-PLUS GAMING II, BIOS 0411 09/21/2018
> [   99.536914] RIP: 0010:amdgpu_fence_enable_signaling+0x15/0x40 [amdgpu]
> [   99.537023] [drm] Unknown EDID CEA parser results
> [   99.537044] Code: 00 e9 4f 55 ab ed 0f 1f 44 00 00 66 2e 0f 1f 84 00 00 00 
> 00 00 0f 1f 44 00 00 48 81 7f 08 20 c7 b1 c0 74 02 31 ff 48 8b 7f 40 <48> 8b 
> 47 48 48 85 c0 74 06 b8 01 00 00 00 c3 48 8b 35 95 9c e5 ee
> [   99.537046] RSP: 0018:b50b01dcfe58 EFLAGS: 00010046
> [   99.537047] RAX: c07adcc0 RBX: 9bd53c3f4d90 RCX: 
> 0017
> [   99.537048] RDX: 0001 RSI: 9bd53c3f4c58 RDI: 
> 
> [   99.537049] RBP: 9bd53c3f4c00 R08:  R09: 
> b918
> [   99.537050] R10: 0001 R11: 0074 R12: 
> c06e4d10
> [   99.537050] R13: 0246 R14: 9bd53b60b9a0 R15: 
> 9bd53c3f4d90
> [   99.537051] FS:  () GS:9bd826c0() 
> knlGS:
> [   99.537052] CS:  0010 DS:  ES:  CR0: 80050033
> [   99.537053] CR2: 0048 CR3: 00021360a005 CR4: 
> 003706e0
> [   99.537054] DR0:  DR1:  DR2: 
> 
> [   99.537055] DR3:  DR6: fffe0ff0 DR7: 
> 0400
> [   99.537056] Call Trace:
> [   99.537057]  __dma_fence_enable_signaling+0x3c/0xa0
> [   99.537060]  dma_fence_add_callback+0x39/0xa0
> [   99.537062]  drm_sched_main+0x1aa/0x390 [gpu_sched]
> [   99.537065]  ? wait_woken+0x80/0x80
> [   99.537068]  ? drm_sched_get_cleanup_job+0x120/0x120 [gpu_sched]
> [   99.537070]  kthread+0x117/0x130
> [   99.537071]  ? kthread_park+0x90/0x9
> 
> Regards,
> Guchun
> 
> -Original Message-
> From: amd-gfx  On Behalf Of Chen, 
> Guchun
> Sent: Wednesday, August 11, 2021 5:24 PM
> To: Grodzovsky, Andrey ; Chen, JingWen 
> ; amd-gfx@lists.freedesktop.org
> Cc: Liu, Monk ; Koenig, Christian 
> ; Jack Zhang ; Jack Zhang 
> 
> Subject: RE: [PATCH v4] drm/amd/amdgpu embed hw_fence into amdgpu_job
> 
> [Public]
> 
> Hi Jingwen,
> 
> Your patch has caused amdgpu driver load failure on all ASICs. Please revert 
> it first and come up with a proper fix.
> 
> Regards,
> Guchun
> 
> -Original Message-
> From: amd-gfx  On Behalf Of Andrey 
> Grodzovsky
> Sent: Wednesday, August 11, 2021 12:41 AM
> To: Chen, JingWen ; amd-gfx@lists.freedesktop.org
> Cc: Liu, Monk ; Koenig, Christian 
> ; Jack Zhang ; Jack Zhang 
> 
> Subject: Re: [PATCH v4] drm/amd/amdgpu embed hw_fence into amdgpu_job
> 
> Reviewed-by: Andrey Grodzovsky 
> 
> Andrey
> 
> On 2021-08-09 11:22 p.m., Jingwen Chen wrote:
> > From: Jack Zhang 
> >
> > Why: Previously hw fence is alloced separately with job.
> > It caused historical lifetime issues and corner cases.
> > The ideal situation is to take fence to manage both job and fence's 
> > lifetime, and simplify the design of gpu-scheduler.
> >
> > How:
> > We propose to embed hw_fence into amdgpu_job.
> > 1. We cover the normal job submission by this method.
> > 2. For ib_test, and submit without a parent job keep the legacy way to 
> > create a hw fence separately.
> > v2:
> > use AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT to show that the fence is 
> > embeded in a job.
> > v3:
> > remove redundant variable ring in amdgpu_job
> > v4:
> > add tdr sequence support for this feature. Add a job_run_counter to 
> > indicate whether this job is a resubmit job.
> >
> > Signed-off-by: Jingwen Chen 
> > Signed-off-by: Jack Zhang 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/am

[PATCH v5] drm/amd/amdgpu embed hw_fence into amdgpu_job

2021-08-11 Thread Jingwen Chen
From: Jack Zhang 

Why: Previously hw fence is alloced separately with job.
It caused historical lifetime issues and corner cases.
The ideal situation is to take fence to manage both job
and fence's lifetime, and simplify the design of gpu-scheduler.

How:
We propose to embed hw_fence into amdgpu_job.
1. We cover the normal job submission by this method.
2. For ib_test, and submit without a parent job keep the
legacy way to create a hw fence separately.
v2:
use AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT to show that the fence is
embedded in a job.
v3:
remove redundant variable ring in amdgpu_job
v4:
add tdr sequence support for this feature. Add a job_run_counter to
indicate whether this job is a resubmit job.
v5
add missing handling in amdgpu_fence_enable_signaling

Signed-off-by: Jingwen Chen 
Signed-off-by: Jack Zhang 
Reviewed-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c  |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 13 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c   | 86 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 39 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h |  6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h|  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  2 +-
 9 files changed, 119 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 7b46ba551cb2..3003ee1c9487 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -714,7 +714,6 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum 
kgd_engine_type engine,
ret = dma_fence_wait(f, false);
 
 err_ib_sched:
-   dma_fence_put(f);
amdgpu_job_free(job);
 err:
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 536005bff24a..277128846dd1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1414,7 +1414,7 @@ static void amdgpu_ib_preempt_mark_partial_job(struct 
amdgpu_ring *ring)
continue;
}
job = to_amdgpu_job(s_job);
-   if (preempted && job->fence == fence)
+   if (preempted && (>hw_fence) == fence)
/* mark the job as preempted */
job->preemption_status |= AMDGPU_IB_PREEMPTED;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 4c33985542ed..04a6bed4e5f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4448,7 +4448,7 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
 struct amdgpu_reset_context *reset_context)
 {
-   int i, r = 0;
+   int i, j, r = 0;
struct amdgpu_job *job = NULL;
bool need_full_reset =
test_bit(AMDGPU_NEED_FULL_RESET, _context->flags);
@@ -4472,6 +4472,17 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device 
*adev,
if (!ring || !ring->sched.thread)
continue;
 
+   /*clear job fence from fence drv to avoid force_completion
+*leave NULL and vm flush fence in fence drv */
+   for (j = 0; j <= ring->fence_drv.num_fences_mask; j++) {
+   struct dma_fence *old, **ptr;
+
+   ptr = >fence_drv.fences[j];
+   old = rcu_dereference_protected(*ptr, 1);
+   if (old && test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, 
>flags)) {
+   RCU_INIT_POINTER(*ptr, NULL);
+   }
+   }
/* after all hw jobs are reset, hw fence is meaningless, so 
force_completion */
amdgpu_fence_driver_force_completion(ring);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 7495911516c2..b439eb7d4177 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -129,30 +129,50 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
  *
  * @ring: ring the fence is associated with
  * @f: resulting fence object
+ * @job: job the fence is embedded in
  * @flags: flags to pass into the subordinate .emit_fence() call
  *
  * Emits a fence command on the requested ring (all asics).
  * Returns 0 on success, -ENOMEM on failure.
  */
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct 
amdgpu_job *job,

Re: [PATCHv2 2/2] drm/amd/amdgpu: add tdr support for embeded hw_fence

2021-08-10 Thread Jingwen Chen
Hi Andrey,

The latest patch [PATCH v4] drm/amd/amdgpu embed hw_fence into
amdgpu_job has been sent to amd-gfx. can you help review this patch?

Best Regards,
Jingwen
On Tue Aug 10, 2021 at 10:51:17AM +0800, Jingwen Chen wrote:
> On Mon Aug 09, 2021 at 12:24:37PM -0400, Andrey Grodzovsky wrote:
> > 
> > On 2021-08-05 4:31 a.m., Jingwen Chen wrote:
> > > [Why]
> > > After embeded hw_fence to amdgpu_job, we need to add tdr support
> > > for this feature.
> > > 
> > > [How]
> > > 1. Add a resubmit_flag for resubmit jobs.
> > > 2. Clear job fence from RCU and force complete vm flush fences in
> > > pre_asic_reset
> > > 3. skip dma_fence_get for resubmit jobs and add a dma_fence_put
> > > for guilty jobs.
> > > v2:
> > > use a job_run_counter in amdgpu_job to replace the resubmit_flag in
> > > drm_sched_job. When the job_run_counter >= 1, it means this job is a
> > > resubmit job.
> > > 
> > > Signed-off-by: Jack Zhang 
> > > Signed-off-by: Jingwen Chen 
> > > ---
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 +++-
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 13 +
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c|  5 -
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_job.h|  3 +++
> > >   4 files changed, 27 insertions(+), 6 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > index 9e53ff851496..ade2fa07a50a 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > @@ -4447,7 +4447,7 @@ int amdgpu_device_mode1_reset(struct amdgpu_device 
> > > *adev)
> > >   int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
> > >struct amdgpu_reset_context 
> > > *reset_context)
> > >   {
> > > - int i, r = 0;
> > > + int i, j, r = 0;
> > >   struct amdgpu_job *job = NULL;
> > >   bool need_full_reset =
> > >   test_bit(AMDGPU_NEED_FULL_RESET, _context->flags);
> > > @@ -4471,6 +4471,16 @@ int amdgpu_device_pre_asic_reset(struct 
> > > amdgpu_device *adev,
> > >   if (!ring || !ring->sched.thread)
> > >   continue;
> > > + /*clear job fence from fence drv to avoid force_completion
> > > +  *leave NULL and vm flush fence in fence drv */
> > > + for (j = 0; j <= ring->fence_drv.num_fences_mask; j ++) {
> > > + struct dma_fence *old,**ptr;
> > > + ptr = >fence_drv.fences[j];
> > > + old = rcu_dereference_protected(*ptr, 1);
> > > + if (old && test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, 
> > > >flags)) {
> > > + RCU_INIT_POINTER(*ptr, NULL);
> > > + }
> > > + }
> > >   /* after all hw jobs are reset, hw fence is 
> > > meaningless, so force_completion */
> > >   amdgpu_fence_driver_force_completion(ring);
> > >   }
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
> > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> > > index 5e29d797a265..c9752cf794fb 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> > > @@ -159,10 +159,15 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, 
> > > struct dma_fence **f, struct amd
> > >   }
> > >   seq = ++ring->fence_drv.sync_seq;
> > > - dma_fence_init(fence, _fence_ops,
> > > ->fence_drv.lock,
> > > -adev->fence_context + ring->idx,
> > > -seq);
> > > + if (job != NULL && job->job_run_counter) {
> > > + /* reinit seq for resubmitted jobs */
> > > + fence->seqno = seq;
> > > + } else {
> > > + dma_fence_init(fence, _fence_ops,
> > > + >fence_drv.lock,
> > > + adev->fence_context + ring->idx,
> > > + seq);
> > > + }
> > 
> > 
> > I think this should be in the first patch actually (and the counter too),
> > without it the first patch is buggy.
> > 
>

Re: [PATCHv2 2/2] drm/amd/amdgpu: add tdr support for embeded hw_fence

2021-08-10 Thread Jingwen Chen
On Mon Aug 09, 2021 at 12:24:37PM -0400, Andrey Grodzovsky wrote:
> 
> On 2021-08-05 4:31 a.m., Jingwen Chen wrote:
> > [Why]
> > After embeded hw_fence to amdgpu_job, we need to add tdr support
> > for this feature.
> > 
> > [How]
> > 1. Add a resubmit_flag for resubmit jobs.
> > 2. Clear job fence from RCU and force complete vm flush fences in
> > pre_asic_reset
> > 3. skip dma_fence_get for resubmit jobs and add a dma_fence_put
> > for guilty jobs.
> > v2:
> > use a job_run_counter in amdgpu_job to replace the resubmit_flag in
> > drm_sched_job. When the job_run_counter >= 1, it means this job is a
> > resubmit job.
> > 
> > Signed-off-by: Jack Zhang 
> > Signed-off-by: Jingwen Chen 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 +++-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 13 +
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c|  5 -
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_job.h|  3 +++
> >   4 files changed, 27 insertions(+), 6 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > index 9e53ff851496..ade2fa07a50a 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > @@ -4447,7 +4447,7 @@ int amdgpu_device_mode1_reset(struct amdgpu_device 
> > *adev)
> >   int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
> >  struct amdgpu_reset_context *reset_context)
> >   {
> > -   int i, r = 0;
> > +   int i, j, r = 0;
> > struct amdgpu_job *job = NULL;
> > bool need_full_reset =
> > test_bit(AMDGPU_NEED_FULL_RESET, _context->flags);
> > @@ -4471,6 +4471,16 @@ int amdgpu_device_pre_asic_reset(struct 
> > amdgpu_device *adev,
> > if (!ring || !ring->sched.thread)
> > continue;
> > +   /*clear job fence from fence drv to avoid force_completion
> > +*leave NULL and vm flush fence in fence drv */
> > +   for (j = 0; j <= ring->fence_drv.num_fences_mask; j ++) {
> > +   struct dma_fence *old,**ptr;
> > +   ptr = >fence_drv.fences[j];
> > +   old = rcu_dereference_protected(*ptr, 1);
> > +   if (old && test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, 
> > >flags)) {
> > +   RCU_INIT_POINTER(*ptr, NULL);
> > +   }
> > +   }
> > /* after all hw jobs are reset, hw fence is meaningless, so 
> > force_completion */
> > amdgpu_fence_driver_force_completion(ring);
> > }
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> > index 5e29d797a265..c9752cf794fb 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> > @@ -159,10 +159,15 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, 
> > struct dma_fence **f, struct amd
> > }
> > seq = ++ring->fence_drv.sync_seq;
> > -   dma_fence_init(fence, _fence_ops,
> > -  >fence_drv.lock,
> > -  adev->fence_context + ring->idx,
> > -  seq);
> > +   if (job != NULL && job->job_run_counter) {
> > +   /* reinit seq for resubmitted jobs */
> > +   fence->seqno = seq;
> > +   } else {
> > +   dma_fence_init(fence, _fence_ops,
> > +   >fence_drv.lock,
> > +   adev->fence_context + ring->idx,
> > +   seq);
> > +   }
> 
> 
> I think this should be in the first patch actually (and the counter too),
> without it the first patch is buggy.
> 
I was originally split these two patches by adding job submission
seqence and adding tdr sequence, But yes, I should merge these two
patches otherwise the tdr sequence will fail without second patch.
Will send a merged version today.

Best Regards,
Jingwen
> 
> > if (job != NULL) {
> > /* mark this fence has a parent job */
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> > index 65a395060de2..19b13a65c73b 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> > @@ -254,6 +254,7 @@ static struct dma_fen

[PATCH v4] drm/amd/amdgpu embed hw_fence into amdgpu_job

2021-08-09 Thread Jingwen Chen
From: Jack Zhang 

Why: Previously hw fence is alloced separately with job.
It caused historical lifetime issues and corner cases.
The ideal situation is to take fence to manage both job
and fence's lifetime, and simplify the design of gpu-scheduler.

How:
We propose to embed hw_fence into amdgpu_job.
1. We cover the normal job submission by this method.
2. For ib_test, and submit without a parent job keep the
legacy way to create a hw fence separately.
v2:
use AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT to show that the fence is
embeded in a job.
v3:
remove redundant variable ring in amdgpu_job
v4:
add tdr sequence support for this feature. Add a job_run_counter to
indicate whether this job is a resubmit job.

Signed-off-by: Jingwen Chen 
Signed-off-by: Jack Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c  |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 12 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c   | 73 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 39 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h |  6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h|  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  2 +-
 9 files changed, 108 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 7b46ba551cb2..3003ee1c9487 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -714,7 +714,6 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum 
kgd_engine_type engine,
ret = dma_fence_wait(f, false);
 
 err_ib_sched:
-   dma_fence_put(f);
amdgpu_job_free(job);
 err:
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 536005bff24a..277128846dd1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1414,7 +1414,7 @@ static void amdgpu_ib_preempt_mark_partial_job(struct 
amdgpu_ring *ring)
continue;
}
job = to_amdgpu_job(s_job);
-   if (preempted && job->fence == fence)
+   if (preempted && (>hw_fence) == fence)
/* mark the job as preempted */
job->preemption_status |= AMDGPU_IB_PREEMPTED;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 9e53ff851496..ade2fa07a50a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4447,7 +4447,7 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
 struct amdgpu_reset_context *reset_context)
 {
-   int i, r = 0;
+   int i, j, r = 0;
struct amdgpu_job *job = NULL;
bool need_full_reset =
test_bit(AMDGPU_NEED_FULL_RESET, _context->flags);
@@ -4471,6 +4471,16 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device 
*adev,
if (!ring || !ring->sched.thread)
continue;
 
+   /*clear job fence from fence drv to avoid force_completion
+*leave NULL and vm flush fence in fence drv */
+   for (j = 0; j <= ring->fence_drv.num_fences_mask; j ++) {
+   struct dma_fence *old,**ptr;
+   ptr = >fence_drv.fences[j];
+   old = rcu_dereference_protected(*ptr, 1);
+   if (old && test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, 
>flags)) {
+   RCU_INIT_POINTER(*ptr, NULL);
+   }
+   }
/* after all hw jobs are reset, hw fence is meaningless, so 
force_completion */
amdgpu_fence_driver_force_completion(ring);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 7495911516c2..a8302e324110 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -129,30 +129,50 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
  *
  * @ring: ring the fence is associated with
  * @f: resulting fence object
+ * @job: job the fence is embeded in
  * @flags: flags to pass into the subordinate .emit_fence() call
  *
  * Emits a fence command on the requested ring (all asics).
  * Returns 0 on success, -ENOMEM on failure.
  */
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct 
amdgpu_job *job,
  unsigned flags)
 {
struct amdgpu_device *adev = ring->adev;
-   struc

Re: [PATCHv2 1/2] drm/amd/amdgpu embed hw_fence into amdgpu_job

2021-08-09 Thread Jingwen Chen
On Mon Aug 09, 2021 at 10:18:37AM +0800, Jingwen Chen wrote:
> On Fri Aug 06, 2021 at 11:48:04AM +0200, Christian König wrote:
> > 
> > 
> > Am 06.08.21 um 07:52 schrieb Jingwen Chen:
> > > On Thu Aug 05, 2021 at 05:13:22PM -0400, Andrey Grodzovsky wrote:
> > > > On 2021-08-05 4:31 a.m., Jingwen Chen wrote:
> > > > > From: Jack Zhang 
> > > > > 
> > > > > Why: Previously hw fence is alloced separately with job.
> > > > > It caused historical lifetime issues and corner cases.
> > > > > The ideal situation is to take fence to manage both job
> > > > > and fence's lifetime, and simplify the design of gpu-scheduler.
> > > > > 
> > > > > How:
> > > > > We propose to embed hw_fence into amdgpu_job.
> > > > > 1. We cover the normal job submission by this method.
> > > > > 2. For ib_test, and submit without a parent job keep the
> > > > > legacy way to create a hw fence separately.
> > > > > v2:
> > > > > use AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT to show that the fence is
> > > > > embeded in a job.
> > > > > 
> > > > > Signed-off-by: Jingwen Chen 
> > > > > Signed-off-by: Jack Zhang 
> > > > > ---
> > > > >drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c  |  1 -
> > > > >drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |  2 +-
> > > > >drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c   | 63 
> > > > > -
> > > > >drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  |  2 +-
> > > > >drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 35 
> > > > >drivers/gpu/drm/amd/amdgpu/amdgpu_job.h |  4 +-
> > > > >drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h|  5 +-
> > > > >drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  2 +-
> > > > >8 files changed, 84 insertions(+), 30 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
> > > > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > > > > index 7b46ba551cb2..3003ee1c9487 100644
> > > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > > > > @@ -714,7 +714,6 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, 
> > > > > enum kgd_engine_type engine,
> > > > >   ret = dma_fence_wait(f, false);
> > > > >err_ib_sched:
> > > > > - dma_fence_put(f);
> > > > >   amdgpu_job_free(job);
> > > > >err:
> > > > >   return ret;
> > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
> > > > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > > > > index 536005bff24a..277128846dd1 100644
> > > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > > > > @@ -1414,7 +1414,7 @@ static void 
> > > > > amdgpu_ib_preempt_mark_partial_job(struct amdgpu_ring *ring)
> > > > >   continue;
> > > > >   }
> > > > >   job = to_amdgpu_job(s_job);
> > > > > - if (preempted && job->fence == fence)
> > > > > + if (preempted && (>hw_fence) == fence)
> > > > >   /* mark the job as preempted */
> > > > >   job->preemption_status |= AMDGPU_IB_PREEMPTED;
> > > > >   }
> > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
> > > > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> > > > > index 7495911516c2..5e29d797a265 100644
> > > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> > > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> > > > > @@ -129,30 +129,46 @@ static u32 amdgpu_fence_read(struct amdgpu_ring 
> > > > > *ring)
> > > > > *
> > > > > * @ring: ring the fence is associated with
> > > > > * @f: resulting fence object
> > > > > + * @job: job the fence is embeded in
> > > > > * @flags: flags to pass into the subordinate .emit_fence() call
> > > > > *
> > > > > * Emi

Re: [PATCHv2 1/2] drm/amd/amdgpu embed hw_fence into amdgpu_job

2021-08-09 Thread Jingwen Chen
On Fri Aug 06, 2021 at 11:48:04AM +0200, Christian König wrote:
> 
> 
> Am 06.08.21 um 07:52 schrieb Jingwen Chen:
> > On Thu Aug 05, 2021 at 05:13:22PM -0400, Andrey Grodzovsky wrote:
> > > On 2021-08-05 4:31 a.m., Jingwen Chen wrote:
> > > > From: Jack Zhang 
> > > > 
> > > > Why: Previously hw fence is alloced separately with job.
> > > > It caused historical lifetime issues and corner cases.
> > > > The ideal situation is to take fence to manage both job
> > > > and fence's lifetime, and simplify the design of gpu-scheduler.
> > > > 
> > > > How:
> > > > We propose to embed hw_fence into amdgpu_job.
> > > > 1. We cover the normal job submission by this method.
> > > > 2. For ib_test, and submit without a parent job keep the
> > > > legacy way to create a hw fence separately.
> > > > v2:
> > > > use AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT to show that the fence is
> > > > embeded in a job.
> > > > 
> > > > Signed-off-by: Jingwen Chen 
> > > > Signed-off-by: Jack Zhang 
> > > > ---
> > > >drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c  |  1 -
> > > >drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |  2 +-
> > > >drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c   | 63 
> > > > -
> > > >drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  |  2 +-
> > > >drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 35 
> > > >drivers/gpu/drm/amd/amdgpu/amdgpu_job.h |  4 +-
> > > >drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h|  5 +-
> > > >drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  2 +-
> > > >8 files changed, 84 insertions(+), 30 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
> > > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > > > index 7b46ba551cb2..3003ee1c9487 100644
> > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > > > @@ -714,7 +714,6 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, 
> > > > enum kgd_engine_type engine,
> > > > ret = dma_fence_wait(f, false);
> > > >err_ib_sched:
> > > > -   dma_fence_put(f);
> > > > amdgpu_job_free(job);
> > > >err:
> > > > return ret;
> > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
> > > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > > > index 536005bff24a..277128846dd1 100644
> > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > > > @@ -1414,7 +1414,7 @@ static void 
> > > > amdgpu_ib_preempt_mark_partial_job(struct amdgpu_ring *ring)
> > > > continue;
> > > > }
> > > > job = to_amdgpu_job(s_job);
> > > > -   if (preempted && job->fence == fence)
> > > > +   if (preempted && (>hw_fence) == fence)
> > > > /* mark the job as preempted */
> > > > job->preemption_status |= AMDGPU_IB_PREEMPTED;
> > > > }
> > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
> > > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> > > > index 7495911516c2..5e29d797a265 100644
> > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> > > > @@ -129,30 +129,46 @@ static u32 amdgpu_fence_read(struct amdgpu_ring 
> > > > *ring)
> > > > *
> > > > * @ring: ring the fence is associated with
> > > > * @f: resulting fence object
> > > > + * @job: job the fence is embeded in
> > > > * @flags: flags to pass into the subordinate .emit_fence() call
> > > > *
> > > > * Emits a fence command on the requested ring (all asics).
> > > > * Returns 0 on success, -ENOMEM on failure.
> > > > */
> > > > -int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
> > > > +int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, 
> > > > struct amdgpu_job *job,
> > > >   unsigned flags)
> > > >{

[PATCHv3 1/2] drm/amd/amdgpu embed hw_fence into amdgpu_job

2021-08-08 Thread Jingwen Chen
From: Jack Zhang 

Why: Previously hw fence is alloced separately with job.
It caused historical lifetime issues and corner cases.
The ideal situation is to take fence to manage both job
and fence's lifetime, and simplify the design of gpu-scheduler.

How:
We propose to embed hw_fence into amdgpu_job.
1. We cover the normal job submission by this method.
2. For ib_test, and submit without a parent job keep the
legacy way to create a hw fence separately.
v2:
use AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT to show that the fence is
embeded in a job.
v3:
remove redundant variable ring in amdgpu_job

Signed-off-by: Jingwen Chen 
Signed-off-by: Jack Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c  |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c   | 62 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 35 
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h |  3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h|  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  2 +-
 8 files changed, 82 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 7b46ba551cb2..3003ee1c9487 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -714,7 +714,6 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum 
kgd_engine_type engine,
ret = dma_fence_wait(f, false);
 
 err_ib_sched:
-   dma_fence_put(f);
amdgpu_job_free(job);
 err:
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 536005bff24a..277128846dd1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1414,7 +1414,7 @@ static void amdgpu_ib_preempt_mark_partial_job(struct 
amdgpu_ring *ring)
continue;
}
job = to_amdgpu_job(s_job);
-   if (preempted && job->fence == fence)
+   if (preempted && (>hw_fence) == fence)
/* mark the job as preempted */
job->preemption_status |= AMDGPU_IB_PREEMPTED;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 7495911516c2..c26eec660ec1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -129,30 +129,45 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
  *
  * @ring: ring the fence is associated with
  * @f: resulting fence object
+ * @job: job the fence is embeded in
  * @flags: flags to pass into the subordinate .emit_fence() call
  *
  * Emits a fence command on the requested ring (all asics).
  * Returns 0 on success, -ENOMEM on failure.
  */
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct 
amdgpu_job *job,
  unsigned flags)
 {
struct amdgpu_device *adev = ring->adev;
-   struct amdgpu_fence *fence;
+   struct dma_fence *fence;
+   struct amdgpu_fence *am_fence;
struct dma_fence __rcu **ptr;
uint32_t seq;
int r;
 
-   fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
-   if (fence == NULL)
-   return -ENOMEM;
+   if (job == NULL) {
+   /* create a sperate hw fence */
+   am_fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_ATOMIC);
+   if (am_fence == NULL)
+   return -ENOMEM;
+   fence = _fence->base;
+   am_fence->ring = ring;
+   } else {
+   /* take use of job-embedded fence */
+   fence = >hw_fence;
+   }
 
seq = ++ring->fence_drv.sync_seq;
-   fence->ring = ring;
-   dma_fence_init(>base, _fence_ops,
+   dma_fence_init(fence, _fence_ops,
   >fence_drv.lock,
   adev->fence_context + ring->idx,
   seq);
+
+   if (job != NULL) {
+   /* mark this fence has a parent job */
+   set_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, >flags);
+   }
+
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
   seq, flags | AMDGPU_FENCE_FLAG_INT);
pm_runtime_get_noresume(adev_to_drm(adev)->dev);
@@ -175,9 +190,9 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct 
dma_fence **f,
/* This function can't be called concurrently anyway, otherwise
 * emitting the fence would mess up the hardware ring buffer.
 */
-   rcu_assign_pointer(*ptr, dma_fence_get(>base));
+   rcu_assign_pointer(*ptr, dma_fence_get(fence));
 
-   *f = 

Re: [PATCHv2 1/2] drm/amd/amdgpu embed hw_fence into amdgpu_job

2021-08-06 Thread Jingwen Chen
On Thu Aug 05, 2021 at 05:13:22PM -0400, Andrey Grodzovsky wrote:
> 
> On 2021-08-05 4:31 a.m., Jingwen Chen wrote:
> > From: Jack Zhang 
> > 
> > Why: Previously hw fence is alloced separately with job.
> > It caused historical lifetime issues and corner cases.
> > The ideal situation is to take fence to manage both job
> > and fence's lifetime, and simplify the design of gpu-scheduler.
> > 
> > How:
> > We propose to embed hw_fence into amdgpu_job.
> > 1. We cover the normal job submission by this method.
> > 2. For ib_test, and submit without a parent job keep the
> > legacy way to create a hw fence separately.
> > v2:
> > use AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT to show that the fence is
> > embeded in a job.
> > 
> > Signed-off-by: Jingwen Chen 
> > Signed-off-by: Jack Zhang 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c  |  1 -
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |  2 +-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c   | 63 -
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  |  2 +-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 35 
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_job.h |  4 +-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h|  5 +-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  2 +-
> >   8 files changed, 84 insertions(+), 30 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > index 7b46ba551cb2..3003ee1c9487 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > @@ -714,7 +714,6 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum 
> > kgd_engine_type engine,
> > ret = dma_fence_wait(f, false);
> >   err_ib_sched:
> > -   dma_fence_put(f);
> > amdgpu_job_free(job);
> >   err:
> > return ret;
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > index 536005bff24a..277128846dd1 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > @@ -1414,7 +1414,7 @@ static void amdgpu_ib_preempt_mark_partial_job(struct 
> > amdgpu_ring *ring)
> > continue;
> > }
> > job = to_amdgpu_job(s_job);
> > -   if (preempted && job->fence == fence)
> > +   if (preempted && (>hw_fence) == fence)
> > /* mark the job as preempted */
> > job->preemption_status |= AMDGPU_IB_PREEMPTED;
> > }
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> > index 7495911516c2..5e29d797a265 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> > @@ -129,30 +129,46 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
> >*
> >* @ring: ring the fence is associated with
> >* @f: resulting fence object
> > + * @job: job the fence is embeded in
> >* @flags: flags to pass into the subordinate .emit_fence() call
> >*
> >* Emits a fence command on the requested ring (all asics).
> >* Returns 0 on success, -ENOMEM on failure.
> >*/
> > -int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
> > +int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, 
> > struct amdgpu_job *job,
> >   unsigned flags)
> >   {
> > struct amdgpu_device *adev = ring->adev;
> > -   struct amdgpu_fence *fence;
> > +   struct dma_fence *fence;
> > +   struct amdgpu_fence *am_fence;
> > struct dma_fence __rcu **ptr;
> > uint32_t seq;
> > int r;
> > -   fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
> > -   if (fence == NULL)
> > -   return -ENOMEM;
> > +   if (job == NULL) {
> > +   /* create a sperate hw fence */
> > +   am_fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_ATOMIC);
> > +   if (am_fence == NULL)
> > +   return -ENOMEM;
> > +   fence = _fence->base;
> > +   am_fence->ring = ring;
> > +   } else {
> > +   /* take use of job-embedded fence */
> > +   fence = >hw_fence;
> > +   job->ring = ring;
> 
> 
> If you would make hw_fence of type amdgpu

[PATCHv2 2/2] drm/amd/amdgpu: add tdr support for embeded hw_fence

2021-08-05 Thread Jingwen Chen
[Why]
After embeded hw_fence to amdgpu_job, we need to add tdr support
for this feature.

[How]
1. Add a resubmit_flag for resubmit jobs.
2. Clear job fence from RCU and force complete vm flush fences in
   pre_asic_reset
3. skip dma_fence_get for resubmit jobs and add a dma_fence_put
   for guilty jobs.
v2:
use a job_run_counter in amdgpu_job to replace the resubmit_flag in
drm_sched_job. When the job_run_counter >= 1, it means this job is a
resubmit job.

Signed-off-by: Jack Zhang 
Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 13 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c|  5 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h|  3 +++
 4 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 9e53ff851496..ade2fa07a50a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4447,7 +4447,7 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
 struct amdgpu_reset_context *reset_context)
 {
-   int i, r = 0;
+   int i, j, r = 0;
struct amdgpu_job *job = NULL;
bool need_full_reset =
test_bit(AMDGPU_NEED_FULL_RESET, _context->flags);
@@ -4471,6 +4471,16 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device 
*adev,
if (!ring || !ring->sched.thread)
continue;
 
+   /*clear job fence from fence drv to avoid force_completion
+*leave NULL and vm flush fence in fence drv */
+   for (j = 0; j <= ring->fence_drv.num_fences_mask; j ++) {
+   struct dma_fence *old,**ptr;
+   ptr = >fence_drv.fences[j];
+   old = rcu_dereference_protected(*ptr, 1);
+   if (old && test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, 
>flags)) {
+   RCU_INIT_POINTER(*ptr, NULL);
+   }
+   }
/* after all hw jobs are reset, hw fence is meaningless, so 
force_completion */
amdgpu_fence_driver_force_completion(ring);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 5e29d797a265..c9752cf794fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -159,10 +159,15 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct 
dma_fence **f, struct amd
}
 
seq = ++ring->fence_drv.sync_seq;
-   dma_fence_init(fence, _fence_ops,
-  >fence_drv.lock,
-  adev->fence_context + ring->idx,
-  seq);
+   if (job != NULL && job->job_run_counter) {
+   /* reinit seq for resubmitted jobs */
+   fence->seqno = seq;
+   } else {
+   dma_fence_init(fence, _fence_ops,
+   >fence_drv.lock,
+   adev->fence_context + ring->idx,
+   seq);
+   }
 
if (job != NULL) {
/* mark this fence has a parent job */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 65a395060de2..19b13a65c73b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -254,6 +254,7 @@ static struct dma_fence *amdgpu_job_run(struct 
drm_sched_job *sched_job)
dma_fence_set_error(finished, -ECANCELED);/* skip IB as well if 
VRAM lost */
 
if (finished->error < 0) {
+   dma_fence_put(>hw_fence);
DRM_INFO("Skip scheduling IBs!\n");
} else {
r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job,
@@ -262,7 +263,9 @@ static struct dma_fence *amdgpu_job_run(struct 
drm_sched_job *sched_job)
DRM_ERROR("Error scheduling IBs (%d)\n", r);
}
 
-   dma_fence_get(fence);
+   if (!job->job_run_counter)
+   dma_fence_get(fence);
+   job->job_run_counter ++;
amdgpu_job_free_resources(job);
 
fence = r ? ERR_PTR(r) : fence;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index 92324c978534..1fa667f245e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -64,6 +64,9 @@ struct amdgpu_job {
/* user fence handling */
uint64_tuf_addr;
uint64_tuf_sequence;
+
+   /* job_run_counter >= 1 means a resubmit job */
+   uint32_tjob_run_counter;
 };
 
 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
-- 
2.25.1



[PATCHv2 1/2] drm/amd/amdgpu embed hw_fence into amdgpu_job

2021-08-05 Thread Jingwen Chen
From: Jack Zhang 

Why: Previously hw fence is alloced separately with job.
It caused historical lifetime issues and corner cases.
The ideal situation is to take fence to manage both job
and fence's lifetime, and simplify the design of gpu-scheduler.

How:
We propose to embed hw_fence into amdgpu_job.
1. We cover the normal job submission by this method.
2. For ib_test, and submit without a parent job keep the
legacy way to create a hw fence separately.
v2:
use AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT to show that the fence is
embeded in a job.

Signed-off-by: Jingwen Chen 
Signed-off-by: Jack Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c  |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c   | 63 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 35 
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h|  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  2 +-
 8 files changed, 84 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 7b46ba551cb2..3003ee1c9487 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -714,7 +714,6 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum 
kgd_engine_type engine,
ret = dma_fence_wait(f, false);
 
 err_ib_sched:
-   dma_fence_put(f);
amdgpu_job_free(job);
 err:
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 536005bff24a..277128846dd1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1414,7 +1414,7 @@ static void amdgpu_ib_preempt_mark_partial_job(struct 
amdgpu_ring *ring)
continue;
}
job = to_amdgpu_job(s_job);
-   if (preempted && job->fence == fence)
+   if (preempted && (>hw_fence) == fence)
/* mark the job as preempted */
job->preemption_status |= AMDGPU_IB_PREEMPTED;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 7495911516c2..5e29d797a265 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -129,30 +129,46 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
  *
  * @ring: ring the fence is associated with
  * @f: resulting fence object
+ * @job: job the fence is embeded in
  * @flags: flags to pass into the subordinate .emit_fence() call
  *
  * Emits a fence command on the requested ring (all asics).
  * Returns 0 on success, -ENOMEM on failure.
  */
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct 
amdgpu_job *job,
  unsigned flags)
 {
struct amdgpu_device *adev = ring->adev;
-   struct amdgpu_fence *fence;
+   struct dma_fence *fence;
+   struct amdgpu_fence *am_fence;
struct dma_fence __rcu **ptr;
uint32_t seq;
int r;
 
-   fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
-   if (fence == NULL)
-   return -ENOMEM;
+   if (job == NULL) {
+   /* create a sperate hw fence */
+   am_fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_ATOMIC);
+   if (am_fence == NULL)
+   return -ENOMEM;
+   fence = _fence->base;
+   am_fence->ring = ring;
+   } else {
+   /* take use of job-embedded fence */
+   fence = >hw_fence;
+   job->ring = ring;
+   }
 
seq = ++ring->fence_drv.sync_seq;
-   fence->ring = ring;
-   dma_fence_init(>base, _fence_ops,
+   dma_fence_init(fence, _fence_ops,
   >fence_drv.lock,
   adev->fence_context + ring->idx,
   seq);
+
+   if (job != NULL) {
+   /* mark this fence has a parent job */
+   set_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, >flags);
+   }
+
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
   seq, flags | AMDGPU_FENCE_FLAG_INT);
pm_runtime_get_noresume(adev_to_drm(adev)->dev);
@@ -175,9 +191,9 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct 
dma_fence **f,
/* This function can't be called concurrently anyway, otherwise
 * emitting the fence would mess up the hardware ring buffer.
 */
-   rcu_assign_pointer(*ptr, dma_fence_get(>base));
+   rcu_assign_pointer(*ptr, dma_fence_get(fence));
 
-   *f = >base;
+  

Re: [PATCH 2/2] drm: add tdr support for embeded hw_fence

2021-07-23 Thread Jingwen Chen
On Fri Jul 23, 2021 at 10:45:49AM +0200, Christian König wrote:
> Am 23.07.21 um 09:07 schrieb Jingwen Chen:
> > [SNIP]
> > Hi Christian,
> > 
> > The thing is vm flush fence has no job passed to amdgpu_fence_emit, so
> > go through the jobs cannot help find the vm flush fence. And keep the
> > rest fences in the RCU array will lead to signaling them in the ib_test
> > right after ASIC reset. While they will be signaled again during
> > resubmission. What I'm doning here is just trying to cleanup the fences
> > without a parent job and make sure the rest fences won't be signaled
> > twice.
> 
> It took me a moment to realize what you are talking about here.
> 
> This is for the KIQ! You need different handling for the KIQ than for the
> scheduler controlled rings.
> 
> It is not only the flush jobs, but the KIQ needs a complete reset because of
> the register writes pushed there as well.
> 
> > > And please drop any usage of DMA_FENCE_FLAG_USER_BITS. That is not 
> > > something
> > > which should be abused for reset handling.
> > > 
> > The DMA_FENCE_FLAG_USER_BITS here is to mark this fence has a parent
> > job. If this is not a proper usage here, do you have any suggestions
> > about how to identify whether the fence has a parent job?
> 
> You don't need to mark the fences at all. Everything on the KIQ ring needs
> to be force completed since none of the fences on that ring have a parent
> job.
> 
> Christian.
>
Hi Christian

Yes KIQ ring fences all need force_completion. But we do need to mark the
fences. Say we have a gfx ring job with vm_flush=1 sending to
amdgpu_ib_schedule, then in amdgpu_vm_flush, after the
amdgpu_ring_emit_vm_flush is done, we will create a vm flush fence on
gfx ring with amdgpu_fence_emit(ring, , NULL, 0).

Then this vm flush fence we create here has no parent job but it's on
gfx ring. 
If we only do force_completion for KIQ ring and just clear the
RCU array for the scheduler controlled rings, nobody will signal and put this
gfx ring vm_flush fence again. When this job is resubmitted, it will
just create a new vm_flush fence. This is a memleak and I have seen this
memleak during my test.

Best Regards,
JingWen Chen
> > 
> > Best Regards,
> > JingWen Chen
> > > Regards,
> > > Christian.
> > > 
> > > > 
> > > > Best Regards,
> > > > JingWen Chen
> > > > > > Andrey
> > > > > > 
> > > > > > 
> > > > > > > > > +    }
> > > > > > > > >      /* after all hw jobs are reset, hw fence is
> > > > > > > > > meaningless, so force_completion */
> > > > > > > > >      amdgpu_fence_driver_force_completion(ring);
> > > > > > > > >      }
> > > > > > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> > > > > > > > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> > > > > > > > > index eecf21d8ec33..815776c9a013 100644
> > > > > > > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> > > > > > > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> > > > > > > > > @@ -156,11 +156,17 @@ int amdgpu_fence_emit(struct
> > > > > > > > > amdgpu_ring *ring, struct dma_fence **f, struct amd
> > > > > > > > >      job->ring = ring;
> > > > > > > > >      }
> > > > > > > > > -    seq = ++ring->fence_drv.sync_seq;
> > > > > > > > > -    dma_fence_init(fence, _fence_ops,
> > > > > > > > > -   >fence_drv.lock,
> > > > > > > > > -   adev->fence_context + ring->idx,
> > > > > > > > > -   seq);
> > > > > > > > > +    if (job != NULL && job->base.resubmit_flag == 1) {
> > > > > > > > > +    /* reinit seq for resubmitted jobs */
> > > > > > > > > +    seq = ++ring->fence_drv.sync_seq;
> > > > > > > > > +    fence->seqno = seq;
> > > > > > > > > +    } else {
> > > > > > > > > +    seq = ++ring->fence_drv.sync_seq;
> > > > > > > > Seems like you could do the above line only once above if-else
> > > > > > > > as it was
> > > > &

Re: [PATCH 2/2] drm: add tdr support for embeded hw_fence

2021-07-23 Thread Jingwen Chen
On Fri Jul 23, 2021 at 08:33:02AM +0200, Christian König wrote:
> Am 22.07.21 um 18:47 schrieb Jingwen Chen:
> > On Thu Jul 22, 2021 at 06:24:28PM +0200, Christian König wrote:
> > > Am 22.07.21 um 16:45 schrieb Andrey Grodzovsky:
> > > > On 2021-07-22 6:45 a.m., Jingwen Chen wrote:
> > > > > On Wed Jul 21, 2021 at 12:53:51PM -0400, Andrey Grodzovsky wrote:
> > > > > > On 2021-07-20 11:13 p.m., Jingwen Chen wrote:
> > > > > > > [Why]
> > > > > > > After embeded hw_fence to amdgpu_job, we need to add tdr support
> > > > > > > for this feature.
> > > > > > > 
> > > > > > > [How]
> > > > > > > 1. Add a resubmit_flag for resubmit jobs.
> > > > > > > 2. Clear job fence from RCU and force complete vm flush fences in
> > > > > > >       pre_asic_reset
> > > > > > > 3. skip dma_fence_get for resubmit jobs and add a dma_fence_put
> > > > > > >   for guilty jobs.
> > > > > > > 
> > > > > > > Signed-off-by: Jack Zhang 
> > > > > > > Signed-off-by: Jingwen Chen 
> > > > > > > ---
> > > > > > >     drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 +++-
> > > > > > >     drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 16 
> > > > > > > +++-
> > > > > > >     drivers/gpu/drm/amd/amdgpu/amdgpu_job.c    |  4 +++-
> > > > > > >     drivers/gpu/drm/scheduler/sched_main.c |  1 +
> > > > > > >     include/drm/gpu_scheduler.h    |  1 +
> > > > > > >     5 files changed, 27 insertions(+), 7 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > > > index 40461547701a..fe0237f72a09 100644
> > > > > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > > > @@ -4382,7 +4382,7 @@ int amdgpu_device_mode1_reset(struct
> > > > > > > amdgpu_device *adev)
> > > > > > >     int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
> > > > > > >      struct amdgpu_reset_context *reset_context)
> > > > > > >     {
> > > > > > > -    int i, r = 0;
> > > > > > > +    int i, j, r = 0;
> > > > > > >     struct amdgpu_job *job = NULL;
> > > > > > >     bool need_full_reset =
> > > > > > >     test_bit(AMDGPU_NEED_FULL_RESET, 
> > > > > > > _context->flags);
> > > > > > > @@ -4406,6 +4406,16 @@ int
> > > > > > > amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
> > > > > > >     if (!ring || !ring->sched.thread)
> > > > > > >     continue;
> > > > > > > +    /*clear job fence from fence drv to avoid 
> > > > > > > force_completion
> > > > > > > + *leave NULL and vm flush fence in fence drv */
> > > > > > > +    for (j = 0; j <= ring->fence_drv.num_fences_mask; j ++) {
> > > > > > > +    struct dma_fence *old,**ptr;
> > > > > > > +    ptr = >fence_drv.fences[j];
> > > > > > > +    old = rcu_dereference_protected(*ptr, 1);
> > > > > > > +    if (old && test_bit(DMA_FENCE_FLAG_USER_BITS,
> > > > > > > >flags))) {
> > > > > > > +    RCU_INIT_POINTER(*ptr, NULL);
> > > > > > > +    }
> > > > > > Is this to avoid premature job free because of dma_fence_put inside
> > > > > > amdgpu_fence_process ?
> > > > > > I can't currently remember why but we probably want all the HW 
> > > > > > fences
> > > > > > currently in the ring to
> > > > > > be forced signaled - maybe better to test for 
> > > > > > DMA_FENCE_FLAG_USER_BITS
> > > > > > inside amdgpu_fence_process
> > > > > > and still do the signaling but not the dma_fence_put part
> > > > > > 
> >

[PATCH v2] drm: add tdr support for embeded hw_fence

2021-07-23 Thread Jingwen Chen
[Why]
After embeded hw_fence to amdgpu_job, we need to add tdr support
for this feature.

[How]
1. Add a resubmit_flag for resubmit jobs.
2. Clear job fence from RCU and force complete vm flush fences in
   pre_asic_reset
3. skip dma_fence_get for resubmit jobs and add a dma_fence_put
   for guilty jobs.

Signed-off-by: Jack Zhang 
Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 13 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c|  4 +++-
 drivers/gpu/drm/scheduler/sched_main.c |  1 +
 include/drm/gpu_scheduler.h|  1 +
 5 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 280b1940e892..df73fe666e87 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4382,7 +4382,7 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
 struct amdgpu_reset_context *reset_context)
 {
-   int i, r = 0;
+   int i, j, r = 0;
struct amdgpu_job *job = NULL;
bool need_full_reset =
test_bit(AMDGPU_NEED_FULL_RESET, _context->flags);
@@ -4406,6 +4406,16 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device 
*adev,
if (!ring || !ring->sched.thread)
continue;
 
+   /*clear job fence from fence drv to avoid force_completion
+*leave NULL and vm flush fence in fence drv */
+   for (j = 0; j <= ring->fence_drv.num_fences_mask; j ++) {
+   struct dma_fence *old,**ptr;
+   ptr = >fence_drv.fences[j];
+   old = rcu_dereference_protected(*ptr, 1);
+   if (old && test_bit(DMA_FENCE_FLAG_USER_BITS, 
>flags))) {
+   RCU_INIT_POINTER(*ptr, NULL);
+   }
+   }
/* after all hw jobs are reset, hw fence is meaningless, so 
force_completion */
amdgpu_fence_driver_force_completion(ring);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index eecf21d8ec33..d5b3d5f8f951 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -157,10 +157,15 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct 
dma_fence **f, struct amd
}
 
seq = ++ring->fence_drv.sync_seq;
-   dma_fence_init(fence, _fence_ops,
-  >fence_drv.lock,
-  adev->fence_context + ring->idx,
-  seq);
+   if (job != NULL && job->base.resubmit_flag == 1) {
+   /* reset seq for resubmitted jobs */
+   fence->seqno = seq;
+   } else {
+   dma_fence_init(fence, _fence_ops,
+   >fence_drv.lock,
+   adev->fence_context + ring->idx,
+   seq);
+   }
 
if (job != NULL) {
/* mark this fence has a parent job */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 7c426e225b24..d6f848adc3f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -241,6 +241,7 @@ static struct dma_fence *amdgpu_job_run(struct 
drm_sched_job *sched_job)
dma_fence_set_error(finished, -ECANCELED);/* skip IB as well if 
VRAM lost */
 
if (finished->error < 0) {
+   dma_fence_put(>hw_fence);
DRM_INFO("Skip scheduling IBs!\n");
} else {
r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job,
@@ -249,7 +250,8 @@ static struct dma_fence *amdgpu_job_run(struct 
drm_sched_job *sched_job)
DRM_ERROR("Error scheduling IBs (%d)\n", r);
}
 
-   dma_fence_get(fence);
+   if (!job->base.resubmit_flag)
+   dma_fence_get(fence);
amdgpu_job_free_resources(job);
 
fence = r ? ERR_PTR(r) : fence;
diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index f4f474944169..5a36ab5aea2d 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -544,6 +544,7 @@ void drm_sched_resubmit_jobs_ext(struct drm_gpu_scheduler 
*sched, int max)
dma_fence_set_error(_fence->finished, -ECANCELED);
 
dma_fence_put(s_job->s_fence->parent);
+   s_job->resubmit_flag = 1;
fence = sched->ops->run_job(s_job);
i++;
 
diff --git a/include/drm/gp

[PATCH v2 1/2] drm/amd/amdgpu embed hw_fence into amdgpu_job

2021-07-23 Thread Jingwen Chen
From: Jack Zhang 

Why: Previously hw fence is alloced separately with job.
It caused historical lifetime issues and corner cases.
The ideal situation is to take fence to manage both job
and fence's lifetime, and simplify the design of gpu-scheduler.

How:
We propose to embed hw_fence into amdgpu_job.
1. We cover the normal job submission by this method.
2. For ib_test, and submit without a parent job keep the
legacy way to create a hw fence separately.

Signed-off-by: Jingwen Chen 
Signed-off-by: Jack Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c  |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c   | 62 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 35 
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h|  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  2 +-
 8 files changed, 80 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index b6d33f13b476..bad403978bac 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -714,7 +714,6 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum 
kgd_engine_type engine,
ret = dma_fence_wait(f, false);
 
 err_ib_sched:
-   dma_fence_put(f);
amdgpu_job_free(job);
 err:
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 536005bff24a..277128846dd1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1414,7 +1414,7 @@ static void amdgpu_ib_preempt_mark_partial_job(struct 
amdgpu_ring *ring)
continue;
}
job = to_amdgpu_job(s_job);
-   if (preempted && job->fence == fence)
+   if (preempted && (>hw_fence) == fence)
/* mark the job as preempted */
job->preemption_status |= AMDGPU_IB_PREEMPTED;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 30772608eac6..eecf21d8ec33 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -133,25 +133,40 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
  * Emits a fence command on the requested ring (all asics).
  * Returns 0 on success, -ENOMEM on failure.
  */
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct 
amdgpu_job *job,
  unsigned flags)
 {
struct amdgpu_device *adev = ring->adev;
-   struct amdgpu_fence *fence;
+   struct dma_fence *fence;
+   struct amdgpu_fence *am_fence;
struct dma_fence __rcu **ptr;
uint32_t seq;
int r;
 
-   fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
-   if (fence == NULL)
-   return -ENOMEM;
+   if (job == NULL) {
+   /* create a sperate hw fence */
+   am_fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
+   if (am_fence == NULL)
+   return -ENOMEM;
+   fence = _fence->base;
+   am_fence->ring = ring;
+   } else {
+   /* take use of job-embedded fence */
+   fence = >hw_fence;
+   job->ring = ring;
+   }
 
seq = ++ring->fence_drv.sync_seq;
-   fence->ring = ring;
-   dma_fence_init(>base, _fence_ops,
+   dma_fence_init(fence, _fence_ops,
   >fence_drv.lock,
   adev->fence_context + ring->idx,
   seq);
+
+   if (job != NULL) {
+   /* mark this fence has a parent job */
+   set_bit(DMA_FENCE_FLAG_USER_BITS, >flags);
+   }
+
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
   seq, flags | AMDGPU_FENCE_FLAG_INT);
pm_runtime_get_noresume(adev_to_drm(adev)->dev);
@@ -174,9 +189,9 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct 
dma_fence **f,
/* This function can't be called concurrently anyway, otherwise
 * emitting the fence would mess up the hardware ring buffer.
 */
-   rcu_assign_pointer(*ptr, dma_fence_get(>base));
+   rcu_assign_pointer(*ptr, dma_fence_get(fence));
 
-   *f = >base;
+   *f = fence;
 
return 0;
 }
@@ -636,8 +651,16 @@ static const char *amdgpu_fence_get_driver_name(struct 
dma_fence *fence)
 
 static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
 {
-   struct amdgpu_fence *fence = to_amdgpu_fence(f);
-  

[PATCH] drm: add tdr support for embeded hw_fence

2021-07-23 Thread Jingwen Chen
[Why]
After embeded hw_fence to amdgpu_job, we need to add tdr support
for this feature.

[How]
1. Add a resubmit_flag for resubmit jobs.
2. Clear job fence from RCU and force complete vm flush fences in
   pre_asic_reset
3. skip dma_fence_get for resubmit jobs and add a dma_fence_put
   for guilty jobs.

Signed-off-by: Jack Zhang 
Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 13 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c|  4 +++-
 drivers/gpu/drm/scheduler/sched_main.c |  1 +
 include/drm/gpu_scheduler.h|  1 +
 5 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 280b1940e892..df73fe666e87 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4382,7 +4382,7 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
 struct amdgpu_reset_context *reset_context)
 {
-   int i, r = 0;
+   int i, j, r = 0;
struct amdgpu_job *job = NULL;
bool need_full_reset =
test_bit(AMDGPU_NEED_FULL_RESET, _context->flags);
@@ -4406,6 +4406,16 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device 
*adev,
if (!ring || !ring->sched.thread)
continue;
 
+   /*clear job fence from fence drv to avoid force_completion
+*leave NULL and vm flush fence in fence drv */
+   for (j = 0; j <= ring->fence_drv.num_fences_mask; j ++) {
+   struct dma_fence *old,**ptr;
+   ptr = >fence_drv.fences[j];
+   old = rcu_dereference_protected(*ptr, 1);
+   if (old && test_bit(DMA_FENCE_FLAG_USER_BITS, 
>flags))) {
+   RCU_INIT_POINTER(*ptr, NULL);
+   }
+   }
/* after all hw jobs are reset, hw fence is meaningless, so 
force_completion */
amdgpu_fence_driver_force_completion(ring);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index eecf21d8ec33..d5b3d5f8f951 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -157,10 +157,15 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct 
dma_fence **f, struct amd
}
 
seq = ++ring->fence_drv.sync_seq;
-   dma_fence_init(fence, _fence_ops,
-  >fence_drv.lock,
-  adev->fence_context + ring->idx,
-  seq);
+   if (job != NULL && job->base.resubmit_flag == 1) {
+   /* reset seq for resubmitted jobs */
+   fence->seqno = seq;
+   } else {
+   dma_fence_init(fence, _fence_ops,
+   >fence_drv.lock,
+   adev->fence_context + ring->idx,
+   seq);
+   }
 
if (job != NULL) {
/* mark this fence has a parent job */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 7c426e225b24..d6f848adc3f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -241,6 +241,7 @@ static struct dma_fence *amdgpu_job_run(struct 
drm_sched_job *sched_job)
dma_fence_set_error(finished, -ECANCELED);/* skip IB as well if 
VRAM lost */
 
if (finished->error < 0) {
+   dma_fence_put(>hw_fence);
DRM_INFO("Skip scheduling IBs!\n");
} else {
r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job,
@@ -249,7 +250,8 @@ static struct dma_fence *amdgpu_job_run(struct 
drm_sched_job *sched_job)
DRM_ERROR("Error scheduling IBs (%d)\n", r);
}
 
-   dma_fence_get(fence);
+   if (!job->base.resubmit_flag)
+   dma_fence_get(fence);
amdgpu_job_free_resources(job);
 
fence = r ? ERR_PTR(r) : fence;
diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index f4f474944169..5a36ab5aea2d 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -544,6 +544,7 @@ void drm_sched_resubmit_jobs_ext(struct drm_gpu_scheduler 
*sched, int max)
dma_fence_set_error(_fence->finished, -ECANCELED);
 
dma_fence_put(s_job->s_fence->parent);
+   s_job->resubmit_flag = 1;
fence = sched->ops->run_job(s_job);
i++;
 
diff --git a/include/drm/gp

[PATCH 1/2] drm/amd/amdgpu embed hw_fence into amdgpu_job

2021-07-23 Thread Jingwen Chen
From: Jack Zhang 

Why: Previously hw fence is alloced separately with job.
It caused historical lifetime issues and corner cases.
The ideal situation is to take fence to manage both job
and fence's lifetime, and simplify the design of gpu-scheduler.

How:
We propose to embed hw_fence into amdgpu_job.
1. We cover the normal job submission by this method.
2. For ib_test, and submit without a parent job keep the
legacy way to create a hw fence separately.

Signed-off-by: Jingwen Chen 
Signed-off-by: Jack Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c  |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c   | 62 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 35 
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h|  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  2 +-
 8 files changed, 80 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index b6d33f13b476..bad403978bac 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -714,7 +714,6 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum 
kgd_engine_type engine,
ret = dma_fence_wait(f, false);
 
 err_ib_sched:
-   dma_fence_put(f);
amdgpu_job_free(job);
 err:
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 536005bff24a..277128846dd1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1414,7 +1414,7 @@ static void amdgpu_ib_preempt_mark_partial_job(struct 
amdgpu_ring *ring)
continue;
}
job = to_amdgpu_job(s_job);
-   if (preempted && job->fence == fence)
+   if (preempted && (>hw_fence) == fence)
/* mark the job as preempted */
job->preemption_status |= AMDGPU_IB_PREEMPTED;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 30772608eac6..eecf21d8ec33 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -133,25 +133,40 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
  * Emits a fence command on the requested ring (all asics).
  * Returns 0 on success, -ENOMEM on failure.
  */
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct 
amdgpu_job *job,
  unsigned flags)
 {
struct amdgpu_device *adev = ring->adev;
-   struct amdgpu_fence *fence;
+   struct dma_fence *fence;
+   struct amdgpu_fence *am_fence;
struct dma_fence __rcu **ptr;
uint32_t seq;
int r;
 
-   fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
-   if (fence == NULL)
-   return -ENOMEM;
+   if (job == NULL) {
+   /* create a sperate hw fence */
+   am_fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
+   if (am_fence == NULL)
+   return -ENOMEM;
+   fence = _fence->base;
+   am_fence->ring = ring;
+   } else {
+   /* take use of job-embedded fence */
+   fence = >hw_fence;
+   job->ring = ring;
+   }
 
seq = ++ring->fence_drv.sync_seq;
-   fence->ring = ring;
-   dma_fence_init(>base, _fence_ops,
+   dma_fence_init(fence, _fence_ops,
   >fence_drv.lock,
   adev->fence_context + ring->idx,
   seq);
+
+   if (job != NULL) {
+   /* mark this fence has a parent job */
+   set_bit(DMA_FENCE_FLAG_USER_BITS, >flags);
+   }
+
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
   seq, flags | AMDGPU_FENCE_FLAG_INT);
pm_runtime_get_noresume(adev_to_drm(adev)->dev);
@@ -174,9 +189,9 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct 
dma_fence **f,
/* This function can't be called concurrently anyway, otherwise
 * emitting the fence would mess up the hardware ring buffer.
 */
-   rcu_assign_pointer(*ptr, dma_fence_get(>base));
+   rcu_assign_pointer(*ptr, dma_fence_get(fence));
 
-   *f = >base;
+   *f = fence;
 
return 0;
 }
@@ -636,8 +651,16 @@ static const char *amdgpu_fence_get_driver_name(struct 
dma_fence *fence)
 
 static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
 {
-   struct amdgpu_fence *fence = to_amdgpu_fence(f);
-  

Re: [PATCH 2/2] drm: add tdr support for embeded hw_fence

2021-07-23 Thread Jingwen Chen
On Fri Jul 23, 2021 at 12:06:32AM -0400, Andrey Grodzovsky wrote:
> 
> On 2021-07-22 8:20 p.m., Jingwen Chen wrote:
> > On Thu Jul 22, 2021 at 01:50:09PM -0400, Andrey Grodzovsky wrote:
> > > On 2021-07-22 1:27 p.m., Jingwen Chen wrote:
> > > > On Thu Jul 22, 2021 at 01:17:13PM -0400, Andrey Grodzovsky wrote:
> > > > > On 2021-07-22 12:47 p.m., Jingwen Chen wrote:
> > > > > > On Thu Jul 22, 2021 at 06:24:28PM +0200, Christian König wrote:
> > > > > > > Am 22.07.21 um 16:45 schrieb Andrey Grodzovsky:
> > > > > > > > On 2021-07-22 6:45 a.m., Jingwen Chen wrote:
> > > > > > > > > On Wed Jul 21, 2021 at 12:53:51PM -0400, Andrey Grodzovsky 
> > > > > > > > > wrote:
> > > > > > > > > > On 2021-07-20 11:13 p.m., Jingwen Chen wrote:
> > > > > > > > > > > [Why]
> > > > > > > > > > > After embeded hw_fence to amdgpu_job, we need to add tdr 
> > > > > > > > > > > support
> > > > > > > > > > > for this feature.
> > > > > > > > > > > 
> > > > > > > > > > > [How]
> > > > > > > > > > > 1. Add a resubmit_flag for resubmit jobs.
> > > > > > > > > > > 2. Clear job fence from RCU and force complete vm flush 
> > > > > > > > > > > fences in
> > > > > > > > > > > pre_asic_reset
> > > > > > > > > > > 3. skip dma_fence_get for resubmit jobs and add a 
> > > > > > > > > > > dma_fence_put
> > > > > > > > > > > for guilty jobs.
> > > > > > > > > > > 
> > > > > > > > > > > Signed-off-by: Jack Zhang 
> > > > > > > > > > > Signed-off-by: Jingwen Chen 
> > > > > > > > > > > ---
> > > > > > > > > > >   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 
> > > > > > > > > > > +++-
> > > > > > > > > > >   drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 16 
> > > > > > > > > > > +++-
> > > > > > > > > > >   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c    |  4 +++-
> > > > > > > > > > >   drivers/gpu/drm/scheduler/sched_main.c |  1 +
> > > > > > > > > > >   include/drm/gpu_scheduler.h    |  1 +
> > > > > > > > > > >   5 files changed, 27 insertions(+), 7 deletions(-)
> > > > > > > > > > > 
> > > > > > > > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > > > > > > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > > > > > > > index 40461547701a..fe0237f72a09 100644
> > > > > > > > > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > > > > > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > > > > > > > @@ -4382,7 +4382,7 @@ int amdgpu_device_mode1_reset(struct
> > > > > > > > > > > amdgpu_device *adev)
> > > > > > > > > > >   int amdgpu_device_pre_asic_reset(struct 
> > > > > > > > > > > amdgpu_device *adev,
> > > > > > > > > > >    struct amdgpu_reset_context 
> > > > > > > > > > > *reset_context)
> > > > > > > > > > >   {
> > > > > > > > > > > -    int i, r = 0;
> > > > > > > > > > > +    int i, j, r = 0;
> > > > > > > > > > >   struct amdgpu_job *job = NULL;
> > > > > > > > > > >   bool need_full_reset =
> > > > > > > > > > >   test_bit(AMDGPU_NEED_FULL_RESET, 
> > > > > > > > > > > _context->flags);
> > > > > > > > > > > @@ -4406,6 +4406,16 @@ int
> > > > > > > > > > > amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
> > > > > > > > > > >  

Re: [PATCH 2/2] drm: add tdr support for embeded hw_fence

2021-07-22 Thread Jingwen Chen
On Thu Jul 22, 2021 at 01:50:09PM -0400, Andrey Grodzovsky wrote:
> 
> On 2021-07-22 1:27 p.m., Jingwen Chen wrote:
> > On Thu Jul 22, 2021 at 01:17:13PM -0400, Andrey Grodzovsky wrote:
> > > On 2021-07-22 12:47 p.m., Jingwen Chen wrote:
> > > > On Thu Jul 22, 2021 at 06:24:28PM +0200, Christian König wrote:
> > > > > Am 22.07.21 um 16:45 schrieb Andrey Grodzovsky:
> > > > > > On 2021-07-22 6:45 a.m., Jingwen Chen wrote:
> > > > > > > On Wed Jul 21, 2021 at 12:53:51PM -0400, Andrey Grodzovsky wrote:
> > > > > > > > On 2021-07-20 11:13 p.m., Jingwen Chen wrote:
> > > > > > > > > [Why]
> > > > > > > > > After embeded hw_fence to amdgpu_job, we need to add tdr 
> > > > > > > > > support
> > > > > > > > > for this feature.
> > > > > > > > > 
> > > > > > > > > [How]
> > > > > > > > > 1. Add a resubmit_flag for resubmit jobs.
> > > > > > > > > 2. Clear job fence from RCU and force complete vm flush 
> > > > > > > > > fences in
> > > > > > > > >    pre_asic_reset
> > > > > > > > > 3. skip dma_fence_get for resubmit jobs and add a 
> > > > > > > > > dma_fence_put
> > > > > > > > >    for guilty jobs.
> > > > > > > > > 
> > > > > > > > > Signed-off-by: Jack Zhang 
> > > > > > > > > Signed-off-by: Jingwen Chen 
> > > > > > > > > ---
> > > > > > > > >      drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 
> > > > > > > > > +++-
> > > > > > > > >      drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 16 
> > > > > > > > > +++-
> > > > > > > > >      drivers/gpu/drm/amd/amdgpu/amdgpu_job.c    |  4 +++-
> > > > > > > > >      drivers/gpu/drm/scheduler/sched_main.c |  1 +
> > > > > > > > >      include/drm/gpu_scheduler.h    |  1 +
> > > > > > > > >      5 files changed, 27 insertions(+), 7 deletions(-)
> > > > > > > > > 
> > > > > > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > > > > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > > > > > index 40461547701a..fe0237f72a09 100644
> > > > > > > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > > > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > > > > > @@ -4382,7 +4382,7 @@ int amdgpu_device_mode1_reset(struct
> > > > > > > > > amdgpu_device *adev)
> > > > > > > > >      int amdgpu_device_pre_asic_reset(struct amdgpu_device 
> > > > > > > > > *adev,
> > > > > > > > >       struct amdgpu_reset_context 
> > > > > > > > > *reset_context)
> > > > > > > > >      {
> > > > > > > > > -    int i, r = 0;
> > > > > > > > > +    int i, j, r = 0;
> > > > > > > > >      struct amdgpu_job *job = NULL;
> > > > > > > > >      bool need_full_reset =
> > > > > > > > >      test_bit(AMDGPU_NEED_FULL_RESET, 
> > > > > > > > > _context->flags);
> > > > > > > > > @@ -4406,6 +4406,16 @@ int
> > > > > > > > > amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
> > > > > > > > >      if (!ring || !ring->sched.thread)
> > > > > > > > >      continue;
> > > > > > > > > +    /*clear job fence from fence drv to avoid 
> > > > > > > > > force_completion
> > > > > > > > > + *leave NULL and vm flush fence in fence drv */
> > > > > > > > > +    for (j = 0; j <= ring->fence_drv.num_fences_mask; j 
> > > > > > > > > ++) {
> > > > > > > > > +    struct dma_fence *old,**ptr;
> > > > > > > > > +    ptr = >fence_drv.fences[j];
> > > &g

Re: [PATCH 2/2] drm: add tdr support for embeded hw_fence

2021-07-22 Thread Jingwen Chen
On Thu Jul 22, 2021 at 01:17:13PM -0400, Andrey Grodzovsky wrote:
> 
> On 2021-07-22 12:47 p.m., Jingwen Chen wrote:
> > On Thu Jul 22, 2021 at 06:24:28PM +0200, Christian König wrote:
> > > Am 22.07.21 um 16:45 schrieb Andrey Grodzovsky:
> > > > On 2021-07-22 6:45 a.m., Jingwen Chen wrote:
> > > > > On Wed Jul 21, 2021 at 12:53:51PM -0400, Andrey Grodzovsky wrote:
> > > > > > On 2021-07-20 11:13 p.m., Jingwen Chen wrote:
> > > > > > > [Why]
> > > > > > > After embeded hw_fence to amdgpu_job, we need to add tdr support
> > > > > > > for this feature.
> > > > > > > 
> > > > > > > [How]
> > > > > > > 1. Add a resubmit_flag for resubmit jobs.
> > > > > > > 2. Clear job fence from RCU and force complete vm flush fences in
> > > > > > >   pre_asic_reset
> > > > > > > 3. skip dma_fence_get for resubmit jobs and add a dma_fence_put
> > > > > > >   for guilty jobs.
> > > > > > > 
> > > > > > > Signed-off-by: Jack Zhang 
> > > > > > > Signed-off-by: Jingwen Chen 
> > > > > > > ---
> > > > > > >     drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 +++-
> > > > > > >     drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 16 
> > > > > > > +++-
> > > > > > >     drivers/gpu/drm/amd/amdgpu/amdgpu_job.c    |  4 +++-
> > > > > > >     drivers/gpu/drm/scheduler/sched_main.c |  1 +
> > > > > > >     include/drm/gpu_scheduler.h    |  1 +
> > > > > > >     5 files changed, 27 insertions(+), 7 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > > > index 40461547701a..fe0237f72a09 100644
> > > > > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > > > @@ -4382,7 +4382,7 @@ int amdgpu_device_mode1_reset(struct
> > > > > > > amdgpu_device *adev)
> > > > > > >     int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
> > > > > > >      struct amdgpu_reset_context *reset_context)
> > > > > > >     {
> > > > > > > -    int i, r = 0;
> > > > > > > +    int i, j, r = 0;
> > > > > > >     struct amdgpu_job *job = NULL;
> > > > > > >     bool need_full_reset =
> > > > > > >     test_bit(AMDGPU_NEED_FULL_RESET, 
> > > > > > > _context->flags);
> > > > > > > @@ -4406,6 +4406,16 @@ int
> > > > > > > amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
> > > > > > >     if (!ring || !ring->sched.thread)
> > > > > > >     continue;
> > > > > > > +    /*clear job fence from fence drv to avoid 
> > > > > > > force_completion
> > > > > > > + *leave NULL and vm flush fence in fence drv */
> > > > > > > +    for (j = 0; j <= ring->fence_drv.num_fences_mask; j ++) {
> > > > > > > +    struct dma_fence *old,**ptr;
> > > > > > > +    ptr = >fence_drv.fences[j];
> > > > > > > +    old = rcu_dereference_protected(*ptr, 1);
> > > > > > > +    if (old && test_bit(DMA_FENCE_FLAG_USER_BITS,
> > > > > > > >flags))) {
> > > > > > > +    RCU_INIT_POINTER(*ptr, NULL);
> > > > > > > +    }
> > > > > > Is this to avoid premature job free because of dma_fence_put inside
> > > > > > amdgpu_fence_process ?
> > > > > > I can't currently remember why but we probably want all the HW 
> > > > > > fences
> > > > > > currently in the ring to
> > > > > > be forced signaled - maybe better to test for 
> > > > > > DMA_FENCE_FLAG_USER_BITS
> > > > > > inside amdgpu_fence_process
> > > > > > and still do the signaling but not the dma_fence_put part
> > > > &g

Re: [PATCH 2/2] drm: add tdr support for embeded hw_fence

2021-07-22 Thread Jingwen Chen
On Thu Jul 22, 2021 at 06:24:28PM +0200, Christian König wrote:
> Am 22.07.21 um 16:45 schrieb Andrey Grodzovsky:
> > 
> > On 2021-07-22 6:45 a.m., Jingwen Chen wrote:
> > > On Wed Jul 21, 2021 at 12:53:51PM -0400, Andrey Grodzovsky wrote:
> > > > On 2021-07-20 11:13 p.m., Jingwen Chen wrote:
> > > > > [Why]
> > > > > After embeded hw_fence to amdgpu_job, we need to add tdr support
> > > > > for this feature.
> > > > > 
> > > > > [How]
> > > > > 1. Add a resubmit_flag for resubmit jobs.
> > > > > 2. Clear job fence from RCU and force complete vm flush fences in
> > > > >  pre_asic_reset
> > > > > 3. skip dma_fence_get for resubmit jobs and add a dma_fence_put
> > > > >  for guilty jobs.
> > > > > 
> > > > > Signed-off-by: Jack Zhang 
> > > > > Signed-off-by: Jingwen Chen 
> > > > > ---
> > > > >    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 +++-
> > > > >    drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 16 +++-
> > > > >    drivers/gpu/drm/amd/amdgpu/amdgpu_job.c    |  4 +++-
> > > > >    drivers/gpu/drm/scheduler/sched_main.c |  1 +
> > > > >    include/drm/gpu_scheduler.h    |  1 +
> > > > >    5 files changed, 27 insertions(+), 7 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > index 40461547701a..fe0237f72a09 100644
> > > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > @@ -4382,7 +4382,7 @@ int amdgpu_device_mode1_reset(struct
> > > > > amdgpu_device *adev)
> > > > >    int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
> > > > >     struct amdgpu_reset_context *reset_context)
> > > > >    {
> > > > > -    int i, r = 0;
> > > > > +    int i, j, r = 0;
> > > > >    struct amdgpu_job *job = NULL;
> > > > >    bool need_full_reset =
> > > > >    test_bit(AMDGPU_NEED_FULL_RESET, _context->flags);
> > > > > @@ -4406,6 +4406,16 @@ int
> > > > > amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
> > > > >    if (!ring || !ring->sched.thread)
> > > > >    continue;
> > > > > +    /*clear job fence from fence drv to avoid force_completion
> > > > > + *leave NULL and vm flush fence in fence drv */
> > > > > +    for (j = 0; j <= ring->fence_drv.num_fences_mask; j ++) {
> > > > > +    struct dma_fence *old,**ptr;
> > > > > +    ptr = >fence_drv.fences[j];
> > > > > +    old = rcu_dereference_protected(*ptr, 1);
> > > > > +    if (old && test_bit(DMA_FENCE_FLAG_USER_BITS,
> > > > > >flags))) {
> > > > > +    RCU_INIT_POINTER(*ptr, NULL);
> > > > > +    }
> > > > 
> > > > Is this to avoid premature job free because of dma_fence_put inside
> > > > amdgpu_fence_process ?
> > > > I can't currently remember why but we probably want all the HW fences
> > > > currently in the ring to
> > > > be forced signaled - maybe better to test for DMA_FENCE_FLAG_USER_BITS
> > > > inside amdgpu_fence_process
> > > > and still do the signaling but not the dma_fence_put part
> > > > 
> > > > Andrey
> > > Hi Andrey,
> > > 
> > > This is to avoid signaling the same fence twice. If we still do the
> > > signaling, then the job in the pending list will be signaled first in
> > > force_completion, and later be signaled in resubmit. This will go to
> > > BUG() in amdgpu_fence_process.
> > 
> > 
> > Oh, i see, how about just adding 'skip' flag to amdgpu_ring and setting
> > it before calling
> > amdgpu_fence_driver_force_completion and resetting it after, then inside
> > amdgpu_fence_driver_force_completion
> > you can just skip the signaling part with this flag for fences with
> > DMA_FENCE_FLAG_USER_BITS set
> > Less lines of code at least.
> 
> Still sounds quite a bit hacky.
> 
> I would rather suggest to completely drop the approach with
> amdg

Re: [PATCH 2/2] drm: add tdr support for embeded hw_fence

2021-07-22 Thread Jingwen Chen
On Thu Jul 22, 2021 at 10:45:40AM -0400, Andrey Grodzovsky wrote:
> 
> On 2021-07-22 6:45 a.m., Jingwen Chen wrote:
> > On Wed Jul 21, 2021 at 12:53:51PM -0400, Andrey Grodzovsky wrote:
> > > On 2021-07-20 11:13 p.m., Jingwen Chen wrote:
> > > > [Why]
> > > > After embeded hw_fence to amdgpu_job, we need to add tdr support
> > > > for this feature.
> > > > 
> > > > [How]
> > > > 1. Add a resubmit_flag for resubmit jobs.
> > > > 2. Clear job fence from RCU and force complete vm flush fences in
> > > >  pre_asic_reset
> > > > 3. skip dma_fence_get for resubmit jobs and add a dma_fence_put
> > > >  for guilty jobs.
> > > > 
> > > > Signed-off-by: Jack Zhang 
> > > > Signed-off-by: Jingwen Chen 
> > > > ---
> > > >drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 +++-
> > > >drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 16 +++-
> > > >drivers/gpu/drm/amd/amdgpu/amdgpu_job.c|  4 +++-
> > > >drivers/gpu/drm/scheduler/sched_main.c |  1 +
> > > >include/drm/gpu_scheduler.h|  1 +
> > > >5 files changed, 27 insertions(+), 7 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> > > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > index 40461547701a..fe0237f72a09 100644
> > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > @@ -4382,7 +4382,7 @@ int amdgpu_device_mode1_reset(struct 
> > > > amdgpu_device *adev)
> > > >int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
> > > >  struct amdgpu_reset_context 
> > > > *reset_context)
> > > >{
> > > > -   int i, r = 0;
> > > > +   int i, j, r = 0;
> > > > struct amdgpu_job *job = NULL;
> > > > bool need_full_reset =
> > > > test_bit(AMDGPU_NEED_FULL_RESET, _context->flags);
> > > > @@ -4406,6 +4406,16 @@ int amdgpu_device_pre_asic_reset(struct 
> > > > amdgpu_device *adev,
> > > > if (!ring || !ring->sched.thread)
> > > > continue;
> > > > +   /*clear job fence from fence drv to avoid 
> > > > force_completion
> > > > +*leave NULL and vm flush fence in fence drv */
> > > > +   for (j = 0; j <= ring->fence_drv.num_fences_mask; j ++) 
> > > > {
> > > > +   struct dma_fence *old,**ptr;
> > > > +   ptr = >fence_drv.fences[j];
> > > > +   old = rcu_dereference_protected(*ptr, 1);
> > > > +   if (old && test_bit(DMA_FENCE_FLAG_USER_BITS, 
> > > > >flags))) {
> > > > +   RCU_INIT_POINTER(*ptr, NULL);
> > > > +   }
> > > 
> > > Is this to avoid premature job free because of dma_fence_put inside
> > > amdgpu_fence_process ?
> > > I can't currently remember why but we probably want all the HW fences
> > > currently in the ring to
> > > be forced signaled - maybe better to test for DMA_FENCE_FLAG_USER_BITS
> > > inside amdgpu_fence_process
> > > and still do the signaling but not the dma_fence_put part
> > > 
> > > Andrey
> > Hi Andrey,
> > 
> > This is to avoid signaling the same fence twice. If we still do the
> > signaling, then the job in the pending list will be signaled first in
> > force_completion, and later be signaled in resubmit. This will go to
> > BUG() in amdgpu_fence_process.
> 
> 
> Oh, i see, how about just adding 'skip' flag to amdgpu_ring and setting it
> before calling
> amdgpu_fence_driver_force_completion and resetting it after, then inside
> amdgpu_fence_driver_force_completion
> you can just skip the signaling part with this flag for fences with
> DMA_FENCE_FLAG_USER_BITS set
> Less lines of code at least.
> 
> Andrey
Hi Andrey,

In this way, this issue still exists. If we just skip it in the
force_completion, these fences are still in the RCU fence array. So when
the FLR finishes, the eop interrupt function of ib_test will call
amdgpu_fence_process, the skipped fences will be signaled along with
ib_test fences and signaled a

Re: [PATCH 2/2] drm: add tdr support for embeded hw_fence

2021-07-22 Thread Jingwen Chen
On Wed Jul 21, 2021 at 12:53:51PM -0400, Andrey Grodzovsky wrote:
> 
> On 2021-07-20 11:13 p.m., Jingwen Chen wrote:
> > [Why]
> > After embeded hw_fence to amdgpu_job, we need to add tdr support
> > for this feature.
> > 
> > [How]
> > 1. Add a resubmit_flag for resubmit jobs.
> > 2. Clear job fence from RCU and force complete vm flush fences in
> > pre_asic_reset
> > 3. skip dma_fence_get for resubmit jobs and add a dma_fence_put
> > for guilty jobs.
> > 
> > Signed-off-by: Jack Zhang 
> > Signed-off-by: Jingwen Chen 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 +++-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 16 +++-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c|  4 +++-
> >   drivers/gpu/drm/scheduler/sched_main.c |  1 +
> >   include/drm/gpu_scheduler.h|  1 +
> >   5 files changed, 27 insertions(+), 7 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > index 40461547701a..fe0237f72a09 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > @@ -4382,7 +4382,7 @@ int amdgpu_device_mode1_reset(struct amdgpu_device 
> > *adev)
> >   int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
> >  struct amdgpu_reset_context *reset_context)
> >   {
> > -   int i, r = 0;
> > +   int i, j, r = 0;
> > struct amdgpu_job *job = NULL;
> > bool need_full_reset =
> > test_bit(AMDGPU_NEED_FULL_RESET, _context->flags);
> > @@ -4406,6 +4406,16 @@ int amdgpu_device_pre_asic_reset(struct 
> > amdgpu_device *adev,
> > if (!ring || !ring->sched.thread)
> > continue;
> > +   /*clear job fence from fence drv to avoid force_completion
> > +*leave NULL and vm flush fence in fence drv */
> > +   for (j = 0; j <= ring->fence_drv.num_fences_mask; j ++) {
> > +   struct dma_fence *old,**ptr;
> > +   ptr = >fence_drv.fences[j];
> > +   old = rcu_dereference_protected(*ptr, 1);
> > +   if (old && test_bit(DMA_FENCE_FLAG_USER_BITS, 
> > >flags))) {
> > +   RCU_INIT_POINTER(*ptr, NULL);
> > +   }
> 
> 
> Is this to avoid premature job free because of dma_fence_put inside
> amdgpu_fence_process ?
> I can't currently remember why but we probably want all the HW fences
> currently in the ring to
> be forced signaled - maybe better to test for DMA_FENCE_FLAG_USER_BITS
> inside amdgpu_fence_process
> and still do the signaling but not the dma_fence_put part
> 
> Andrey

Hi Andrey,

This is to avoid signaling the same fence twice. If we still do the
signaling, then the job in the pending list will be signaled first in
force_completion, and later be signaled in resubmit. This will go to
BUG() in amdgpu_fence_process.

> 
> > +   }
> > /* after all hw jobs are reset, hw fence is meaningless, so 
> > force_completion */
> > amdgpu_fence_driver_force_completion(ring);
> > }
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> > index eecf21d8ec33..815776c9a013 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> > @@ -156,11 +156,17 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, 
> > struct dma_fence **f, struct amd
> > job->ring = ring;
> > }
> > -   seq = ++ring->fence_drv.sync_seq;
> > -   dma_fence_init(fence, _fence_ops,
> > -  >fence_drv.lock,
> > -  adev->fence_context + ring->idx,
> > -  seq);
> > +   if (job != NULL && job->base.resubmit_flag == 1) {
> > +   /* reinit seq for resubmitted jobs */
> > +   seq = ++ring->fence_drv.sync_seq;
> > +   fence->seqno = seq;
> > +   } else {
> > +   seq = ++ring->fence_drv.sync_seq;
> 
> 
> Seems like you could do the above line only once above if-else as it was
> before

Sure, I will modify this.


Best Regards,
JingWen Chen
> 
> > +   dma_fence_init(fence, _fence_ops,
> > +   >fence_drv.lock,
> > +   adev->fence_context + ring->idx,
> > +   seq);
>

[PATCH 1/2] drm/amd/amdgpu embed hw_fence into amdgpu_job

2021-07-21 Thread Jingwen Chen
From: Jack Zhang 

Why: Previously hw fence is alloced separately with job.
It caused historical lifetime issues and corner cases.
The ideal situation is to take fence to manage both job
and fence's lifetime, and simplify the design of gpu-scheduler.

How:
We propose to embed hw_fence into amdgpu_job.
1. We cover the normal job submission by this method.
2. For ib_test, and submit without a parent job keep the
legacy way to create a hw fence separately.

Signed-off-by: Jingwen Chen 
Signed-off-by: Jack Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c  |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c   | 62 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 35 
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h|  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  2 +-
 8 files changed, 80 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index b6d33f13b476..bad403978bac 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -714,7 +714,6 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum 
kgd_engine_type engine,
ret = dma_fence_wait(f, false);
 
 err_ib_sched:
-   dma_fence_put(f);
amdgpu_job_free(job);
 err:
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 536005bff24a..277128846dd1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1414,7 +1414,7 @@ static void amdgpu_ib_preempt_mark_partial_job(struct 
amdgpu_ring *ring)
continue;
}
job = to_amdgpu_job(s_job);
-   if (preempted && job->fence == fence)
+   if (preempted && (>hw_fence) == fence)
/* mark the job as preempted */
job->preemption_status |= AMDGPU_IB_PREEMPTED;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 30772608eac6..eecf21d8ec33 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -133,25 +133,40 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
  * Emits a fence command on the requested ring (all asics).
  * Returns 0 on success, -ENOMEM on failure.
  */
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct 
amdgpu_job *job,
  unsigned flags)
 {
struct amdgpu_device *adev = ring->adev;
-   struct amdgpu_fence *fence;
+   struct dma_fence *fence;
+   struct amdgpu_fence *am_fence;
struct dma_fence __rcu **ptr;
uint32_t seq;
int r;
 
-   fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
-   if (fence == NULL)
-   return -ENOMEM;
+   if (job == NULL) {
+   /* create a sperate hw fence */
+   am_fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
+   if (am_fence == NULL)
+   return -ENOMEM;
+   fence = _fence->base;
+   am_fence->ring = ring;
+   } else {
+   /* take use of job-embedded fence */
+   fence = >hw_fence;
+   job->ring = ring;
+   }
 
seq = ++ring->fence_drv.sync_seq;
-   fence->ring = ring;
-   dma_fence_init(>base, _fence_ops,
+   dma_fence_init(fence, _fence_ops,
   >fence_drv.lock,
   adev->fence_context + ring->idx,
   seq);
+
+   if (job != NULL) {
+   /* mark this fence has a parent job */
+   set_bit(DMA_FENCE_FLAG_USER_BITS, >flags);
+   }
+
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
   seq, flags | AMDGPU_FENCE_FLAG_INT);
pm_runtime_get_noresume(adev_to_drm(adev)->dev);
@@ -174,9 +189,9 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct 
dma_fence **f,
/* This function can't be called concurrently anyway, otherwise
 * emitting the fence would mess up the hardware ring buffer.
 */
-   rcu_assign_pointer(*ptr, dma_fence_get(>base));
+   rcu_assign_pointer(*ptr, dma_fence_get(fence));
 
-   *f = >base;
+   *f = fence;
 
return 0;
 }
@@ -636,8 +651,16 @@ static const char *amdgpu_fence_get_driver_name(struct 
dma_fence *fence)
 
 static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
 {
-   struct amdgpu_fence *fence = to_amdgpu_fence(f);
-  

[PATCH 2/2] drm: add tdr support for embeded hw_fence

2021-07-21 Thread Jingwen Chen
[Why]
After embeded hw_fence to amdgpu_job, we need to add tdr support
for this feature.

[How]
1. Add a resubmit_flag for resubmit jobs.
2. Clear job fence from RCU and force complete vm flush fences in
   pre_asic_reset
3. skip dma_fence_get for resubmit jobs and add a dma_fence_put
   for guilty jobs.

Signed-off-by: Jack Zhang 
Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 16 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c|  4 +++-
 drivers/gpu/drm/scheduler/sched_main.c |  1 +
 include/drm/gpu_scheduler.h|  1 +
 5 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 40461547701a..fe0237f72a09 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4382,7 +4382,7 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
 struct amdgpu_reset_context *reset_context)
 {
-   int i, r = 0;
+   int i, j, r = 0;
struct amdgpu_job *job = NULL;
bool need_full_reset =
test_bit(AMDGPU_NEED_FULL_RESET, _context->flags);
@@ -4406,6 +4406,16 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device 
*adev,
if (!ring || !ring->sched.thread)
continue;
 
+   /*clear job fence from fence drv to avoid force_completion
+*leave NULL and vm flush fence in fence drv */
+   for (j = 0; j <= ring->fence_drv.num_fences_mask; j ++) {
+   struct dma_fence *old,**ptr;
+   ptr = >fence_drv.fences[j];
+   old = rcu_dereference_protected(*ptr, 1);
+   if (old && test_bit(DMA_FENCE_FLAG_USER_BITS, 
>flags))) {
+   RCU_INIT_POINTER(*ptr, NULL);
+   }
+   }
/* after all hw jobs are reset, hw fence is meaningless, so 
force_completion */
amdgpu_fence_driver_force_completion(ring);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index eecf21d8ec33..815776c9a013 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -156,11 +156,17 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct 
dma_fence **f, struct amd
job->ring = ring;
}
 
-   seq = ++ring->fence_drv.sync_seq;
-   dma_fence_init(fence, _fence_ops,
-  >fence_drv.lock,
-  adev->fence_context + ring->idx,
-  seq);
+   if (job != NULL && job->base.resubmit_flag == 1) {
+   /* reinit seq for resubmitted jobs */
+   seq = ++ring->fence_drv.sync_seq;
+   fence->seqno = seq;
+   } else {
+   seq = ++ring->fence_drv.sync_seq;
+   dma_fence_init(fence, _fence_ops,
+   >fence_drv.lock,
+   adev->fence_context + ring->idx,
+   seq);
+   }
 
if (job != NULL) {
/* mark this fence has a parent job */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 7c426e225b24..d6f848adc3f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -241,6 +241,7 @@ static struct dma_fence *amdgpu_job_run(struct 
drm_sched_job *sched_job)
dma_fence_set_error(finished, -ECANCELED);/* skip IB as well if 
VRAM lost */
 
if (finished->error < 0) {
+   dma_fence_put(>hw_fence);
DRM_INFO("Skip scheduling IBs!\n");
} else {
r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job,
@@ -249,7 +250,8 @@ static struct dma_fence *amdgpu_job_run(struct 
drm_sched_job *sched_job)
DRM_ERROR("Error scheduling IBs (%d)\n", r);
}
 
-   dma_fence_get(fence);
+   if (!job->base.resubmit_flag)
+   dma_fence_get(fence);
amdgpu_job_free_resources(job);
 
fence = r ? ERR_PTR(r) : fence;
diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index f4f474944169..5a36ab5aea2d 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -544,6 +544,7 @@ void drm_sched_resubmit_jobs_ext(struct drm_gpu_scheduler 
*sched, int max)
dma_fence_set_error(_fence->finished, -ECANCELED);
 
dma_fence_put(s_job->s_fence->parent);
+ 

[PATCH v2] drm/amd/amdgpu: consider kernel job always not guilty

2021-07-21 Thread Jingwen Chen
[Why]
Currently all timedout job will be considered to be guilty. In SRIOV
multi-vf use case, the vf flr happens first and then job time out is
found. There can be several jobs timeout during a very small time slice.
And if the innocent sdma job time out is found before the real bad
job, then the innocent sdma job will be set to guilty. This will lead
to a page fault after resubmitting job.

[How]
If the job is a kernel job, we will always consider it not guilty

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 37fa199be8b3..40461547701a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4410,7 +4410,7 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device 
*adev,
amdgpu_fence_driver_force_completion(ring);
}
 
-   if(job)
+   if (job && job->vm)
drm_sched_increase_karma(>base);
 
r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
@@ -4874,7 +4874,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as 
another already in progress",
job ? job->base.id : -1, hive->hive_id);
amdgpu_put_xgmi_hive(hive);
-   if (job)
+   if (job && job->vm)
drm_sched_increase_karma(>base);
return 0;
}
@@ -4898,7 +4898,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
job ? job->base.id : -1);
 
/* even we skipped this reset, still need to set the job to 
guilty */
-   if (job)
+   if (job && job->vm)
drm_sched_increase_karma(>base);
goto skip_recovery;
}
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/amdgpu: consider paging job always not guilty

2021-07-20 Thread Jingwen Chen
[Why]
Currently all timedout job will be considered to be guilty. In SRIOV
multi-vf use case, the vf flr happens first and then job time out is
found. There can be several jobs timeout during a very small time slice.
And if the innocent sdma job time out is found before the real bad
job, then the innocent sdma job will be set to guilty. This will lead
to a page fault after resubmitting job.

[How]
If the job is a paging job, we will always consider it not guilty

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 37fa199be8b3..40461547701a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4410,7 +4410,7 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device 
*adev,
amdgpu_fence_driver_force_completion(ring);
}
 
-   if(job)
+   if (job && job->vm)
drm_sched_increase_karma(>base);
 
r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
@@ -4874,7 +4874,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as 
another already in progress",
job ? job->base.id : -1, hive->hive_id);
amdgpu_put_xgmi_hive(hive);
-   if (job)
+   if (job && job->vm)
drm_sched_increase_karma(>base);
return 0;
}
@@ -4898,7 +4898,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
job ? job->base.id : -1);
 
/* even we skipped this reset, still need to set the job to 
guilty */
-   if (job)
+   if (job && job->vm)
drm_sched_increase_karma(>base);
goto skip_recovery;
}
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/amdgpu: vm entities should have kernel priority

2021-07-19 Thread Jingwen Chen
[Why]
Current vm_pte entities have NORMAL priority, in SRIOV multi-vf
use case, the vf flr happens first and then job time out is found.
There can be several jobs timeout during a very small time slice.
And if the innocent sdma job time out is found before the real bad
job, then the innocent sdma job will be set to guilty as it only
has NORMAL priority. This will lead to a page fault after
resubmitting job.

[How]
sdma should always have KERNEL priority. The kernel job will always
be resubmitted.

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 358316d6a38c..f7526b67cc5d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2923,13 +2923,13 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm)
INIT_LIST_HEAD(>done);
 
/* create scheduler entities for page table updates */
-   r = drm_sched_entity_init(>immediate, DRM_SCHED_PRIORITY_NORMAL,
+   r = drm_sched_entity_init(>immediate, DRM_SCHED_PRIORITY_KERNEL,
  adev->vm_manager.vm_pte_scheds,
  adev->vm_manager.vm_pte_num_scheds, NULL);
if (r)
return r;
 
-   r = drm_sched_entity_init(>delayed, DRM_SCHED_PRIORITY_NORMAL,
+   r = drm_sched_entity_init(>delayed, DRM_SCHED_PRIORITY_KERNEL,
  adev->vm_manager.vm_pte_scheds,
  adev->vm_manager.vm_pte_num_scheds, NULL);
if (r)
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: SRIOV flr_work should take write_lock

2021-07-01 Thread Jingwen Chen
[Why]
If flr_work takes read_lock, then other threads who takes
read_lock can access hardware when host is doing vf flr.

[How]
flr_work should take write_lock to avoid this case.

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 4 ++--
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 3ee481557fc9..ff2307d7ee0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -252,7 +252,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct 
*work)
 * otherwise the mailbox msg will be ruined/reseted by
 * the VF FLR.
 */
-   if (!down_read_trylock(>reset_sem))
+   if (!down_write_trylock(>reset_sem))
return;
 
amdgpu_virt_fini_data_exchange(adev);
@@ -268,7 +268,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct 
*work)
 
 flr_done:
atomic_set(>in_gpu_reset, 0);
-   up_read(>reset_sem);
+   up_write(>reset_sem);
 
/* Trigger recovery for world switch failure if no TDR */
if (amdgpu_device_should_recover_gpu(adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c 
b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index 48e588d3c409..9f7aac435d69 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -273,7 +273,7 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct 
*work)
 * otherwise the mailbox msg will be ruined/reseted by
 * the VF FLR.
 */
-   if (!down_read_trylock(>reset_sem))
+   if (!down_write_trylock(>reset_sem))
return;
 
amdgpu_virt_fini_data_exchange(adev);
@@ -289,7 +289,7 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct 
*work)
 
 flr_done:
atomic_set(>in_gpu_reset, 0);
-   up_read(>reset_sem);
+   up_write(>reset_sem);
 
/* Trigger recovery for world switch failure if no TDR */
if (amdgpu_device_should_recover_gpu(adev)
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCHv2] drm/amd/amdgpu:save psp ring wptr to avoid attack

2021-05-26 Thread Jingwen Chen
From: Victor Zhao 

[Why]
When some tools performing psp mailbox attack, the readback value
of register can be a random value which may break psp.

[How]
Use a psp wptr cache machanism to aovid the change made by attack.

v2: unify change and add detailed reason

Change-Id: Idee78e8c1c781463048f2f6311fdc70488ef05b2
Signed-off-by: Victor Zhao 
Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 1 +
 drivers/gpu/drm/amd/amdgpu/psp_v11_0.c  | 3 ++-
 drivers/gpu/drm/amd/amdgpu/psp_v3_1.c   | 3 ++-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 46a5328e00e0..60aa99a39a74 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -76,6 +76,7 @@ struct psp_ring
uint64_tring_mem_mc_addr;
void*ring_mem_handle;
uint32_tring_size;
+   uint32_tring_wptr;
 };
 
 /* More registers may will be supported */
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index 1f2e7e35c91e..0fd1ed918627 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -733,7 +733,7 @@ static uint32_t psp_v11_0_ring_get_wptr(struct psp_context 
*psp)
struct amdgpu_device *adev = psp->adev;
 
if (amdgpu_sriov_vf(adev))
-   data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
+   data = psp->km_ring.ring_wptr;
else
data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
 
@@ -747,6 +747,7 @@ static void psp_v11_0_ring_set_wptr(struct psp_context 
*psp, uint32_t value)
if (amdgpu_sriov_vf(adev)) {
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, value);
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, 
GFX_CTRL_CMD_ID_CONSUME_CMD);
+   psp->km_ring.ring_wptr = value;
} else
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index f2e725f72d2f..908664a5774b 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -379,7 +379,7 @@ static uint32_t psp_v3_1_ring_get_wptr(struct psp_context 
*psp)
struct amdgpu_device *adev = psp->adev;
 
if (amdgpu_sriov_vf(adev))
-   data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
+   data = psp->km_ring.ring_wptr;
else
data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
return data;
@@ -394,6 +394,7 @@ static void psp_v3_1_ring_set_wptr(struct psp_context *psp, 
uint32_t value)
/* send interrupt to PSP for SRIOV ring write pointer update */
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
GFX_CTRL_CMD_ID_CONSUME_CMD);
+   psp->km_ring.ring_wptr = value;
} else
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value);
 }
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/amdgpu:save psp ring wptr in SRIOV to avoid attack

2021-05-26 Thread Jingwen Chen
From: Victor Zhao 

save psp ring wptr in SRIOV to avoid attack to avoid extra changes to
MP0_SMN_C2PMSG_102 reg

Change-Id: Idee78e8c1c781463048f2f6311fdc70488ef05b2
Signed-off-by: Victor Zhao 
Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 1 +
 drivers/gpu/drm/amd/amdgpu/psp_v11_0.c  | 3 ++-
 drivers/gpu/drm/amd/amdgpu/psp_v3_1.c   | 3 ++-
 4 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 55378c6b9722..20e06b3ec686 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -2701,6 +2701,7 @@ int psp_ring_cmd_submit(struct psp_context *psp,
/* Update the write Pointer in DWORDs */
psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % 
ring_size_dw;
psp_ring_set_wptr(psp, psp_write_ptr_reg);
+   ring->ring_wptr = psp_write_ptr_reg;
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 46a5328e00e0..60aa99a39a74 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -76,6 +76,7 @@ struct psp_ring
uint64_tring_mem_mc_addr;
void*ring_mem_handle;
uint32_tring_size;
+   uint32_tring_wptr;
 };
 
 /* More registers may will be supported */
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index 1f2e7e35c91e..4a32b0c84ef4 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -474,6 +474,7 @@ static int psp_v11_0_ring_create(struct psp_context *psp,
return ret;
}
 
+   ring->ring_wptr = 0;
/* Write low address of the ring to C2PMSG_102 */
psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg);
@@ -733,7 +734,7 @@ static uint32_t psp_v11_0_ring_get_wptr(struct psp_context 
*psp)
struct amdgpu_device *adev = psp->adev;
 
if (amdgpu_sriov_vf(adev))
-   data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
+   data = psp->km_ring.ring_wptr;
else
data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index f2e725f72d2f..160f78eb6403 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -237,6 +237,7 @@ static int psp_v3_1_ring_create(struct psp_context *psp,
return ret;
}
 
+   ring->ring_wptr = 0;
/* Write low address of the ring to C2PMSG_102 */
psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg);
@@ -379,7 +380,7 @@ static uint32_t psp_v3_1_ring_get_wptr(struct psp_context 
*psp)
struct amdgpu_device *adev = psp->adev;
 
if (amdgpu_sriov_vf(adev))
-   data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
+   data = psp->km_ring.ring_wptr;
else
data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
return data;
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/amdgpu: fix refcount leak

2021-05-17 Thread Jingwen Chen
[Why]
the gem object rfb->base.obj[0] is get according to num_planes
in amdgpufb_create, but is not put according to num_planes

[How]
put rfb->base.obj[0] in amdgpu_fbdev_destroy according to num_planes

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index 4f10c4529840..09b048647523 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -288,10 +288,13 @@ static int amdgpufb_create(struct drm_fb_helper *helper,
 static int amdgpu_fbdev_destroy(struct drm_device *dev, struct amdgpu_fbdev 
*rfbdev)
 {
struct amdgpu_framebuffer *rfb = >rfb;
+   int i;
 
drm_fb_helper_unregister_fbi(>helper);
 
if (rfb->base.obj[0]) {
+   for (i = 0; i < rfb->base.format->num_planes; i++)
+   drm_gem_object_put(rfb->base.obj[0]);
amdgpufb_destroy_pinned_object(rfb->base.obj[0]);
rfb->base.obj[0] = NULL;
drm_framebuffer_unregister_private(>base);
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/amdgpu: destroy pinned gem obj according to refcount

2021-05-17 Thread Jingwen Chen
[Why]
the fb gem object is get for 4 times when amdgpu_display_framebuffer_init,
while this object is put for less than 4 times. This can lead to warning trace
when unloading amdgpu

[How]
put gem object according to refcount in amdgpufb_destroy_pinned_object

Warning trace attached:
[324584.505752] amdgpu :00:07.0: amdgpu: amdgpu: finishing device.
[324584.510737] [drm] clean up the vf2pf work item
[324584.532205] [drm] free PSP TMR buffer
[324584.591206] [ cut here ]
[324584.591449] WARNING: CPU: 1 PID: 5800 at 
/var/lib/dkms/amdgpu/5.11.11.119-1259830/build/include/drm/ttm/ttm_resource.h:196
 amdgpu_vram_mgr_fini+0x72/0x150 [amdgpu]
[324584.591450] Modules linked in: amdgpu(OE-) amd_iommu_v2 amdttm(OE) 
amd_sched(OE) amdkcl(OE) drm_kms_helper drm i2c_algo_bit fb_sys_fops 
syscopyarea sysfillrect sysimgblt intel_rapl_msr intel_rapl_common kvm 
irqbypass snd_hda_codec_generic ledtrig_audio snd_hda_intel snd_intel_dspcfg 
crct10dif_pclmul snd_hda_codec crc32_pclmul ghash_clmulni_intel snd_hda_core 
snd_hwdep snd_pcm aesni_intel aes_x86_64 crypto_simd snd_seq_midi cryptd 
snd_seq_midi_event glue_helper snd_rawmidi snd_seq input_leds snd_seq_device 
serio_raw snd_timer snd mac_hid soundcore qemu_fw_cfg sch_fq_codel binfmt_misc 
parport_pc ppdev lp parport ip_tables x_tables autofs4 8139too psmouse floppy 
8139cp mii i2c_piix4 pata_acpi [last unloaded: amd_iommu_v2]
[324584.591479] CPU: 1 PID: 5800 Comm: modprobe Tainted: GW  OE 
5.3.0-61-generic #55~18.04.1-Ubuntu
[324584.591480] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
1.10.2-1ubuntu1 04/01/2014
[324584.591538] RIP: 0010:amdgpu_vram_mgr_fini+0x72/0x150 [amdgpu]
[324584.591540] Code: 00 00 41 c6 84 24 40 5d 00 00 00 4c 89 f6 e8 85 9d fa ff 
85 c0 74 17 5b 41 5c 41 5d 41 5e 41 5f 5d c3 4c 89 ff e8 51 d3 a9 dc <0f> 0b eb 
c3 4d 8d b4 24 90 5e 00 00 4d 8d ac 24 98 5e 00 00 4c 89
[324584.591541] RSP: 0018:9ce444e7fce8 EFLAGS: 00010282
[324584.591542] RAX: 0024 RBX: 8e86b02c5d60 RCX: 

[324584.591543] RDX:  RSI: 8e86b7a97448 RDI: 
8e86b7a97448
[324584.591543] RBP: 9ce444e7fd10 R08: 0405 R09: 
0004
[324584.591544] R10: 9ce444e7fcd0 R11: 0001 R12: 
8e86b02c
[324584.591544] R13: 8e86b02c5da0 R14: 8e86b02c5d40 R15: 
c0c702a8
[324584.591545] FS:  7fea6fac0540() GS:8e86b7a8() 
knlGS:
[324584.591546] CS:  0010 DS:  ES:  CR0: 80050033
[324584.591547] CR2: 55b9092b6048 CR3: 00022f962004 CR4: 
003606e0
[324584.591550] DR0:  DR1:  DR2: 

[324584.591550] DR3:  DR6: fffe0ff0 DR7: 
0400
[324584.591551] Call Trace:
[324584.591605]  amdgpu_ttm_fini+0xc7/0x230 [amdgpu]
[324584.591657]  amdgpu_bo_fini+0x12/0x40 [amdgpu]
[324584.591717]  gmc_v10_0_sw_fini+0x32/0x40 [amdgpu]
[324584.591767]  amdgpu_device_fini+0x373/0x560 [amdgpu]
[324584.591831]  amdgpu_driver_unload_kms+0x43/0x70 [amdgpu]
[324584.591879]  amdgpu_pci_remove+0x3b/0x60 [amdgpu]
[324584.591950]  pci_device_remove+0x3e/0xc0
[324584.591981]  device_release_driver_internal+0xe0/0x1b0
[324584.591982]  driver_detach+0x49/0x90
[324584.591984]  bus_remove_driver+0x59/0xd0
[324584.591985]  driver_unregister+0x2c/0x40
[324584.591986]  pci_unregister_driver+0x22/0xa0
[324584.592071]  amdgpu_exit+0x15/0x629 [amdgpu]
[324584.592121]  __x64_sys_delete_module+0x146/0x290
[324584.592148]  do_syscall_64+0x5a/0x130
[324584.592165]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[324584.592183] RIP: 0033:0x7fea6f5e4047
[324584.592185] Code: 73 01 c3 48 8b 0d 41 8e 2c 00 f7 d8 64 89 01 48 83 c8 ff 
c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 b8 b0 00 00 00 0f 05 <48> 3d 01 
f0 ff ff 73 01 c3 48 8b 0d 11 8e 2c 00 f7 d8 64 89 01 48
[324584.592186] RSP: 002b:7ffdfa3d75a8 EFLAGS: 0206 ORIG_RAX: 
00b0
[324584.592187] RAX: ffda RBX: 55b9092ae120 RCX: 
7fea6f5e4047
[324584.592187] RDX:  RSI: 0800 RDI: 
55b9092ae188
[324584.592188] RBP: 55b9092ae120 R08: 7ffdfa3d6551 R09: 

[324584.592188] R10: 7fea6f660c40 R11: 0206 R12: 
55b9092ae188
[324584.592189] R13: 0001 R14: 55b9092ae188 R15: 
7ffdfa3d8990
[324584.592190] ---[ end trace 4ea03bb6309ad6c3 ]---

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index 4f10c4529840..afdc2c48c060 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -106,7 +106,7 @@ int amdgpu_align_pitch(struct amdgpu_device *adev, int 
width, int cpp, bool tile
 static void amdgpufb_destroy_pinned_object(struct drm_gem_

[PATCH] drm/amd/amdgpu: add fini virt data exchange to ip_suspend

2021-03-04 Thread Jingwen Chen
[Why]
when try to shutdown guest vm in sriov mode, virt data
exchange is not fini. After vram lost, trying to write
vram could hang cpu.

[How]
add fini virt data exchange in ip_suspend

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a11760ec3924..bec725b50c1b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2774,8 +2774,10 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
 {
int r;
 
-   if (amdgpu_sriov_vf(adev))
+   if (amdgpu_sriov_vf(adev)) {
+   amdgpu_virt_fini_data_exchange(adev);
amdgpu_virt_request_full_gpu(adev, false);
+   }
 
r = amdgpu_device_ip_suspend_phase1(adev);
if (r)
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/amdgpu: fini data exchange when req_gpu_fini in SRIOV

2021-03-04 Thread Jingwen Chen
Do fini data exchange everytime req_gpu_fini in SRIOV

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   | 3 +++
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a11760ec3924..e3ed52f66414 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3601,10 +3601,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
/* make sure IB test finished before entering exclusive mode
 * to avoid preemption on IB test
 * */
-   if (amdgpu_sriov_vf(adev)) {
+   if (amdgpu_sriov_vf(adev))
amdgpu_virt_request_full_gpu(adev, false);
-   amdgpu_virt_fini_data_exchange(adev);
-   }
 
/* disable all interrupts */
amdgpu_irq_disable_all(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 3dd7eec52344..af1e5d8fc2b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -114,6 +114,9 @@ int amdgpu_virt_request_full_gpu(struct amdgpu_device 
*adev, bool init)
struct amdgpu_virt *virt = >virt;
int r;
 
+   if (!init)
+   amdgpu_virt_fini_data_exchange(adev);
+
if (virt->ops && virt->ops->req_full_gpu) {
r = virt->ops->req_full_gpu(adev, init);
if (r)
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/amdgpu: move inc gpu_reset_counter after drm_sched_stop

2021-02-25 Thread Jingwen Chen
Move gpu_reset_counter after drm_sched_stop to avoid race
condition caused by job submitted between reset_count +1 and
drm_sched_stop.

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f0f7ed42ee7f..703b96cf3560 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4447,7 +4447,6 @@ static bool amdgpu_device_lock_adev(struct amdgpu_device 
*adev,
down_write(>reset_sem);
}
 
-   atomic_inc(>gpu_reset_counter);
switch (amdgpu_asic_reset_method(adev)) {
case AMD_RESET_METHOD_MODE1:
adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
@@ -4708,6 +4707,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
if (need_emergency_restart)
amdgpu_job_stop_all_jobs_on_sched(>sched);
}
+   atomic_inc(_adev->gpu_reset_counter);
}
 
if (need_emergency_restart)
@@ -5050,6 +5050,7 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev 
*pdev, pci_channel_sta
 
drm_sched_stop(>sched, NULL);
}
+   atomic_inc(>gpu_reset_counter);
return PCI_ERS_RESULT_NEED_RESET;
case pci_channel_io_perm_failure:
/* Permanent error, prepare for device removal */
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/2] drm/amd/amdgpu: force flush resubmit job

2021-02-24 Thread Jingwen Chen
[Why]
when a job is scheduled during TDR(after device reset count
increase and before drm_sched_stop), this job won't do vm_flush
when resubmit itself after GPU reset done. This can lead to
a page fault.

[How]
Always do vm_flush for resubmit job.

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index fdbe7d4e8b8b..4af2c5d15950 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1088,7 +1088,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct 
amdgpu_job *job,
if (update_spm_vmid_needed && adev->gfx.rlc.funcs->update_spm_vmid)
adev->gfx.rlc.funcs->update_spm_vmid(adev, job->vmid);
 
-   if (amdgpu_vmid_had_gpu_reset(adev, id)) {
+   if (amdgpu_vmid_had_gpu_reset(adev, id) ||
+   (job->base.flags & DRM_FLAG_RESUBMIT_JOB)) {
gds_switch_needed = true;
vm_flush_needed = true;
pasid_mapping_needed = true;
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/2] drm: add a flag to indicate job is resubmitted

2021-02-24 Thread Jingwen Chen
Add a flag in drm_sched_job to indicate the job resubmit.

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/scheduler/sched_main.c | 2 ++
 include/drm/gpu_scheduler.h| 2 ++
 2 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index da24c4e8b9fb..d59ac90f5081 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -517,6 +517,7 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler 
*sched)
if (found_guilty && s_job->s_fence->scheduled.context == 
guilty_context)
dma_fence_set_error(_fence->finished, -ECANCELED);
 
+   s_job->flags |= DRM_FLAG_RESUBMIT_JOB;
dma_fence_put(s_job->s_fence->parent);
fence = sched->ops->run_job(s_job);
 
@@ -565,6 +566,7 @@ int drm_sched_job_init(struct drm_sched_job *job,
if (!job->s_fence)
return -ENOMEM;
job->id = atomic64_inc_return(>job_id_count);
+   job->flags = 0;
 
INIT_LIST_HEAD(>node);
 
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 92436553fd6a..a22122f98c9c 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -29,6 +29,7 @@
 #include 
 
 #define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000)
+#define DRM_FLAG_RESUBMIT_JOB (1 << 0)
 
 struct drm_gpu_scheduler;
 struct drm_sched_rq;
@@ -198,6 +199,7 @@ struct drm_sched_job {
enum drm_sched_priority s_priority;
struct drm_sched_entity  *entity;
struct dma_fence_cb cb;
+   uint32_tflags;
 };
 
 static inline bool drm_sched_invalidate_job(struct drm_sched_job *s_job,
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/amdgpu: add error handling to amdgpu_virt_read_pf2vf_data

2021-01-19 Thread Jingwen Chen
[Why]
when vram lost happened in guest, try to write vram can lead to
kernel stuck.

[How]
When the readback data is invalid, don't do write work, directly
reschedule a new work.

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index c649944e49da..3dd7eec52344 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -558,10 +558,14 @@ static int amdgpu_virt_write_vf2pf_data(struct 
amdgpu_device *adev)
 static void amdgpu_virt_update_vf2pf_work_item(struct work_struct *work)
 {
struct amdgpu_device *adev = container_of(work, struct amdgpu_device, 
virt.vf2pf_work.work);
+   int ret;
 
-   amdgpu_virt_read_pf2vf_data(adev);
+   ret = amdgpu_virt_read_pf2vf_data(adev);
+   if (ret)
+   goto out;
amdgpu_virt_write_vf2pf_data(adev);
 
+out:
schedule_delayed_work(&(adev->virt.vf2pf_work), 
adev->virt.vf2pf_update_interval_ms);
 }
 
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/amdgpu: remove redundant flush_delayed_work

2021-01-17 Thread Jingwen Chen
When using cancel_delayed_work_sync, there's no need
to flush_delayed_work first. This sequence can lead to
a redundant loop of work executing.

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 3e212862cf5d..c649944e49da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -569,7 +569,6 @@ void amdgpu_virt_fini_data_exchange(struct amdgpu_device 
*adev)
 {
if (adev->virt.vf2pf_update_interval_ms != 0) {
DRM_INFO("clean up the vf2pf work item\n");
-   flush_delayed_work(>virt.vf2pf_work);
cancel_delayed_work_sync(>virt.vf2pf_work);
adev->virt.vf2pf_update_interval_ms = 0;
}
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: skip power profile switch in sriov

2020-11-26 Thread Jingwen Chen
power profile switch in vcn need to send SetWorkLoad msg to
smu, which is not supported in sriov.

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 17a45baff638..8fb12afe3c96 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -1168,6 +1168,9 @@ int amdgpu_dpm_switch_power_profile(struct amdgpu_device 
*adev,
 {
int ret = 0;
 
+   if (amdgpu_sriov_vf(adev))
+   return 0;
+
if (is_support_sw_smu(adev))
ret = smu_switch_power_profile(>smu, type, en);
else if (adev->powerplay.pp_funcs &&
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu: skip power profile switch in sriov

2020-11-23 Thread Jingwen Chen
power profile switch in vcn need to send SetWorkLoad msg to
smu, which is not supported in sriov.

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 17a45baff638..8fb12afe3c96 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -1168,6 +1168,9 @@ int amdgpu_dpm_switch_power_profile(struct amdgpu_device 
*adev,
 {
int ret = 0;
 
+   if (amdgpu_sriov_vf(adev))
+   return 0;
+
if (is_support_sw_smu(adev))
ret = smu_switch_power_profile(>smu, type, en);
else if (adev->powerplay.pp_funcs &&
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/2] drm/amd: Skip not used microcode loading in SRIOV

2020-09-23 Thread Jingwen Chen
smc, sdma, sos, ta and asd fw is not used in SRIOV. Skip them to
accelerate sw_init for navi12.

v2: skip above fw in SRIOV for vega10 and sienna_cichlid
v3: directly skip psp fw loading in SRIOV
Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c  | 10 ++
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   |  3 +++
 drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c   |  3 +++
 drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c   |  3 +++
 .../gpu/drm/amd/pm/powerplay/smumgr/vega10_smumgr.c  | 12 +++-
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c| 11 +++
 6 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 2c66e20b2ed9..18be544d8c1e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -161,10 +161,12 @@ static int psp_sw_init(void *handle)
struct psp_context *psp = >psp;
int ret;
 
-   ret = psp_init_microcode(psp);
-   if (ret) {
-   DRM_ERROR("Failed to load psp firmware!\n");
-   return ret;
+   if (!amdgpu_sriov_vf(adev)) {
+   ret = psp_init_microcode(psp);
+   if (ret) {
+   DRM_ERROR("Failed to load psp firmware!\n");
+   return ret;
+   }
}
 
ret = psp_memory_training_init(psp);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 810635cbf4c1..86fb1eddf5a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -592,6 +592,9 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device 
*adev)
struct amdgpu_firmware_info *info = NULL;
const struct common_firmware_header *header = NULL;
 
+   if (amdgpu_sriov_vf(adev))
+   return 0;
+
DRM_DEBUG("\n");
 
switch (adev->asic_type) {
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 48c95a78a173..9c72b95b7463 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -203,6 +203,9 @@ static int sdma_v5_0_init_microcode(struct amdgpu_device 
*adev)
const struct common_firmware_header *header = NULL;
const struct sdma_firmware_header_v1_0 *hdr;
 
+   if (amdgpu_sriov_vf(adev))
+   return 0;
+
DRM_DEBUG("\n");
 
switch (adev->asic_type) {
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index 34ccf376ee45..9f3952723c63 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -148,6 +148,9 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device 
*adev)
struct amdgpu_firmware_info *info = NULL;
const struct common_firmware_header *header = NULL;
 
+   if (amdgpu_sriov_vf(adev))
+   return 0;
+
DRM_DEBUG("\n");
 
switch (adev->asic_type) {
diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/vega10_smumgr.c 
b/drivers/gpu/drm/amd/pm/powerplay/smumgr/vega10_smumgr.c
index 1e222c5d91a4..daf122f24f23 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/vega10_smumgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/vega10_smumgr.c
@@ -209,11 +209,13 @@ static int vega10_smu_init(struct pp_hwmgr *hwmgr)
int ret;
struct cgs_firmware_info info = {0};
 
-   ret = cgs_get_firmware_info(hwmgr->device,
-   CGS_UCODE_ID_SMU,
-   );
-   if (ret || !info.kptr)
-   return -EINVAL;
+   if (!amdgpu_sriov_vf((struct amdgpu_device *)hwmgr->adev)) {
+   ret = cgs_get_firmware_info(hwmgr->device,
+   CGS_UCODE_ID_SMU,
+   );
+   if (ret || !info.kptr)
+   return -EINVAL;
+   }
 
priv = kzalloc(sizeof(struct vega10_smumgr), GFP_KERNEL);
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 538e6f5e19eb..3010cb31324a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -832,10 +832,13 @@ static int smu_sw_init(void *handle)
 
smu->smu_dpm.dpm_level = AMD_DPM_FORCED_LEVEL_AUTO;
smu->smu_dpm.requested_dpm_level = AMD_DPM_FORCED_LEVEL_AUTO;
-   ret = smu_init_microcode(smu);
-   if (ret) {
-   dev_err(adev->dev, "Failed to load smu firmware!\n");
-   return ret;
+
+   if (!amdgpu_sriov_vf(adev)) {
+   ret = smu_init_microcode(smu);
+   if (ret) {
+ 

[PATCH 1/2] drm/amd/pm: Skip use smc fw data in SRIOV

2020-09-23 Thread Jingwen Chen
smc fw is not needed in SRIOV, thus driver should not try to get smc
fw data.

Signed-off-by: Jingwen Chen 
---
 .../gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c| 61 ++-
 1 file changed, 32 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
index b53872eb4398..d8ca6d968813 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
@@ -322,39 +322,42 @@ int smu_v11_0_setup_pptable(struct smu_context *smu)
void *table;
uint16_t version_major, version_minor;
 
-   hdr = (const struct smc_firmware_header_v1_0 *) adev->pm.fw->data;
-   version_major = le16_to_cpu(hdr->header.header_version_major);
-   version_minor = le16_to_cpu(hdr->header.header_version_minor);
-   if ((version_major == 2 && smu->smu_table.boot_values.pp_table_id > 0) 
||
-   adev->asic_type == CHIP_NAVY_FLOUNDER) {
-   dev_info(adev->dev, "use driver provided pptable %d\n", 
smu->smu_table.boot_values.pp_table_id);
-   switch (version_minor) {
-   case 0:
-   ret = smu_v11_0_set_pptable_v2_0(smu, , );
-   break;
-   case 1:
-   ret = smu_v11_0_set_pptable_v2_1(smu, , ,
-
smu->smu_table.boot_values.pp_table_id);
-   break;
-   default:
-   ret = -EINVAL;
-   break;
+   if (!amdgpu_sriov_vf(adev)) {
+   hdr = (const struct smc_firmware_header_v1_0 *) 
adev->pm.fw->data;
+   version_major = le16_to_cpu(hdr->header.header_version_major);
+   version_minor = le16_to_cpu(hdr->header.header_version_minor);
+   if ((version_major == 2 && 
smu->smu_table.boot_values.pp_table_id > 0) ||
+   adev->asic_type == CHIP_NAVY_FLOUNDER) {
+   dev_info(adev->dev, "use driver provided pptable %d\n", 
smu->smu_table.boot_values.pp_table_id);
+   switch (version_minor) {
+   case 0:
+   ret = smu_v11_0_set_pptable_v2_0(smu, , 
);
+   break;
+   case 1:
+   ret = smu_v11_0_set_pptable_v2_1(smu, , 
,
+   
smu->smu_table.boot_values.pp_table_id);
+   break;
+   default:
+   ret = -EINVAL;
+   break;
+   }
+   if (ret)
+   return ret;
+   goto out;
}
-   if (ret)
-   return ret;
+   }
 
-   } else {
-   dev_info(adev->dev, "use vbios provided pptable\n");
-   index = 
get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
-   powerplayinfo);
+   dev_info(adev->dev, "use vbios provided pptable\n");
+   index = 
get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+   powerplayinfo);
 
-   ret = amdgpu_atombios_get_data_table(adev, index, 
_table_size, , ,
- (uint8_t **));
-   if (ret)
-   return ret;
-   size = atom_table_size;
-   }
+   ret = amdgpu_atombios_get_data_table(adev, index, _table_size, 
, ,
+   (uint8_t **));
+   if (ret)
+   return ret;
+   size = atom_table_size;
 
+out:
if (!smu->smu_table.power_play_table)
smu->smu_table.power_play_table = table;
if (!smu->smu_table.power_play_table_size)
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 1/2] drm/amd/pm: Skip use smc fw data in SRIOV

2020-09-22 Thread Jingwen Chen
smc fw is not needed in SRIOV, thus driver should not try to get smc
fw data.

Signed-off-by: Jingwen Chen 
---
 .../gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c| 61 ++-
 1 file changed, 32 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
index b53872eb4398..d8ca6d968813 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
@@ -322,39 +322,42 @@ int smu_v11_0_setup_pptable(struct smu_context *smu)
void *table;
uint16_t version_major, version_minor;
 
-   hdr = (const struct smc_firmware_header_v1_0 *) adev->pm.fw->data;
-   version_major = le16_to_cpu(hdr->header.header_version_major);
-   version_minor = le16_to_cpu(hdr->header.header_version_minor);
-   if ((version_major == 2 && smu->smu_table.boot_values.pp_table_id > 0) 
||
-   adev->asic_type == CHIP_NAVY_FLOUNDER) {
-   dev_info(adev->dev, "use driver provided pptable %d\n", 
smu->smu_table.boot_values.pp_table_id);
-   switch (version_minor) {
-   case 0:
-   ret = smu_v11_0_set_pptable_v2_0(smu, , );
-   break;
-   case 1:
-   ret = smu_v11_0_set_pptable_v2_1(smu, , ,
-
smu->smu_table.boot_values.pp_table_id);
-   break;
-   default:
-   ret = -EINVAL;
-   break;
+   if (!amdgpu_sriov_vf(adev)) {
+   hdr = (const struct smc_firmware_header_v1_0 *) 
adev->pm.fw->data;
+   version_major = le16_to_cpu(hdr->header.header_version_major);
+   version_minor = le16_to_cpu(hdr->header.header_version_minor);
+   if ((version_major == 2 && 
smu->smu_table.boot_values.pp_table_id > 0) ||
+   adev->asic_type == CHIP_NAVY_FLOUNDER) {
+   dev_info(adev->dev, "use driver provided pptable %d\n", 
smu->smu_table.boot_values.pp_table_id);
+   switch (version_minor) {
+   case 0:
+   ret = smu_v11_0_set_pptable_v2_0(smu, , 
);
+   break;
+   case 1:
+   ret = smu_v11_0_set_pptable_v2_1(smu, , 
,
+   
smu->smu_table.boot_values.pp_table_id);
+   break;
+   default:
+   ret = -EINVAL;
+   break;
+   }
+   if (ret)
+   return ret;
+   goto out;
}
-   if (ret)
-   return ret;
+   }
 
-   } else {
-   dev_info(adev->dev, "use vbios provided pptable\n");
-   index = 
get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
-   powerplayinfo);
+   dev_info(adev->dev, "use vbios provided pptable\n");
+   index = 
get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+   powerplayinfo);
 
-   ret = amdgpu_atombios_get_data_table(adev, index, 
_table_size, , ,
- (uint8_t **));
-   if (ret)
-   return ret;
-   size = atom_table_size;
-   }
+   ret = amdgpu_atombios_get_data_table(adev, index, _table_size, 
, ,
+   (uint8_t **));
+   if (ret)
+   return ret;
+   size = atom_table_size;
 
+out:
if (!smu->smu_table.power_play_table)
smu->smu_table.power_play_table = table;
if (!smu->smu_table.power_play_table_size)
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/2] drm/amd: Skip not used microcode loading in SRIOV

2020-09-22 Thread Jingwen Chen
smc, sdma, sos, ta and asd fw is not used in SRIOV. Skip them to
accelerate sw_init for navi12.

v2: skip above fw in SRIOV for vega10 and sienna_cichlid
Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c  |  9 +
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   |  3 +++
 drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c   |  3 +++
 drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c   |  3 +++
 .../gpu/drm/amd/pm/powerplay/smumgr/vega10_smumgr.c  | 12 +++-
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c| 11 +++
 6 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 2c66e20b2ed9..9e2038de6ea7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -2385,6 +2385,9 @@ int psp_init_asd_microcode(struct psp_context *psp,
const struct psp_firmware_header_v1_0 *asd_hdr;
int err = 0;
 
+   if (amdgpu_sriov_vf(adev))
+   return 0;
+
if (!chip_name) {
dev_err(adev->dev, "invalid chip name for asd microcode\n");
return -EINVAL;
@@ -2424,6 +2427,9 @@ int psp_init_sos_microcode(struct psp_context *psp,
const struct psp_firmware_header_v1_3 *sos_hdr_v1_3;
int err = 0;
 
+   if (amdgpu_sriov_vf(adev))
+   return 0;
+
if (!chip_name) {
dev_err(adev->dev, "invalid chip name for sos microcode\n");
return -EINVAL;
@@ -2558,6 +2564,9 @@ int psp_init_ta_microcode(struct psp_context *psp,
int err = 0;
int ta_index = 0;
 
+   if (amdgpu_sriov_vf(adev))
+   return 0;
+
if (!chip_name) {
dev_err(adev->dev, "invalid chip name for ta microcode\n");
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 810635cbf4c1..86fb1eddf5a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -592,6 +592,9 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device 
*adev)
struct amdgpu_firmware_info *info = NULL;
const struct common_firmware_header *header = NULL;
 
+   if (amdgpu_sriov_vf(adev))
+   return 0;
+
DRM_DEBUG("\n");
 
switch (adev->asic_type) {
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 48c95a78a173..9c72b95b7463 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -203,6 +203,9 @@ static int sdma_v5_0_init_microcode(struct amdgpu_device 
*adev)
const struct common_firmware_header *header = NULL;
const struct sdma_firmware_header_v1_0 *hdr;
 
+   if (amdgpu_sriov_vf(adev))
+   return 0;
+
DRM_DEBUG("\n");
 
switch (adev->asic_type) {
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index 34ccf376ee45..9f3952723c63 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -148,6 +148,9 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device 
*adev)
struct amdgpu_firmware_info *info = NULL;
const struct common_firmware_header *header = NULL;
 
+   if (amdgpu_sriov_vf(adev))
+   return 0;
+
DRM_DEBUG("\n");
 
switch (adev->asic_type) {
diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/vega10_smumgr.c 
b/drivers/gpu/drm/amd/pm/powerplay/smumgr/vega10_smumgr.c
index 1e222c5d91a4..daf122f24f23 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/vega10_smumgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/vega10_smumgr.c
@@ -209,11 +209,13 @@ static int vega10_smu_init(struct pp_hwmgr *hwmgr)
int ret;
struct cgs_firmware_info info = {0};
 
-   ret = cgs_get_firmware_info(hwmgr->device,
-   CGS_UCODE_ID_SMU,
-   );
-   if (ret || !info.kptr)
-   return -EINVAL;
+   if (!amdgpu_sriov_vf((struct amdgpu_device *)hwmgr->adev)) {
+   ret = cgs_get_firmware_info(hwmgr->device,
+   CGS_UCODE_ID_SMU,
+   );
+   if (ret || !info.kptr)
+   return -EINVAL;
+   }
 
priv = kzalloc(sizeof(struct vega10_smumgr), GFP_KERNEL);
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 538e6f5e19eb..3010cb31324a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -832,10 +832,13 @@ static int smu_sw_init(void *handle)
 
smu-&

[PATCH 1/2] drm/amd/pm: Skip use smc fw data in SRIOV

2020-09-17 Thread Jingwen Chen
smc fw is not needed in SRIOV, thus driver should not try to get smc
fw data.

Signed-off-by: Jingwen Chen 
---
 .../gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c| 61 ++-
 1 file changed, 32 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
index b53872eb4398..d8ca6d968813 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
@@ -322,39 +322,42 @@ int smu_v11_0_setup_pptable(struct smu_context *smu)
void *table;
uint16_t version_major, version_minor;
 
-   hdr = (const struct smc_firmware_header_v1_0 *) adev->pm.fw->data;
-   version_major = le16_to_cpu(hdr->header.header_version_major);
-   version_minor = le16_to_cpu(hdr->header.header_version_minor);
-   if ((version_major == 2 && smu->smu_table.boot_values.pp_table_id > 0) 
||
-   adev->asic_type == CHIP_NAVY_FLOUNDER) {
-   dev_info(adev->dev, "use driver provided pptable %d\n", 
smu->smu_table.boot_values.pp_table_id);
-   switch (version_minor) {
-   case 0:
-   ret = smu_v11_0_set_pptable_v2_0(smu, , );
-   break;
-   case 1:
-   ret = smu_v11_0_set_pptable_v2_1(smu, , ,
-
smu->smu_table.boot_values.pp_table_id);
-   break;
-   default:
-   ret = -EINVAL;
-   break;
+   if (!amdgpu_sriov_vf(adev)) {
+   hdr = (const struct smc_firmware_header_v1_0 *) 
adev->pm.fw->data;
+   version_major = le16_to_cpu(hdr->header.header_version_major);
+   version_minor = le16_to_cpu(hdr->header.header_version_minor);
+   if ((version_major == 2 && 
smu->smu_table.boot_values.pp_table_id > 0) ||
+   adev->asic_type == CHIP_NAVY_FLOUNDER) {
+   dev_info(adev->dev, "use driver provided pptable %d\n", 
smu->smu_table.boot_values.pp_table_id);
+   switch (version_minor) {
+   case 0:
+   ret = smu_v11_0_set_pptable_v2_0(smu, , 
);
+   break;
+   case 1:
+   ret = smu_v11_0_set_pptable_v2_1(smu, , 
,
+   
smu->smu_table.boot_values.pp_table_id);
+   break;
+   default:
+   ret = -EINVAL;
+   break;
+   }
+   if (ret)
+   return ret;
+   goto out;
}
-   if (ret)
-   return ret;
+   }
 
-   } else {
-   dev_info(adev->dev, "use vbios provided pptable\n");
-   index = 
get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
-   powerplayinfo);
+   dev_info(adev->dev, "use vbios provided pptable\n");
+   index = 
get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+   powerplayinfo);
 
-   ret = amdgpu_atombios_get_data_table(adev, index, 
_table_size, , ,
- (uint8_t **));
-   if (ret)
-   return ret;
-   size = atom_table_size;
-   }
+   ret = amdgpu_atombios_get_data_table(adev, index, _table_size, 
, ,
+   (uint8_t **));
+   if (ret)
+   return ret;
+   size = atom_table_size;
 
+out:
if (!smu->smu_table.power_play_table)
smu->smu_table.power_play_table = table;
if (!smu->smu_table.power_play_table_size)
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 2/2] drm/amd: Skip not used microcode loading in SRIOV

2020-09-17 Thread Jingwen Chen
smc, sdma, sos and asd fw is not used in SRIOV. Skip them to
accelerate sw_init.

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 16 +---
 drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c |  3 +++
 drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c |  3 +++
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index 6c5d9612abcb..11252f41ab12 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -109,15 +109,17 @@ static int psp_v11_0_init_microcode(struct psp_context 
*psp)
BUG();
}
 
-   err = psp_init_sos_microcode(psp, chip_name);
-   if (err)
-   return err;
-
-   if (adev->asic_type != CHIP_SIENNA_CICHLID &&
-   adev->asic_type != CHIP_NAVY_FLOUNDER) {
-   err = psp_init_asd_microcode(psp, chip_name);
+   if (!amdgpu_sriov_vf(adev)) {
+   err = psp_init_sos_microcode(psp, chip_name);
if (err)
return err;
+
+   if (adev->asic_type != CHIP_SIENNA_CICHLID &&
+   adev->asic_type != CHIP_NAVY_FLOUNDER) {
+   err = psp_init_asd_microcode(psp, chip_name);
+   if (err)
+   return err;
+   }
}
 
switch (adev->asic_type) {
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 48c95a78a173..d2c26b5fa00c 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -219,6 +219,9 @@ static int sdma_v5_0_init_microcode(struct amdgpu_device 
*adev)
BUG();
}
 
+   if(amdgpu_sriov_vf(adev))
+   return 0;
+
for (i = 0; i < adev->sdma.num_instances; i++) {
if (i == 0)
snprintf(fw_name, sizeof(fw_name), 
"amdgpu/%s_sdma.bin", chip_name);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
index d8ca6d968813..85860e08c198 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
@@ -114,6 +114,9 @@ int smu_v11_0_init_microcode(struct smu_context *smu)
return -EINVAL;
}
 
+   if (amdgpu_sriov_vf(adev))
+   return 0;
+
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_smc.bin", chip_name);
 
err = request_firmware(>pm.fw, fw_name, adev->dev);
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/pm: Skip smu_post_init in SRIOV

2020-09-17 Thread Jingwen Chen
smu_post_init needs to enable SMU feature, while this require
virtualization off. Skip it since this feature is not used in SRIOV.

v2: move the check to the early stage of smu_post_init.

v3: fix typo

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index a027c7fdad56..05cb1fdd15ce 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -2631,6 +2631,9 @@ static int navi10_post_smu_init(struct smu_context *smu)
uint64_t feature_mask = 0;
int ret = 0;
 
+   if (amdgpu_sriov_vf(adev))
+   return 0;
+
/* For Naiv1x, enable these features only after DAL initialization */
if (adev->pm.pp_feature & PP_SOCCLK_DPM_MASK)
feature_mask |= FEATURE_MASK(FEATURE_DPM_SOCCLK_BIT);
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/pm: Skip smu_post_init in SRIOV

2020-09-17 Thread Jingwen Chen
smu_post_init needs to enable SMU feature, while this require
virtualization off. Skip it since this feature is not used in SRIOV.

v2: move the check to the early stage of smu_post_init.

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index a027c7fdad56..a950f009c794 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -2631,6 +2631,9 @@ static int navi10_post_smu_init(struct smu_context *smu)
uint64_t feature_mask = 0;
int ret = 0;
 
+   if (!amdgpu_sriov_vf(adev))
+   return 0;
+
/* For Naiv1x, enable these features only after DAL initialization */
if (adev->pm.pp_feature & PP_SOCCLK_DPM_MASK)
feature_mask |= FEATURE_MASK(FEATURE_DPM_SOCCLK_BIT);
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/pm: Skip smu_post_init in SRIOV

2020-09-17 Thread Jingwen Chen
smu_post_init needs to enable SMU feature, while this require
virtualization off. Skip it since this feature is not used in SRIOV.

Signed-off-by: Jingwen Chen 
---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 5c4b74f964fc..79163d0ff762 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -469,10 +469,12 @@ static int smu_late_init(void *handle)
if (!smu->pm_enabled)
return 0;
 
-   ret = smu_post_init(smu);
-   if (ret) {
-   dev_err(adev->dev, "Failed to post smu init!\n");
-   return ret;
+   if (!amdgpu_sriov_vf(adev)) {
+   ret = smu_post_init(smu);
+   if (ret) {
+   dev_err(adev->dev, "Failed to post smu init!\n");
+   return ret;
+   }
}
 
ret = smu_set_default_od_settings(smu);
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


  1   2   >