Re: [PATCH] drm/amdgpu: introduce vram lost for reset
Am 27.08.19 um 10:34 schrieb Monk Liu: for SOC15/vega10 the BACO reset & mode1 would introduce vram lost in high end address range, current kmd's vram lost checking cannot catch it since it only check very ahead visible frame buffer Looks mostly good to me. Only question I have is why to use a macro? Some inline function should do as well. Christian. Signed-off-by: Monk Liu --- drivers/gpu/drm/amd/amdgpu/amdgpu.h| 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/soc15.c | 2 ++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index f6ae565..11e0fc0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1155,6 +1155,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1))) #define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev)) #define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev))) +#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter)); /* Common functions */ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 02b3e7d..31690e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3482,7 +3482,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, amdgpu_virt_init_data_exchange(adev); amdgpu_virt_release_full_gpu(adev, true); if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { - atomic_inc(>vram_lost_counter); + amdgpu_inc_vram_lost(adev); r = amdgpu_device_recover_vram(adev); } @@ -3648,7 +3648,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, vram_lost = amdgpu_device_check_vram_lost(tmp_adev); if (vram_lost) { DRM_INFO("VRAM is lost due to GPU reset!\n"); - atomic_inc(_adev->vram_lost_counter); + amdgpu_inc_vram_lost(tmp_adev); } r = amdgpu_gtt_mgr_recover( diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index fe2212df..8af7501 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -557,10 +557,12 @@ static int soc15_asic_reset(struct amdgpu_device *adev) { switch (soc15_asic_reset_method(adev)) { case AMD_RESET_METHOD_BACO: + amdgpu_inc_vram_lost(adev); return soc15_asic_baco_reset(adev); case AMD_RESET_METHOD_MODE2: return soc15_mode2_reset(adev); default: + amdgpu_inc_vram_lost(adev); return soc15_asic_mode1_reset(adev); } } ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH] drm/amdgpu: introduce vram lost for reset
sure _ Monk Liu|GPU Virtualization Team |AMD [sig-cloud-gpu] From: Deucher, Alexander Sent: Tuesday, August 27, 2019 8:48 PM To: Liu, Monk ; amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: introduce vram lost for reset Can you update nv.c as well? With that fixed, Reviewed-by: Alex Deucher mailto:alexander.deuc...@amd.com>> From: amd-gfx mailto:amd-gfx-boun...@lists.freedesktop.org>> on behalf of Monk Liu mailto:monk@amd.com>> Sent: Tuesday, August 27, 2019 4:34 AM To: amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> mailto:amd-gfx@lists.freedesktop.org>> Cc: Liu, Monk mailto:monk@amd.com>> Subject: [PATCH] drm/amdgpu: introduce vram lost for reset for SOC15/vega10 the BACO reset & mode1 would introduce vram lost in high end address range, current kmd's vram lost checking cannot catch it since it only check very ahead visible frame buffer Signed-off-by: Monk Liu mailto:monk@amd.com>> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h| 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/soc15.c | 2 ++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index f6ae565..11e0fc0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1155,6 +1155,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1))) #define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev)) #define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev))) +#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter)); /* Common functions */ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 02b3e7d..31690e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3482,7 +3482,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, amdgpu_virt_init_data_exchange(adev); amdgpu_virt_release_full_gpu(adev, true); if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { - atomic_inc(>vram_lost_counter); + amdgpu_inc_vram_lost(adev); r = amdgpu_device_recover_vram(adev); } @@ -3648,7 +3648,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, vram_lost = amdgpu_device_check_vram_lost(tmp_adev); if (vram_lost) { DRM_INFO("VRAM is lost due to GPU reset!\n"); - atomic_inc(_adev->vram_lost_counter); + amdgpu_inc_vram_lost(tmp_adev); } r = amdgpu_gtt_mgr_recover( diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index fe2212df..8af7501 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -557,10 +557,12 @@ static int soc15_asic_reset(struct amdgpu_device *adev) { switch (soc15_asic_reset_method(adev)) { case AMD_RESET_METHOD_BACO: + amdgpu_inc_vram_lost(adev); return soc15_asic_baco_reset(adev); case AMD_RESET_METHOD_MODE2: return soc15_mode2_reset(adev); default: + amdgpu_inc_vram_lost(adev); return soc15_asic_mode1_reset(adev); } } -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: introduce vram lost for reset
Can you update nv.c as well? With that fixed, Reviewed-by: Alex Deucher From: amd-gfx on behalf of Monk Liu Sent: Tuesday, August 27, 2019 4:34 AM To: amd-gfx@lists.freedesktop.org Cc: Liu, Monk Subject: [PATCH] drm/amdgpu: introduce vram lost for reset for SOC15/vega10 the BACO reset & mode1 would introduce vram lost in high end address range, current kmd's vram lost checking cannot catch it since it only check very ahead visible frame buffer Signed-off-by: Monk Liu --- drivers/gpu/drm/amd/amdgpu/amdgpu.h| 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/soc15.c | 2 ++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index f6ae565..11e0fc0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1155,6 +1155,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1))) #define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev)) #define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev))) +#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter)); /* Common functions */ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 02b3e7d..31690e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3482,7 +3482,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, amdgpu_virt_init_data_exchange(adev); amdgpu_virt_release_full_gpu(adev, true); if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { - atomic_inc(>vram_lost_counter); + amdgpu_inc_vram_lost(adev); r = amdgpu_device_recover_vram(adev); } @@ -3648,7 +3648,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, vram_lost = amdgpu_device_check_vram_lost(tmp_adev); if (vram_lost) { DRM_INFO("VRAM is lost due to GPU reset!\n"); - atomic_inc(_adev->vram_lost_counter); + amdgpu_inc_vram_lost(tmp_adev); } r = amdgpu_gtt_mgr_recover( diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index fe2212df..8af7501 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -557,10 +557,12 @@ static int soc15_asic_reset(struct amdgpu_device *adev) { switch (soc15_asic_reset_method(adev)) { case AMD_RESET_METHOD_BACO: + amdgpu_inc_vram_lost(adev); return soc15_asic_baco_reset(adev); case AMD_RESET_METHOD_MODE2: return soc15_mode2_reset(adev); default: + amdgpu_inc_vram_lost(adev); return soc15_asic_mode1_reset(adev); } } -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx