Re: [PATCH v3 3/3] drm/amdgpu: recover gart table at resume
Am 22.10.21 um 11:32 schrieb Nirmoy Das: Get rid off pin/unpin of gart BO at resume/suspend and instead pin only once and try to recover gart content at resume time. This is much more stable in case there is OOM situation at 2nd call to amdgpu_device_evict_resources() while evicting GART table. v3: remove gart recovery from other places v2: pin gart at amdgpu_gart_table_vram_alloc() Signed-off-by: Nirmoy Das --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 --- drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 80 ++ drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 3 +- drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 3 +- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 3 +- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 3 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 3 +- 7 files changed, 12 insertions(+), 94 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 2b53d86aebac..f0c70e9d37fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3935,16 +3935,11 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) if (!adev->in_s0ix) amdgpu_amdkfd_suspend(adev, adev->in_runpm); - /* First evict vram memory */ amdgpu_device_evict_resources(adev); amdgpu_fence_driver_hw_fini(adev); amdgpu_device_ip_suspend_phase2(adev); - /* This second call to evict device resources is to evict -* the gart page table using the CPU. -*/ - amdgpu_device_evict_resources(adev); return 0; } @@ -4286,8 +4281,6 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, goto error; amdgpu_virt_init_data_exchange(adev); - /* we need recover gart prior to run SMC/CP/SDMA resume */ - amdgpu_gtt_mgr_recover(>mman.gtt_mgr); r = amdgpu_device_fw_loading(adev); if (r) @@ -4604,10 +4597,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, amdgpu_inc_vram_lost(tmp_adev); } - r = amdgpu_gtt_mgr_recover(_adev->mman.gtt_mgr); - if (r) - goto out; - r = amdgpu_device_fw_loading(tmp_adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index d3e4203f6217..679eec122bb5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -116,78 +116,16 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev) { int r; - if (adev->gart.bo == NULL) { - struct amdgpu_bo_param bp; - - memset(, 0, sizeof(bp)); - bp.size = adev->gart.table_size; - bp.byte_align = PAGE_SIZE; - bp.domain = AMDGPU_GEM_DOMAIN_VRAM; - bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - bp.type = ttm_bo_type_kernel; - bp.resv = NULL; - bp.bo_ptr_size = sizeof(struct amdgpu_bo); - - r = amdgpu_bo_create(adev, , >gart.bo); - if (r) { - return r; - } - } - return 0; -} - -/** - * amdgpu_gart_table_vram_pin - pin gart page table in vram - * - * @adev: amdgpu_device pointer - * - * Pin the GART page table in vram so it will not be moved - * by the memory manager (pcie r4xx, r5xx+). These asics require the - * gart table to be in video memory. - * Returns 0 for success, error for failure. - */ -int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev) -{ - int r; + if (adev->gart.bo != NULL) + return 0; - r = amdgpu_bo_reserve(adev->gart.bo, false); - if (unlikely(r != 0)) - return r; - r = amdgpu_bo_pin(adev->gart.bo, AMDGPU_GEM_DOMAIN_VRAM); + r = amdgpu_bo_create_kernel(adev, adev->gart.table_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, >gart.bo, + NULL, (void *)>gart.ptr); if (r) { - amdgpu_bo_unreserve(adev->gart.bo); return r; } Just "return amdgpu_bo_create_kernel(..);" should do it now as far as I can see. Apart from that looks really good now. Christian. - r = amdgpu_bo_kmap(adev->gart.bo, >gart.ptr); - if (r) - amdgpu_bo_unpin(adev->gart.bo); - amdgpu_bo_unreserve(adev->gart.bo); - return r; -} - -/** - * amdgpu_gart_table_vram_unpin - unpin gart page table in vram - * - * @adev: amdgpu_device pointer - * - * Unpin the GART page table in vram (pcie r4xx, r5xx+).
[PATCH v3 3/3] drm/amdgpu: recover gart table at resume
Get rid off pin/unpin of gart BO at resume/suspend and instead pin only once and try to recover gart content at resume time. This is much more stable in case there is OOM situation at 2nd call to amdgpu_device_evict_resources() while evicting GART table. v3: remove gart recovery from other places v2: pin gart at amdgpu_gart_table_vram_alloc() Signed-off-by: Nirmoy Das --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 --- drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 80 ++ drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 3 +- drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 3 +- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 3 +- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 3 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 3 +- 7 files changed, 12 insertions(+), 94 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 2b53d86aebac..f0c70e9d37fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3935,16 +3935,11 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) if (!adev->in_s0ix) amdgpu_amdkfd_suspend(adev, adev->in_runpm); - /* First evict vram memory */ amdgpu_device_evict_resources(adev); amdgpu_fence_driver_hw_fini(adev); amdgpu_device_ip_suspend_phase2(adev); - /* This second call to evict device resources is to evict -* the gart page table using the CPU. -*/ - amdgpu_device_evict_resources(adev); return 0; } @@ -4286,8 +4281,6 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, goto error; amdgpu_virt_init_data_exchange(adev); - /* we need recover gart prior to run SMC/CP/SDMA resume */ - amdgpu_gtt_mgr_recover(>mman.gtt_mgr); r = amdgpu_device_fw_loading(adev); if (r) @@ -4604,10 +4597,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, amdgpu_inc_vram_lost(tmp_adev); } - r = amdgpu_gtt_mgr_recover(_adev->mman.gtt_mgr); - if (r) - goto out; - r = amdgpu_device_fw_loading(tmp_adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index d3e4203f6217..679eec122bb5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -116,78 +116,16 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev) { int r; - if (adev->gart.bo == NULL) { - struct amdgpu_bo_param bp; - - memset(, 0, sizeof(bp)); - bp.size = adev->gart.table_size; - bp.byte_align = PAGE_SIZE; - bp.domain = AMDGPU_GEM_DOMAIN_VRAM; - bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - bp.type = ttm_bo_type_kernel; - bp.resv = NULL; - bp.bo_ptr_size = sizeof(struct amdgpu_bo); - - r = amdgpu_bo_create(adev, , >gart.bo); - if (r) { - return r; - } - } - return 0; -} - -/** - * amdgpu_gart_table_vram_pin - pin gart page table in vram - * - * @adev: amdgpu_device pointer - * - * Pin the GART page table in vram so it will not be moved - * by the memory manager (pcie r4xx, r5xx+). These asics require the - * gart table to be in video memory. - * Returns 0 for success, error for failure. - */ -int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev) -{ - int r; + if (adev->gart.bo != NULL) + return 0; - r = amdgpu_bo_reserve(adev->gart.bo, false); - if (unlikely(r != 0)) - return r; - r = amdgpu_bo_pin(adev->gart.bo, AMDGPU_GEM_DOMAIN_VRAM); + r = amdgpu_bo_create_kernel(adev, adev->gart.table_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, >gart.bo, + NULL, (void *)>gart.ptr); if (r) { - amdgpu_bo_unreserve(adev->gart.bo); return r; } - r = amdgpu_bo_kmap(adev->gart.bo, >gart.ptr); - if (r) - amdgpu_bo_unpin(adev->gart.bo); - amdgpu_bo_unreserve(adev->gart.bo); - return r; -} - -/** - * amdgpu_gart_table_vram_unpin - unpin gart page table in vram - * - * @adev: amdgpu_device pointer - * - * Unpin the GART page table in vram (pcie r4xx, r5xx+). - * These asics require the gart table to be in video memory. - */ -void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev) -{ - int r; - - if (adev->gart.bo == NULL) { -
[PATCH v3 3/3] drm/amdgpu: recover gart table at resume
Get rid off pin/unpin of gart BO at resume/suspend and instead pin only once and try to recover gart content at resume time. This is much more stable in case there is OOM situation at 2nd call to amdgpu_device_evict_resources() while evicting GART table. v3: remove gart recovery from other places v2: pin gart at amdgpu_gart_table_vram_alloc() Signed-off-by: Nirmoy Das --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 --- drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 80 ++ drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 3 +- drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 3 +- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 3 +- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 3 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 3 +- 7 files changed, 12 insertions(+), 94 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 2b53d86aebac..f0c70e9d37fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3935,16 +3935,11 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) if (!adev->in_s0ix) amdgpu_amdkfd_suspend(adev, adev->in_runpm); - /* First evict vram memory */ amdgpu_device_evict_resources(adev); amdgpu_fence_driver_hw_fini(adev); amdgpu_device_ip_suspend_phase2(adev); - /* This second call to evict device resources is to evict -* the gart page table using the CPU. -*/ - amdgpu_device_evict_resources(adev); return 0; } @@ -4286,8 +4281,6 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, goto error; amdgpu_virt_init_data_exchange(adev); - /* we need recover gart prior to run SMC/CP/SDMA resume */ - amdgpu_gtt_mgr_recover(>mman.gtt_mgr); r = amdgpu_device_fw_loading(adev); if (r) @@ -4604,10 +4597,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, amdgpu_inc_vram_lost(tmp_adev); } - r = amdgpu_gtt_mgr_recover(_adev->mman.gtt_mgr); - if (r) - goto out; - r = amdgpu_device_fw_loading(tmp_adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index d3e4203f6217..679eec122bb5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -116,78 +116,16 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev) { int r; - if (adev->gart.bo == NULL) { - struct amdgpu_bo_param bp; - - memset(, 0, sizeof(bp)); - bp.size = adev->gart.table_size; - bp.byte_align = PAGE_SIZE; - bp.domain = AMDGPU_GEM_DOMAIN_VRAM; - bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - bp.type = ttm_bo_type_kernel; - bp.resv = NULL; - bp.bo_ptr_size = sizeof(struct amdgpu_bo); - - r = amdgpu_bo_create(adev, , >gart.bo); - if (r) { - return r; - } - } - return 0; -} - -/** - * amdgpu_gart_table_vram_pin - pin gart page table in vram - * - * @adev: amdgpu_device pointer - * - * Pin the GART page table in vram so it will not be moved - * by the memory manager (pcie r4xx, r5xx+). These asics require the - * gart table to be in video memory. - * Returns 0 for success, error for failure. - */ -int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev) -{ - int r; + if (adev->gart.bo != NULL) + return 0; - r = amdgpu_bo_reserve(adev->gart.bo, false); - if (unlikely(r != 0)) - return r; - r = amdgpu_bo_pin(adev->gart.bo, AMDGPU_GEM_DOMAIN_VRAM); + r = amdgpu_bo_create_kernel(adev, adev->gart.table_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, >gart.bo, + NULL, (void *)>gart.ptr); if (r) { - amdgpu_bo_unreserve(adev->gart.bo); return r; } - r = amdgpu_bo_kmap(adev->gart.bo, >gart.ptr); - if (r) - amdgpu_bo_unpin(adev->gart.bo); - amdgpu_bo_unreserve(adev->gart.bo); - return r; -} - -/** - * amdgpu_gart_table_vram_unpin - unpin gart page table in vram - * - * @adev: amdgpu_device pointer - * - * Unpin the GART page table in vram (pcie r4xx, r5xx+). - * These asics require the gart table to be in video memory. - */ -void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev) -{ - int r; - - if (adev->gart.bo == NULL) { -