On 2024-04-19 12:23, Mukul Joshi wrote:
Do VRAM accounting when doing migrations to vram to make sure
there is enough available VRAM and migrating to VRAM doesn't evict
other possible non-unified memory BOs. If migrating to VRAM fails,
driver can fall back to using system memory seamlessly.

Signed-off-by: Mukul Joshi <mukul.jo...@amd.com>

Reviewed-by: Felix Kuehling <felix.kuehl...@amd.com>


---
  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 16 +++++++++++++++-
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c     |  2 +-
  2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index bdc01ca9609a..a6bfc00c0310 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -509,10 +509,19 @@ svm_migrate_ram_to_vram(struct svm_range *prange, 
uint32_t best_loc,
        start = start_mgr << PAGE_SHIFT;
        end = (last_mgr + 1) << PAGE_SHIFT;
+ r = amdgpu_amdkfd_reserve_mem_limit(node->adev,
+                                       prange->npages * PAGE_SIZE,
+                                       KFD_IOC_ALLOC_MEM_FLAGS_VRAM,
+                                       node->xcp ? node->xcp->id : 0);
+       if (r) {
+               dev_dbg(node->adev->dev, "failed to allocate VRAM, size exceeds VRAM 
limit\n", r);
+               return -ENOSPC;
+       }
+
        r = svm_range_vram_node_new(node, prange, true);
        if (r) {
                dev_dbg(node->adev->dev, "fail %ld to alloc vram\n", r);
-               return r;
+               goto out;
        }
        ttm_res_offset = (start_mgr - prange->start + prange->offset) << 
PAGE_SHIFT;
@@ -545,6 +554,11 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
                svm_range_vram_node_free(prange);
        }
+out:
+       amdgpu_amdkfd_unreserve_mem_limit(node->adev,
+                                       prange->npages * PAGE_SIZE,
+                                       KFD_IOC_ALLOC_MEM_FLAGS_VRAM,
+                                       node->xcp ? node->xcp->id : 0);
        return r < 0 ? r : 0;
  }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index f7d75b432cc6..bfab16b43fec 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -3426,7 +3426,7 @@ svm_range_trigger_migration(struct mm_struct *mm, struct 
svm_range *prange,
                                mm, KFD_MIGRATE_TRIGGER_PREFETCH);
        *migrated = !r;
- return r;
+       return 0;
  }
int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence)

Reply via email to