SVM range size is tracked using the system page size. The range start and
end are aligned to system page-sized PFNs, so the total SVM range size
equals the total number of pages in the SVM range multiplied by the system
page size.

The SVM range map/unmap functions pass these system page-sized PFN numbers
to amdgpu_vm_update_range(), which expects PFNs based on the GPU page size
(4K). On non-4K page systems, this mismatch causes only part of the SVM
range to be mapped in the GPU page table, while the rest remains unmapped.
If the GPU accesses an unmapped address within the same range, it results
in a GPU page fault.

To fix this, the required conversion has been added in both
svm_range_map_to_gpu() and svm_range_unmap_from_gpu(), ensuring that all
pages in the SVM range are correctly mapped on non-4K systems.

Signed-off-by: Ritesh Harjani (IBM) <[email protected]>
Signed-off-by: Donet Tom <[email protected]>
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 30 ++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 74a1d3e1d52b..a2636f2d6c71 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1314,11 +1314,16 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
                         struct dma_fence **fence)
 {
        uint64_t init_pte_value = 0;
+       uint64_t gpu_start, gpu_end;
 
-       pr_debug("[0x%llx 0x%llx]\n", start, last);
+       // Convert CPU page range to GPU page range
+       gpu_start = start * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
+       gpu_end = (last + 1) * AMDGPU_GPU_PAGES_IN_CPU_PAGE - 1;
 
-       return amdgpu_vm_update_range(adev, vm, false, true, true, false, NULL, 
start,
-                                     last, init_pte_value, 0, 0, NULL, NULL,
+       pr_debug("%s: CPU[0x%llx 0x%llx] -> GPU[0x%llx 0x%llx]\n", __func__,
+                start, last, gpu_start, gpu_end);
+       return amdgpu_vm_update_range(adev, vm, false, true, true, false, NULL, 
gpu_start,
+                                     gpu_end, init_pte_value, 0, 0, NULL, NULL,
                                      fence);
 }
 
@@ -1398,9 +1403,13 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, 
struct svm_range *prange,
                 last_start, last_start + npages - 1, readonly);
 
        for (i = offset; i < offset + npages; i++) {
+               uint64_t gpu_start;
+               uint64_t gpu_end;
+
                last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN;
                dma_addr[i] &= ~SVM_RANGE_VRAM_DOMAIN;
 
+
                /* Collect all pages in the same address range and memory domain
                 * that can be mapped with a single call to update mapping.
                 */
@@ -1415,17 +1424,22 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, 
struct svm_range *prange,
                if (readonly)
                        pte_flags &= ~AMDGPU_PTE_WRITEABLE;
 
-               pr_debug("svms 0x%p map [0x%lx 0x%llx] vram %d PTE 0x%llx\n",
-                        prange->svms, last_start, prange->start + i,
-                        (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
-                        pte_flags);
 
                /* For dGPU mode, we use same vm_manager to allocate VRAM for
                 * different memory partition based on fpfn/lpfn, we should use
                 * same vm_manager.vram_base_offset regardless memory partition.
                 */
+               gpu_start = last_start * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
+               gpu_end = (prange->start + i + 1) * 
AMDGPU_GPU_PAGES_IN_CPU_PAGE - 1;
+
+               pr_debug("svms 0x%p map CPU[0x%lx 0x%llx] GPU[0x%llx 0x%llx] 
vram %d PTE 0x%llx\n",
+                        prange->svms, last_start, prange->start + i,
+                        gpu_start, gpu_end,
+                        (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
+                        pte_flags);
+
                r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, 
true,
-                                          NULL, last_start, prange->start + i,
+                                          NULL, gpu_start, gpu_end,
                                           pte_flags,
                                           (last_start - prange->start) << 
PAGE_SHIFT,
                                           bo_adev ? 
bo_adev->vm_manager.vram_base_offset : 0,
-- 
2.52.0

Reply via email to