Add a new function amdgpu_vm_update_leaves() to avoid memory allocation on page faults.
The idea is to only update the leave PTEs to insert a dummy PRT PTE. TODO: HW older than GMC v9 needs a different solution. Signed-off-by: Christian König <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 40 ++++++++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 + drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 71 ++++++++++++++++++++++- 3 files changed, 103 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index e5588346a03f..94632a660b79 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2984,10 +2984,12 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, u32 vmid, u32 node_id, uint64_t addr, uint64_t ts, bool write_fault) { - bool is_compute_context = false; + struct amdgpu_vm_update_params params; + bool is_compute_context; struct amdgpu_bo *root; uint64_t value, flags; struct amdgpu_vm *vm; + unsigned int idx; int r; vm = amdgpu_vm_lock_by_pasid(adev, &root, pasid); @@ -3029,24 +3031,46 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, value = adev->dummy_page_addr; flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE; - + /* On +gfx9 we can use the PRT functionality instead */ + if (!adev->gmc.gmc_funcs->set_prt) { + flags &= ~AMDGPU_PTE_VALID; + flags |= AMDGPU_PTE_PRT; + } } else { /* Let the hw retry silently on the PTE */ value = 0; } + if (!drm_dev_enter(adev_to_drm(adev), &idx)) { + r = -ENODEV; + goto error_unlock; + } + + amdgpu_vm_eviction_lock(vm); + if (vm->evicting) { + r = -EBUSY; + goto error_dev_exit; + } + + memset(¶ms, 0, sizeof(params)); + params.adev = adev; + params.vm = vm; + params.immediate = true; + params.pages_addr = NULL; + r = dma_resv_reserve_fences(root->tbo.base.resv, 1); if (r) { pr_debug("failed %d to reserve fence slot\n", r); - goto error_unlock; + goto error_eviction_lock; } - r = amdgpu_vm_update_range(adev, vm, true, false, false, false, - NULL, addr, addr, flags, value, 0, NULL, NULL, NULL); - if (r) - goto error_unlock; + amdgpu_vm_update_leaves(¶ms, addr, addr, value, flags); - r = amdgpu_vm_update_pdes(adev, vm, true); +error_eviction_lock: + amdgpu_vm_eviction_unlock(vm); + +error_dev_exit: + drm_dev_exit(idx); error_unlock: amdgpu_bo_unreserve(root); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index cc096c005e34..04b32accfa3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -613,6 +613,9 @@ int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params, int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, uint64_t start, uint64_t end, uint64_t dst, uint64_t flags); +void amdgpu_vm_update_leaves(struct amdgpu_vm_update_params *params, + uint64_t start, uint64_t end, + int64_t dst, uint64_t flags); void amdgpu_vm_pt_free_work(struct work_struct *work); void amdgpu_vm_pt_free_list(struct amdgpu_device *adev, struct amdgpu_vm_update_params *params); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index e43a60d09808..9766b6b9aecc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -790,6 +790,8 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, uint64_t dst, uint64_t flags) { struct amdgpu_device *adev = params->adev; + struct amdgpu_vm *vm = params->vm; + pid_t tgid = vm->task_info ? vm->task_info->tgid : 0; struct amdgpu_vm_pt_cursor cursor; uint64_t frag_start = start, frag_end; unsigned int frag; @@ -881,7 +883,6 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, entry_end = min(entry_end, end); do { - struct amdgpu_vm *vm = params->vm; uint64_t upd_end = min(entry_end, frag_end); unsigned int nptes = (upd_end - frag_start) >> shift; uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag); @@ -893,8 +894,7 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, trace_amdgpu_vm_update_ptes(params, frag_start, upd_end, min(nptes, 32u), dst, incr, - upd_flags, - vm->task_info ? vm->task_info->tgid : 0, + upd_flags, tgid, vm->immediate.fence_context); amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt), cursor.level, pe_start, dst, @@ -938,6 +938,71 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, return 0; } +/** + * amdgpu_vm_update_leaves - update leave PDEs/PTEs + * + * @params: see amdgpu_vm_update_params definition + * @start: start of GPU address range + * @end: end of GPU address range + * @dst: destination address to insert into the leave PDEs/PTEs + * @flags: mapping flags + * + * Update the leave PDEs/PTEs in the range @start - @end without allocating or + * freeing page tables. + * + * Returns: + * 0 for success, negative error code for failure. + */ +void amdgpu_vm_update_leaves(struct amdgpu_vm_update_params *params, + uint64_t start, uint64_t end, + int64_t dst, uint64_t flags) +{ + struct amdgpu_device *adev = params->adev; + struct amdgpu_vm *vm = params->vm; + pid_t tgid = vm->task_info ? vm->task_info->tgid : 0; + struct amdgpu_vm_pt_cursor cursor; + + amdgpu_vm_pt_start(adev, params->vm, start, &cursor); + while (cursor.pfn < end) { + unsigned int shift, mask; + uint64_t entry_end, pe_start; + struct amdgpu_bo *pt; + unsigned int nptes; + + /* Walk to the leave entries */ + if (amdgpu_vm_pt_descendant(adev, &cursor)) + continue; + + pt = cursor.parent->bo; + shift = amdgpu_vm_pt_level_shift(adev, cursor.level - 1); + mask = amdgpu_vm_pt_entries_mask(adev, cursor.level - 1); + + /* Looks good so far, calculate parameters for the update */ + pe_start = ((cursor.pfn >> shift) & mask) * 8; + + entry_end = ((uint64_t)mask + 1) << shift; + entry_end += cursor.pfn & ~(entry_end - 1); + entry_end = min(entry_end, end); + + nptes = (entry_end - cursor.pfn) >> shift; + /* + * This can happen when we set higher level PDEs to unmap and/or + * silent to stop fault floods. + */ + nptes = max(nptes, 1u); + + trace_amdgpu_vm_update_ptes(params, cursor.pfn, entry_end, + min(nptes, 32u), dst, 0, flags, + tgid, + vm->immediate.fence_context); + amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt), + cursor.level - 1, pe_start, dst, + nptes, 0, flags); + + amdgpu_vm_pt_next(adev, &cursor); + } +} + /** * amdgpu_vm_pt_map_tables - have bo of root PD cpu accessible * @adev: amdgpu device structure -- 2.43.0
