amdgpu: add amdgpu_vm_update_leaves()

Timur Kristóf Thu, 11 Jun 2026 23:54:57 -0700

On 2026. május 29., péntek 13:24:08 közép-európai nyári idő Christian König 
wrote:
> Add a new function amdgpu_vm_update_leaves() to avoid memory allocation
> on page faults.
> 
> The idea is to only update the leave PTEs to insert a dummy PRT PTE.


We should only use PRT PTEs on GPUs that do not have the SMEM PRT bug,
in other words only GFX9 for now.

As-is, this basically regresses all the work that I did for retry faults.

Due to the SMEM PRT bug, using PRT PTEs is unacceptable because it would imply 
that we can only mitigate VM faults that come from VMEM instructions. That 
creates unnecessary inconsistency (inability to mitigate some VM faults) and 
poor user experience (GPU hangs).

> 
> TODO: HW older than GMC v9 needs a different solution.

Can you elaborate what different solution is needed?
As far as I know, GMC v6-v7-v8 do not use and do not need to call the 
amdgpu_vm_handle_fault() function, they can just mitigate page faults on their 
own.

> 
> Signed-off-by: Christian König <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    | 40 ++++++++++---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h    |  3 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 71 ++++++++++++++++++++++-
>  3 files changed, 103 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index e5588346a03f..94632a660b79
> 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2984,10 +2984,12 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device
> *adev, u32 pasid, u32 vmid, u32 node_id, uint64_t addr,
>                           uint64_t ts, bool write_fault)
>  {
> -     bool is_compute_context = false;
> +     struct amdgpu_vm_update_params params;
> +     bool is_compute_context;
>       struct amdgpu_bo *root;
>       uint64_t value, flags;
>       struct amdgpu_vm *vm;
> +     unsigned int idx;
>       int r;
> 
>       vm = amdgpu_vm_lock_by_pasid(adev, &root, pasid);
> @@ -3029,24 +3031,46 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device
> *adev, u32 pasid, value = adev->dummy_page_addr;
>               flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE |
>                       AMDGPU_PTE_WRITEABLE;
> -
> +             /* On +gfx9 we can use the PRT functionality instead */
> +             if (!adev->gmc.gmc_funcs->set_prt) {
> +                     flags &= ~AMDGPU_PTE_VALID;
> +                     flags |= AMDGPU_PTE_PRT;
> +             }

As noted above, PRT should only be used on GFX9 (and potentially, future 
products where the SMEM PRT bug is fixed).

Also, checking the set_prt is not necessary here as this function only makes 
sense on GFX9 and newer.

Maybe we could help the confusion by documenting both amdgpu_vm_handle_fault() 
and amdgpu_gmc_handle_retry_fault() that they are only applicable to GFX9 and 
newer that support recoverable faults or retry faults. (In a separate commit 
from this one, though.)

>       } else {
>               /* Let the hw retry silently on the PTE */
>               value = 0;
>       }
> 
> +     if (!drm_dev_enter(adev_to_drm(adev), &idx)) {
> +             r = -ENODEV;
> +             goto error_unlock;
> +     }
> +
> +     amdgpu_vm_eviction_lock(vm);
> +     if (vm->evicting) {
> +             r = -EBUSY;
> +             goto error_dev_exit;
> +     }
> +
> +     memset(&params, 0, sizeof(params));
> +     params.adev = adev;
> +     params.vm = vm;
> +     params.immediate = true;
> +     params.pages_addr = NULL;
> +
>       r = dma_resv_reserve_fences(root->tbo.base.resv, 1);
>       if (r) {
>               pr_debug("failed %d to reserve fence slot\n", r);
> -             goto error_unlock;
> +             goto error_eviction_lock;
>       }
> 
> -     r = amdgpu_vm_update_range(adev, vm, true, false, false, false,
> -                                NULL, addr, addr, flags, value, 
0, NULL, NULL, NULL);
> -     if (r)
> -             goto error_unlock;
> +     amdgpu_vm_update_leaves(&params, addr, addr, value, flags);
> 
> -     r = amdgpu_vm_update_pdes(adev, vm, true);
> +error_eviction_lock:
> +     amdgpu_vm_eviction_unlock(vm);
> +
> +error_dev_exit:
> +     drm_dev_exit(idx);
> 
>  error_unlock:
>       amdgpu_bo_unreserve(root);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index cc096c005e34..04b32accfa3f
> 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -613,6 +613,9 @@ int amdgpu_vm_pde_update(struct amdgpu_vm_update_params
> *params, int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
> uint64_t start, uint64_t end,
>                         uint64_t dst, uint64_t flags);
> +void amdgpu_vm_update_leaves(struct amdgpu_vm_update_params *params,
> +                          uint64_t start, uint64_t end,
> +                          int64_t dst, uint64_t flags);
>  void amdgpu_vm_pt_free_work(struct work_struct *work);
>  void amdgpu_vm_pt_free_list(struct amdgpu_device *adev,
>                           struct amdgpu_vm_update_params *params);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index
> e43a60d09808..9766b6b9aecc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> @@ -790,6 +790,8 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params
> *params, uint64_t dst, uint64_t flags)
>  {
>       struct amdgpu_device *adev = params->adev;
> +     struct amdgpu_vm *vm = params->vm;
> +     pid_t tgid = vm->task_info ? vm->task_info->tgid : 0;

These spurious changes in amdgpu_vm_ptes_update() look unrelated the rest of 
the commit, so I think they should either be split off into a separate commit 
or explained in the commit message why they are necessary here.

>       struct amdgpu_vm_pt_cursor cursor;
>       uint64_t frag_start = start, frag_end;
>       unsigned int frag;
> @@ -881,7 +883,6 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params
> *params, entry_end = min(entry_end, end);
> 
>               do {
> -                     struct amdgpu_vm *vm = params->vm;
>                       uint64_t upd_end = min(entry_end, frag_end);
>                       unsigned int nptes = (upd_end - frag_start) 
>> shift;
>                       uint64_t upd_flags = flags | 
AMDGPU_PTE_FRAG(frag);
> @@ -893,8 +894,7 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params
> *params,
> 
>                       trace_amdgpu_vm_update_ptes(params, 
frag_start, upd_end,
>                                                   
min(nptes, 32u), dst, incr,
> -                                                 
upd_flags,
> -                                                 vm-
>task_info ? vm->task_info->tgid : 0,
> +                                                 
upd_flags, tgid,
>                                                   vm-
>immediate.fence_context);
>                       amdgpu_vm_pte_update_flags(params, 
to_amdgpu_bo_vm(pt),
>                                                  
cursor.level, pe_start, dst,
> @@ -938,6 +938,71 @@ int amdgpu_vm_ptes_update(struct
> amdgpu_vm_update_params *params, return 0;
>  }
> 
> +/**
> + * amdgpu_vm_update_leaves - update leave PDEs/PTEs
> + *
> + * @params: see amdgpu_vm_update_params definition
> + * @start: start of GPU address range
> + * @end: end of GPU address range
> + * @dst: destination address to insert into the leave PDEs/PTEs
> + * @flags: mapping flags
> + *
> + * Update the leave PDEs/PTEs in the range @start - @end without allocating
> or + * freeing page tables.
> + *
> + * Returns:
> + * 0 for success, negative error code for failure.
> + */
> +void amdgpu_vm_update_leaves(struct amdgpu_vm_update_params *params,
> +                          uint64_t start, uint64_t end,
> +                          int64_t dst, uint64_t flags)
> +{
> +     struct amdgpu_device *adev = params->adev;
> +     struct amdgpu_vm *vm = params->vm;
> +     pid_t tgid = vm->task_info ? vm->task_info->tgid : 0;
> +     struct amdgpu_vm_pt_cursor cursor;
> +
> +     amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
> +     while (cursor.pfn < end) {
> +             unsigned int shift, mask;
> +             uint64_t entry_end, pe_start;
> +             struct amdgpu_bo *pt;
> +             unsigned int nptes;
> +
> +             /* Walk to the leave entries */
> +             if (amdgpu_vm_pt_descendant(adev, &cursor))
> +                     continue;
> +
> +             pt = cursor.parent->bo;
> +             shift = amdgpu_vm_pt_level_shift(adev, cursor.level - 
1);
> +             mask = amdgpu_vm_pt_entries_mask(adev, cursor.level - 
1);
> +
> +             /* Looks good so far, calculate parameters for the 
update */
> +             pe_start = ((cursor.pfn >> shift) & mask) * 8;
> +
> +             entry_end = ((uint64_t)mask + 1) << shift;
> +             entry_end += cursor.pfn & ~(entry_end - 1);
> +             entry_end = min(entry_end, end);
> +
> +             nptes = (entry_end - cursor.pfn) >> shift;
> +             /*
> +              * This can happen when we set higher level PDEs to 
unmap and/or
> +              * silent to stop fault floods.
> +              */
> +             nptes = max(nptes, 1u);
> +
> +             trace_amdgpu_vm_update_ptes(params, cursor.pfn, 
entry_end,
> +                                         min(nptes, 32u), 
dst, 0, flags,
> +                                         tgid,
> +                                         vm-
>immediate.fence_context);
> +             amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt),
> +                                        cursor.level - 1, 
pe_start, dst,
> +                                        nptes, 0, flags);
> +
> +             amdgpu_vm_pt_next(adev, &cursor);
> +     }
> +}
> +
>  /**
>   * amdgpu_vm_pt_map_tables - have bo of root PD cpu accessible
>   * @adev: amdgpu device structure

Re: [PATCH 06/13] drm/amdgpu: add amdgpu_vm_update_leaves()

Reply via email to