From: Honglei Huang <[email protected]>

Implement GPU page table mapping in amdgpu_svm_range.c:
- range_invalidate_gpu_mapping: mark range as unmapped
- amdgpu_svm_range_pages_valid: check notifier sequence validity
- amdgpu_svm_range_is_valid: full validation (mapped, PTE flags, attrs)
- amdgpu_svm_range_zap_ptes: clear GPU PTEs via amdgpu_vm_update_range
- amdgpu_svm_range_attr_pte_flags: compute PTE flags per GC IP version
  with MTYPE selection for coherency modes
- amdgpu_svm_range_lock_vm_pd: acquire VM page directory via drm_exec
- amdgpu_svm_range_update_gpu_range: DMA segment coalescing and PTE
  programming under notifier lock
- amdgpu_svm_range_find_or_insert: wrapper with read-only fallback
- amdgpu_svm_range_get_pages: pages acquisition with evict on mixed type
- amdgpu_svm_range_update_mapping: full pipeline (lock PD, validate
  pages, program PTEs, update PDEs, flush TLB, update state)
- amdgpu_svm_range_map_attrs: iterate address range creating/mapping
  ranges with attribute-driven PTE flags
- amdgpu_svm_range_map_interval: cursor-based mapping across attr tree

Signed-off-by: Honglei Huang <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c | 488 ++++++++++++++++++
 1 file changed, 488 insertions(+)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c
new file mode 100644
index 000000000..c733d611d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c
@@ -0,0 +1,488 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2026 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu_svm.h"
+#include "amdgpu_svm_attr.h"
+#include "amdgpu_svm_range.h"
+#include "amdgpu_svm_fault.h"
+#include "amdgpu.h"
+#include "amdgpu_vm.h"
+
+#include <drm/drm_exec.h>
+#include <drm/drm_pagemap.h>
+
+#include <linux/mmu_notifier.h>
+#include <uapi/linux/kfd_ioctl.h>
+
+static void
+range_invalidate_gpu_mapping(struct amdgpu_svm_range *range)
+{
+       WRITE_ONCE(range->gpu_mapped, false);
+}
+
+bool
+amdgpu_svm_range_pages_valid(struct amdgpu_svm *svm,
+                 struct amdgpu_svm_range *range)
+{
+       struct drm_gpusvm_range *base = &range->base;
+
+       lockdep_assert_held(&svm->gpusvm.notifier_lock);
+
+       if (base->pages.flags.unmapped || base->pages.flags.partial_unmap)
+               return false;
+
+       return drm_gpusvm_range_pages_valid(&svm->gpusvm, base);
+}
+
+bool amdgpu_svm_range_is_valid(struct amdgpu_svm *svm,
+                              struct amdgpu_svm_range *range,
+                              const struct amdgpu_svm_attrs *attrs,
+                              uint64_t pte_flags)
+{
+       unsigned int flags;
+       bool valid;
+
+       flags = memalloc_noreclaim_save();
+       drm_gpusvm_notifier_lock(&svm->gpusvm);
+       valid = range->gpu_mapped &&
+               range->pte_flags == pte_flags &&
+               range->attr_flags == attrs->flags &&
+               amdgpu_svm_range_pages_valid(svm, range);
+       drm_gpusvm_notifier_unlock(&svm->gpusvm);
+       memalloc_noreclaim_restore(flags);
+
+       return valid;
+}
+
+
+static int
+amdgpu_svm_range_zap_ptes(struct amdgpu_svm *svm,
+                                     struct amdgpu_svm_range *range,
+                                     const struct mmu_notifier_range 
*mmu_range)
+{
+       struct drm_gpusvm_range *base = &range->base;
+       struct dma_fence *fence = NULL;
+       unsigned long start_page = max(drm_gpusvm_range_start(base),
+                                      mmu_range->start) >> PAGE_SHIFT;
+       unsigned long last_page = (min(drm_gpusvm_range_end(base),
+                                      mmu_range->end) >> PAGE_SHIFT) - 1;
+       unsigned int flags;
+       int ret;
+
+       if (last_page < start_page)
+               return 0;
+
+       flags = memalloc_noreclaim_save();
+       ret = amdgpu_vm_update_range(svm->adev, svm->vm, false, true, true, 
false,
+                                    NULL, start_page, last_page, 0, 0, 0, NULL,
+                                    NULL, &fence);
+       memalloc_noreclaim_restore(flags);
+
+       if (!ret && fence) {
+               ret = dma_fence_wait(fence, false);
+               if (ret < 0)
+                       AMDGPU_SVM_TRACE("notifier unmap fence wait failed: 
ret=%d [0x%lx-0x%lx]-0x%lx\n",
+                                        ret, start_page, last_page,
+                                        last_page - start_page + 1);
+       }
+
+       dma_fence_put(fence);
+       return ret;
+}
+
+uint64_t
+amdgpu_svm_range_attr_pte_flags(struct amdgpu_svm *svm,
+                           const struct amdgpu_svm_attrs *attrs,
+                           bool read_only)
+{
+       /* a simple pte flags func */
+       uint32_t gc_ip_version = amdgpu_ip_version(svm->adev, GC_HWIP, 0);
+       uint32_t flags = attrs->flags;
+       uint32_t mapping_flags = 0;
+       uint64_t pte_flags;
+       bool coherent = flags & (AMDGPU_SVM_ATTR_BIT_COHERENT |
+                                AMDGPU_SVM_ATTR_BIT_EXT_COHERENT);
+       bool ext_coherent = flags & AMDGPU_SVM_ATTR_BIT_EXT_COHERENT;
+       bool snoop = true;
+       unsigned int mtype_local;
+
+       switch (gc_ip_version) {
+       case IP_VERSION(9, 4, 1):
+       case IP_VERSION(9, 4, 2):
+               mapping_flags |= coherent ?
+                       AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+               break;
+       case IP_VERSION(9, 4, 3):
+       case IP_VERSION(9, 4, 4):
+       case IP_VERSION(9, 5, 0):
+               if (ext_coherent)
+                       mtype_local = AMDGPU_VM_MTYPE_CC;
+               else
+                       mtype_local = amdgpu_mtype_local == 1 ? 
AMDGPU_VM_MTYPE_NC :
+                               amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_CC :
+                               AMDGPU_VM_MTYPE_RW;
+               if (svm->adev->flags & AMD_IS_APU) {
+                       if (num_possible_nodes() <= 1)
+                               mapping_flags |= mtype_local;
+                       else
+                               mapping_flags |= ext_coherent ?
+                                       AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+               } else {
+                       if (gc_ip_version < IP_VERSION(9, 5, 0) || ext_coherent)
+                               mapping_flags |= AMDGPU_VM_MTYPE_UC;
+                       else
+                               mapping_flags |= AMDGPU_VM_MTYPE_NC;
+               }
+               break;
+       case IP_VERSION(11, 0, 0):
+       case IP_VERSION(11, 0, 1):
+       case IP_VERSION(11, 0, 2):
+       case IP_VERSION(11, 0, 3):
+       case IP_VERSION(11, 0, 4):
+       case IP_VERSION(11, 5, 0):
+       case IP_VERSION(11, 5, 1):
+       case IP_VERSION(11, 5, 2):
+       case IP_VERSION(11, 5, 3):
+               mapping_flags |= coherent ?
+                       AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+               break;
+       case IP_VERSION(12, 0, 0):
+       case IP_VERSION(12, 0, 1):
+               mapping_flags |= AMDGPU_VM_MTYPE_NC;
+               break;
+       default:
+               mapping_flags |= coherent ?
+                       AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+               break;
+       }
+
+       if (flags & AMDGPU_SVM_ATTR_BIT_GPU_EXEC)
+               mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
+
+       pte_flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SYSTEM;
+       pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
+       if (gc_ip_version >= IP_VERSION(12, 0, 0))
+               pte_flags |= AMDGPU_PTE_IS_PTE;
+
+       amdgpu_gmc_get_vm_pte(svm->adev, svm->vm, NULL, mapping_flags, 
&pte_flags);
+       pte_flags |= AMDGPU_PTE_READABLE;
+       if (!(flags & AMDGPU_SVM_ATTR_BIT_GPU_RO) && !read_only)
+               pte_flags |= AMDGPU_PTE_WRITEABLE;
+
+       return pte_flags;
+}
+
+
+
+int amdgpu_svm_range_lock_vm_pd(struct amdgpu_svm *svm, struct drm_exec *exec,
+                               bool intr)
+{
+       unsigned int exec_flags = DRM_EXEC_IGNORE_DUPLICATES;
+       int ret;
+
+       if (intr)
+               exec_flags |= DRM_EXEC_INTERRUPTIBLE_WAIT;
+
+       drm_exec_init(exec, exec_flags, 0);
+       drm_exec_until_all_locked(exec) {
+               ret = amdgpu_vm_lock_pd(svm->vm, exec, 1);
+               drm_exec_retry_on_contention(exec);
+               if (ret) {
+                       drm_exec_fini(exec);
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
+int
+amdgpu_svm_range_update_gpu_range(struct amdgpu_svm *svm,
+                                 struct amdgpu_svm_range *range,
+                                 uint64_t pte_flags,
+                                 bool flush_tlb,
+                                 bool wait_fence,
+                                 struct dma_fence **fence)
+{
+       struct drm_gpusvm_range *base = &range->base;
+
+       lockdep_assert_held(&svm->gpusvm.notifier_lock);
+
+       const unsigned long range_start_page = drm_gpusvm_range_start(base) >> 
PAGE_SHIFT;
+       const unsigned long range_end_page = drm_gpusvm_range_end(base) >> 
PAGE_SHIFT;
+       const unsigned long npages = range_end_page - range_start_page;
+       unsigned long mapped_pages = 0;
+       unsigned long dma_idx = 0;
+       int ret;
+
+       if (!base->pages.dma_addr || !npages)
+               return -EINVAL;
+
+       while (mapped_pages < npages) {
+               const struct drm_pagemap_addr *entry = 
&base->pages.dma_addr[dma_idx++];
+               unsigned long seg_pages = min_t(unsigned long, 1UL << 
entry->order,
+                                               npages - mapped_pages);
+               unsigned long start_page, last_page;
+               bool is_last_seg;
+
+               if (entry->proto != DRM_INTERCONNECT_SYSTEM)
+                       return -EOPNOTSUPP;
+
+               start_page = range_start_page + mapped_pages;
+               last_page = start_page + seg_pages - 1;
+               mapped_pages += seg_pages;
+               is_last_seg = mapped_pages == npages;
+
+               ret = amdgpu_vm_update_range(svm->adev, svm->vm, false, false,
+                                            flush_tlb && is_last_seg, true, 
NULL,
+                                            start_page, last_page, pte_flags,
+                                            0, entry->addr, NULL, NULL,
+                                            wait_fence && is_last_seg ? fence 
: NULL);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+struct amdgpu_svm_range *
+amdgpu_svm_range_find_or_insert(struct amdgpu_svm *svm, unsigned long addr,
+                               unsigned long gpuva_start, unsigned long 
gpuva_end,
+                               struct drm_gpusvm_ctx *ctx)
+{
+       struct drm_gpusvm_range *r;
+
+retry:
+       r = drm_gpusvm_range_find_or_insert(&svm->gpusvm, addr,
+                                           gpuva_start, gpuva_end, ctx);
+
+       if (PTR_ERR_OR_ZERO(r) == -EPERM && !ctx->read_only) {
+               ctx->read_only = true;
+               goto retry;
+       }
+
+       if (IS_ERR(r))
+               return ERR_CAST(r);
+
+       return to_amdgpu_svm_range(r);
+}
+
+int amdgpu_svm_range_get_pages(struct amdgpu_svm *svm,
+                              struct drm_gpusvm_range *range,
+                              struct drm_gpusvm_ctx *ctx)
+{
+       int ret;
+
+       ret = drm_gpusvm_range_get_pages(&svm->gpusvm, range, ctx);
+       if (ret == -EOPNOTSUPP) {
+               AMDGPU_SVM_ERR("range get pages failed with -EOPNOTSUPP, 
evicting range and retrying: gpuva=[0x%lx-0x%lx) ret=%d\n",
+                               drm_gpusvm_range_start(range),
+                               drm_gpusvm_range_end(range), ret);
+               drm_gpusvm_range_evict(&svm->gpusvm, range);
+       }
+
+       return ret;
+}
+
+int amdgpu_svm_range_update_mapping(struct amdgpu_svm *svm,
+                                   struct amdgpu_svm_range *range,
+                                   uint64_t pte_flags,
+                                   uint32_t attrs_flags,
+                                   bool intr, bool wait,
+                                   bool flush_tlb)
+{
+       struct drm_exec exec;
+       struct dma_fence *fence = NULL;
+       unsigned int flags;
+       int ret;
+
+       ret = amdgpu_svm_range_lock_vm_pd(svm, &exec, intr);
+       if (ret)
+               return ret;
+
+       flags = memalloc_noreclaim_save();
+       drm_gpusvm_notifier_lock(&svm->gpusvm);
+
+       if (!amdgpu_svm_range_pages_valid(svm, range)) {
+               range_invalidate_gpu_mapping(range);
+               ret = -EAGAIN;
+       } else {
+               ret = amdgpu_svm_range_update_gpu_range(svm, range, pte_flags,
+                                                       flush_tlb, wait,
+                                                       wait ? &fence : NULL);
+       }
+
+       drm_gpusvm_notifier_unlock(&svm->gpusvm);
+       memalloc_noreclaim_restore(flags);
+
+       if (!ret && fence)
+               dma_fence_wait(fence, intr);
+       dma_fence_put(fence);
+
+       if (!ret)
+               ret = amdgpu_vm_update_pdes(svm->adev, svm->vm, false);
+
+       if (!ret) {
+               if (flush_tlb)
+                       svm->flush_tlb(svm);
+               WRITE_ONCE(range->pte_flags, pte_flags);
+               WRITE_ONCE(range->attr_flags, attrs_flags);
+               WRITE_ONCE(range->gpu_mapped, true);
+               range->validate_timestamp = ktime_get_boottime();
+       }
+
+       drm_exec_fini(&exec);
+       return ret;
+}
+
+int
+amdgpu_svm_range_map_attrs(struct amdgpu_svm *svm,
+                      const struct amdgpu_svm_attrs *attrs,
+                      unsigned long start, unsigned long end)
+{
+       unsigned long addr = start;
+       int ret;
+       bool devmem_possible = amdgpu_svm_attr_devmem_possible(svm, attrs);
+       bool need_vram_migration = amdgpu_svm_attr_prefer_vram(svm, attrs);
+       devmem_possible = false; /* TODO: add migration */
+       struct drm_gpusvm_ctx map_ctx = {
+               .read_only = !!(attrs->flags & AMDGPU_SVM_ATTR_BIT_GPU_RO),
+               .devmem_possible = devmem_possible,
+               .devmem_only = need_vram_migration && devmem_possible,
+               .check_pages_threshold = devmem_possible ? SZ_64K : 0,
+       };
+
+       while (addr < end) {
+               struct amdgpu_svm_range *range;
+               unsigned long next_addr;
+               uint64_t range_pte_flags;
+               range = amdgpu_svm_range_find_or_insert(svm, addr,
+                                                       addr, end,
+                                                       &map_ctx);
+               if (IS_ERR(range)) {
+                       AMDGPU_SVM_ERR("failed to find or insert range for 
gpuva 0x%lx [0x%lx-0x%lx), ret=%ld\n",
+                                       addr, start, end, PTR_ERR(range));
+                       return PTR_ERR(range);
+               }
+
+               next_addr = drm_gpusvm_range_end(&range->base);
+               if (next_addr <= addr)
+                       return -EINVAL;
+
+               range_pte_flags = amdgpu_svm_range_attr_pte_flags(
+                                               svm, attrs, map_ctx.read_only);
+
+               if (amdgpu_svm_range_is_valid(svm, range,
+                                                       attrs, 
range_pte_flags)) {
+                       addr = next_addr;
+                       continue;
+               }
+
+               /* TODO: add migration */
+
+               AMDGPU_SVM_RANGE_DEBUG(range, "PREFETCH - GET PAGES");
+
+               ret = amdgpu_svm_range_get_pages(svm, &range->base,
+                                                &map_ctx);
+               if (ret) {
+                       AMDGPU_SVM_ERR("failed to get pages for range 
[0x%lx-0x%lx), ret=%d\n",
+                                       drm_gpusvm_range_start(&range->base),
+                                       drm_gpusvm_range_end(&range->base), 
ret);
+                       return ret;
+               }
+
+               AMDGPU_SVM_RANGE_DEBUG(range, "PREFETCH - UPDATE MAPPING");
+
+               ret = amdgpu_svm_range_update_mapping(svm, range,
+                                                     range_pte_flags,
+                                                     attrs->flags,
+                                                     true, true,
+                                                     true);
+               if (ret) {
+                       AMDGPU_SVM_ERR("failed to update gpu mapping for range 
[0x%lx-0x%lx), ret=%d\n",
+                                       drm_gpusvm_range_start(&range->base),
+                                       drm_gpusvm_range_end(&range->base), 
ret);
+                       return ret;
+               }
+
+               addr = next_addr;
+       }
+
+       return 0;
+}
+
+static int
+amdgpu_svm_range_map_attr_range(struct amdgpu_svm *svm,
+                               struct amdgpu_svm_attr_range *attr_range)
+{
+       return amdgpu_svm_range_map_attrs(svm, &attr_range->attrs,
+                                         amdgpu_svm_attr_start(attr_range),
+                                         amdgpu_svm_attr_end(attr_range));
+}
+
+int
+amdgpu_svm_range_map_interval(struct amdgpu_svm *svm,
+                                unsigned long start_page,
+                                unsigned long last_page)
+{
+       amdgpu_svm_assert_locked(svm);
+
+       struct amdgpu_svm_attr_tree *attr_tree = svm->attr_tree;
+       unsigned long cursor = start_page;
+
+       while (cursor <= last_page) {
+               struct amdgpu_svm_attrs attrs;
+               struct amdgpu_svm_attr_range *attr_range;
+               unsigned long seg_last;
+               unsigned long seg_start;
+               unsigned long next;
+               int ret;
+
+               mutex_lock(&attr_tree->lock);
+               attr_range = amdgpu_svm_attr_get_bounds_locked(attr_tree, 
cursor,
+                                                              &seg_start, 
&seg_last);
+               if (attr_range)
+                       attrs = attr_range->attrs;
+               mutex_unlock(&attr_tree->lock);
+
+               seg_last = min(seg_last, last_page);
+               if (attr_range && amdgpu_svm_attr_has_access(attrs.access)) {
+                       /* map may fail here cause no vma or access deny */
+                       ret = amdgpu_svm_range_map_attr_range(svm, attr_range);
+                       if (ret)
+                               return ret;
+               }
+
+               if (seg_last == ULONG_MAX || seg_last == last_page)
+                       break;
+
+               next = seg_last + 1;
+               if (next <= cursor)
+                       break;
+               cursor = next;
+       }
+
+       return 0;
+}
+
-- 
2.34.1

Reply via email to