From: Honglei Huang <[email protected]>

Implement GPU page table mapping in amdgpu_svm_range.c:
- amdgpu_svm_range_pages_valid: check notifier sequence validity
- amdgpu_svm_range_is_valid: validate gpu_mapped and attr_flags
  against the requested attribute set; PTE flags are no longer
  cached on the range (recomputed per DMA segment), so they are
  not part of the validity check
- amdgpu_svm_range_zap_ptes: clear GPU PTEs via amdgpu_vm_update_range
  for an explicit [start_page, last_page] window (exposed for use by
  notifier and attribute-invalidation paths in later commits)
- amdgpu_svm_range_attr_pte_flags: compute PTE flags per GC IP version
  from the attribute set plus the per-segment drm_pagemap protocol.
  Branches model kfd_svm semantics:
    * 9.4.1/9.4.2: local VRAM uses CC/RW (XGMI-to-CPU keeps snoop on
      9.4.2); remote/sysmem keeps UC/NC
    * 9.4.3/9.4.4/9.5.0: snoop on; is_vram tree picks mtype_local,
      NC or UC based on ext_coherent and same-hive; sysmem splits
      APU (NUMA-aware) vs dGPU
    * 12.0.0/12.0.1: NC
    * 12.1.0: AID A1 aware mtype_local/mtype_remote; ext_coherent UC;
      sets AMDGPU_PTE_BUS_ATOMICS when the device supports it
  AMDGPU_PTE_SYSTEM is set only when the segment is not local VRAM
  (is_local derived from proto == AMDGPU_INTERCONNECT_VRAM).
- amdgpu_svm_range_lock_vm_pd: acquire VM page directory via drm_exec
- amdgpu_svm_range_update_gpu_range: DMA segment coalescing; recomputes
  PTE flags per segment from entry->proto via attr_pte_flags, then
  programs PTEs under notifier lock
- amdgpu_svm_range_find_or_insert: wrapper with read-only fallback
- amdgpu_svm_range_get_pages: pages acquisition; on -EOPNOTSUPP from
  the mixed-residency path evicts via amdgpu_svm_range_evict
- amdgpu_svm_range_evict: thin wrapper around drm_gpusvm_range_evict
  that short-circuits when the range has no devmem-backed pages
- amdgpu_svm_range_update_mapping: full pipeline (lock PD, validate
  pages, program PTEs, update PDEs, flush TLB, record attr_flags
  and mapped state); takes attrs + read_only and forwards them to
  update_gpu_range
- amdgpu_svm_range_map_attrs: cursor-based iteration across attr tree,
  creating/mapping ranges; no longer pre-computes pte_flags (computed
  per segment inside update_gpu_range)

Also add to amdgpu_svm.h driver-private interconnect tags used in
drm_pagemap_addr.proto (mirrors xe's XE_INTERCONNECT_VRAM/_P2P):
  AMDGPU_INTERCONNECT_VRAM = DRM_INTERCONNECT_DRIVER
  AMDGPU_INTERCONNECT_P2P  = AMDGPU_INTERCONNECT_VRAM + 1
and include <drm/drm_pagemap.h>.

Signed-off-by: Honglei Huang <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_svm.h       |   4 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c | 469 ++++++++++++++++++
 2 files changed, 473 insertions(+)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_svm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm.h
index 49f8b8f06..b04ef1617 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_svm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm.h
@@ -27,6 +27,7 @@
 
 #include <drm/amdgpu_drm.h>
 #include <drm/drm_gpusvm.h>
+#include <drm/drm_pagemap.h>
 #include <linux/atomic.h>
 #include <linux/kref.h>
 #include <linux/list.h>
@@ -44,6 +45,9 @@ struct amdgpu_svm_attrs;
 struct drm_device;
 struct drm_file;
 
+#define AMDGPU_INTERCONNECT_VRAM       DRM_INTERCONNECT_DRIVER
+#define AMDGPU_INTERCONNECT_P2P                (AMDGPU_INTERCONNECT_VRAM + 1)
+
 enum amdgpu_svm_xnack_mode {
        AMDGPU_SVM_XNACK_OFF,
        AMDGPU_SVM_XNACK_ON,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c
new file mode 100644
index 000000000..eda3ebaf5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c
@@ -0,0 +1,469 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2026 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu_svm.h"
+#include "amdgpu_svm_attr.h"
+#include "amdgpu_svm_range.h"
+#include "amdgpu_svm_fault.h"
+#include "amdgpu.h"
+#include "amdgpu_vm.h"
+
+#include <drm/drm_exec.h>
+#include <drm/drm_pagemap.h>
+
+#include <linux/mmu_notifier.h>
+#include <uapi/linux/kfd_ioctl.h>
+
+bool
+amdgpu_svm_range_pages_valid(struct amdgpu_svm *svm,
+                 struct amdgpu_svm_range *range)
+{
+       struct drm_gpusvm_range *base = &range->base;
+
+       lockdep_assert_held(&svm->gpusvm.notifier_lock);
+
+       if (base->pages.flags.unmapped || base->pages.flags.partial_unmap)
+               return false;
+
+       return drm_gpusvm_range_pages_valid(&svm->gpusvm, base);
+}
+
+bool amdgpu_svm_range_is_valid(struct amdgpu_svm *svm,
+                              struct amdgpu_svm_range *range,
+                              const struct amdgpu_svm_attrs *attrs)
+{
+       unsigned int flags;
+       bool valid;
+
+       flags = memalloc_noreclaim_save();
+       drm_gpusvm_notifier_lock(&svm->gpusvm);
+       valid = range->gpu_mapped &&
+               range->attr_flags == attrs->flags &&
+               amdgpu_svm_range_pages_valid(svm, range);
+       drm_gpusvm_notifier_unlock(&svm->gpusvm);
+       memalloc_noreclaim_restore(flags);
+
+       return valid;
+}
+
+
+int
+amdgpu_svm_range_zap_ptes(struct amdgpu_svm *svm,
+                         struct amdgpu_svm_range *range,
+                         unsigned long start_page,
+                         unsigned long last_page)
+{
+       struct dma_fence *fence = NULL;
+       unsigned int flags;
+       int ret;
+
+       if (last_page < start_page)
+               return 0;
+
+       flags = memalloc_noreclaim_save();
+       ret = amdgpu_vm_update_range(svm->adev, svm->vm, false, true, true, 
false,
+                                    NULL, start_page, last_page, 0, 0, 0, NULL,
+                                    NULL, &fence);
+       memalloc_noreclaim_restore(flags);
+
+       if (!ret && fence) {
+               ret = dma_fence_wait(fence, false);
+               if (ret < 0)
+                       AMDGPU_SVM_TRACE("notifier unmap fence wait failed: 
ret=%d [0x%lx-0x%lx]-0x%lx\n",
+                                        ret, start_page, last_page,
+                                        last_page - start_page + 1);
+       }
+
+       dma_fence_put(fence);
+       return ret;
+}
+
+uint64_t
+amdgpu_svm_range_attr_pte_flags(struct amdgpu_svm *svm,
+                           const struct amdgpu_svm_attrs *attrs,
+                           bool read_only,
+                           enum drm_interconnect_protocol proto)
+{
+       uint32_t flags = attrs->flags;
+       uint32_t mapping_flags = 0;
+       uint32_t gc_ip_version = amdgpu_ip_version(svm->adev, GC_HWIP, 0);
+       uint64_t pte_flags;
+       bool snoop = proto != AMDGPU_INTERCONNECT_VRAM;
+       bool coherent = flags & (AMDGPU_SVM_ATTR_BIT_COHERENT |
+                                AMDGPU_SVM_ATTR_BIT_EXT_COHERENT);
+       bool ext_coherent = flags & AMDGPU_SVM_ATTR_BIT_EXT_COHERENT;
+       unsigned int mtype_local, mtype_remote;
+       bool is_aid_a1;
+       bool is_local = (proto == AMDGPU_INTERCONNECT_VRAM);
+       bool is_vram = is_local || (proto == AMDGPU_INTERCONNECT_P2P);
+
+       switch (gc_ip_version) {
+       case IP_VERSION(9, 4, 1):
+       case IP_VERSION(9, 4, 2):
+               if (is_local) {
+                       mapping_flags |= coherent ?
+                               AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
+                       /* 9.4.2 local VRAM with XGMI keeps snoop */
+                       if (gc_ip_version == IP_VERSION(9, 4, 2) &&
+                           svm->adev->gmc.xgmi.connected_to_cpu)
+                               snoop = true;
+               } else {
+                       mapping_flags |= coherent ?
+                               AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+                       /* TODO: migration: re enable snoop for same hive */
+               }
+               break;
+       case IP_VERSION(9, 4, 3):
+       case IP_VERSION(9, 4, 4):
+       case IP_VERSION(9, 5, 0):
+               if (ext_coherent)
+                       mtype_local = AMDGPU_VM_MTYPE_CC;
+               else
+                       mtype_local = amdgpu_mtype_local == 1 ? 
AMDGPU_VM_MTYPE_NC :
+                               amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_CC :
+                               AMDGPU_VM_MTYPE_RW;
+               snoop = true;
+               if (is_vram) {
+                       if (is_local) {
+                               /* local HBM region close to partition */
+                               mapping_flags |= mtype_local;
+                       } else if (!ext_coherent) {
+                               /* TODO: add same hive check */
+                               mapping_flags |= AMDGPU_VM_MTYPE_NC;
+                       } else if (gc_ip_version < IP_VERSION(9, 5, 0)) {
+                               /* TODO: add same hive check */
+                               mapping_flags |= AMDGPU_VM_MTYPE_UC;
+                       } else {
+                               mapping_flags |= ext_coherent ? 
AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+                       }
+               } else if (svm->adev->flags & AMD_IS_APU) {
+                       /* On NUMA systems, locality is determined per-page
+                        * in amdgpu_gmc_override_vm_pte_flags.
+                        */
+                       if (num_possible_nodes() <= 1)
+                               mapping_flags |= mtype_local;
+                       else
+                               mapping_flags |= ext_coherent ? 
AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+               } else {
+                       if (gc_ip_version < IP_VERSION(9, 5, 0) || ext_coherent)
+                               mapping_flags |= AMDGPU_VM_MTYPE_UC;
+                       else
+                               mapping_flags |= AMDGPU_VM_MTYPE_NC;
+               }
+               break;
+       case IP_VERSION(12, 0, 0):
+       case IP_VERSION(12, 0, 1):
+               mapping_flags |= AMDGPU_VM_MTYPE_NC;
+               break;
+       case IP_VERSION(12, 1, 0):
+               is_aid_a1 = (svm->adev->rev_id & 0x10);
+               mtype_local = amdgpu_mtype_local == 0 ? AMDGPU_VM_MTYPE_RW :
+                               amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC :
+                               is_aid_a1 ? AMDGPU_VM_MTYPE_RW : 
AMDGPU_VM_MTYPE_NC;
+               mtype_remote = is_aid_a1 ? AMDGPU_VM_MTYPE_NC : 
AMDGPU_VM_MTYPE_UC;
+               snoop = true;
+
+               if (is_local) {
+                       mapping_flags |= mtype_local;
+               } else if (ext_coherent) {
+                       mapping_flags |= AMDGPU_VM_MTYPE_UC;
+               } else {
+                       /* system memory or remote VRAM */
+                       mapping_flags |= mtype_remote;
+               }
+               break;
+       default:
+               mapping_flags |= coherent ?
+                       AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+               break;
+       }
+
+       if (flags & AMDGPU_SVM_ATTR_BIT_GPU_EXEC)
+               mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
+
+       pte_flags = AMDGPU_PTE_VALID;
+       pte_flags |= is_local ? 0 : AMDGPU_PTE_SYSTEM;
+       pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
+       if (gc_ip_version >= IP_VERSION(12, 0, 0))
+               pte_flags |= AMDGPU_PTE_IS_PTE;
+
+       amdgpu_gmc_get_vm_pte(svm->adev, svm->vm, NULL, mapping_flags, 
&pte_flags);
+       pte_flags |= AMDGPU_PTE_READABLE;
+       if (!(flags & AMDGPU_SVM_ATTR_BIT_GPU_RO) && !read_only)
+               pte_flags |= AMDGPU_PTE_WRITEABLE;
+
+       if (gc_ip_version == IP_VERSION(12, 1, 0) &&
+           svm->adev->have_atomics_support)
+               pte_flags |= AMDGPU_PTE_BUS_ATOMICS;
+
+       return pte_flags;
+}
+
+
+
+int amdgpu_svm_range_lock_vm_pd(struct amdgpu_svm *svm, struct drm_exec *exec,
+                               bool intr)
+{
+       unsigned int exec_flags = DRM_EXEC_IGNORE_DUPLICATES;
+       int ret;
+
+       if (intr)
+               exec_flags |= DRM_EXEC_INTERRUPTIBLE_WAIT;
+
+       drm_exec_init(exec, exec_flags, 0);
+       drm_exec_until_all_locked(exec) {
+               ret = amdgpu_vm_lock_pd(svm->vm, exec, 1);
+               drm_exec_retry_on_contention(exec);
+               if (ret) {
+                       drm_exec_fini(exec);
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
+int
+amdgpu_svm_range_update_gpu_range(struct amdgpu_svm *svm,
+                                 struct amdgpu_svm_range *range,
+                                 const struct amdgpu_svm_attrs *attrs,
+                                 bool read_only,
+                                 bool flush_tlb,
+                                 bool wait_fence,
+                                 struct dma_fence **fence)
+{
+       struct drm_gpusvm_range *base = &range->base;
+
+       lockdep_assert_held(&svm->gpusvm.notifier_lock);
+
+       const unsigned long range_start_page = drm_gpusvm_range_start(base) >> 
PAGE_SHIFT;
+       const unsigned long range_end_page = drm_gpusvm_range_end(base) >> 
PAGE_SHIFT;
+       const unsigned long npages = range_end_page - range_start_page;
+       unsigned long mapped_pages = 0;
+       unsigned long dma_idx = 0;
+       int ret;
+
+       if (!base->pages.dma_addr || !npages)
+               return -EINVAL;
+
+       while (mapped_pages < npages) {
+               const struct drm_pagemap_addr *entry = 
&base->pages.dma_addr[dma_idx++];
+               unsigned long seg_pages = min_t(unsigned long, 1UL << 
entry->order,
+                                               npages - mapped_pages);
+               uint64_t pte_flags;
+               unsigned long start_page, last_page;
+               bool is_last_seg;
+
+               if (entry->proto != DRM_INTERCONNECT_SYSTEM)
+                       return -EOPNOTSUPP;
+
+               pte_flags = amdgpu_svm_range_attr_pte_flags(svm, attrs,
+                                                           read_only,
+                                                           entry->proto);
+
+               start_page = range_start_page + mapped_pages;
+               last_page = start_page + seg_pages - 1;
+               mapped_pages += seg_pages;
+               is_last_seg = mapped_pages == npages;
+
+               ret = amdgpu_vm_update_range(svm->adev, svm->vm, false, false,
+                                            flush_tlb && is_last_seg, true, 
NULL,
+                                            start_page, last_page, pte_flags,
+                                            0, entry->addr, NULL, NULL,
+                                            wait_fence && is_last_seg ? fence 
: NULL);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+struct amdgpu_svm_range *
+amdgpu_svm_range_find_or_insert(struct amdgpu_svm *svm, unsigned long addr,
+                               unsigned long gpuva_start, unsigned long 
gpuva_end,
+                               struct drm_gpusvm_ctx *ctx)
+{
+       struct drm_gpusvm_range *r;
+
+retry:
+       r = drm_gpusvm_range_find_or_insert(&svm->gpusvm, addr,
+                                           gpuva_start, gpuva_end, ctx);
+
+       if (PTR_ERR_OR_ZERO(r) == -EPERM && !ctx->read_only) {
+               ctx->read_only = true;
+               goto retry;
+       }
+
+       if (IS_ERR(r))
+               return ERR_CAST(r);
+
+       return to_amdgpu_svm_range(r);
+}
+
+int amdgpu_svm_range_get_pages(struct amdgpu_svm *svm,
+                              struct drm_gpusvm_range *range,
+                              struct drm_gpusvm_ctx *ctx)
+{
+       int ret;
+
+       ret = drm_gpusvm_range_get_pages(&svm->gpusvm, range, ctx);
+       if (ret == -EOPNOTSUPP) {
+               AMDGPU_SVM_ERR("range get pages failed with -EOPNOTSUPP, 
evicting range and retrying: gpuva=[0x%lx-0x%lx) ret=%d\n",
+                               drm_gpusvm_range_start(range),
+                               drm_gpusvm_range_end(range), ret);
+               amdgpu_svm_range_evict(svm, range);
+       }
+
+       return ret;
+}
+
+void amdgpu_svm_range_evict(struct amdgpu_svm *svm,
+                           struct drm_gpusvm_range *range)
+{
+       if (!range->pages.flags.has_devmem_pages)
+               return;
+
+       drm_gpusvm_range_evict(&svm->gpusvm, range);
+}
+
+int amdgpu_svm_range_update_mapping(struct amdgpu_svm *svm,
+                                   struct amdgpu_svm_range *range,
+                                   const struct amdgpu_svm_attrs *attrs,
+                                   bool read_only,
+                                   bool intr, bool wait,
+                                   bool flush_tlb)
+{
+       struct drm_exec exec;
+       struct dma_fence *fence = NULL;
+       unsigned int flags;
+       int ret;
+
+       ret = amdgpu_svm_range_lock_vm_pd(svm, &exec, intr);
+       if (ret)
+               return ret;
+
+       flags = memalloc_noreclaim_save();
+       drm_gpusvm_notifier_lock(&svm->gpusvm);
+
+       if (!amdgpu_svm_range_pages_valid(svm, range)) {
+               amdgpu_svm_range_invalidate_gpu_mapping(range);
+               ret = -EAGAIN;
+       } else {
+               ret = amdgpu_svm_range_update_gpu_range(svm, range, attrs,
+                                                       read_only, flush_tlb,
+                                                       wait, wait ? &fence : 
NULL);
+       }
+
+       drm_gpusvm_notifier_unlock(&svm->gpusvm);
+       memalloc_noreclaim_restore(flags);
+
+       if (!ret && fence)
+               dma_fence_wait(fence, intr);
+       dma_fence_put(fence);
+
+       if (!ret)
+               ret = amdgpu_vm_update_pdes(svm->adev, svm->vm, false);
+
+       if (!ret) {
+               if (flush_tlb)
+                       svm->flush_tlb(svm);
+               WRITE_ONCE(range->attr_flags, attrs->flags);
+               WRITE_ONCE(range->gpu_mapped, true);
+               range->validate_timestamp = ktime_get_boottime();
+       }
+
+       drm_exec_fini(&exec);
+       return ret;
+}
+
+int
+amdgpu_svm_range_map_attrs(struct amdgpu_svm *svm,
+                      const struct amdgpu_svm_attrs *attrs,
+                      unsigned long start, unsigned long end)
+{
+       unsigned long addr = start;
+       int ret;
+       bool devmem_possible = false; /* TODO: add migration */
+       bool need_vram_migration = amdgpu_svm_attr_prefer_vram(svm, attrs);
+       struct drm_gpusvm_ctx map_ctx = {
+               .read_only = !!(attrs->flags & AMDGPU_SVM_ATTR_BIT_GPU_RO),
+               .devmem_possible = devmem_possible,
+               .devmem_only = need_vram_migration && devmem_possible,
+               .check_pages_threshold = devmem_possible ? SZ_64K : 0,
+       };
+
+       while (addr < end) {
+               struct amdgpu_svm_range *range;
+               unsigned long next_addr;
+
+               range = amdgpu_svm_range_find_or_insert(svm, addr,
+                                                       addr, end,
+                                                       &map_ctx);
+               if (IS_ERR(range)) {
+                       AMDGPU_SVM_ERR("failed to find or insert range for 
gpuva 0x%lx [0x%lx-0x%lx), ret=%ld\n",
+                                       addr, start, end, PTR_ERR(range));
+                       return PTR_ERR(range);
+               }
+
+               next_addr = drm_gpusvm_range_end(&range->base);
+               if (next_addr <= addr)
+                       return -EINVAL;
+
+               if (amdgpu_svm_range_is_valid(svm, range, attrs)) {
+                       addr = next_addr;
+                       continue;
+               }
+
+               /* TODO: add migration */
+
+               AMDGPU_SVM_RANGE_DEBUG(range, "PREFETCH - GET PAGES");
+
+               ret = amdgpu_svm_range_get_pages(svm, &range->base,
+                                                &map_ctx);
+               if (ret) {
+                       AMDGPU_SVM_ERR("failed to get pages for range 
[0x%lx-0x%lx), ret=%d\n",
+                                       drm_gpusvm_range_start(&range->base),
+                                       drm_gpusvm_range_end(&range->base), 
ret);
+                       return ret;
+               }
+
+               AMDGPU_SVM_RANGE_DEBUG(range, "PREFETCH - UPDATE MAPPING");
+
+               ret = amdgpu_svm_range_update_mapping(svm, range, attrs,
+                                                     map_ctx.read_only,
+                                                     true, true,
+                                                     true);
+               if (ret) {
+                       AMDGPU_SVM_ERR("failed to update gpu mapping for range 
[0x%lx-0x%lx), ret=%d\n",
+                                       drm_gpusvm_range_start(&range->base),
+                                       drm_gpusvm_range_end(&range->base), 
ret);
+                       return ret;
+               }
+
+               addr = next_addr;
+       }
+
+       return 0;
+}
+
-- 
2.34.1

Reply via email to