From: Honglei Huang <[email protected]> Implement GPU page table mapping in amdgpu_svm_range.c: - range_invalidate_gpu_mapping: mark range as unmapped - amdgpu_svm_range_pages_valid: check notifier sequence validity - amdgpu_svm_range_is_valid: full validation (mapped, PTE flags, attrs) - amdgpu_svm_range_zap_ptes: clear GPU PTEs via amdgpu_vm_update_range - amdgpu_svm_range_attr_pte_flags: compute PTE flags per GC IP version with MTYPE selection for coherency modes - amdgpu_svm_range_lock_vm_pd: acquire VM page directory via drm_exec - amdgpu_svm_range_update_gpu_range: DMA segment coalescing and PTE programming under notifier lock - amdgpu_svm_range_find_or_insert: wrapper with read-only fallback - amdgpu_svm_range_get_pages: pages acquisition with evict on mixed type - amdgpu_svm_range_update_mapping: full pipeline (lock PD, validate pages, program PTEs, update PDEs, flush TLB, update state) - amdgpu_svm_range_map_attrs: iterate address range creating/mapping ranges with attribute-driven PTE flags - amdgpu_svm_range_map_interval: cursor-based mapping across attr tree
Signed-off-by: Honglei Huang <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c | 488 ++++++++++++++++++ 1 file changed, 488 insertions(+) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c new file mode 100644 index 000000000..c733d611d --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c @@ -0,0 +1,488 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright 2026 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu_svm.h" +#include "amdgpu_svm_attr.h" +#include "amdgpu_svm_range.h" +#include "amdgpu_svm_fault.h" +#include "amdgpu.h" +#include "amdgpu_vm.h" + +#include <drm/drm_exec.h> +#include <drm/drm_pagemap.h> + +#include <linux/mmu_notifier.h> +#include <uapi/linux/kfd_ioctl.h> + +static void +range_invalidate_gpu_mapping(struct amdgpu_svm_range *range) +{ + WRITE_ONCE(range->gpu_mapped, false); +} + +bool +amdgpu_svm_range_pages_valid(struct amdgpu_svm *svm, + struct amdgpu_svm_range *range) +{ + struct drm_gpusvm_range *base = &range->base; + + lockdep_assert_held(&svm->gpusvm.notifier_lock); + + if (base->pages.flags.unmapped || base->pages.flags.partial_unmap) + return false; + + return drm_gpusvm_range_pages_valid(&svm->gpusvm, base); +} + +bool amdgpu_svm_range_is_valid(struct amdgpu_svm *svm, + struct amdgpu_svm_range *range, + const struct amdgpu_svm_attrs *attrs, + uint64_t pte_flags) +{ + unsigned int flags; + bool valid; + + flags = memalloc_noreclaim_save(); + drm_gpusvm_notifier_lock(&svm->gpusvm); + valid = range->gpu_mapped && + range->pte_flags == pte_flags && + range->attr_flags == attrs->flags && + amdgpu_svm_range_pages_valid(svm, range); + drm_gpusvm_notifier_unlock(&svm->gpusvm); + memalloc_noreclaim_restore(flags); + + return valid; +} + + +static int +amdgpu_svm_range_zap_ptes(struct amdgpu_svm *svm, + struct amdgpu_svm_range *range, + const struct mmu_notifier_range *mmu_range) +{ + struct drm_gpusvm_range *base = &range->base; + struct dma_fence *fence = NULL; + unsigned long start_page = max(drm_gpusvm_range_start(base), + mmu_range->start) >> PAGE_SHIFT; + unsigned long last_page = (min(drm_gpusvm_range_end(base), + mmu_range->end) >> PAGE_SHIFT) - 1; + unsigned int flags; + int ret; + + if (last_page < start_page) + return 0; + + flags = memalloc_noreclaim_save(); + ret = amdgpu_vm_update_range(svm->adev, svm->vm, false, true, true, false, + NULL, start_page, last_page, 0, 0, 0, NULL, + NULL, &fence); + memalloc_noreclaim_restore(flags); + + if (!ret && fence) { + ret = dma_fence_wait(fence, false); + if (ret < 0) + AMDGPU_SVM_TRACE("notifier unmap fence wait failed: ret=%d [0x%lx-0x%lx]-0x%lx\n", + ret, start_page, last_page, + last_page - start_page + 1); + } + + dma_fence_put(fence); + return ret; +} + +uint64_t +amdgpu_svm_range_attr_pte_flags(struct amdgpu_svm *svm, + const struct amdgpu_svm_attrs *attrs, + bool read_only) +{ + /* a simple pte flags func */ + uint32_t gc_ip_version = amdgpu_ip_version(svm->adev, GC_HWIP, 0); + uint32_t flags = attrs->flags; + uint32_t mapping_flags = 0; + uint64_t pte_flags; + bool coherent = flags & (AMDGPU_SVM_ATTR_BIT_COHERENT | + AMDGPU_SVM_ATTR_BIT_EXT_COHERENT); + bool ext_coherent = flags & AMDGPU_SVM_ATTR_BIT_EXT_COHERENT; + bool snoop = true; + unsigned int mtype_local; + + switch (gc_ip_version) { + case IP_VERSION(9, 4, 1): + case IP_VERSION(9, 4, 2): + mapping_flags |= coherent ? + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + break; + case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): + if (ext_coherent) + mtype_local = AMDGPU_VM_MTYPE_CC; + else + mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC : + amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_CC : + AMDGPU_VM_MTYPE_RW; + if (svm->adev->flags & AMD_IS_APU) { + if (num_possible_nodes() <= 1) + mapping_flags |= mtype_local; + else + mapping_flags |= ext_coherent ? + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + } else { + if (gc_ip_version < IP_VERSION(9, 5, 0) || ext_coherent) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; + } + break; + case IP_VERSION(11, 0, 0): + case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 2): + case IP_VERSION(11, 0, 3): + case IP_VERSION(11, 0, 4): + case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): + case IP_VERSION(11, 5, 2): + case IP_VERSION(11, 5, 3): + mapping_flags |= coherent ? + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + break; + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + mapping_flags |= AMDGPU_VM_MTYPE_NC; + break; + default: + mapping_flags |= coherent ? + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + break; + } + + if (flags & AMDGPU_SVM_ATTR_BIT_GPU_EXEC) + mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; + + pte_flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SYSTEM; + pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0; + if (gc_ip_version >= IP_VERSION(12, 0, 0)) + pte_flags |= AMDGPU_PTE_IS_PTE; + + amdgpu_gmc_get_vm_pte(svm->adev, svm->vm, NULL, mapping_flags, &pte_flags); + pte_flags |= AMDGPU_PTE_READABLE; + if (!(flags & AMDGPU_SVM_ATTR_BIT_GPU_RO) && !read_only) + pte_flags |= AMDGPU_PTE_WRITEABLE; + + return pte_flags; +} + + + +int amdgpu_svm_range_lock_vm_pd(struct amdgpu_svm *svm, struct drm_exec *exec, + bool intr) +{ + unsigned int exec_flags = DRM_EXEC_IGNORE_DUPLICATES; + int ret; + + if (intr) + exec_flags |= DRM_EXEC_INTERRUPTIBLE_WAIT; + + drm_exec_init(exec, exec_flags, 0); + drm_exec_until_all_locked(exec) { + ret = amdgpu_vm_lock_pd(svm->vm, exec, 1); + drm_exec_retry_on_contention(exec); + if (ret) { + drm_exec_fini(exec); + return ret; + } + } + + return 0; +} + +int +amdgpu_svm_range_update_gpu_range(struct amdgpu_svm *svm, + struct amdgpu_svm_range *range, + uint64_t pte_flags, + bool flush_tlb, + bool wait_fence, + struct dma_fence **fence) +{ + struct drm_gpusvm_range *base = &range->base; + + lockdep_assert_held(&svm->gpusvm.notifier_lock); + + const unsigned long range_start_page = drm_gpusvm_range_start(base) >> PAGE_SHIFT; + const unsigned long range_end_page = drm_gpusvm_range_end(base) >> PAGE_SHIFT; + const unsigned long npages = range_end_page - range_start_page; + unsigned long mapped_pages = 0; + unsigned long dma_idx = 0; + int ret; + + if (!base->pages.dma_addr || !npages) + return -EINVAL; + + while (mapped_pages < npages) { + const struct drm_pagemap_addr *entry = &base->pages.dma_addr[dma_idx++]; + unsigned long seg_pages = min_t(unsigned long, 1UL << entry->order, + npages - mapped_pages); + unsigned long start_page, last_page; + bool is_last_seg; + + if (entry->proto != DRM_INTERCONNECT_SYSTEM) + return -EOPNOTSUPP; + + start_page = range_start_page + mapped_pages; + last_page = start_page + seg_pages - 1; + mapped_pages += seg_pages; + is_last_seg = mapped_pages == npages; + + ret = amdgpu_vm_update_range(svm->adev, svm->vm, false, false, + flush_tlb && is_last_seg, true, NULL, + start_page, last_page, pte_flags, + 0, entry->addr, NULL, NULL, + wait_fence && is_last_seg ? fence : NULL); + if (ret) + return ret; + } + + return 0; +} + +struct amdgpu_svm_range * +amdgpu_svm_range_find_or_insert(struct amdgpu_svm *svm, unsigned long addr, + unsigned long gpuva_start, unsigned long gpuva_end, + struct drm_gpusvm_ctx *ctx) +{ + struct drm_gpusvm_range *r; + +retry: + r = drm_gpusvm_range_find_or_insert(&svm->gpusvm, addr, + gpuva_start, gpuva_end, ctx); + + if (PTR_ERR_OR_ZERO(r) == -EPERM && !ctx->read_only) { + ctx->read_only = true; + goto retry; + } + + if (IS_ERR(r)) + return ERR_CAST(r); + + return to_amdgpu_svm_range(r); +} + +int amdgpu_svm_range_get_pages(struct amdgpu_svm *svm, + struct drm_gpusvm_range *range, + struct drm_gpusvm_ctx *ctx) +{ + int ret; + + ret = drm_gpusvm_range_get_pages(&svm->gpusvm, range, ctx); + if (ret == -EOPNOTSUPP) { + AMDGPU_SVM_ERR("range get pages failed with -EOPNOTSUPP, evicting range and retrying: gpuva=[0x%lx-0x%lx) ret=%d\n", + drm_gpusvm_range_start(range), + drm_gpusvm_range_end(range), ret); + drm_gpusvm_range_evict(&svm->gpusvm, range); + } + + return ret; +} + +int amdgpu_svm_range_update_mapping(struct amdgpu_svm *svm, + struct amdgpu_svm_range *range, + uint64_t pte_flags, + uint32_t attrs_flags, + bool intr, bool wait, + bool flush_tlb) +{ + struct drm_exec exec; + struct dma_fence *fence = NULL; + unsigned int flags; + int ret; + + ret = amdgpu_svm_range_lock_vm_pd(svm, &exec, intr); + if (ret) + return ret; + + flags = memalloc_noreclaim_save(); + drm_gpusvm_notifier_lock(&svm->gpusvm); + + if (!amdgpu_svm_range_pages_valid(svm, range)) { + range_invalidate_gpu_mapping(range); + ret = -EAGAIN; + } else { + ret = amdgpu_svm_range_update_gpu_range(svm, range, pte_flags, + flush_tlb, wait, + wait ? &fence : NULL); + } + + drm_gpusvm_notifier_unlock(&svm->gpusvm); + memalloc_noreclaim_restore(flags); + + if (!ret && fence) + dma_fence_wait(fence, intr); + dma_fence_put(fence); + + if (!ret) + ret = amdgpu_vm_update_pdes(svm->adev, svm->vm, false); + + if (!ret) { + if (flush_tlb) + svm->flush_tlb(svm); + WRITE_ONCE(range->pte_flags, pte_flags); + WRITE_ONCE(range->attr_flags, attrs_flags); + WRITE_ONCE(range->gpu_mapped, true); + range->validate_timestamp = ktime_get_boottime(); + } + + drm_exec_fini(&exec); + return ret; +} + +int +amdgpu_svm_range_map_attrs(struct amdgpu_svm *svm, + const struct amdgpu_svm_attrs *attrs, + unsigned long start, unsigned long end) +{ + unsigned long addr = start; + int ret; + bool devmem_possible = amdgpu_svm_attr_devmem_possible(svm, attrs); + bool need_vram_migration = amdgpu_svm_attr_prefer_vram(svm, attrs); + devmem_possible = false; /* TODO: add migration */ + struct drm_gpusvm_ctx map_ctx = { + .read_only = !!(attrs->flags & AMDGPU_SVM_ATTR_BIT_GPU_RO), + .devmem_possible = devmem_possible, + .devmem_only = need_vram_migration && devmem_possible, + .check_pages_threshold = devmem_possible ? SZ_64K : 0, + }; + + while (addr < end) { + struct amdgpu_svm_range *range; + unsigned long next_addr; + uint64_t range_pte_flags; + range = amdgpu_svm_range_find_or_insert(svm, addr, + addr, end, + &map_ctx); + if (IS_ERR(range)) { + AMDGPU_SVM_ERR("failed to find or insert range for gpuva 0x%lx [0x%lx-0x%lx), ret=%ld\n", + addr, start, end, PTR_ERR(range)); + return PTR_ERR(range); + } + + next_addr = drm_gpusvm_range_end(&range->base); + if (next_addr <= addr) + return -EINVAL; + + range_pte_flags = amdgpu_svm_range_attr_pte_flags( + svm, attrs, map_ctx.read_only); + + if (amdgpu_svm_range_is_valid(svm, range, + attrs, range_pte_flags)) { + addr = next_addr; + continue; + } + + /* TODO: add migration */ + + AMDGPU_SVM_RANGE_DEBUG(range, "PREFETCH - GET PAGES"); + + ret = amdgpu_svm_range_get_pages(svm, &range->base, + &map_ctx); + if (ret) { + AMDGPU_SVM_ERR("failed to get pages for range [0x%lx-0x%lx), ret=%d\n", + drm_gpusvm_range_start(&range->base), + drm_gpusvm_range_end(&range->base), ret); + return ret; + } + + AMDGPU_SVM_RANGE_DEBUG(range, "PREFETCH - UPDATE MAPPING"); + + ret = amdgpu_svm_range_update_mapping(svm, range, + range_pte_flags, + attrs->flags, + true, true, + true); + if (ret) { + AMDGPU_SVM_ERR("failed to update gpu mapping for range [0x%lx-0x%lx), ret=%d\n", + drm_gpusvm_range_start(&range->base), + drm_gpusvm_range_end(&range->base), ret); + return ret; + } + + addr = next_addr; + } + + return 0; +} + +static int +amdgpu_svm_range_map_attr_range(struct amdgpu_svm *svm, + struct amdgpu_svm_attr_range *attr_range) +{ + return amdgpu_svm_range_map_attrs(svm, &attr_range->attrs, + amdgpu_svm_attr_start(attr_range), + amdgpu_svm_attr_end(attr_range)); +} + +int +amdgpu_svm_range_map_interval(struct amdgpu_svm *svm, + unsigned long start_page, + unsigned long last_page) +{ + amdgpu_svm_assert_locked(svm); + + struct amdgpu_svm_attr_tree *attr_tree = svm->attr_tree; + unsigned long cursor = start_page; + + while (cursor <= last_page) { + struct amdgpu_svm_attrs attrs; + struct amdgpu_svm_attr_range *attr_range; + unsigned long seg_last; + unsigned long seg_start; + unsigned long next; + int ret; + + mutex_lock(&attr_tree->lock); + attr_range = amdgpu_svm_attr_get_bounds_locked(attr_tree, cursor, + &seg_start, &seg_last); + if (attr_range) + attrs = attr_range->attrs; + mutex_unlock(&attr_tree->lock); + + seg_last = min(seg_last, last_page); + if (attr_range && amdgpu_svm_attr_has_access(attrs.access)) { + /* map may fail here cause no vma or access deny */ + ret = amdgpu_svm_range_map_attr_range(svm, attr_range); + if (ret) + return ret; + } + + if (seg_last == ULONG_MAX || seg_last == last_page) + break; + + next = seg_last + 1; + if (next <= cursor) + break; + cursor = next; + } + + return 0; +} + -- 2.34.1
