This gives the mm subsystem the ability to propose the insertion of a PMD or PUD mapping for the faulting address.
If the virtual address provided from userspace with mmap() using the address hint parameter is aligned to a huge page size, if the GEM object is backed by a tmpfs mount point using Transparent Hugepage and if the shmem backing store manages to allocate enough contiguous physical pages to fit within a huge page, the CPU mapping will then benefit from significantly increased memcpy() performance. For instance, when these conditions are met on a system with 2 MiB huge pages, a (fresh) aligned copy of 2 MiB would raise a single page fault instead of 4096. Signed-off-by: Loïc Molinari <[email protected]> --- drivers/gpu/drm/drm_gem_shmem_helper.c | 51 ++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 50594cf8e17c..30aa0d72093b 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -573,7 +573,8 @@ int drm_gem_shmem_dumb_create(struct drm_file *file, struct drm_device *dev, } EXPORT_SYMBOL_GPL(drm_gem_shmem_dumb_create); -static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) +static vm_fault_t drm_gem_shmem_huge_fault(struct vm_fault *vmf, + unsigned int order) { struct vm_area_struct *vma = vmf->vma; struct drm_gem_object *obj = vma->vm_private_data; @@ -582,6 +583,7 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) vm_fault_t ret; struct page *page; pgoff_t page_offset; + unsigned long pfn, paddr; /* We don't use vmf->pgoff since that has the fake offset */ page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT; @@ -592,17 +594,55 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) drm_WARN_ON_ONCE(obj->dev, !shmem->pages) || shmem->madv < 0) { ret = VM_FAULT_SIGBUS; - } else { - page = shmem->pages[page_offset]; + goto out; + } + + page = shmem->pages[page_offset]; + pfn = page_to_pfn(page); + + switch (order) { + case 0: + ret = vmf_insert_pfn(vma, vmf->address, pfn); + break; + +#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP + case PMD_ORDER: + paddr = pfn << PAGE_SHIFT; + if (((vmf->address & ~PMD_MASK) == (paddr & ~PMD_MASK)) && + (folio_order(page_folio(page)) == PMD_ORDER)) + ret = vmf_insert_pfn_pmd( + vmf, pfn & (PMD_MASK >> PAGE_SHIFT), false); + else + ret = VM_FAULT_FALLBACK; + break; +#endif + +#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP + case PUD_ORDER: + paddr = pfn << PAGE_SHIFT; + if (((vmf->address & ~PUD_MASK) == (paddr & ~PUD_MASK)) && + (folio_order(page_folio(page)) == PUD_ORDER)) + ret = vmf_insert_pfn_pud( + vmf, pfn & (PUD_MASK >> PAGE_SHIFT), false); + else + ret = VM_FAULT_FALLBACK; + break; +#endif - ret = vmf_insert_pfn(vma, vmf->address, page_to_pfn(page)); + default: } + out: dma_resv_unlock(shmem->base.resv); return ret; } +static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) +{ + return drm_gem_shmem_huge_fault(vmf, 0); +} + static void drm_gem_shmem_vm_open(struct vm_area_struct *vma) { struct drm_gem_object *obj = vma->vm_private_data; @@ -639,6 +679,9 @@ static void drm_gem_shmem_vm_close(struct vm_area_struct *vma) const struct vm_operations_struct drm_gem_shmem_vm_ops = { .fault = drm_gem_shmem_fault, +#if defined(CONFIG_ARCH_SUPPORTS_PMD_PFNMAP) || defined(CONFIG_ARCH_SUPPORTS_PUD_PFNMAP) + .huge_fault = drm_gem_shmem_huge_fault, +#endif .open = drm_gem_shmem_vm_open, .close = drm_gem_shmem_vm_close, }; -- 2.47.3
