From: Peter Xu <[email protected]> When a VMA is registered with userfaulfd, its ->fault() method should check if a folio exists in the page cache and call handle_userfault() with appropriate mode:
- VM_UFFD_MINOR if VMA is registered in minor mode and the folio exists - VM_UFFD_MISSING if VMA is registered in missing mode and the folio does not exist Instead of calling handle_userfault() directly from a specific ->fault() handler, call __do_userfault() helper from the generic __do_fault(). For VMAs registered with userfaultfd the new __do_userfault() helper will check if the folio is found in the page cache using vm_uffd_ops->get_folio_noalloc() and call handle_userfault() with the appropriate mode. Make vm_uffd_ops->get_folio_noalloc() required method for non-anonymous VMAs mapped at PTE level. Signed-off-by: Peter Xu <[email protected]> Co-developed-by: Mike Rapoport (Microsoft) <[email protected]> Signed-off-by: Mike Rapoport (Microsoft) <[email protected]> --- mm/memory.c | 43 +++++++++++++++++++++++++++++++++++++++++++ mm/shmem.c | 12 ------------ mm/userfaultfd.c | 8 ++++++++ 3 files changed, 51 insertions(+), 12 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 07778814b4a8..e2183c44d70b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5328,6 +5328,41 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) return VM_FAULT_OOM; } +#ifdef CONFIG_USERFAULTFD +static vm_fault_t __do_userfault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct inode *inode; + struct folio *folio; + + if (!(userfaultfd_missing(vma) || userfaultfd_minor(vma))) + return 0; + + inode = file_inode(vma->vm_file); + folio = vma->vm_ops->uffd_ops->get_folio_noalloc(inode, vmf->pgoff); + if (!IS_ERR_OR_NULL(folio)) { + /* + * TODO: provide a flag for get_folio_noalloc() to avoid + * locking (or even the extra reference?) + */ + folio_unlock(folio); + folio_put(folio); + if (userfaultfd_minor(vma)) + return handle_userfault(vmf, VM_UFFD_MINOR); + } else { + if (userfaultfd_missing(vma)) + return handle_userfault(vmf, VM_UFFD_MISSING); + } + + return 0; +} +#else +static inline vm_fault_t __do_userfault(struct vm_fault *vmf) +{ + return 0; +} +#endif + /* * The mmap_lock must have been held on entry, and may have been * released depending on flags and vma->vm_ops->fault() return value. @@ -5360,6 +5395,14 @@ static vm_fault_t __do_fault(struct vm_fault *vmf) return VM_FAULT_OOM; } + /* + * If this is an userfaultfd trap, process it in advance before + * triggering the genuine fault handler. + */ + ret = __do_userfault(vmf); + if (ret) + return ret; + ret = vma->vm_ops->fault(vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY | VM_FAULT_DONE_COW))) diff --git a/mm/shmem.c b/mm/shmem.c index 68620caaf75f..239545352cd2 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2489,13 +2489,6 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index, fault_mm = vma ? vma->vm_mm : NULL; folio = filemap_get_entry(inode->i_mapping, index); - if (folio && vma && userfaultfd_minor(vma)) { - if (!xa_is_value(folio)) - folio_put(folio); - *fault_type = handle_userfault(vmf, VM_UFFD_MINOR); - return 0; - } - if (xa_is_value(folio)) { error = shmem_swapin_folio(inode, index, &folio, sgp, gfp, vma, fault_type); @@ -2540,11 +2533,6 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index, * Fast cache lookup and swap lookup did not find it: allocate. */ - if (vma && userfaultfd_missing(vma)) { - *fault_type = handle_userfault(vmf, VM_UFFD_MISSING); - return 0; - } - /* Find hugepage orders that are allowed for anonymous shmem and tmpfs. */ orders = shmem_allowable_huge_orders(inode, vma, index, write_end, false); if (orders > 0) { diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 7cd7c5d1ce84..2ac5fad0ed6c 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -2045,6 +2045,14 @@ bool vma_can_userfault(struct vm_area_struct *vma, vm_flags_t vm_flags, !vma_is_anonymous(vma)) return false; + /* + * File backed memory with PTE level mappigns must implement + * ops->get_folio_noalloc() + */ + if (!vma_is_anonymous(vma) && !is_vm_hugetlb_page(vma) && + !ops->get_folio_noalloc) + return false; + return ops->can_userfault(vma, vm_flags); } -- 2.51.0

