From: Peter Xu <[email protected]>

When a VMA is registered with userfaulfd, its ->fault()
method should check if a folio exists in the page cache and call
handle_userfault() with appropriate mode:

- VM_UFFD_MINOR if VMA is registered in minor mode and the folio exists
- VM_UFFD_MISSING if VMA is registered in missing mode and the folio
  does not exist

Instead of calling handle_userfault() directly from a specific ->fault()
handler, call __do_userfault() helper from the generic __do_fault().

For VMAs registered with userfaultfd the new __do_userfault() helper
will check if the folio is found in the page cache using
vm_uffd_ops->get_folio_noalloc() and call handle_userfault() with the
appropriate mode.

Make vm_uffd_ops->get_folio_noalloc() required method for non-anonymous
VMAs mapped at PTE level.

Signed-off-by: Peter Xu <[email protected]>
Co-developed-by: Mike Rapoport (Microsoft) <[email protected]>
Signed-off-by: Mike Rapoport (Microsoft) <[email protected]>
---
 mm/memory.c      | 43 +++++++++++++++++++++++++++++++++++++++++++
 mm/shmem.c       | 12 ------------
 mm/userfaultfd.c |  8 ++++++++
 3 files changed, 51 insertions(+), 12 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index 07778814b4a8..e2183c44d70b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5328,6 +5328,41 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
        return VM_FAULT_OOM;
 }
 
+#ifdef CONFIG_USERFAULTFD
+static vm_fault_t __do_userfault(struct vm_fault *vmf)
+{
+       struct vm_area_struct *vma = vmf->vma;
+       struct inode *inode;
+       struct folio *folio;
+
+       if (!(userfaultfd_missing(vma) || userfaultfd_minor(vma)))
+               return 0;
+
+       inode = file_inode(vma->vm_file);
+       folio = vma->vm_ops->uffd_ops->get_folio_noalloc(inode, vmf->pgoff);
+       if (!IS_ERR_OR_NULL(folio)) {
+               /*
+                * TODO: provide a flag for get_folio_noalloc() to avoid
+                * locking (or even the extra reference?)
+                */
+               folio_unlock(folio);
+               folio_put(folio);
+               if (userfaultfd_minor(vma))
+                       return handle_userfault(vmf, VM_UFFD_MINOR);
+       } else {
+               if (userfaultfd_missing(vma))
+                       return handle_userfault(vmf, VM_UFFD_MISSING);
+       }
+
+       return 0;
+}
+#else
+static inline vm_fault_t __do_userfault(struct vm_fault *vmf)
+{
+       return 0;
+}
+#endif
+
 /*
  * The mmap_lock must have been held on entry, and may have been
  * released depending on flags and vma->vm_ops->fault() return value.
@@ -5360,6 +5395,14 @@ static vm_fault_t __do_fault(struct vm_fault *vmf)
                        return VM_FAULT_OOM;
        }
 
+       /*
+        * If this is an userfaultfd trap, process it in advance before
+        * triggering the genuine fault handler.
+        */
+       ret = __do_userfault(vmf);
+       if (ret)
+               return ret;
+
        ret = vma->vm_ops->fault(vmf);
        if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY |
                            VM_FAULT_DONE_COW)))
diff --git a/mm/shmem.c b/mm/shmem.c
index 68620caaf75f..239545352cd2 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2489,13 +2489,6 @@ static int shmem_get_folio_gfp(struct inode *inode, 
pgoff_t index,
        fault_mm = vma ? vma->vm_mm : NULL;
 
        folio = filemap_get_entry(inode->i_mapping, index);
-       if (folio && vma && userfaultfd_minor(vma)) {
-               if (!xa_is_value(folio))
-                       folio_put(folio);
-               *fault_type = handle_userfault(vmf, VM_UFFD_MINOR);
-               return 0;
-       }
-
        if (xa_is_value(folio)) {
                error = shmem_swapin_folio(inode, index, &folio,
                                           sgp, gfp, vma, fault_type);
@@ -2540,11 +2533,6 @@ static int shmem_get_folio_gfp(struct inode *inode, 
pgoff_t index,
         * Fast cache lookup and swap lookup did not find it: allocate.
         */
 
-       if (vma && userfaultfd_missing(vma)) {
-               *fault_type = handle_userfault(vmf, VM_UFFD_MISSING);
-               return 0;
-       }
-
        /* Find hugepage orders that are allowed for anonymous shmem and tmpfs. 
*/
        orders = shmem_allowable_huge_orders(inode, vma, index, write_end, 
false);
        if (orders > 0) {
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 7cd7c5d1ce84..2ac5fad0ed6c 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -2045,6 +2045,14 @@ bool vma_can_userfault(struct vm_area_struct *vma, 
vm_flags_t vm_flags,
            !vma_is_anonymous(vma))
                return false;
 
+       /*
+        * File backed memory with PTE level mappigns must implement
+        * ops->get_folio_noalloc()
+        */
+       if (!vma_is_anonymous(vma) && !is_vm_hugetlb_page(vma) &&
+           !ops->get_folio_noalloc)
+               return false;
+
        return ops->can_userfault(vma, vm_flags);
 }
 
-- 
2.51.0


Reply via email to