From: "Mike Rapoport (Microsoft)" <[email protected]>

When userspace resolves a page fault in a shmem VMA with UFFDIO_CONTINUE
it needs to get a folio that already exists in the pagecache backing
that VMA.

Instead of using shmem_get_folio() for that, add a get_folio_noalloc()
method to 'struct vm_uffd_ops' that will return a folio if it exists in
the VMA's pagecache at given pgoff.

Implement get_folio_noalloc() method for shmem and slightly refactor
userfaultfd's mfill_get_vma() and mfill_atomic_pte_continue() to support
this new API.

Signed-off-by: Mike Rapoport (Microsoft) <[email protected]>
---
 include/linux/userfaultfd_k.h |  7 +++++++
 mm/shmem.c                    | 15 ++++++++++++++-
 mm/userfaultfd.c              | 32 ++++++++++++++++----------------
 3 files changed, 37 insertions(+), 17 deletions(-)

diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 56e85ab166c7..66dfc3c164e6 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -84,6 +84,13 @@ extern vm_fault_t handle_userfault(struct vm_fault *vmf, 
unsigned long reason);
 struct vm_uffd_ops {
        /* Checks if a VMA can support userfaultfd */
        bool (*can_userfault)(struct vm_area_struct *vma, vm_flags_t vm_flags);
+       /*
+        * Called to resolve UFFDIO_CONTINUE request.
+        * Should return the folio found at pgoff in the VMA's pagecache if it
+        * exists or ERR_PTR otherwise.
+        * The returned folio is locked and with reference held.
+        */
+       struct folio *(*get_folio_noalloc)(struct inode *inode, pgoff_t pgoff);
 };
 
 /* A combined operation mode + behavior flags. */
diff --git a/mm/shmem.c b/mm/shmem.c
index 9b82cda271c4..87cd8d2fdb97 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -5291,6 +5291,18 @@ static const struct super_operations shmem_ops = {
 };
 
 #ifdef CONFIG_USERFAULTFD
+static struct folio *shmem_get_folio_noalloc(struct inode *inode, pgoff_t 
pgoff)
+{
+       struct folio *folio;
+       int err;
+
+       err = shmem_get_folio(inode, pgoff, 0, &folio, SGP_NOALLOC);
+       if (err)
+               return ERR_PTR(err);
+
+       return folio;
+}
+
 static bool shmem_can_userfault(struct vm_area_struct *vma, vm_flags_t 
vm_flags)
 {
        /*
@@ -5303,7 +5315,8 @@ static bool shmem_can_userfault(struct vm_area_struct 
*vma, vm_flags_t vm_flags)
 }
 
 static const struct vm_uffd_ops shmem_uffd_ops = {
-       .can_userfault  = shmem_can_userfault,
+       .can_userfault          = shmem_can_userfault,
+       .get_folio_noalloc      = shmem_get_folio_noalloc,
 };
 #endif
 
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index d035f5e17f07..f0e6336015f1 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -188,6 +188,7 @@ static int mfill_get_vma(struct mfill_state *state)
        struct userfaultfd_ctx *ctx = state->ctx;
        uffd_flags_t flags = state->flags;
        struct vm_area_struct *dst_vma;
+       const struct vm_uffd_ops *ops;
        int err;
 
        /*
@@ -228,10 +229,12 @@ static int mfill_get_vma(struct mfill_state *state)
        if (is_vm_hugetlb_page(dst_vma))
                goto out;
 
-       if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
+       ops = vma_uffd_ops(dst_vma);
+       if (!ops)
                goto out_unlock;
-       if (!vma_is_shmem(dst_vma) &&
-           uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE))
+
+       if (uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE) &&
+           !ops->get_folio_noalloc)
                goto out_unlock;
 
 out:
@@ -568,6 +571,7 @@ static int mfill_atomic_pte_zeropage(struct mfill_state 
*state)
 static int mfill_atomic_pte_continue(struct mfill_state *state)
 {
        struct vm_area_struct *dst_vma = state->vma;
+       const struct vm_uffd_ops *ops = vma_uffd_ops(dst_vma);
        unsigned long dst_addr = state->dst_addr;
        pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
        struct inode *inode = file_inode(dst_vma->vm_file);
@@ -577,16 +581,13 @@ static int mfill_atomic_pte_continue(struct mfill_state 
*state)
        struct page *page;
        int ret;
 
-       ret = shmem_get_folio(inode, pgoff, 0, &folio, SGP_NOALLOC);
+       if (!ops)
+               return -EOPNOTSUPP;
+
+       folio = ops->get_folio_noalloc(inode, pgoff);
        /* Our caller expects us to return -EFAULT if we failed to find folio */
-       if (ret == -ENOENT)
-               ret = -EFAULT;
-       if (ret)
-               goto out;
-       if (!folio) {
-               ret = -EFAULT;
-               goto out;
-       }
+       if (IS_ERR_OR_NULL(folio))
+               return -EFAULT;
 
        page = folio_file_page(folio, pgoff);
        if (PageHWPoison(page)) {
@@ -600,13 +601,12 @@ static int mfill_atomic_pte_continue(struct mfill_state 
*state)
                goto out_release;
 
        folio_unlock(folio);
-       ret = 0;
-out:
-       return ret;
+       return 0;
+
 out_release:
        folio_unlock(folio);
        folio_put(folio);
-       goto out;
+       return ret;
 }
 
 /* Handles UFFDIO_POISON for all non-hugetlb VMAs. */
-- 
2.51.0


Reply via email to