On Fri, Mar 6, 2026 at 9:19 AM Mike Rapoport <[email protected]> wrote: > > From: "Mike Rapoport (Microsoft)" <[email protected]> > > and use it to refactor mfill_atomic_pte_zeroed_folio() and > mfill_atomic_pte_copy(). > > mfill_atomic_pte_zeroed_folio() and mfill_atomic_pte_copy() perform > almost identical actions: > * allocate a folio > * update folio contents (either copy from userspace of fill with zeros) > * update page tables with the new folio > > Split a __mfill_atomic_pte() helper that handles both cases and uses > newly introduced vm_uffd_ops->alloc_folio() to allocate the folio. > > Pass the ops structure from the callers to __mfill_atomic_pte() to later > allow using anon_uffd_ops for MAP_PRIVATE mappings of file-backed VMAs. > > Note, that the new ops method is called alloc_folio() rather than > folio_alloc() to avoid clash with alloc_tag macro folio_alloc(). > > Signed-off-by: Mike Rapoport (Microsoft) <[email protected]>
Feel free to add: Reviewed-by: James Houghton <[email protected]> > --- > include/linux/userfaultfd_k.h | 6 +++ > mm/userfaultfd.c | 92 ++++++++++++++++++----------------- > 2 files changed, 54 insertions(+), 44 deletions(-) > > diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h > index 66dfc3c164e6..4d8b879eed91 100644 > --- a/include/linux/userfaultfd_k.h > +++ b/include/linux/userfaultfd_k.h > @@ -91,6 +91,12 @@ struct vm_uffd_ops { > * The returned folio is locked and with reference held. > */ > struct folio *(*get_folio_noalloc)(struct inode *inode, pgoff_t > pgoff); > + /* > + * Called during resolution of UFFDIO_COPY request. > + * Should return allocate a and return folio or NULL if allocation > fails. "Should allocate and return a folio or NULL if allocation fails." I see this mistake is fixed in the next patch. :) > + */ > + struct folio *(*alloc_folio)(struct vm_area_struct *vma, > + unsigned long addr); > }; > > /* A combined operation mode + behavior flags. */ > diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c > index 98ade14eaa5b..31f3ab6a73e2 100644 > --- a/mm/userfaultfd.c > +++ b/mm/userfaultfd.c > @@ -42,8 +42,26 @@ static bool anon_can_userfault(struct vm_area_struct *vma, > vm_flags_t vm_flags) > return true; > } > > +static struct folio *anon_alloc_folio(struct vm_area_struct *vma, > + unsigned long addr) > +{ > + struct folio *folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, > + addr); > + > + if (!folio) > + return NULL; > + > + if (mem_cgroup_charge(folio, vma->vm_mm, GFP_KERNEL)) { > + folio_put(folio); > + return NULL; > + } > + > + return folio; > +} > + > static const struct vm_uffd_ops anon_uffd_ops = { > .can_userfault = anon_can_userfault, > + .alloc_folio = anon_alloc_folio, > }; > > static const struct vm_uffd_ops *vma_uffd_ops(struct vm_area_struct *vma) > @@ -458,7 +476,8 @@ static int mfill_copy_folio_retry(struct mfill_state > *state, struct folio *folio > return 0; > } > > -static int mfill_atomic_pte_copy(struct mfill_state *state) > +static int __mfill_atomic_pte(struct mfill_state *state, > + const struct vm_uffd_ops *ops) > { > unsigned long dst_addr = state->dst_addr; > unsigned long src_addr = state->src_addr; > @@ -466,16 +485,12 @@ static int mfill_atomic_pte_copy(struct mfill_state > *state) > struct folio *folio; > int ret; > > - folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, state->vma, > dst_addr); > + folio = ops->alloc_folio(state->vma, state->dst_addr); > if (!folio) > return -ENOMEM; > > - ret = -ENOMEM; > - if (mem_cgroup_charge(folio, state->vma->vm_mm, GFP_KERNEL)) > - goto out_release; > - > - ret = mfill_copy_folio_locked(folio, src_addr); > - if (unlikely(ret)) { > + if (uffd_flags_mode_is(flags, MFILL_ATOMIC_COPY)) { > + ret = mfill_copy_folio_locked(folio, src_addr); > /* > * Fallback to copy_from_user outside mmap_lock. > * If retry is successful, mfill_copy_folio_locked() returns > @@ -483,9 +498,15 @@ static int mfill_atomic_pte_copy(struct mfill_state > *state) > * If there was an error, we must mfill_put_vma() anyway and > it > * will take care of unlocking if needed. > */ > - ret = mfill_copy_folio_retry(state, folio); > - if (ret) > - goto out_release; > + if (unlikely(ret)) { > + ret = mfill_copy_folio_retry(state, folio); > + if (ret) > + goto err_folio_put; > + } > + } else if (uffd_flags_mode_is(flags, MFILL_ATOMIC_ZEROPAGE)) { > + clear_user_highpage(&folio->page, state->dst_addr); > + } else { > + VM_WARN_ONCE(1, "unknown UFFDIO operation"); "Unknown UFFDIO operation. flags=%x", flags seems a little better to me. > } > > /* > @@ -498,47 +519,30 @@ static int mfill_atomic_pte_copy(struct mfill_state > *state) > ret = mfill_atomic_install_pte(state->pmd, state->vma, dst_addr, > &folio->page, true, flags); > if (ret) > - goto out_release; > -out: > - return ret; > -out_release: > + goto err_folio_put; > + > + return 0; > + > +err_folio_put: > + folio_put(folio); > /* Don't return -ENOENT so that our caller won't retry */ > if (ret == -ENOENT) > ret = -EFAULT; > - folio_put(folio); > - goto out; > + return ret; > } > > -static int mfill_atomic_pte_zeroed_folio(pmd_t *dst_pmd, > - struct vm_area_struct *dst_vma, > - unsigned long dst_addr) > +static int mfill_atomic_pte_copy(struct mfill_state *state) > { > - struct folio *folio; > - int ret = -ENOMEM; > - > - folio = vma_alloc_zeroed_movable_folio(dst_vma, dst_addr); > - if (!folio) > - return ret; > - > - if (mem_cgroup_charge(folio, dst_vma->vm_mm, GFP_KERNEL)) > - goto out_put; > + const struct vm_uffd_ops *ops = vma_uffd_ops(state->vma); > > - /* > - * The memory barrier inside __folio_mark_uptodate makes sure that > - * zeroing out the folio become visible before mapping the page > - * using set_pte_at(). See do_anonymous_page(). > - */ > - __folio_mark_uptodate(folio); > + return __mfill_atomic_pte(state, ops); > +} > > - ret = mfill_atomic_install_pte(dst_pmd, dst_vma, dst_addr, > - &folio->page, true, 0); > - if (ret) > - goto out_put; > +static int mfill_atomic_pte_zeroed_folio(struct mfill_state *state) > +{ > + const struct vm_uffd_ops *ops = vma_uffd_ops(state->vma); > > - return 0; > -out_put: > - folio_put(folio); > - return ret; > + return __mfill_atomic_pte(state, ops); > } > > static int mfill_atomic_pte_zeropage(struct mfill_state *state) > @@ -551,7 +555,7 @@ static int mfill_atomic_pte_zeropage(struct mfill_state > *state) > int ret; > > if (mm_forbids_zeropage(dst_vma->vm_mm)) > - return mfill_atomic_pte_zeroed_folio(dst_pmd, dst_vma, > dst_addr); > + return mfill_atomic_pte_zeroed_folio(state); > > _dst_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr), > dst_vma->vm_page_prot)); > -- > 2.51.0 >

