From: "Mike Rapoport (Microsoft)" <[email protected]> Implementation of UFFDIO_COPY for anonymous memory might fail to copy data data from userspace buffer when the destination VMA is locked (either with mm_lock or with per-VMA lock).
In that case, mfill_atomic() releases the locks, retries copying the data with locks dropped and then re-locks the destination VMA and re-establishes PMD. Since this retry-reget dance is only relevant for UFFDIO_COPY and it never happens for other UFFDIO_ operations, make it a part of mfill_atomic_pte_copy() that actually implements UFFDIO_COPY for anonymous memory. shmem implementation will be updated later and the loop in mfill_atomic() will be adjusted afterwards. Signed-off-by: Mike Rapoport (Microsoft) <[email protected]> --- mm/userfaultfd.c | 70 +++++++++++++++++++++++++++++++----------------- 1 file changed, 46 insertions(+), 24 deletions(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 45d8f04aaf4f..01a2b898fa40 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -404,35 +404,57 @@ static int mfill_copy_folio_locked(struct folio *folio, unsigned long src_addr) return ret; } +static int mfill_copy_folio_retry(struct mfill_state *state, struct folio *folio) +{ + unsigned long src_addr = state->src_addr; + void *kaddr; + int err; + + /* retry copying with mm_lock dropped */ + mfill_put_vma(state); + + kaddr = kmap_local_folio(folio, 0); + err = copy_from_user(kaddr, (const void __user *) src_addr, PAGE_SIZE); + kunmap_local(kaddr); + if (unlikely(err)) + return -EFAULT; + + flush_dcache_folio(folio); + + /* reget VMA and PMD, they could change underneath us */ + err = mfill_get_vma(state); + if (err) + return err; + + err = mfill_get_pmd(state); + if (err) + return err; + + return 0; +} + static int mfill_atomic_pte_copy(struct mfill_state *state) { - struct vm_area_struct *dst_vma = state->vma; unsigned long dst_addr = state->dst_addr; unsigned long src_addr = state->src_addr; uffd_flags_t flags = state->flags; - pmd_t *dst_pmd = state->pmd; struct folio *folio; int ret; - if (!state->folio) { - ret = -ENOMEM; - folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, dst_vma, - dst_addr); - if (!folio) - goto out; + folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, state->vma, dst_addr); + if (!folio) + return -ENOMEM; - ret = mfill_copy_folio_locked(folio, src_addr); + ret = -ENOMEM; + if (mem_cgroup_charge(folio, state->vma->vm_mm, GFP_KERNEL)) + goto out_release; + ret = mfill_copy_folio_locked(folio, src_addr); + if (unlikely(ret)) { /* fallback to copy_from_user outside mmap_lock */ - if (unlikely(ret)) { - ret = -ENOENT; - state->folio = folio; - /* don't free the page */ - goto out; - } - } else { - folio = state->folio; - state->folio = NULL; + ret = mfill_copy_folio_retry(state, folio); + if (ret) + goto out_release; } /* @@ -442,17 +464,16 @@ static int mfill_atomic_pte_copy(struct mfill_state *state) */ __folio_mark_uptodate(folio); - ret = -ENOMEM; - if (mem_cgroup_charge(folio, dst_vma->vm_mm, GFP_KERNEL)) - goto out_release; - - ret = mfill_atomic_install_pte(dst_pmd, dst_vma, dst_addr, + ret = mfill_atomic_install_pte(state->pmd, state->vma, dst_addr, &folio->page, true, flags); if (ret) goto out_release; out: return ret; out_release: + /* Don't return -ENOENT so that our caller won't retry */ + if (ret == -ENOENT) + ret = -EFAULT; folio_put(folio); goto out; } @@ -907,7 +928,8 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx, break; } - mfill_put_vma(&state); + if (state.vma) + mfill_put_vma(&state); out: if (state.folio) folio_put(state.folio); -- 2.51.0

