From: Peter Zijlstra <pet...@infradead.org>

When speculating faults (without holding mmap_sem) we need to validate
that the vma against which we loaded pages is still valid when we're
ready to install the new PTE.

Therefore, replace the pte_offset_map_lock() calls that (re)take the
PTL with pte_map_lock() which can fail in case we find the VMA changed
since we started the fault.

Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org>

[Port to 4.12 kernel]
[Remove the comment about the fault_env structure which has been
 implemented as the vm_fault structure in the kernel]
[move pte_map_lock()'s definition upper in the file]
[move the define of FAULT_FLAG_SPECULATIVE later in the series]
[review error path in do_swap_page(), do_anonymous_page() and
 wp_page_copy()]
Signed-off-by: Laurent Dufour <lduf...@linux.ibm.com>
---
 mm/memory.c | 87 +++++++++++++++++++++++++++++++++++------------------
 1 file changed, 58 insertions(+), 29 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index c6ddadd9d2b7..fc3698d13cb5 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2073,6 +2073,13 @@ int apply_to_page_range(struct mm_struct *mm, unsigned 
long addr,
 }
 EXPORT_SYMBOL_GPL(apply_to_page_range);
 
+static inline bool pte_map_lock(struct vm_fault *vmf)
+{
+       vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd,
+                                      vmf->address, &vmf->ptl);
+       return true;
+}
+
 /*
  * handle_pte_fault chooses page fault handler according to an entry which was
  * read non-atomically.  Before making any commitment, on those architectures
@@ -2261,25 +2268,26 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
        int page_copied = 0;
        struct mem_cgroup *memcg;
        struct mmu_notifier_range range;
+       int ret = VM_FAULT_OOM;
 
        if (unlikely(anon_vma_prepare(vma)))
-               goto oom;
+               goto out;
 
        if (is_zero_pfn(pte_pfn(vmf->orig_pte))) {
                new_page = alloc_zeroed_user_highpage_movable(vma,
                                                              vmf->address);
                if (!new_page)
-                       goto oom;
+                       goto out;
        } else {
                new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,
                                vmf->address);
                if (!new_page)
-                       goto oom;
+                       goto out;
                cow_user_page(new_page, old_page, vmf->address, vma);
        }
 
        if (mem_cgroup_try_charge_delay(new_page, mm, GFP_KERNEL, &memcg, 
false))
-               goto oom_free_new;
+               goto out_free_new;
 
        __SetPageUptodate(new_page);
 
@@ -2291,7 +2299,10 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
        /*
         * Re-check the pte - we dropped the lock
         */
-       vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, &vmf->ptl);
+       if (!pte_map_lock(vmf)) {
+               ret = VM_FAULT_RETRY;
+               goto out_uncharge;
+       }
        if (likely(pte_same(*vmf->pte, vmf->orig_pte))) {
                if (old_page) {
                        if (!PageAnon(old_page)) {
@@ -2378,12 +2389,14 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
                put_page(old_page);
        }
        return page_copied ? VM_FAULT_WRITE : 0;
-oom_free_new:
+out_uncharge:
+       mem_cgroup_cancel_charge(new_page, memcg, false);
+out_free_new:
        put_page(new_page);
-oom:
+out:
        if (old_page)
                put_page(old_page);
-       return VM_FAULT_OOM;
+       return ret;
 }
 
 /**
@@ -2405,8 +2418,8 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
 vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf)
 {
        WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED));
-       vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address,
-                                      &vmf->ptl);
+       if (!pte_map_lock(vmf))
+               return VM_FAULT_RETRY;
        /*
         * We might have raced with another page fault while we released the
         * pte_offset_map_lock.
@@ -2527,8 +2540,11 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
                        get_page(vmf->page);
                        pte_unmap_unlock(vmf->pte, vmf->ptl);
                        lock_page(vmf->page);
-                       vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
-                                       vmf->address, &vmf->ptl);
+                       if (!pte_map_lock(vmf)) {
+                               unlock_page(vmf->page);
+                               put_page(vmf->page);
+                               return VM_FAULT_RETRY;
+                       }
                        if (!pte_same(*vmf->pte, vmf->orig_pte)) {
                                unlock_page(vmf->page);
                                pte_unmap_unlock(vmf->pte, vmf->ptl);
@@ -2744,11 +2760,15 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 
                if (!page) {
                        /*
-                        * Back out if somebody else faulted in this pte
-                        * while we released the pte lock.
+                        * Back out if the VMA has changed in our back during
+                        * a speculative page fault or if somebody else
+                        * faulted in this pte while we released the pte lock.
                         */
-                       vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
-                                       vmf->address, &vmf->ptl);
+                       if (!pte_map_lock(vmf)) {
+                               delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
+                               ret = VM_FAULT_RETRY;
+                               goto out;
+                       }
                        if (likely(pte_same(*vmf->pte, vmf->orig_pte)))
                                ret = VM_FAULT_OOM;
                        delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
@@ -2801,10 +2821,13 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
        }
 
        /*
-        * Back out if somebody else already faulted in this pte.
+        * Back out if the VMA has changed in our back during a speculative
+        * page fault or if somebody else already faulted in this pte.
         */
-       vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
-                       &vmf->ptl);
+       if (!pte_map_lock(vmf)) {
+               ret = VM_FAULT_RETRY;
+               goto out_cancel_cgroup;
+       }
        if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte)))
                goto out_nomap;
 
@@ -2882,8 +2905,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 out:
        return ret;
 out_nomap:
-       mem_cgroup_cancel_charge(page, memcg, false);
        pte_unmap_unlock(vmf->pte, vmf->ptl);
+out_cancel_cgroup:
+       mem_cgroup_cancel_charge(page, memcg, false);
 out_page:
        unlock_page(page);
 out_release:
@@ -2934,8 +2958,8 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
                        !mm_forbids_zeropage(vma->vm_mm)) {
                entry = pte_mkspecial(pfn_pte(my_zero_pfn(vmf->address),
                                                vma->vm_page_prot));
-               vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
-                               vmf->address, &vmf->ptl);
+               if (!pte_map_lock(vmf))
+                       return VM_FAULT_RETRY;
                if (!pte_none(*vmf->pte))
                        goto unlock;
                ret = check_stable_address_space(vma->vm_mm);
@@ -2971,14 +2995,16 @@ static vm_fault_t do_anonymous_page(struct vm_fault 
*vmf)
        if (vma->vm_flags & VM_WRITE)
                entry = pte_mkwrite(pte_mkdirty(entry));
 
-       vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
-                       &vmf->ptl);
-       if (!pte_none(*vmf->pte))
+       if (!pte_map_lock(vmf)) {
+               ret = VM_FAULT_RETRY;
                goto release;
+       }
+       if (!pte_none(*vmf->pte))
+               goto unlock_and_release;
 
        ret = check_stable_address_space(vma->vm_mm);
        if (ret)
-               goto release;
+               goto unlock_and_release;
 
        /* Deliver the page fault to userland, check inside PT lock */
        if (userfaultfd_missing(vma)) {
@@ -3000,10 +3026,12 @@ static vm_fault_t do_anonymous_page(struct vm_fault 
*vmf)
 unlock:
        pte_unmap_unlock(vmf->pte, vmf->ptl);
        return ret;
+unlock_and_release:
+       pte_unmap_unlock(vmf->pte, vmf->ptl);
 release:
        mem_cgroup_cancel_charge(page, memcg, false);
        put_page(page);
-       goto unlock;
+       return ret;
 oom_free_page:
        put_page(page);
 oom:
@@ -3118,8 +3146,9 @@ static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf)
         * pte_none() under vmf->ptl protection when we return to
         * alloc_set_pte().
         */
-       vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
-                       &vmf->ptl);
+       if (!pte_map_lock(vmf))
+               return VM_FAULT_RETRY;
+
        return 0;
 }
 
-- 
2.21.0

Reply via email to