Replace vma_start_write() with vma_start_write_killable() in syscalls,
improving reaction time to the kill signal.

In a number of places we now lock VMA earlier than before to avoid
doing work and undoing it later if a fatal signal is pending. This
is safe because the moves are happening within sections where we
already hold the mmap_write_lock, so the moves do not change the
locking order relative to other kernel locks.

Suggested-by: Matthew Wilcox <[email protected]>
Signed-off-by: Suren Baghdasaryan <[email protected]>
---
 mm/madvise.c   | 13 ++++++++++---
 mm/memory.c    |  2 ++
 mm/mempolicy.c | 11 +++++++++--
 mm/mlock.c     | 30 ++++++++++++++++++++++++------
 mm/mprotect.c  | 25 +++++++++++++++++--------
 mm/mremap.c    |  8 +++++---
 mm/mseal.c     | 24 +++++++++++++++++++-----
 7 files changed, 86 insertions(+), 27 deletions(-)

diff --git a/mm/madvise.c b/mm/madvise.c
index 69708e953cf5..f2c7b0512cdf 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -172,10 +172,17 @@ static int madvise_update_vma(vm_flags_t new_flags,
        if (IS_ERR(vma))
                return PTR_ERR(vma);
 
-       madv_behavior->vma = vma;
+       /*
+        * If a new vma was created during vma_modify_XXX, the resulting
+        * vma is already locked. Skip re-locking new vma in this case.
+        */
+       if (vma == madv_behavior->vma) {
+               if (vma_start_write_killable(vma))
+                       return -EINTR;
+       } else {
+               madv_behavior->vma = vma;
+       }
 
-       /* vm_flags is protected by the mmap_lock held in write mode. */
-       vma_start_write(vma);
        vma->flags = new_vma_flags;
        if (set_new_anon_name)
                return replace_anon_vma_name(vma, anon_name);
diff --git a/mm/memory.c b/mm/memory.c
index e44469f9cf65..9f99ec634831 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -366,6 +366,8 @@ void free_pgd_range(struct mmu_gather *tlb,
  * page tables that should be removed.  This can differ from the vma mappings 
on
  * some archs that may have mappings that need to be removed outside the vmas.
  * Note that the prev->vm_end and next->vm_start are often used.
+ * We don't use vma_start_write_killable() because page tables should be freed
+ * even if the task is being killed.
  *
  * The vma_end differs from the pg_end when a dup_mmap() failed and the tree 
has
  * unrelated data to the mm_struct being torn down.
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index fd08771e2057..c38a90487531 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1784,7 +1784,8 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, 
start, unsigned long, le
                return -EINVAL;
        if (end == start)
                return 0;
-       mmap_write_lock(mm);
+       if (mmap_write_lock_killable(mm))
+               return -EINTR;
        prev = vma_prev(&vmi);
        for_each_vma_range(vmi, vma, end) {
                /*
@@ -1801,13 +1802,19 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, 
start, unsigned long, le
                        err = -EOPNOTSUPP;
                        break;
                }
+               /*
+                * Lock the VMA early to avoid extra work if fatal signal
+                * is pending.
+                */
+               err = vma_start_write_killable(vma);
+               if (err)
+                       break;
                new = mpol_dup(old);
                if (IS_ERR(new)) {
                        err = PTR_ERR(new);
                        break;
                }
 
-               vma_start_write(vma);
                new->home_node = home_node;
                err = mbind_range(&vmi, vma, &prev, start, end, new);
                mpol_put(new);
diff --git a/mm/mlock.c b/mm/mlock.c
index 8c227fefa2df..2ed454db7cf7 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -419,8 +419,10 @@ static int mlock_pte_range(pmd_t *pmd, unsigned long addr,
  *
  * Called for mlock(), mlock2() and mlockall(), to set @vma VM_LOCKED;
  * called for munlock() and munlockall(), to clear VM_LOCKED from @vma.
+ *
+ * Return: 0 on success, -EINTR if fatal signal is pending.
  */
-static void mlock_vma_pages_range(struct vm_area_struct *vma,
+static int mlock_vma_pages_range(struct vm_area_struct *vma,
        unsigned long start, unsigned long end,
        vma_flags_t *new_vma_flags)
 {
@@ -442,7 +444,9 @@ static void mlock_vma_pages_range(struct vm_area_struct 
*vma,
         */
        if (vma_flags_test(new_vma_flags, VMA_LOCKED_BIT))
                vma_flags_set(new_vma_flags, VMA_IO_BIT);
-       vma_start_write(vma);
+       if (vma_start_write_killable(vma))
+               return -EINTR;
+
        vma_flags_reset_once(vma, new_vma_flags);
 
        lru_add_drain();
@@ -453,6 +457,7 @@ static void mlock_vma_pages_range(struct vm_area_struct 
*vma,
                vma_flags_clear(new_vma_flags, VMA_IO_BIT);
                vma_flags_reset_once(vma, new_vma_flags);
        }
+       return 0;
 }
 
 /*
@@ -506,11 +511,15 @@ static int mlock_fixup(struct vma_iterator *vmi, struct 
vm_area_struct *vma,
         */
        if (vma_flags_test(&new_vma_flags, VMA_LOCKED_BIT) &&
            vma_flags_test(&old_vma_flags, VMA_LOCKED_BIT)) {
+               ret = vma_start_write_killable(vma);
+               if (ret)
+                       goto out; /* mm->locked_vm is fine as nr_pages == 0 */
                /* No work to do, and mlocking twice would be wrong */
-               vma_start_write(vma);
                vma->flags = new_vma_flags;
        } else {
-               mlock_vma_pages_range(vma, start, end, &new_vma_flags);
+               ret = mlock_vma_pages_range(vma, start, end, &new_vma_flags);
+               if (ret)
+                       mm->locked_vm -= nr_pages;
        }
 out:
        *prev = vma;
@@ -739,9 +748,18 @@ static int apply_mlockall_flags(int flags)
 
                error = mlock_fixup(&vmi, vma, &prev, vma->vm_start, 
vma->vm_end,
                                    newflags);
-               /* Ignore errors, but prev needs fixing up. */
-               if (error)
+               if (error) {
+                       /*
+                        * If we failed due to a pending fatal signal, return
+                        * now. If we locked the vma before signal arrived, it
+                        * will be unlocked when we drop mmap_write_lock.
+                        */
+                       if (fatal_signal_pending(current))
+                               return -EINTR;
+
+                       /* Ignore errors, but prev needs fixing up. */
                        prev = vma;
+               }
                cond_resched();
        }
 out:
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 110d47a36d4b..d6227877465f 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -700,6 +700,7 @@ mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather 
*tlb,
        const vma_flags_t old_vma_flags = READ_ONCE(vma->flags);
        vma_flags_t new_vma_flags = legacy_to_vma_flags(newflags);
        long nrpages = (end - start) >> PAGE_SHIFT;
+       struct vm_area_struct *new_vma;
        unsigned int mm_cp_flags = 0;
        unsigned long charged = 0;
        int error;
@@ -756,19 +757,27 @@ mprotect_fixup(struct vma_iterator *vmi, struct 
mmu_gather *tlb,
                vma_flags_clear(&new_vma_flags, VMA_ACCOUNT_BIT);
        }
 
-       vma = vma_modify_flags(vmi, *pprev, vma, start, end, &new_vma_flags);
-       if (IS_ERR(vma)) {
-               error = PTR_ERR(vma);
+       new_vma = vma_modify_flags(vmi, *pprev, vma, start, end,
+                                  &new_vma_flags);
+       if (IS_ERR(new_vma)) {
+               error = PTR_ERR(new_vma);
                goto fail;
        }
 
-       *pprev = vma;
-
        /*
-        * vm_flags and vm_page_prot are protected by the mmap_lock
-        * held in write mode.
+        * If a new vma was created during vma_modify_flags, the resulting
+        * vma is already locked. Skip re-locking new vma in this case.
         */
-       vma_start_write(vma);
+       if (new_vma == vma) {
+               error = vma_start_write_killable(vma);
+               if (error)
+                       goto fail;
+       } else {
+               vma = new_vma;
+       }
+
+       *pprev = vma;
+
        vma_flags_reset_once(vma, &new_vma_flags);
        if (vma_wants_manual_pte_write_upgrade(vma))
                mm_cp_flags |= MM_CP_TRY_CHANGE_WRITABLE;
diff --git a/mm/mremap.c b/mm/mremap.c
index e9c8b1d05832..0860102bddab 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -1348,6 +1348,11 @@ static unsigned long move_vma(struct vma_remap_struct 
*vrm)
        if (err)
                return err;
 
+       /* We don't want racing faults. */
+       err = vma_start_write_killable(vrm->vma);
+       if (err)
+               return err;
+
        /*
         * If accounted, determine the number of bytes the operation will
         * charge.
@@ -1355,9 +1360,6 @@ static unsigned long move_vma(struct vma_remap_struct 
*vrm)
        if (!vrm_calc_charge(vrm))
                return -ENOMEM;
 
-       /* We don't want racing faults. */
-       vma_start_write(vrm->vma);
-
        /* Perform copy step. */
        err = copy_vma_and_data(vrm, &new_vma);
        /*
diff --git a/mm/mseal.c b/mm/mseal.c
index 603df53ad267..1ea19fd3d384 100644
--- a/mm/mseal.c
+++ b/mm/mseal.c
@@ -70,14 +70,28 @@ static int mseal_apply(struct mm_struct *mm,
 
                if (!vma_test(vma, VMA_SEALED_BIT)) {
                        vma_flags_t vma_flags = vma->flags;
+                       struct vm_area_struct *new_vma;
 
                        vma_flags_set(&vma_flags, VMA_SEALED_BIT);
 
-                       vma = vma_modify_flags(&vmi, prev, vma, curr_start,
-                                              curr_end, &vma_flags);
-                       if (IS_ERR(vma))
-                               return PTR_ERR(vma);
-                       vma_start_write(vma);
+                       new_vma = vma_modify_flags(&vmi, prev, vma, curr_start,
+                                                  curr_end, &vma_flags);
+                       if (IS_ERR(new_vma))
+                               return PTR_ERR(new_vma);
+
+                       /*
+                        * If a new vma was created during vma_modify_flags,
+                        * the resulting vma is already locked.
+                        * Skip re-locking new vma in this case.
+                        */
+                       if (new_vma == vma) {
+                               int err = vma_start_write_killable(vma);
+                               if (err)
+                                       return err;
+                       } else {
+                               vma = new_vma;
+                       }
+
                        vma_set_flags(vma, VMA_SEALED_BIT);
                }
 
-- 
2.53.0.1018.g2bb0e51243-goog


Reply via email to