Normally free_pgtables needs to lock affected VMAs except for the case
when VMAs were isolated under VMA write-lock. munmap() does just that,
isolating while holding appropriate locks and then downgrading mmap_lock
and dropping per-VMA locks before freeing page tables.
Add a parameter to free_pgtables and unmap_region for such scenario.

Signed-off-by: Suren Baghdasaryan <sur...@google.com>
---
 mm/internal.h |  2 +-
 mm/memory.c   |  6 +++++-
 mm/mmap.c     | 18 ++++++++++++------
 3 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/mm/internal.h b/mm/internal.h
index bcf75a8b032d..5ea4ff1a70e7 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -87,7 +87,7 @@ void folio_activate(struct folio *folio);
 
 void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt,
                   struct vm_area_struct *start_vma, unsigned long floor,
-                  unsigned long ceiling);
+                  unsigned long ceiling, bool lock_vma);
 void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte);
 
 struct zap_details;
diff --git a/mm/memory.c b/mm/memory.c
index 2fabf89b2be9..9ece18548db1 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -348,7 +348,7 @@ void free_pgd_range(struct mmu_gather *tlb,
 
 void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt,
                   struct vm_area_struct *vma, unsigned long floor,
-                  unsigned long ceiling)
+                  unsigned long ceiling, bool lock_vma)
 {
        MA_STATE(mas, mt, vma->vm_end, vma->vm_end);
 
@@ -366,6 +366,8 @@ void free_pgtables(struct mmu_gather *tlb, struct 
maple_tree *mt,
                 * Hide vma from rmap and truncate_pagecache before freeing
                 * pgtables
                 */
+               if (lock_vma)
+                       vma_write_lock(vma);
                unlink_anon_vmas(vma);
                unlink_file_vma(vma);
 
@@ -380,6 +382,8 @@ void free_pgtables(struct mmu_gather *tlb, struct 
maple_tree *mt,
                               && !is_vm_hugetlb_page(next)) {
                                vma = next;
                                next = mas_find(&mas, ceiling - 1);
+                               if (lock_vma)
+                                       vma_write_lock(vma);
                                unlink_anon_vmas(vma);
                                unlink_file_vma(vma);
                        }
diff --git a/mm/mmap.c b/mm/mmap.c
index be289e0b693b..0d767ce043af 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -78,7 +78,7 @@ core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 
0644);
 static void unmap_region(struct mm_struct *mm, struct maple_tree *mt,
                struct vm_area_struct *vma, struct vm_area_struct *prev,
                struct vm_area_struct *next, unsigned long start,
-               unsigned long end);
+               unsigned long end, bool lock_vma);
 
 static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
 {
@@ -2202,7 +2202,7 @@ static inline void remove_mt(struct mm_struct *mm, struct 
ma_state *mas)
 static void unmap_region(struct mm_struct *mm, struct maple_tree *mt,
                struct vm_area_struct *vma, struct vm_area_struct *prev,
                struct vm_area_struct *next,
-               unsigned long start, unsigned long end)
+               unsigned long start, unsigned long end, bool lock_vma)
 {
        struct mmu_gather tlb;
 
@@ -2211,7 +2211,8 @@ static void unmap_region(struct mm_struct *mm, struct 
maple_tree *mt,
        update_hiwater_rss(mm);
        unmap_vmas(&tlb, mt, vma, start, end);
        free_pgtables(&tlb, mt, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
-                                next ? next->vm_start : USER_PGTABLES_CEILING);
+                                next ? next->vm_start : USER_PGTABLES_CEILING,
+                                lock_vma);
        tlb_finish_mmu(&tlb);
 }
 
@@ -2468,7 +2469,11 @@ do_mas_align_munmap(struct ma_state *mas, struct 
vm_area_struct *vma,
                        mmap_write_downgrade(mm);
        }
 
-       unmap_region(mm, &mt_detach, vma, prev, next, start, end);
+       /*
+        * We can free page tables without locking the vmas because they were
+        * isolated before we downgraded mmap_lock and dropped per-vma locks.
+        */
+       unmap_region(mm, &mt_detach, vma, prev, next, start, end, !downgrade);
        /* Statistics and freeing VMAs */
        mas_set(&mas_detach, start);
        remove_mt(mm, &mas_detach);
@@ -2785,7 +2790,8 @@ unsigned long mmap_region(struct file *file, unsigned 
long addr,
        vma->vm_file = NULL;
 
        /* Undo any partial mapping done by a device driver. */
-       unmap_region(mm, mas.tree, vma, prev, next, vma->vm_start, vma->vm_end);
+       unmap_region(mm, mas.tree, vma, prev, next, vma->vm_start, vma->vm_end,
+                    true);
        if (file && (vm_flags & VM_SHARED))
                mapping_unmap_writable(file->f_mapping);
 free_vma:
@@ -3130,7 +3136,7 @@ void exit_mmap(struct mm_struct *mm)
        mmap_write_lock(mm);
        mt_clear_in_rcu(&mm->mm_mt);
        free_pgtables(&tlb, &mm->mm_mt, vma, FIRST_USER_ADDRESS,
-                     USER_PGTABLES_CEILING);
+                     USER_PGTABLES_CEILING, true);
        tlb_finish_mmu(&tlb);
 
        /*
-- 
2.39.0

Reply via email to