From: Peter Zijlstra <pet...@infradead.org>

Wrap the VMA modifications (vma_adjust/unmap_page_range) with sequence
counts such that we can easily test if a VMA is changed.

The unmap_page_range() one allows us to make assumptions about
page-tables; when we find the seqcount hasn't changed we can assume
page-tables are still valid.

The flip side is that we cannot distinguish between a vma_adjust() and
the unmap_page_range() -- where with the former we could have
re-checked the vma bounds against the address.

Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org>

[Port to 4.12 kernel]
[Fix lock dependency between mapping->i_mmap_rwsem and vma->vm_sequence]
Signed-off-by: Laurent Dufour <lduf...@linux.vnet.ibm.com>
---
 include/linux/mm_types.h |  1 +
 mm/memory.c              |  2 ++
 mm/mmap.c                | 21 ++++++++++++++++++---
 3 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 46f4ecf5479a..df9a530c8ca1 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -344,6 +344,7 @@ struct vm_area_struct {
        struct mempolicy *vm_policy;    /* NUMA policy for the VMA */
 #endif
        struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
+       seqcount_t vm_sequence;
 } __randomize_layout;
 
 struct core_thread {
diff --git a/mm/memory.c b/mm/memory.c
index 530d887ca885..f250e7c92948 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1499,6 +1499,7 @@ void unmap_page_range(struct mmu_gather *tlb,
        unsigned long next;
 
        BUG_ON(addr >= end);
+       write_seqcount_begin(&vma->vm_sequence);
        tlb_start_vma(tlb, vma);
        pgd = pgd_offset(vma->vm_mm, addr);
        do {
@@ -1508,6 +1509,7 @@ void unmap_page_range(struct mmu_gather *tlb,
                next = zap_p4d_range(tlb, vma, pgd, addr, next, details);
        } while (pgd++, addr = next, addr != end);
        tlb_end_vma(tlb, vma);
+       write_seqcount_end(&vma->vm_sequence);
 }
 
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 680506faceae..0a0012c7e50c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -558,6 +558,8 @@ void __vma_link_rb(struct mm_struct *mm, struct 
vm_area_struct *vma,
        else
                mm->highest_vm_end = vm_end_gap(vma);
 
+       seqcount_init(&vma->vm_sequence);
+
        /*
         * vma->vm_prev wasn't known when we followed the rbtree to find the
         * correct insertion point for that vma. As a result, we could not
@@ -799,6 +801,11 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long 
start,
                }
        }
 
+       write_seqcount_begin(&vma->vm_sequence);
+       if (next && next != vma)
+               write_seqcount_begin_nested(&next->vm_sequence,
+                                           SINGLE_DEPTH_NESTING);
+
        anon_vma = vma->anon_vma;
        if (!anon_vma && adjust_next)
                anon_vma = next->anon_vma;
@@ -903,6 +910,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long 
start,
                mm->map_count--;
                mpol_put(vma_policy(next));
                kmem_cache_free(vm_area_cachep, next);
+               write_seqcount_end(&next->vm_sequence);
                /*
                 * In mprotect's case 6 (see comments on vma_merge),
                 * we must remove another next too. It would clutter
@@ -932,11 +940,14 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned 
long start,
                if (remove_next == 2) {
                        remove_next = 1;
                        end = next->vm_end;
+                       write_seqcount_end(&vma->vm_sequence);
                        goto again;
-               }
-               else if (next)
+               } else if (next) {
+                       if (next != vma)
+                               write_seqcount_begin_nested(&next->vm_sequence,
+                                                           
SINGLE_DEPTH_NESTING);
                        vma_gap_update(next);
-               else {
+               } else {
                        /*
                         * If remove_next == 2 we obviously can't
                         * reach this path.
@@ -962,6 +973,10 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long 
start,
        if (insert && file)
                uprobe_mmap(insert);
 
+       if (next && next != vma)
+               write_seqcount_end(&next->vm_sequence);
+       write_seqcount_end(&vma->vm_sequence);
+
        validate_mm(mm);
 
        return 0;
-- 
2.7.4

Reply via email to