[PATCH v7 16/24] mm: Introduce __page_add_new_anon_rmap()

2018-02-06 Thread Laurent Dufour
When dealing with speculative page fault handler, we may race with VMA
being split or merged. In this case the vma->vm_start and vm->vm_end
fields may not match the address the page fault is occurring.

This can only happens when the VMA is split but in that case, the
anon_vma pointer of the new VMA will be the same as the original one,
because in __split_vma the new->anon_vma is set to src->anon_vma when
*new = *vma.

So even if the VMA boundaries are not correct, the anon_vma pointer is
still valid.

If the VMA has been merged, then the VMA in which it has been merged
must have the same anon_vma pointer otherwise the merge can't be done.

So in all the case we know that the anon_vma is valid, since we have
checked before starting the speculative page fault that the anon_vma
pointer is valid for this VMA and since there is an anon_vma this
means that at one time a page has been backed and that before the VMA
is cleaned, the page table lock would have to be grab to clean the
PTE, and the anon_vma field is checked once the PTE is locked.

This patch introduce a new __page_add_new_anon_rmap() service which
doesn't check for the VMA boundaries, and create a new inline one
which do the check.

When called from a page fault handler, if this is not a speculative one,
there is a guarantee that vm_start and vm_end match the faulting address,
so this check is useless. In the context of the speculative page fault
handler, this check may be wrong but anon_vma is still valid as explained
above.

Signed-off-by: Laurent Dufour 
---
 include/linux/rmap.h | 12 ++--
 mm/memory.c  |  8 
 mm/rmap.c|  5 ++---
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 988d176472df..a5d282573093 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -174,8 +174,16 @@ void page_add_anon_rmap(struct page *, struct 
vm_area_struct *,
unsigned long, bool);
 void do_page_add_anon_rmap(struct page *, struct vm_area_struct *,
   unsigned long, int);
-void page_add_new_anon_rmap(struct page *, struct vm_area_struct *,
-   unsigned long, bool);
+void __page_add_new_anon_rmap(struct page *, struct vm_area_struct *,
+ unsigned long, bool);
+static inline void page_add_new_anon_rmap(struct page *page,
+ struct vm_area_struct *vma,
+ unsigned long address, bool compound)
+{
+   VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
+   __page_add_new_anon_rmap(page, vma, address, compound);
+}
+
 void page_add_file_rmap(struct page *, bool);
 void page_remove_rmap(struct page *, bool);
 
diff --git a/mm/memory.c b/mm/memory.c
index b1e6a1b7c77a..6d0d7a911cbe 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2553,7 +2553,7 @@ static int wp_page_copy(struct vm_fault *vmf)
 * thread doing COW.
 */
ptep_clear_flush_notify(vma, vmf->address, vmf->pte);
-   page_add_new_anon_rmap(new_page, vma, vmf->address, false);
+   __page_add_new_anon_rmap(new_page, vma, vmf->address, false);
mem_cgroup_commit_charge(new_page, memcg, false, false);
__lru_cache_add_active_or_unevictable(new_page, vmf->vma_flags);
/*
@@ -3095,7 +3095,7 @@ int do_swap_page(struct vm_fault *vmf)
 
/* ksm created a completely new copy */
if (unlikely(page != swapcache && swapcache)) {
-   page_add_new_anon_rmap(page, vma, vmf->address, false);
+   __page_add_new_anon_rmap(page, vma, vmf->address, false);
mem_cgroup_commit_charge(page, memcg, false, false);
__lru_cache_add_active_or_unevictable(page, vmf->vma_flags);
} else {
@@ -3246,7 +3246,7 @@ static int do_anonymous_page(struct vm_fault *vmf)
}
 
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
-   page_add_new_anon_rmap(page, vma, vmf->address, false);
+   __page_add_new_anon_rmap(page, vma, vmf->address, false);
mem_cgroup_commit_charge(page, memcg, false, false);
__lru_cache_add_active_or_unevictable(page, vmf->vma_flags);
 setpte:
@@ -3498,7 +3498,7 @@ int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup 
*memcg,
/* copy-on-write page */
if (write && !(vmf->vma_flags & VM_SHARED)) {
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
-   page_add_new_anon_rmap(page, vma, vmf->address, false);
+   __page_add_new_anon_rmap(page, vma, vmf->address, false);
mem_cgroup_commit_charge(page, memcg, false, false);
__lru_cache_add_active_or_unevictable(page, vmf->vma_flags);
} else {
diff --git a/mm/rmap.c b/mm/rmap.c
index 47db27f8049e..6ec168ba5f73 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1136,7 

[PATCH v7 16/24] mm: Introduce __page_add_new_anon_rmap()

2018-02-06 Thread Laurent Dufour
When dealing with speculative page fault handler, we may race with VMA
being split or merged. In this case the vma->vm_start and vm->vm_end
fields may not match the address the page fault is occurring.

This can only happens when the VMA is split but in that case, the
anon_vma pointer of the new VMA will be the same as the original one,
because in __split_vma the new->anon_vma is set to src->anon_vma when
*new = *vma.

So even if the VMA boundaries are not correct, the anon_vma pointer is
still valid.

If the VMA has been merged, then the VMA in which it has been merged
must have the same anon_vma pointer otherwise the merge can't be done.

So in all the case we know that the anon_vma is valid, since we have
checked before starting the speculative page fault that the anon_vma
pointer is valid for this VMA and since there is an anon_vma this
means that at one time a page has been backed and that before the VMA
is cleaned, the page table lock would have to be grab to clean the
PTE, and the anon_vma field is checked once the PTE is locked.

This patch introduce a new __page_add_new_anon_rmap() service which
doesn't check for the VMA boundaries, and create a new inline one
which do the check.

When called from a page fault handler, if this is not a speculative one,
there is a guarantee that vm_start and vm_end match the faulting address,
so this check is useless. In the context of the speculative page fault
handler, this check may be wrong but anon_vma is still valid as explained
above.

Signed-off-by: Laurent Dufour 
---
 include/linux/rmap.h | 12 ++--
 mm/memory.c  |  8 
 mm/rmap.c|  5 ++---
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 988d176472df..a5d282573093 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -174,8 +174,16 @@ void page_add_anon_rmap(struct page *, struct 
vm_area_struct *,
unsigned long, bool);
 void do_page_add_anon_rmap(struct page *, struct vm_area_struct *,
   unsigned long, int);
-void page_add_new_anon_rmap(struct page *, struct vm_area_struct *,
-   unsigned long, bool);
+void __page_add_new_anon_rmap(struct page *, struct vm_area_struct *,
+ unsigned long, bool);
+static inline void page_add_new_anon_rmap(struct page *page,
+ struct vm_area_struct *vma,
+ unsigned long address, bool compound)
+{
+   VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
+   __page_add_new_anon_rmap(page, vma, address, compound);
+}
+
 void page_add_file_rmap(struct page *, bool);
 void page_remove_rmap(struct page *, bool);
 
diff --git a/mm/memory.c b/mm/memory.c
index b1e6a1b7c77a..6d0d7a911cbe 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2553,7 +2553,7 @@ static int wp_page_copy(struct vm_fault *vmf)
 * thread doing COW.
 */
ptep_clear_flush_notify(vma, vmf->address, vmf->pte);
-   page_add_new_anon_rmap(new_page, vma, vmf->address, false);
+   __page_add_new_anon_rmap(new_page, vma, vmf->address, false);
mem_cgroup_commit_charge(new_page, memcg, false, false);
__lru_cache_add_active_or_unevictable(new_page, vmf->vma_flags);
/*
@@ -3095,7 +3095,7 @@ int do_swap_page(struct vm_fault *vmf)
 
/* ksm created a completely new copy */
if (unlikely(page != swapcache && swapcache)) {
-   page_add_new_anon_rmap(page, vma, vmf->address, false);
+   __page_add_new_anon_rmap(page, vma, vmf->address, false);
mem_cgroup_commit_charge(page, memcg, false, false);
__lru_cache_add_active_or_unevictable(page, vmf->vma_flags);
} else {
@@ -3246,7 +3246,7 @@ static int do_anonymous_page(struct vm_fault *vmf)
}
 
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
-   page_add_new_anon_rmap(page, vma, vmf->address, false);
+   __page_add_new_anon_rmap(page, vma, vmf->address, false);
mem_cgroup_commit_charge(page, memcg, false, false);
__lru_cache_add_active_or_unevictable(page, vmf->vma_flags);
 setpte:
@@ -3498,7 +3498,7 @@ int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup 
*memcg,
/* copy-on-write page */
if (write && !(vmf->vma_flags & VM_SHARED)) {
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
-   page_add_new_anon_rmap(page, vma, vmf->address, false);
+   __page_add_new_anon_rmap(page, vma, vmf->address, false);
mem_cgroup_commit_charge(page, memcg, false, false);
__lru_cache_add_active_or_unevictable(page, vmf->vma_flags);
} else {
diff --git a/mm/rmap.c b/mm/rmap.c
index 47db27f8049e..6ec168ba5f73 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1136,7 +1136,7 @@ void