Re: [PATCH v9 11/24] mm: Cache some VMA fields in the vm_fault structure

2018-04-04 Thread Laurent Dufour
On 03/04/2018 00:24, David Rientjes wrote:
> On Tue, 13 Mar 2018, Laurent Dufour wrote:
> 
>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>> index ef6ef0627090..dfa81a638b7c 100644
>> --- a/include/linux/mm.h
>> +++ b/include/linux/mm.h
>> @@ -359,6 +359,12 @@ struct vm_fault {
>>   * page table to avoid allocation from
>>   * atomic context.
>>   */
>> +/*
>> + * These entries are required when handling speculative page fault.
>> + * This way the page handling is done using consistent field values.
>> + */
>> +unsigned long vma_flags;
>> +pgprot_t vma_page_prot;
>>  };
>>  
>>  /* page entry size for vm->huge_fault() */
>> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
>> index 446427cafa19..f71db2b42b30 100644
>> --- a/mm/hugetlb.c
>> +++ b/mm/hugetlb.c
>> @@ -3717,6 +3717,8 @@ static int hugetlb_no_page(struct mm_struct *mm, 
>> struct vm_area_struct *vma,
>>  .vma = vma,
>>  .address = address,
>>  .flags = flags,
>> +.vma_flags = vma->vm_flags,
>> +.vma_page_prot = vma->vm_page_prot,
>>  /*
>>   * Hard to debug if it ends up being
>>   * used by a callee that assumes
>> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
>> index 32314e9e48dd..a946d5306160 100644
>> --- a/mm/khugepaged.c
>> +++ b/mm/khugepaged.c
>> @@ -882,6 +882,8 @@ static bool __collapse_huge_page_swapin(struct mm_struct 
>> *mm,
>>  .flags = FAULT_FLAG_ALLOW_RETRY,
>>  .pmd = pmd,
>>  .pgoff = linear_page_index(vma, address),
>> +.vma_flags = vma->vm_flags,
>> +.vma_page_prot = vma->vm_page_prot,
>>  };
>>  
>>  /* we only decide to swapin, if there is enough young ptes */
>> diff --git a/mm/memory.c b/mm/memory.c
>> index 0200340ef089..46fe92b93682 100644
>> --- a/mm/memory.c
>> +++ b/mm/memory.c
>> @@ -2615,7 +2615,7 @@ static int wp_page_copy(struct vm_fault *vmf)
>>   * Don't let another task, with possibly unlocked vma,
>>   * keep the mlocked page.
>>   */
>> -if (page_copied && (vma->vm_flags & VM_LOCKED)) {
>> +if (page_copied && (vmf->vma_flags & VM_LOCKED)) {
>>  lock_page(old_page);/* LRU manipulation */
>>  if (PageMlocked(old_page))
>>  munlock_vma_page(old_page);
> 
> Doesn't wp_page_copy() also need to pass this to anon_vma_prepare() so 
> that find_mergeable_anon_vma() works correctly?

In the case of the spf handler, we check that the vma->anon_vma is not null.
So __anon_vma_prepare(vma) is never called in the context of the SPF handler.

> 
>> @@ -2649,7 +2649,7 @@ static int wp_page_copy(struct vm_fault *vmf)
>>   */
>>  int finish_mkwrite_fault(struct vm_fault *vmf)
>>  {
>> -WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED));
>> +WARN_ON_ONCE(!(vmf->vma_flags & VM_SHARED));
>>  if (!pte_map_lock(vmf))
>>  return VM_FAULT_RETRY;
>>  /*
>> @@ -2751,7 +2751,7 @@ static int do_wp_page(struct vm_fault *vmf)
>>   * We should not cow pages in a shared writeable mapping.
>>   * Just mark the pages writable and/or call ops->pfn_mkwrite.
>>   */
>> -if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
>> +if ((vmf->vma_flags & (VM_WRITE|VM_SHARED)) ==
>>   (VM_WRITE|VM_SHARED))
>>  return wp_pfn_shared(vmf);
>>  
>> @@ -2798,7 +2798,7 @@ static int do_wp_page(struct vm_fault *vmf)
>>  return VM_FAULT_WRITE;
>>  }
>>  unlock_page(vmf->page);
>> -} else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
>> +} else if (unlikely((vmf->vma_flags & (VM_WRITE|VM_SHARED)) ==
>>  (VM_WRITE|VM_SHARED))) {
>>  return wp_page_shared(vmf);
>>  }
>> @@ -3067,7 +3067,7 @@ int do_swap_page(struct vm_fault *vmf)
>>  
>>  inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
>>  dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS);
>> -pte = mk_pte(page, vma->vm_page_prot);
>> +pte = mk_pte(page, vmf->vma_page_prot);
>>  if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
>>  pte = maybe_mkwrite(pte_mkdirty(pte), vma);
>>  vmf->flags &= ~FAULT_FLAG_WRITE;
>> @@ -3093,7 +3093,7 @@ int do_swap_page(struct vm_fault *vmf)
>>  
>>  swap_free(entry);
>>  if (mem_cgroup_swap_full(page) ||
>> -(vma->vm_flags & VM_LOCKED) || PageMlocked(page))
>> +(vmf->vma_flags & VM_LOCKED) || PageMlocked(page))
>>  try_to_free_swap(page);
>>  unlock_page(page);
>>  if (page != swapcache && swap

Re: [PATCH v9 11/24] mm: Cache some VMA fields in the vm_fault structure

2018-04-02 Thread David Rientjes
On Tue, 13 Mar 2018, Laurent Dufour wrote:

> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index ef6ef0627090..dfa81a638b7c 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -359,6 +359,12 @@ struct vm_fault {
>* page table to avoid allocation from
>* atomic context.
>*/
> + /*
> +  * These entries are required when handling speculative page fault.
> +  * This way the page handling is done using consistent field values.
> +  */
> + unsigned long vma_flags;
> + pgprot_t vma_page_prot;
>  };
>  
>  /* page entry size for vm->huge_fault() */
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 446427cafa19..f71db2b42b30 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -3717,6 +3717,8 @@ static int hugetlb_no_page(struct mm_struct *mm, struct 
> vm_area_struct *vma,
>   .vma = vma,
>   .address = address,
>   .flags = flags,
> + .vma_flags = vma->vm_flags,
> + .vma_page_prot = vma->vm_page_prot,
>   /*
>* Hard to debug if it ends up being
>* used by a callee that assumes
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 32314e9e48dd..a946d5306160 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -882,6 +882,8 @@ static bool __collapse_huge_page_swapin(struct mm_struct 
> *mm,
>   .flags = FAULT_FLAG_ALLOW_RETRY,
>   .pmd = pmd,
>   .pgoff = linear_page_index(vma, address),
> + .vma_flags = vma->vm_flags,
> + .vma_page_prot = vma->vm_page_prot,
>   };
>  
>   /* we only decide to swapin, if there is enough young ptes */
> diff --git a/mm/memory.c b/mm/memory.c
> index 0200340ef089..46fe92b93682 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -2615,7 +2615,7 @@ static int wp_page_copy(struct vm_fault *vmf)
>* Don't let another task, with possibly unlocked vma,
>* keep the mlocked page.
>*/
> - if (page_copied && (vma->vm_flags & VM_LOCKED)) {
> + if (page_copied && (vmf->vma_flags & VM_LOCKED)) {
>   lock_page(old_page);/* LRU manipulation */
>   if (PageMlocked(old_page))
>   munlock_vma_page(old_page);

Doesn't wp_page_copy() also need to pass this to anon_vma_prepare() so 
that find_mergeable_anon_vma() works correctly?

> @@ -2649,7 +2649,7 @@ static int wp_page_copy(struct vm_fault *vmf)
>   */
>  int finish_mkwrite_fault(struct vm_fault *vmf)
>  {
> - WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED));
> + WARN_ON_ONCE(!(vmf->vma_flags & VM_SHARED));
>   if (!pte_map_lock(vmf))
>   return VM_FAULT_RETRY;
>   /*
> @@ -2751,7 +2751,7 @@ static int do_wp_page(struct vm_fault *vmf)
>* We should not cow pages in a shared writeable mapping.
>* Just mark the pages writable and/or call ops->pfn_mkwrite.
>*/
> - if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
> + if ((vmf->vma_flags & (VM_WRITE|VM_SHARED)) ==
>(VM_WRITE|VM_SHARED))
>   return wp_pfn_shared(vmf);
>  
> @@ -2798,7 +2798,7 @@ static int do_wp_page(struct vm_fault *vmf)
>   return VM_FAULT_WRITE;
>   }
>   unlock_page(vmf->page);
> - } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
> + } else if (unlikely((vmf->vma_flags & (VM_WRITE|VM_SHARED)) ==
>   (VM_WRITE|VM_SHARED))) {
>   return wp_page_shared(vmf);
>   }
> @@ -3067,7 +3067,7 @@ int do_swap_page(struct vm_fault *vmf)
>  
>   inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
>   dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS);
> - pte = mk_pte(page, vma->vm_page_prot);
> + pte = mk_pte(page, vmf->vma_page_prot);
>   if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
>   pte = maybe_mkwrite(pte_mkdirty(pte), vma);
>   vmf->flags &= ~FAULT_FLAG_WRITE;
> @@ -3093,7 +3093,7 @@ int do_swap_page(struct vm_fault *vmf)
>  
>   swap_free(entry);
>   if (mem_cgroup_swap_full(page) ||
> - (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
> + (vmf->vma_flags & VM_LOCKED) || PageMlocked(page))
>   try_to_free_swap(page);
>   unlock_page(page);
>   if (page != swapcache && swapcache) {
> @@ -3150,7 +3150,7 @@ static int do_anonymous_page(struct vm_fault *vmf)
>   pte_t entry;
>  
>   /* File mapping without ->vm_ops ? */
> - if (vma->vm_flags & VM_SHARED)
> + if (vmf->vma_flags & VM_SHARED)
>  

[PATCH v9 11/24] mm: Cache some VMA fields in the vm_fault structure

2018-03-13 Thread Laurent Dufour
When handling speculative page fault, the vma->vm_flags and
vma->vm_page_prot fields are read once the page table lock is released. So
there is no more guarantee that these fields would not change in our back.
They will be saved in the vm_fault structure before the VMA is checked for
changes.

This patch also set the fields in hugetlb_no_page() and
__collapse_huge_page_swapin even if it is not need for the callee.

Signed-off-by: Laurent Dufour 
---
 include/linux/mm.h |  6 ++
 mm/hugetlb.c   |  2 ++
 mm/khugepaged.c|  2 ++
 mm/memory.c| 38 --
 4 files changed, 30 insertions(+), 18 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index ef6ef0627090..dfa81a638b7c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -359,6 +359,12 @@ struct vm_fault {
 * page table to avoid allocation from
 * atomic context.
 */
+   /*
+* These entries are required when handling speculative page fault.
+* This way the page handling is done using consistent field values.
+*/
+   unsigned long vma_flags;
+   pgprot_t vma_page_prot;
 };
 
 /* page entry size for vm->huge_fault() */
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 446427cafa19..f71db2b42b30 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3717,6 +3717,8 @@ static int hugetlb_no_page(struct mm_struct *mm, struct 
vm_area_struct *vma,
.vma = vma,
.address = address,
.flags = flags,
+   .vma_flags = vma->vm_flags,
+   .vma_page_prot = vma->vm_page_prot,
/*
 * Hard to debug if it ends up being
 * used by a callee that assumes
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 32314e9e48dd..a946d5306160 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -882,6 +882,8 @@ static bool __collapse_huge_page_swapin(struct mm_struct 
*mm,
.flags = FAULT_FLAG_ALLOW_RETRY,
.pmd = pmd,
.pgoff = linear_page_index(vma, address),
+   .vma_flags = vma->vm_flags,
+   .vma_page_prot = vma->vm_page_prot,
};
 
/* we only decide to swapin, if there is enough young ptes */
diff --git a/mm/memory.c b/mm/memory.c
index 0200340ef089..46fe92b93682 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2615,7 +2615,7 @@ static int wp_page_copy(struct vm_fault *vmf)
 * Don't let another task, with possibly unlocked vma,
 * keep the mlocked page.
 */
-   if (page_copied && (vma->vm_flags & VM_LOCKED)) {
+   if (page_copied && (vmf->vma_flags & VM_LOCKED)) {
lock_page(old_page);/* LRU manipulation */
if (PageMlocked(old_page))
munlock_vma_page(old_page);
@@ -2649,7 +2649,7 @@ static int wp_page_copy(struct vm_fault *vmf)
  */
 int finish_mkwrite_fault(struct vm_fault *vmf)
 {
-   WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED));
+   WARN_ON_ONCE(!(vmf->vma_flags & VM_SHARED));
if (!pte_map_lock(vmf))
return VM_FAULT_RETRY;
/*
@@ -2751,7 +2751,7 @@ static int do_wp_page(struct vm_fault *vmf)
 * We should not cow pages in a shared writeable mapping.
 * Just mark the pages writable and/or call ops->pfn_mkwrite.
 */
-   if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
+   if ((vmf->vma_flags & (VM_WRITE|VM_SHARED)) ==
 (VM_WRITE|VM_SHARED))
return wp_pfn_shared(vmf);
 
@@ -2798,7 +2798,7 @@ static int do_wp_page(struct vm_fault *vmf)
return VM_FAULT_WRITE;
}
unlock_page(vmf->page);
-   } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
+   } else if (unlikely((vmf->vma_flags & (VM_WRITE|VM_SHARED)) ==
(VM_WRITE|VM_SHARED))) {
return wp_page_shared(vmf);
}
@@ -3067,7 +3067,7 @@ int do_swap_page(struct vm_fault *vmf)
 
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS);
-   pte = mk_pte(page, vma->vm_page_prot);
+   pte = mk_pte(page, vmf->vma_page_prot);
if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
vmf->flags &= ~FAULT_FLAG_WRITE;
@@ -3093,7 +3093,7 @@ int do_swap_page(struct vm_fault *vmf)
 
swap_free(entry);
if (mem_cgroup_swap_full(page) ||
-   (vma->vm_flags & VM_LOCKED) || PageMlocked(page