Re: [PATCH v9 14/24] mm: Introduce __maybe_mkwrite()

2018-04-04 Thread Laurent Dufour
On 03/04/2018 01:12, David Rientjes wrote:
> On Tue, 13 Mar 2018, Laurent Dufour wrote:
> 
>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>> index dfa81a638b7c..a84ddc218bbd 100644
>> --- a/include/linux/mm.h
>> +++ b/include/linux/mm.h
>> @@ -684,13 +684,18 @@ void free_compound_page(struct page *page);
>>   * pte_mkwrite.  But get_user_pages can cause write faults for mappings
>>   * that do not have writing enabled, when used by access_process_vm.
>>   */
>> -static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
>> +static inline pte_t __maybe_mkwrite(pte_t pte, unsigned long vma_flags)
>>  {
>> -if (likely(vma->vm_flags & VM_WRITE))
>> +if (likely(vma_flags & VM_WRITE))
>>  pte = pte_mkwrite(pte);
>>  return pte;
>>  }
>>  
>> +static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
>> +{
>> +return __maybe_mkwrite(pte, vma->vm_flags);
>> +}
>> +
>>  int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
>>  struct page *page);
>>  int finish_fault(struct vm_fault *vmf);
>> diff --git a/mm/memory.c b/mm/memory.c
>> index 0a0a483d9a65..af0338fbc34d 100644
>> --- a/mm/memory.c
>> +++ b/mm/memory.c
>> @@ -2472,7 +2472,7 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
>>  
>>  flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
>>  entry = pte_mkyoung(vmf->orig_pte);
>> -entry = maybe_mkwrite(pte_mkdirty(entry), vma);
>> +entry = __maybe_mkwrite(pte_mkdirty(entry), vmf->vma_flags);
>>  if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1))
>>  update_mmu_cache(vma, vmf->address, vmf->pte);
>>  pte_unmap_unlock(vmf->pte, vmf->ptl);
>> @@ -2549,8 +2549,8 @@ static int wp_page_copy(struct vm_fault *vmf)
>>  inc_mm_counter_fast(mm, MM_ANONPAGES);
>>  }
>>  flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
>> -entry = mk_pte(new_page, vma->vm_page_prot);
>> -entry = maybe_mkwrite(pte_mkdirty(entry), vma);
>> +entry = mk_pte(new_page, vmf->vma_page_prot);
>> +entry = __maybe_mkwrite(pte_mkdirty(entry), vmf->vma_flags);
>>  /*
>>   * Clear the pte entry and flush it first, before updating the
>>   * pte with the new entry. This will avoid a race condition
> 
> Don't you also need to do this in do_swap_page()?

Indeed I'll drop this patch as all the changes are now done in the patch 11
"mm: Cache some VMA fields in the vm_fault structure" where, as you suggested,
maybe_mkwrite() is now getting passed the vm_flags value directly.

> diff --git a/mm/memory.c b/mm/memory.c
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -3067,9 +3067,9 @@ int do_swap_page(struct vm_fault *vmf)
> 
>   inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
>   dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS);
> - pte = mk_pte(page, vma->vm_page_prot);
> + pte = mk_pte(page, vmf->vma_page_prot);
>   if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
> - pte = maybe_mkwrite(pte_mkdirty(pte), vma);
> + pte = __maybe_mkwrite(pte_mkdirty(pte), vmf->vma_flags);
>   vmf->flags &= ~FAULT_FLAG_WRITE;
>   ret |= VM_FAULT_WRITE;
>   exclusive = RMAP_EXCLUSIVE;
> 



Re: [PATCH v9 14/24] mm: Introduce __maybe_mkwrite()

2018-04-04 Thread Laurent Dufour
On 03/04/2018 01:12, David Rientjes wrote:
> On Tue, 13 Mar 2018, Laurent Dufour wrote:
> 
>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>> index dfa81a638b7c..a84ddc218bbd 100644
>> --- a/include/linux/mm.h
>> +++ b/include/linux/mm.h
>> @@ -684,13 +684,18 @@ void free_compound_page(struct page *page);
>>   * pte_mkwrite.  But get_user_pages can cause write faults for mappings
>>   * that do not have writing enabled, when used by access_process_vm.
>>   */
>> -static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
>> +static inline pte_t __maybe_mkwrite(pte_t pte, unsigned long vma_flags)
>>  {
>> -if (likely(vma->vm_flags & VM_WRITE))
>> +if (likely(vma_flags & VM_WRITE))
>>  pte = pte_mkwrite(pte);
>>  return pte;
>>  }
>>  
>> +static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
>> +{
>> +return __maybe_mkwrite(pte, vma->vm_flags);
>> +}
>> +
>>  int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
>>  struct page *page);
>>  int finish_fault(struct vm_fault *vmf);
>> diff --git a/mm/memory.c b/mm/memory.c
>> index 0a0a483d9a65..af0338fbc34d 100644
>> --- a/mm/memory.c
>> +++ b/mm/memory.c
>> @@ -2472,7 +2472,7 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
>>  
>>  flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
>>  entry = pte_mkyoung(vmf->orig_pte);
>> -entry = maybe_mkwrite(pte_mkdirty(entry), vma);
>> +entry = __maybe_mkwrite(pte_mkdirty(entry), vmf->vma_flags);
>>  if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1))
>>  update_mmu_cache(vma, vmf->address, vmf->pte);
>>  pte_unmap_unlock(vmf->pte, vmf->ptl);
>> @@ -2549,8 +2549,8 @@ static int wp_page_copy(struct vm_fault *vmf)
>>  inc_mm_counter_fast(mm, MM_ANONPAGES);
>>  }
>>  flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
>> -entry = mk_pte(new_page, vma->vm_page_prot);
>> -entry = maybe_mkwrite(pte_mkdirty(entry), vma);
>> +entry = mk_pte(new_page, vmf->vma_page_prot);
>> +entry = __maybe_mkwrite(pte_mkdirty(entry), vmf->vma_flags);
>>  /*
>>   * Clear the pte entry and flush it first, before updating the
>>   * pte with the new entry. This will avoid a race condition
> 
> Don't you also need to do this in do_swap_page()?

Indeed I'll drop this patch as all the changes are now done in the patch 11
"mm: Cache some VMA fields in the vm_fault structure" where, as you suggested,
maybe_mkwrite() is now getting passed the vm_flags value directly.

> diff --git a/mm/memory.c b/mm/memory.c
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -3067,9 +3067,9 @@ int do_swap_page(struct vm_fault *vmf)
> 
>   inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
>   dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS);
> - pte = mk_pte(page, vma->vm_page_prot);
> + pte = mk_pte(page, vmf->vma_page_prot);
>   if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
> - pte = maybe_mkwrite(pte_mkdirty(pte), vma);
> + pte = __maybe_mkwrite(pte_mkdirty(pte), vmf->vma_flags);
>   vmf->flags &= ~FAULT_FLAG_WRITE;
>   ret |= VM_FAULT_WRITE;
>   exclusive = RMAP_EXCLUSIVE;
> 



Re: [PATCH v9 14/24] mm: Introduce __maybe_mkwrite()

2018-04-02 Thread David Rientjes
On Tue, 13 Mar 2018, Laurent Dufour wrote:

> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index dfa81a638b7c..a84ddc218bbd 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -684,13 +684,18 @@ void free_compound_page(struct page *page);
>   * pte_mkwrite.  But get_user_pages can cause write faults for mappings
>   * that do not have writing enabled, when used by access_process_vm.
>   */
> -static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
> +static inline pte_t __maybe_mkwrite(pte_t pte, unsigned long vma_flags)
>  {
> - if (likely(vma->vm_flags & VM_WRITE))
> + if (likely(vma_flags & VM_WRITE))
>   pte = pte_mkwrite(pte);
>   return pte;
>  }
>  
> +static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
> +{
> + return __maybe_mkwrite(pte, vma->vm_flags);
> +}
> +
>  int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
>   struct page *page);
>  int finish_fault(struct vm_fault *vmf);
> diff --git a/mm/memory.c b/mm/memory.c
> index 0a0a483d9a65..af0338fbc34d 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -2472,7 +2472,7 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
>  
>   flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
>   entry = pte_mkyoung(vmf->orig_pte);
> - entry = maybe_mkwrite(pte_mkdirty(entry), vma);
> + entry = __maybe_mkwrite(pte_mkdirty(entry), vmf->vma_flags);
>   if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1))
>   update_mmu_cache(vma, vmf->address, vmf->pte);
>   pte_unmap_unlock(vmf->pte, vmf->ptl);
> @@ -2549,8 +2549,8 @@ static int wp_page_copy(struct vm_fault *vmf)
>   inc_mm_counter_fast(mm, MM_ANONPAGES);
>   }
>   flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
> - entry = mk_pte(new_page, vma->vm_page_prot);
> - entry = maybe_mkwrite(pte_mkdirty(entry), vma);
> + entry = mk_pte(new_page, vmf->vma_page_prot);
> + entry = __maybe_mkwrite(pte_mkdirty(entry), vmf->vma_flags);
>   /*
>* Clear the pte entry and flush it first, before updating the
>* pte with the new entry. This will avoid a race condition

Don't you also need to do this in do_swap_page()?

diff --git a/mm/memory.c b/mm/memory.c
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3067,9 +3067,9 @@ int do_swap_page(struct vm_fault *vmf)
 
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS);
-   pte = mk_pte(page, vma->vm_page_prot);
+   pte = mk_pte(page, vmf->vma_page_prot);
if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
-   pte = maybe_mkwrite(pte_mkdirty(pte), vma);
+   pte = __maybe_mkwrite(pte_mkdirty(pte), vmf->vma_flags);
vmf->flags &= ~FAULT_FLAG_WRITE;
ret |= VM_FAULT_WRITE;
exclusive = RMAP_EXCLUSIVE;


Re: [PATCH v9 14/24] mm: Introduce __maybe_mkwrite()

2018-04-02 Thread David Rientjes
On Tue, 13 Mar 2018, Laurent Dufour wrote:

> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index dfa81a638b7c..a84ddc218bbd 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -684,13 +684,18 @@ void free_compound_page(struct page *page);
>   * pte_mkwrite.  But get_user_pages can cause write faults for mappings
>   * that do not have writing enabled, when used by access_process_vm.
>   */
> -static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
> +static inline pte_t __maybe_mkwrite(pte_t pte, unsigned long vma_flags)
>  {
> - if (likely(vma->vm_flags & VM_WRITE))
> + if (likely(vma_flags & VM_WRITE))
>   pte = pte_mkwrite(pte);
>   return pte;
>  }
>  
> +static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
> +{
> + return __maybe_mkwrite(pte, vma->vm_flags);
> +}
> +
>  int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
>   struct page *page);
>  int finish_fault(struct vm_fault *vmf);
> diff --git a/mm/memory.c b/mm/memory.c
> index 0a0a483d9a65..af0338fbc34d 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -2472,7 +2472,7 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
>  
>   flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
>   entry = pte_mkyoung(vmf->orig_pte);
> - entry = maybe_mkwrite(pte_mkdirty(entry), vma);
> + entry = __maybe_mkwrite(pte_mkdirty(entry), vmf->vma_flags);
>   if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1))
>   update_mmu_cache(vma, vmf->address, vmf->pte);
>   pte_unmap_unlock(vmf->pte, vmf->ptl);
> @@ -2549,8 +2549,8 @@ static int wp_page_copy(struct vm_fault *vmf)
>   inc_mm_counter_fast(mm, MM_ANONPAGES);
>   }
>   flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
> - entry = mk_pte(new_page, vma->vm_page_prot);
> - entry = maybe_mkwrite(pte_mkdirty(entry), vma);
> + entry = mk_pte(new_page, vmf->vma_page_prot);
> + entry = __maybe_mkwrite(pte_mkdirty(entry), vmf->vma_flags);
>   /*
>* Clear the pte entry and flush it first, before updating the
>* pte with the new entry. This will avoid a race condition

Don't you also need to do this in do_swap_page()?

diff --git a/mm/memory.c b/mm/memory.c
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3067,9 +3067,9 @@ int do_swap_page(struct vm_fault *vmf)
 
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS);
-   pte = mk_pte(page, vma->vm_page_prot);
+   pte = mk_pte(page, vmf->vma_page_prot);
if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
-   pte = maybe_mkwrite(pte_mkdirty(pte), vma);
+   pte = __maybe_mkwrite(pte_mkdirty(pte), vmf->vma_flags);
vmf->flags &= ~FAULT_FLAG_WRITE;
ret |= VM_FAULT_WRITE;
exclusive = RMAP_EXCLUSIVE;


[PATCH v9 14/24] mm: Introduce __maybe_mkwrite()

2018-03-13 Thread Laurent Dufour
The current maybe_mkwrite() is getting passed the pointer to the vma
structure to fetch the vm_flags field.

When dealing with the speculative page fault handler, it will be better to
rely on the cached vm_flags value stored in the vm_fault structure.

This patch introduce a __maybe_mkwrite() service which can be called by
passing the value of the vm_flags field.

There is no change functional changes expected for the other callers of
maybe_mkwrite().

Signed-off-by: Laurent Dufour 
---
 include/linux/mm.h | 9 +++--
 mm/memory.c| 6 +++---
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index dfa81a638b7c..a84ddc218bbd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -684,13 +684,18 @@ void free_compound_page(struct page *page);
  * pte_mkwrite.  But get_user_pages can cause write faults for mappings
  * that do not have writing enabled, when used by access_process_vm.
  */
-static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
+static inline pte_t __maybe_mkwrite(pte_t pte, unsigned long vma_flags)
 {
-   if (likely(vma->vm_flags & VM_WRITE))
+   if (likely(vma_flags & VM_WRITE))
pte = pte_mkwrite(pte);
return pte;
 }
 
+static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
+{
+   return __maybe_mkwrite(pte, vma->vm_flags);
+}
+
 int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
struct page *page);
 int finish_fault(struct vm_fault *vmf);
diff --git a/mm/memory.c b/mm/memory.c
index 0a0a483d9a65..af0338fbc34d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2472,7 +2472,7 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
 
flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
entry = pte_mkyoung(vmf->orig_pte);
-   entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+   entry = __maybe_mkwrite(pte_mkdirty(entry), vmf->vma_flags);
if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1))
update_mmu_cache(vma, vmf->address, vmf->pte);
pte_unmap_unlock(vmf->pte, vmf->ptl);
@@ -2549,8 +2549,8 @@ static int wp_page_copy(struct vm_fault *vmf)
inc_mm_counter_fast(mm, MM_ANONPAGES);
}
flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
-   entry = mk_pte(new_page, vma->vm_page_prot);
-   entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+   entry = mk_pte(new_page, vmf->vma_page_prot);
+   entry = __maybe_mkwrite(pte_mkdirty(entry), vmf->vma_flags);
/*
 * Clear the pte entry and flush it first, before updating the
 * pte with the new entry. This will avoid a race condition
-- 
2.7.4



[PATCH v9 14/24] mm: Introduce __maybe_mkwrite()

2018-03-13 Thread Laurent Dufour
The current maybe_mkwrite() is getting passed the pointer to the vma
structure to fetch the vm_flags field.

When dealing with the speculative page fault handler, it will be better to
rely on the cached vm_flags value stored in the vm_fault structure.

This patch introduce a __maybe_mkwrite() service which can be called by
passing the value of the vm_flags field.

There is no change functional changes expected for the other callers of
maybe_mkwrite().

Signed-off-by: Laurent Dufour 
---
 include/linux/mm.h | 9 +++--
 mm/memory.c| 6 +++---
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index dfa81a638b7c..a84ddc218bbd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -684,13 +684,18 @@ void free_compound_page(struct page *page);
  * pte_mkwrite.  But get_user_pages can cause write faults for mappings
  * that do not have writing enabled, when used by access_process_vm.
  */
-static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
+static inline pte_t __maybe_mkwrite(pte_t pte, unsigned long vma_flags)
 {
-   if (likely(vma->vm_flags & VM_WRITE))
+   if (likely(vma_flags & VM_WRITE))
pte = pte_mkwrite(pte);
return pte;
 }
 
+static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
+{
+   return __maybe_mkwrite(pte, vma->vm_flags);
+}
+
 int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
struct page *page);
 int finish_fault(struct vm_fault *vmf);
diff --git a/mm/memory.c b/mm/memory.c
index 0a0a483d9a65..af0338fbc34d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2472,7 +2472,7 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
 
flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
entry = pte_mkyoung(vmf->orig_pte);
-   entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+   entry = __maybe_mkwrite(pte_mkdirty(entry), vmf->vma_flags);
if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1))
update_mmu_cache(vma, vmf->address, vmf->pte);
pte_unmap_unlock(vmf->pte, vmf->ptl);
@@ -2549,8 +2549,8 @@ static int wp_page_copy(struct vm_fault *vmf)
inc_mm_counter_fast(mm, MM_ANONPAGES);
}
flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
-   entry = mk_pte(new_page, vma->vm_page_prot);
-   entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+   entry = mk_pte(new_page, vmf->vma_page_prot);
+   entry = __maybe_mkwrite(pte_mkdirty(entry), vmf->vma_flags);
/*
 * Clear the pte entry and flush it first, before updating the
 * pte with the new entry. This will avoid a race condition
-- 
2.7.4