Re: [PATCH 02/16] mm: Prepare for FAULT_FLAG_SPECULATIVE

2017-08-09 Thread Laurent Dufour
On 09/08/2017 12:08, Kirill A. Shutemov wrote:
> On Tue, Aug 08, 2017 at 04:35:35PM +0200, Laurent Dufour wrote:
>> @@ -2295,7 +2302,11 @@ static int wp_page_copy(struct vm_fault *vmf)
>>  /*
>>   * Re-check the pte - we dropped the lock
>>   */
>> -vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, >ptl);
>> +if (!pte_map_lock(vmf)) {
>> +mem_cgroup_cancel_charge(new_page, memcg, false);
>> +ret = VM_FAULT_RETRY;
>> +goto oom_free_new;
> 
> With the change, label is misleading.

That's right.
But I'm wondering renaming it out to 'out_free_new' and replacing all the
matching 'goto' where the label was making sense will help readability ?
Have you better idea ?



Re: [PATCH 02/16] mm: Prepare for FAULT_FLAG_SPECULATIVE

2017-08-09 Thread Laurent Dufour
On 09/08/2017 12:08, Kirill A. Shutemov wrote:
> On Tue, Aug 08, 2017 at 04:35:35PM +0200, Laurent Dufour wrote:
>> @@ -2295,7 +2302,11 @@ static int wp_page_copy(struct vm_fault *vmf)
>>  /*
>>   * Re-check the pte - we dropped the lock
>>   */
>> -vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, >ptl);
>> +if (!pte_map_lock(vmf)) {
>> +mem_cgroup_cancel_charge(new_page, memcg, false);
>> +ret = VM_FAULT_RETRY;
>> +goto oom_free_new;
> 
> With the change, label is misleading.

That's right.
But I'm wondering renaming it out to 'out_free_new' and replacing all the
matching 'goto' where the label was making sense will help readability ?
Have you better idea ?



Re: [PATCH 02/16] mm: Prepare for FAULT_FLAG_SPECULATIVE

2017-08-09 Thread Kirill A. Shutemov
On Tue, Aug 08, 2017 at 04:35:35PM +0200, Laurent Dufour wrote:
> @@ -2295,7 +2302,11 @@ static int wp_page_copy(struct vm_fault *vmf)
>   /*
>* Re-check the pte - we dropped the lock
>*/
> - vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, >ptl);
> + if (!pte_map_lock(vmf)) {
> + mem_cgroup_cancel_charge(new_page, memcg, false);
> + ret = VM_FAULT_RETRY;
> + goto oom_free_new;

With the change, label is misleading.

> + }
>   if (likely(pte_same(*vmf->pte, vmf->orig_pte))) {
>   if (old_page) {
>   if (!PageAnon(old_page)) {

-- 
 Kirill A. Shutemov


Re: [PATCH 02/16] mm: Prepare for FAULT_FLAG_SPECULATIVE

2017-08-09 Thread Kirill A. Shutemov
On Tue, Aug 08, 2017 at 04:35:35PM +0200, Laurent Dufour wrote:
> @@ -2295,7 +2302,11 @@ static int wp_page_copy(struct vm_fault *vmf)
>   /*
>* Re-check the pte - we dropped the lock
>*/
> - vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, >ptl);
> + if (!pte_map_lock(vmf)) {
> + mem_cgroup_cancel_charge(new_page, memcg, false);
> + ret = VM_FAULT_RETRY;
> + goto oom_free_new;

With the change, label is misleading.

> + }
>   if (likely(pte_same(*vmf->pte, vmf->orig_pte))) {
>   if (old_page) {
>   if (!PageAnon(old_page)) {

-- 
 Kirill A. Shutemov


[PATCH 02/16] mm: Prepare for FAULT_FLAG_SPECULATIVE

2017-08-08 Thread Laurent Dufour
From: Peter Zijlstra 

When speculating faults (without holding mmap_sem) we need to validate
that the vma against which we loaded pages is still valid when we're
ready to install the new PTE.

Therefore, replace the pte_offset_map_lock() calls that (re)take the
PTL with pte_map_lock() which can fail in case we find the VMA changed
since we started the fault.

Signed-off-by: Peter Zijlstra (Intel) 

[Port to 4.12 kernel]
[Remove the comment about the fault_env structure which has been
 implemented as the vm_fault structure in the kernel]
Signed-off-by: Laurent Dufour 
---
 include/linux/mm.h |  1 +
 mm/memory.c| 55 ++
 2 files changed, 40 insertions(+), 16 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 46b9ac5e8569..8763ec96dc78 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -286,6 +286,7 @@ extern pgprot_t protection_map[16];
 #define FAULT_FLAG_USER0x40/* The fault originated in 
userspace */
 #define FAULT_FLAG_REMOTE  0x80/* faulting for non current tsk/mm */
 #define FAULT_FLAG_INSTRUCTION  0x100  /* The fault was during an instruction 
fetch */
+#define FAULT_FLAG_SPECULATIVE 0x200   /* Speculative fault, not holding 
mmap_sem */
 
 #define FAULT_FLAG_TRACE \
{ FAULT_FLAG_WRITE, "WRITE" }, \
diff --git a/mm/memory.c b/mm/memory.c
index d08f494f1b37..b93916c0b086 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2241,6 +2241,12 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
pte_unmap_unlock(vmf->pte, vmf->ptl);
 }
 
+static bool pte_map_lock(struct vm_fault *vmf)
+{
+   vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address, 
>ptl);
+   return true;
+}
+
 /*
  * Handle the case of a page which we actually need to copy to a new page.
  *
@@ -2268,6 +2274,7 @@ static int wp_page_copy(struct vm_fault *vmf)
const unsigned long mmun_start = vmf->address & PAGE_MASK;
const unsigned long mmun_end = mmun_start + PAGE_SIZE;
struct mem_cgroup *memcg;
+   int ret = VM_FAULT_OOM;
 
if (unlikely(anon_vma_prepare(vma)))
goto oom;
@@ -2295,7 +2302,11 @@ static int wp_page_copy(struct vm_fault *vmf)
/*
 * Re-check the pte - we dropped the lock
 */
-   vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, >ptl);
+   if (!pte_map_lock(vmf)) {
+   mem_cgroup_cancel_charge(new_page, memcg, false);
+   ret = VM_FAULT_RETRY;
+   goto oom_free_new;
+   }
if (likely(pte_same(*vmf->pte, vmf->orig_pte))) {
if (old_page) {
if (!PageAnon(old_page)) {
@@ -2383,7 +2394,7 @@ static int wp_page_copy(struct vm_fault *vmf)
 oom:
if (old_page)
put_page(old_page);
-   return VM_FAULT_OOM;
+   return ret;
 }
 
 /**
@@ -2404,8 +2415,8 @@ static int wp_page_copy(struct vm_fault *vmf)
 int finish_mkwrite_fault(struct vm_fault *vmf)
 {
WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED));
-   vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address,
-  >ptl);
+   if (!pte_map_lock(vmf))
+   return VM_FAULT_RETRY;
/*
 * We might have raced with another page fault while we released the
 * pte_offset_map_lock.
@@ -2523,8 +2534,11 @@ static int do_wp_page(struct vm_fault *vmf)
get_page(vmf->page);
pte_unmap_unlock(vmf->pte, vmf->ptl);
lock_page(vmf->page);
-   vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
-   vmf->address, >ptl);
+   if (!pte_map_lock(vmf)) {
+   unlock_page(vmf->page);
+   put_page(vmf->page);
+   return VM_FAULT_RETRY;
+   }
if (!pte_same(*vmf->pte, vmf->orig_pte)) {
unlock_page(vmf->page);
pte_unmap_unlock(vmf->pte, vmf->ptl);
@@ -2682,8 +2696,10 @@ int do_swap_page(struct vm_fault *vmf)
 * Back out if somebody else faulted in this pte
 * while we released the pte lock.
 */
-   vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
-   vmf->address, >ptl);
+   if (!pte_map_lock(vmf)) {
+   delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
+   return VM_FAULT_RETRY;
+   }
if (likely(pte_same(*vmf->pte, vmf->orig_pte)))
ret = VM_FAULT_OOM;

[PATCH 02/16] mm: Prepare for FAULT_FLAG_SPECULATIVE

2017-08-08 Thread Laurent Dufour
From: Peter Zijlstra 

When speculating faults (without holding mmap_sem) we need to validate
that the vma against which we loaded pages is still valid when we're
ready to install the new PTE.

Therefore, replace the pte_offset_map_lock() calls that (re)take the
PTL with pte_map_lock() which can fail in case we find the VMA changed
since we started the fault.

Signed-off-by: Peter Zijlstra (Intel) 

[Port to 4.12 kernel]
[Remove the comment about the fault_env structure which has been
 implemented as the vm_fault structure in the kernel]
Signed-off-by: Laurent Dufour 
---
 include/linux/mm.h |  1 +
 mm/memory.c| 55 ++
 2 files changed, 40 insertions(+), 16 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 46b9ac5e8569..8763ec96dc78 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -286,6 +286,7 @@ extern pgprot_t protection_map[16];
 #define FAULT_FLAG_USER0x40/* The fault originated in 
userspace */
 #define FAULT_FLAG_REMOTE  0x80/* faulting for non current tsk/mm */
 #define FAULT_FLAG_INSTRUCTION  0x100  /* The fault was during an instruction 
fetch */
+#define FAULT_FLAG_SPECULATIVE 0x200   /* Speculative fault, not holding 
mmap_sem */
 
 #define FAULT_FLAG_TRACE \
{ FAULT_FLAG_WRITE, "WRITE" }, \
diff --git a/mm/memory.c b/mm/memory.c
index d08f494f1b37..b93916c0b086 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2241,6 +2241,12 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
pte_unmap_unlock(vmf->pte, vmf->ptl);
 }
 
+static bool pte_map_lock(struct vm_fault *vmf)
+{
+   vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address, 
>ptl);
+   return true;
+}
+
 /*
  * Handle the case of a page which we actually need to copy to a new page.
  *
@@ -2268,6 +2274,7 @@ static int wp_page_copy(struct vm_fault *vmf)
const unsigned long mmun_start = vmf->address & PAGE_MASK;
const unsigned long mmun_end = mmun_start + PAGE_SIZE;
struct mem_cgroup *memcg;
+   int ret = VM_FAULT_OOM;
 
if (unlikely(anon_vma_prepare(vma)))
goto oom;
@@ -2295,7 +2302,11 @@ static int wp_page_copy(struct vm_fault *vmf)
/*
 * Re-check the pte - we dropped the lock
 */
-   vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, >ptl);
+   if (!pte_map_lock(vmf)) {
+   mem_cgroup_cancel_charge(new_page, memcg, false);
+   ret = VM_FAULT_RETRY;
+   goto oom_free_new;
+   }
if (likely(pte_same(*vmf->pte, vmf->orig_pte))) {
if (old_page) {
if (!PageAnon(old_page)) {
@@ -2383,7 +2394,7 @@ static int wp_page_copy(struct vm_fault *vmf)
 oom:
if (old_page)
put_page(old_page);
-   return VM_FAULT_OOM;
+   return ret;
 }
 
 /**
@@ -2404,8 +2415,8 @@ static int wp_page_copy(struct vm_fault *vmf)
 int finish_mkwrite_fault(struct vm_fault *vmf)
 {
WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED));
-   vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address,
-  >ptl);
+   if (!pte_map_lock(vmf))
+   return VM_FAULT_RETRY;
/*
 * We might have raced with another page fault while we released the
 * pte_offset_map_lock.
@@ -2523,8 +2534,11 @@ static int do_wp_page(struct vm_fault *vmf)
get_page(vmf->page);
pte_unmap_unlock(vmf->pte, vmf->ptl);
lock_page(vmf->page);
-   vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
-   vmf->address, >ptl);
+   if (!pte_map_lock(vmf)) {
+   unlock_page(vmf->page);
+   put_page(vmf->page);
+   return VM_FAULT_RETRY;
+   }
if (!pte_same(*vmf->pte, vmf->orig_pte)) {
unlock_page(vmf->page);
pte_unmap_unlock(vmf->pte, vmf->ptl);
@@ -2682,8 +2696,10 @@ int do_swap_page(struct vm_fault *vmf)
 * Back out if somebody else faulted in this pte
 * while we released the pte lock.
 */
-   vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
-   vmf->address, >ptl);
+   if (!pte_map_lock(vmf)) {
+   delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
+   return VM_FAULT_RETRY;
+   }
if (likely(pte_same(*vmf->pte, vmf->orig_pte)))
ret = VM_FAULT_OOM;
delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
@@ -2739,8 +2755,11 @@ int