Re: [PATCH v5 06/11] mm: thp: check pmd migration entry in common path

2017-04-21 Thread Zi Yan


Anshuman Khandual wrote:
> On 04/21/2017 02:17 AM, Zi Yan wrote:
>> From: Zi Yan 
>>
>> If one of callers of page migration starts to handle thp,
>> memory management code start to see pmd migration entry, so we need
>> to prepare for it before enabling. This patch changes various code
>> point which checks the status of given pmds in order to prevent race
>> between thp migration and the pmd-related works.
>>
>> ChangeLog v1 -> v2:
>> - introduce pmd_related() (I know the naming is not good, but can't
>>   think up no better name. Any suggesntion is welcomed.)
>>
>> Signed-off-by: Naoya Horiguchi 
>>
>> ChangeLog v2 -> v3:
>> - add is_swap_pmd()
>> - a pmd entry should be pmd pointing to pte pages, is_swap_pmd(),
>>   pmd_trans_huge(), pmd_devmap(), or pmd_none()
>> - pmd_none_or_trans_huge_or_clear_bad() and pmd_trans_unstable() return
>>   true on pmd_migration_entry, so that migration entries are not
>>   treated as pmd page table entries.
>>
>> ChangeLog v4 -> v5:
>> - add explanation in pmd_none_or_trans_huge_or_clear_bad() to state
>>   the equivalence of !pmd_present() and is_pmd_migration_entry()
>> - fix migration entry wait deadlock code (from v1) in follow_page_mask()
>> - remove unnecessary code (from v1) in follow_trans_huge_pmd()
>> - use is_swap_pmd() instead of !pmd_present() for pmd migration entry,
>>   so it will not be confused with pmd_none()
>> - change author information
>>
>> Signed-off-by: Zi Yan 
>> ---
>>  arch/x86/mm/gup.c |  7 +++--
>>  fs/proc/task_mmu.c| 30 +
>>  include/asm-generic/pgtable.h | 17 +++-
>>  include/linux/huge_mm.h   | 14 --
>>  mm/gup.c  | 22 ++--
>>  mm/huge_memory.c  | 61 
>> ++-
>>  mm/memcontrol.c   |  5 
>>  mm/memory.c   | 12 +++--
>>  mm/mprotect.c |  4 +--
>>  mm/mremap.c   |  2 +-
>>  10 files changed, 145 insertions(+), 29 deletions(-)
>>
>> diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
>> index 456dfdfd2249..096bbcc801e6 100644
>> --- a/arch/x86/mm/gup.c
>> +++ b/arch/x86/mm/gup.c
>> @@ -9,6 +9,7 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>  #include 
>>  
>>  #include 
>> @@ -243,9 +244,11 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, 
>> unsigned long end,
>>  pmd_t pmd = *pmdp;
>>  
>>  next = pmd_addr_end(addr, end);
>> -if (pmd_none(pmd))
>> +if (!pmd_present(pmd)) {
>> +VM_BUG_ON(is_swap_pmd(pmd) && 
>> IS_ENABLED(CONFIG_MIGRATION) &&
>> +  !is_pmd_migration_entry(pmd));
>>  return 0;
>> -if (unlikely(pmd_large(pmd) || !pmd_present(pmd))) {
>> +} else if (unlikely(pmd_large(pmd))) {
>>  /*
>>   * NUMA hinting faults need to be handled in the GUP
>>   * slowpath for accounting purposes and so that they
>> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
>> index 5c8359704601..57489dcd71c4 100644
>> --- a/fs/proc/task_mmu.c
>> +++ b/fs/proc/task_mmu.c
>> @@ -600,7 +600,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long 
>> addr, unsigned long end,
>>  
>>  ptl = pmd_trans_huge_lock(pmd, vma);
>>  if (ptl) {
>> -smaps_pmd_entry(pmd, addr, walk);
>> +if (pmd_present(*pmd))
>> +smaps_pmd_entry(pmd, addr, walk);
>>  spin_unlock(ptl);
>>  return 0;
>>  }
>> @@ -942,6 +943,9 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned 
>> long addr,
>>  goto out;
>>  }
>>  
>> +if (!pmd_present(*pmd))
>> +goto out;
>> +
> 
> These pmd_present() checks should have been done irrespective of the
> presence of new PMD migration entries. Please separate them out in a
> different clean up patch.

Not really. The introduction of PMD migration entries makes
pmd_trans_huge_lock() return a lock when PMD is a swap entry (See
changes on pmd_trans_huge_lock() in this patch). This was not the case
before, where pmd_trans_huge_lock() returned NULL if PMD entry was
pmd_none() and both two chunks were not reachable.

Maybe I should use is_swap_pmd() to clarify the confusion.



>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>> index 7406d88445bf..3479e9caf2fa 100644
>> --- a/mm/huge_memory.c
>> +++ b/mm/huge_memory.c
>> @@ -912,6 +912,22 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct 
>> mm_struct *src_mm,
>>  
>>  ret = -EAGAIN;
>>  pmd = *src_pmd;
>> +
>> +if (unlikely(is_swap_pmd(pmd))) {
>> +swp_entry_t entry = pmd_to_swp_entry(pmd);
>> +
>> +VM_BUG_ON(IS_ENABLED(CONFIG_MIGRATION) &&
>> +  

Re: [PATCH v5 06/11] mm: thp: check pmd migration entry in common path

2017-04-21 Thread Zi Yan


Anshuman Khandual wrote:
> On 04/21/2017 02:17 AM, Zi Yan wrote:
>> From: Zi Yan 
>>
>> If one of callers of page migration starts to handle thp,
>> memory management code start to see pmd migration entry, so we need
>> to prepare for it before enabling. This patch changes various code
>> point which checks the status of given pmds in order to prevent race
>> between thp migration and the pmd-related works.
>>
>> ChangeLog v1 -> v2:
>> - introduce pmd_related() (I know the naming is not good, but can't
>>   think up no better name. Any suggesntion is welcomed.)
>>
>> Signed-off-by: Naoya Horiguchi 
>>
>> ChangeLog v2 -> v3:
>> - add is_swap_pmd()
>> - a pmd entry should be pmd pointing to pte pages, is_swap_pmd(),
>>   pmd_trans_huge(), pmd_devmap(), or pmd_none()
>> - pmd_none_or_trans_huge_or_clear_bad() and pmd_trans_unstable() return
>>   true on pmd_migration_entry, so that migration entries are not
>>   treated as pmd page table entries.
>>
>> ChangeLog v4 -> v5:
>> - add explanation in pmd_none_or_trans_huge_or_clear_bad() to state
>>   the equivalence of !pmd_present() and is_pmd_migration_entry()
>> - fix migration entry wait deadlock code (from v1) in follow_page_mask()
>> - remove unnecessary code (from v1) in follow_trans_huge_pmd()
>> - use is_swap_pmd() instead of !pmd_present() for pmd migration entry,
>>   so it will not be confused with pmd_none()
>> - change author information
>>
>> Signed-off-by: Zi Yan 
>> ---
>>  arch/x86/mm/gup.c |  7 +++--
>>  fs/proc/task_mmu.c| 30 +
>>  include/asm-generic/pgtable.h | 17 +++-
>>  include/linux/huge_mm.h   | 14 --
>>  mm/gup.c  | 22 ++--
>>  mm/huge_memory.c  | 61 
>> ++-
>>  mm/memcontrol.c   |  5 
>>  mm/memory.c   | 12 +++--
>>  mm/mprotect.c |  4 +--
>>  mm/mremap.c   |  2 +-
>>  10 files changed, 145 insertions(+), 29 deletions(-)
>>
>> diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
>> index 456dfdfd2249..096bbcc801e6 100644
>> --- a/arch/x86/mm/gup.c
>> +++ b/arch/x86/mm/gup.c
>> @@ -9,6 +9,7 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>  #include 
>>  
>>  #include 
>> @@ -243,9 +244,11 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, 
>> unsigned long end,
>>  pmd_t pmd = *pmdp;
>>  
>>  next = pmd_addr_end(addr, end);
>> -if (pmd_none(pmd))
>> +if (!pmd_present(pmd)) {
>> +VM_BUG_ON(is_swap_pmd(pmd) && 
>> IS_ENABLED(CONFIG_MIGRATION) &&
>> +  !is_pmd_migration_entry(pmd));
>>  return 0;
>> -if (unlikely(pmd_large(pmd) || !pmd_present(pmd))) {
>> +} else if (unlikely(pmd_large(pmd))) {
>>  /*
>>   * NUMA hinting faults need to be handled in the GUP
>>   * slowpath for accounting purposes and so that they
>> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
>> index 5c8359704601..57489dcd71c4 100644
>> --- a/fs/proc/task_mmu.c
>> +++ b/fs/proc/task_mmu.c
>> @@ -600,7 +600,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long 
>> addr, unsigned long end,
>>  
>>  ptl = pmd_trans_huge_lock(pmd, vma);
>>  if (ptl) {
>> -smaps_pmd_entry(pmd, addr, walk);
>> +if (pmd_present(*pmd))
>> +smaps_pmd_entry(pmd, addr, walk);
>>  spin_unlock(ptl);
>>  return 0;
>>  }
>> @@ -942,6 +943,9 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned 
>> long addr,
>>  goto out;
>>  }
>>  
>> +if (!pmd_present(*pmd))
>> +goto out;
>> +
> 
> These pmd_present() checks should have been done irrespective of the
> presence of new PMD migration entries. Please separate them out in a
> different clean up patch.

Not really. The introduction of PMD migration entries makes
pmd_trans_huge_lock() return a lock when PMD is a swap entry (See
changes on pmd_trans_huge_lock() in this patch). This was not the case
before, where pmd_trans_huge_lock() returned NULL if PMD entry was
pmd_none() and both two chunks were not reachable.

Maybe I should use is_swap_pmd() to clarify the confusion.



>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>> index 7406d88445bf..3479e9caf2fa 100644
>> --- a/mm/huge_memory.c
>> +++ b/mm/huge_memory.c
>> @@ -912,6 +912,22 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct 
>> mm_struct *src_mm,
>>  
>>  ret = -EAGAIN;
>>  pmd = *src_pmd;
>> +
>> +if (unlikely(is_swap_pmd(pmd))) {
>> +swp_entry_t entry = pmd_to_swp_entry(pmd);
>> +
>> +VM_BUG_ON(IS_ENABLED(CONFIG_MIGRATION) &&
>> +  !is_pmd_migration_entry(pmd));
>> +if (is_write_migration_entry(entry)) {
>> +   

Re: [PATCH v5 06/11] mm: thp: check pmd migration entry in common path

2017-04-21 Thread Anshuman Khandual
On 04/21/2017 02:17 AM, Zi Yan wrote:
> From: Zi Yan 
> 
> If one of callers of page migration starts to handle thp,
> memory management code start to see pmd migration entry, so we need
> to prepare for it before enabling. This patch changes various code
> point which checks the status of given pmds in order to prevent race
> between thp migration and the pmd-related works.
> 
> ChangeLog v1 -> v2:
> - introduce pmd_related() (I know the naming is not good, but can't
>   think up no better name. Any suggesntion is welcomed.)
> 
> Signed-off-by: Naoya Horiguchi 
> 
> ChangeLog v2 -> v3:
> - add is_swap_pmd()
> - a pmd entry should be pmd pointing to pte pages, is_swap_pmd(),
>   pmd_trans_huge(), pmd_devmap(), or pmd_none()
> - pmd_none_or_trans_huge_or_clear_bad() and pmd_trans_unstable() return
>   true on pmd_migration_entry, so that migration entries are not
>   treated as pmd page table entries.
> 
> ChangeLog v4 -> v5:
> - add explanation in pmd_none_or_trans_huge_or_clear_bad() to state
>   the equivalence of !pmd_present() and is_pmd_migration_entry()
> - fix migration entry wait deadlock code (from v1) in follow_page_mask()
> - remove unnecessary code (from v1) in follow_trans_huge_pmd()
> - use is_swap_pmd() instead of !pmd_present() for pmd migration entry,
>   so it will not be confused with pmd_none()
> - change author information
> 
> Signed-off-by: Zi Yan 
> ---
>  arch/x86/mm/gup.c |  7 +++--
>  fs/proc/task_mmu.c| 30 +
>  include/asm-generic/pgtable.h | 17 +++-
>  include/linux/huge_mm.h   | 14 --
>  mm/gup.c  | 22 ++--
>  mm/huge_memory.c  | 61 
> ++-
>  mm/memcontrol.c   |  5 
>  mm/memory.c   | 12 +++--
>  mm/mprotect.c |  4 +--
>  mm/mremap.c   |  2 +-
>  10 files changed, 145 insertions(+), 29 deletions(-)
> 
> diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
> index 456dfdfd2249..096bbcc801e6 100644
> --- a/arch/x86/mm/gup.c
> +++ b/arch/x86/mm/gup.c
> @@ -9,6 +9,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  
>  #include 
> @@ -243,9 +244,11 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, 
> unsigned long end,
>   pmd_t pmd = *pmdp;
>  
>   next = pmd_addr_end(addr, end);
> - if (pmd_none(pmd))
> + if (!pmd_present(pmd)) {
> + VM_BUG_ON(is_swap_pmd(pmd) && 
> IS_ENABLED(CONFIG_MIGRATION) &&
> +   !is_pmd_migration_entry(pmd));
>   return 0;
> - if (unlikely(pmd_large(pmd) || !pmd_present(pmd))) {
> + } else if (unlikely(pmd_large(pmd))) {
>   /*
>* NUMA hinting faults need to be handled in the GUP
>* slowpath for accounting purposes and so that they
> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
> index 5c8359704601..57489dcd71c4 100644
> --- a/fs/proc/task_mmu.c
> +++ b/fs/proc/task_mmu.c
> @@ -600,7 +600,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long 
> addr, unsigned long end,
>  
>   ptl = pmd_trans_huge_lock(pmd, vma);
>   if (ptl) {
> - smaps_pmd_entry(pmd, addr, walk);
> + if (pmd_present(*pmd))
> + smaps_pmd_entry(pmd, addr, walk);
>   spin_unlock(ptl);
>   return 0;
>   }
> @@ -942,6 +943,9 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long 
> addr,
>   goto out;
>   }
>  
> + if (!pmd_present(*pmd))
> + goto out;
> +

These pmd_present() checks should have been done irrespective of the
presence of new PMD migration entries. Please separate them out in a
different clean up patch.
 
>   page = pmd_page(*pmd);
>  
>   /* Clear accessed and referenced bits. */
> @@ -1221,28 +1225,32 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned 
> long addr, unsigned long end,
>   if (ptl) {
>   u64 flags = 0, frame = 0;
>   pmd_t pmd = *pmdp;
> + struct page *page = NULL;
>  
>   if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(pmd))
>   flags |= PM_SOFT_DIRTY;
>  
> - /*
> -  * Currently pmd for thp is always present because thp
> -  * can not be swapped-out, migrated, or HWPOISONed
> -  * (split in such cases instead.)
> -  * This if-check is just to prepare for future implementation.
> -  */
>   if (pmd_present(pmd)) {
> - struct page *page = pmd_page(pmd);
> -
> - if (page_mapcount(page) == 1)
> - flags |= PM_MMAP_EXCLUSIVE;
> +  

Re: [PATCH v5 06/11] mm: thp: check pmd migration entry in common path

2017-04-21 Thread Anshuman Khandual
On 04/21/2017 02:17 AM, Zi Yan wrote:
> From: Zi Yan 
> 
> If one of callers of page migration starts to handle thp,
> memory management code start to see pmd migration entry, so we need
> to prepare for it before enabling. This patch changes various code
> point which checks the status of given pmds in order to prevent race
> between thp migration and the pmd-related works.
> 
> ChangeLog v1 -> v2:
> - introduce pmd_related() (I know the naming is not good, but can't
>   think up no better name. Any suggesntion is welcomed.)
> 
> Signed-off-by: Naoya Horiguchi 
> 
> ChangeLog v2 -> v3:
> - add is_swap_pmd()
> - a pmd entry should be pmd pointing to pte pages, is_swap_pmd(),
>   pmd_trans_huge(), pmd_devmap(), or pmd_none()
> - pmd_none_or_trans_huge_or_clear_bad() and pmd_trans_unstable() return
>   true on pmd_migration_entry, so that migration entries are not
>   treated as pmd page table entries.
> 
> ChangeLog v4 -> v5:
> - add explanation in pmd_none_or_trans_huge_or_clear_bad() to state
>   the equivalence of !pmd_present() and is_pmd_migration_entry()
> - fix migration entry wait deadlock code (from v1) in follow_page_mask()
> - remove unnecessary code (from v1) in follow_trans_huge_pmd()
> - use is_swap_pmd() instead of !pmd_present() for pmd migration entry,
>   so it will not be confused with pmd_none()
> - change author information
> 
> Signed-off-by: Zi Yan 
> ---
>  arch/x86/mm/gup.c |  7 +++--
>  fs/proc/task_mmu.c| 30 +
>  include/asm-generic/pgtable.h | 17 +++-
>  include/linux/huge_mm.h   | 14 --
>  mm/gup.c  | 22 ++--
>  mm/huge_memory.c  | 61 
> ++-
>  mm/memcontrol.c   |  5 
>  mm/memory.c   | 12 +++--
>  mm/mprotect.c |  4 +--
>  mm/mremap.c   |  2 +-
>  10 files changed, 145 insertions(+), 29 deletions(-)
> 
> diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
> index 456dfdfd2249..096bbcc801e6 100644
> --- a/arch/x86/mm/gup.c
> +++ b/arch/x86/mm/gup.c
> @@ -9,6 +9,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  
>  #include 
> @@ -243,9 +244,11 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, 
> unsigned long end,
>   pmd_t pmd = *pmdp;
>  
>   next = pmd_addr_end(addr, end);
> - if (pmd_none(pmd))
> + if (!pmd_present(pmd)) {
> + VM_BUG_ON(is_swap_pmd(pmd) && 
> IS_ENABLED(CONFIG_MIGRATION) &&
> +   !is_pmd_migration_entry(pmd));
>   return 0;
> - if (unlikely(pmd_large(pmd) || !pmd_present(pmd))) {
> + } else if (unlikely(pmd_large(pmd))) {
>   /*
>* NUMA hinting faults need to be handled in the GUP
>* slowpath for accounting purposes and so that they
> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
> index 5c8359704601..57489dcd71c4 100644
> --- a/fs/proc/task_mmu.c
> +++ b/fs/proc/task_mmu.c
> @@ -600,7 +600,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long 
> addr, unsigned long end,
>  
>   ptl = pmd_trans_huge_lock(pmd, vma);
>   if (ptl) {
> - smaps_pmd_entry(pmd, addr, walk);
> + if (pmd_present(*pmd))
> + smaps_pmd_entry(pmd, addr, walk);
>   spin_unlock(ptl);
>   return 0;
>   }
> @@ -942,6 +943,9 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long 
> addr,
>   goto out;
>   }
>  
> + if (!pmd_present(*pmd))
> + goto out;
> +

These pmd_present() checks should have been done irrespective of the
presence of new PMD migration entries. Please separate them out in a
different clean up patch.
 
>   page = pmd_page(*pmd);
>  
>   /* Clear accessed and referenced bits. */
> @@ -1221,28 +1225,32 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned 
> long addr, unsigned long end,
>   if (ptl) {
>   u64 flags = 0, frame = 0;
>   pmd_t pmd = *pmdp;
> + struct page *page = NULL;
>  
>   if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(pmd))
>   flags |= PM_SOFT_DIRTY;
>  
> - /*
> -  * Currently pmd for thp is always present because thp
> -  * can not be swapped-out, migrated, or HWPOISONed
> -  * (split in such cases instead.)
> -  * This if-check is just to prepare for future implementation.
> -  */
>   if (pmd_present(pmd)) {
> - struct page *page = pmd_page(pmd);
> -
> - if (page_mapcount(page) == 1)
> - flags |= PM_MMAP_EXCLUSIVE;
> + page = pmd_page(pmd);
>  
>