Re: [PATCH v7 06/10] mm: thp: check pmd migration entry in common path

2017-06-21 Thread Zi Yan
On 21 Jun 2017, at 7:49, Kirill A. Shutemov wrote:

> On Tue, Jun 20, 2017 at 07:07:11PM -0400, Zi Yan wrote:
>> @@ -1220,6 +1238,9 @@ int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t 
>> orig_pmd)
>>  if (unlikely(!pmd_same(*vmf->pmd, orig_pmd)))
>>  goto out_unlock;
>>
>> +if (unlikely(!pmd_present(orig_pmd)))
>> +goto out_unlock;
>> +
>
> Hm. Shouldn't we wait for the page here?

Thanks for pointing this out.

This chunk is unnecessary, since do_huge_pmd_wp_page() is called by 
wp_huge_pmd(),
which is called only when orig_pmd is present. Thus, this code is useless.
And pmd_same() will also preclude vmf->pmd from not present.


>>  page = pmd_page(orig_pmd);
>>  VM_BUG_ON_PAGE(!PageCompound(page) || !PageHead(page), page);
>>  /*
>> @@ -1556,6 +1577,12 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, 
>> struct vm_area_struct *vma,
>>  if (is_huge_zero_pmd(orig_pmd))
>>  goto out;
>>
>> +if (unlikely(!pmd_present(orig_pmd))) {
>> +VM_BUG_ON(IS_ENABLED(CONFIG_MIGRATION) &&
>> +  !is_pmd_migration_entry(orig_pmd));
>> +goto out;
>> +}
>> +
>>  page = pmd_page(orig_pmd);
>>  /*
>>   * If other processes are mapping this page, we couldn't discard
>> @@ -1770,6 +1797,23 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t 
>> *pmd,
>>  preserve_write = prot_numa && pmd_write(*pmd);
>>  ret = 1;
>>
>> +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
>> +if (is_swap_pmd(*pmd)) {
>> +swp_entry_t entry = pmd_to_swp_entry(*pmd);
>> +
>> +VM_BUG_ON(IS_ENABLED(CONFIG_MIGRATION) &&
>> +  !is_pmd_migration_entry(*pmd));
>> +if (is_write_migration_entry(entry)) {
>> +pmd_t newpmd;
>> +
>> +make_migration_entry_read();
>> +newpmd = swp_entry_to_pmd(entry);
>> +set_pmd_at(mm, addr, pmd, newpmd);
>
> I was confused by this. Could you copy comment from change_pte_range()
> here?

Sure. Will do.

>> +}
>> +goto unlock;
>> +}
>> +#endif
>> +
>>  /*
>>   * Avoid trapping faults against the zero page. The read-only
>>   * data is likely to be read-cached on the local CPU and

Thanks for your review.

--
Best Regards
Yan Zi


signature.asc
Description: OpenPGP digital signature


Re: [PATCH v7 06/10] mm: thp: check pmd migration entry in common path

2017-06-21 Thread Zi Yan
On 21 Jun 2017, at 7:49, Kirill A. Shutemov wrote:

> On Tue, Jun 20, 2017 at 07:07:11PM -0400, Zi Yan wrote:
>> @@ -1220,6 +1238,9 @@ int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t 
>> orig_pmd)
>>  if (unlikely(!pmd_same(*vmf->pmd, orig_pmd)))
>>  goto out_unlock;
>>
>> +if (unlikely(!pmd_present(orig_pmd)))
>> +goto out_unlock;
>> +
>
> Hm. Shouldn't we wait for the page here?

Thanks for pointing this out.

This chunk is unnecessary, since do_huge_pmd_wp_page() is called by 
wp_huge_pmd(),
which is called only when orig_pmd is present. Thus, this code is useless.
And pmd_same() will also preclude vmf->pmd from not present.


>>  page = pmd_page(orig_pmd);
>>  VM_BUG_ON_PAGE(!PageCompound(page) || !PageHead(page), page);
>>  /*
>> @@ -1556,6 +1577,12 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, 
>> struct vm_area_struct *vma,
>>  if (is_huge_zero_pmd(orig_pmd))
>>  goto out;
>>
>> +if (unlikely(!pmd_present(orig_pmd))) {
>> +VM_BUG_ON(IS_ENABLED(CONFIG_MIGRATION) &&
>> +  !is_pmd_migration_entry(orig_pmd));
>> +goto out;
>> +}
>> +
>>  page = pmd_page(orig_pmd);
>>  /*
>>   * If other processes are mapping this page, we couldn't discard
>> @@ -1770,6 +1797,23 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t 
>> *pmd,
>>  preserve_write = prot_numa && pmd_write(*pmd);
>>  ret = 1;
>>
>> +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
>> +if (is_swap_pmd(*pmd)) {
>> +swp_entry_t entry = pmd_to_swp_entry(*pmd);
>> +
>> +VM_BUG_ON(IS_ENABLED(CONFIG_MIGRATION) &&
>> +  !is_pmd_migration_entry(*pmd));
>> +if (is_write_migration_entry(entry)) {
>> +pmd_t newpmd;
>> +
>> +make_migration_entry_read();
>> +newpmd = swp_entry_to_pmd(entry);
>> +set_pmd_at(mm, addr, pmd, newpmd);
>
> I was confused by this. Could you copy comment from change_pte_range()
> here?

Sure. Will do.

>> +}
>> +goto unlock;
>> +}
>> +#endif
>> +
>>  /*
>>   * Avoid trapping faults against the zero page. The read-only
>>   * data is likely to be read-cached on the local CPU and

Thanks for your review.

--
Best Regards
Yan Zi


signature.asc
Description: OpenPGP digital signature


Re: [PATCH v7 06/10] mm: thp: check pmd migration entry in common path

2017-06-21 Thread Kirill A. Shutemov
On Tue, Jun 20, 2017 at 07:07:11PM -0400, Zi Yan wrote:
> @@ -1220,6 +1238,9 @@ int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t 
> orig_pmd)
>   if (unlikely(!pmd_same(*vmf->pmd, orig_pmd)))
>   goto out_unlock;
>  
> + if (unlikely(!pmd_present(orig_pmd)))
> + goto out_unlock;
> +

Hm. Shouldn't we wait for the page here?

>   page = pmd_page(orig_pmd);
>   VM_BUG_ON_PAGE(!PageCompound(page) || !PageHead(page), page);
>   /*
> @@ -1556,6 +1577,12 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, 
> struct vm_area_struct *vma,
>   if (is_huge_zero_pmd(orig_pmd))
>   goto out;
>  
> + if (unlikely(!pmd_present(orig_pmd))) {
> + VM_BUG_ON(IS_ENABLED(CONFIG_MIGRATION) &&
> +   !is_pmd_migration_entry(orig_pmd));
> + goto out;
> + }
> +
>   page = pmd_page(orig_pmd);
>   /*
>* If other processes are mapping this page, we couldn't discard
> @@ -1770,6 +1797,23 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t 
> *pmd,
>   preserve_write = prot_numa && pmd_write(*pmd);
>   ret = 1;
>  
> +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
> + if (is_swap_pmd(*pmd)) {
> + swp_entry_t entry = pmd_to_swp_entry(*pmd);
> +
> + VM_BUG_ON(IS_ENABLED(CONFIG_MIGRATION) &&
> +   !is_pmd_migration_entry(*pmd));
> + if (is_write_migration_entry(entry)) {
> + pmd_t newpmd;
> +
> + make_migration_entry_read();
> + newpmd = swp_entry_to_pmd(entry);
> + set_pmd_at(mm, addr, pmd, newpmd);

I was confused by this. Could you copy comment from change_pte_range()
here?

> + }
> + goto unlock;
> + }
> +#endif
> +
>   /*
>* Avoid trapping faults against the zero page. The read-only
>* data is likely to be read-cached on the local CPU and

-- 
 Kirill A. Shutemov


Re: [PATCH v7 06/10] mm: thp: check pmd migration entry in common path

2017-06-21 Thread Kirill A. Shutemov
On Tue, Jun 20, 2017 at 07:07:11PM -0400, Zi Yan wrote:
> @@ -1220,6 +1238,9 @@ int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t 
> orig_pmd)
>   if (unlikely(!pmd_same(*vmf->pmd, orig_pmd)))
>   goto out_unlock;
>  
> + if (unlikely(!pmd_present(orig_pmd)))
> + goto out_unlock;
> +

Hm. Shouldn't we wait for the page here?

>   page = pmd_page(orig_pmd);
>   VM_BUG_ON_PAGE(!PageCompound(page) || !PageHead(page), page);
>   /*
> @@ -1556,6 +1577,12 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, 
> struct vm_area_struct *vma,
>   if (is_huge_zero_pmd(orig_pmd))
>   goto out;
>  
> + if (unlikely(!pmd_present(orig_pmd))) {
> + VM_BUG_ON(IS_ENABLED(CONFIG_MIGRATION) &&
> +   !is_pmd_migration_entry(orig_pmd));
> + goto out;
> + }
> +
>   page = pmd_page(orig_pmd);
>   /*
>* If other processes are mapping this page, we couldn't discard
> @@ -1770,6 +1797,23 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t 
> *pmd,
>   preserve_write = prot_numa && pmd_write(*pmd);
>   ret = 1;
>  
> +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
> + if (is_swap_pmd(*pmd)) {
> + swp_entry_t entry = pmd_to_swp_entry(*pmd);
> +
> + VM_BUG_ON(IS_ENABLED(CONFIG_MIGRATION) &&
> +   !is_pmd_migration_entry(*pmd));
> + if (is_write_migration_entry(entry)) {
> + pmd_t newpmd;
> +
> + make_migration_entry_read();
> + newpmd = swp_entry_to_pmd(entry);
> + set_pmd_at(mm, addr, pmd, newpmd);

I was confused by this. Could you copy comment from change_pte_range()
here?

> + }
> + goto unlock;
> + }
> +#endif
> +
>   /*
>* Avoid trapping faults against the zero page. The read-only
>* data is likely to be read-cached on the local CPU and

-- 
 Kirill A. Shutemov


[PATCH v7 06/10] mm: thp: check pmd migration entry in common path

2017-06-20 Thread Zi Yan
From: Zi Yan 

If one of callers of page migration starts to handle thp,
memory management code start to see pmd migration entry, so we need
to prepare for it before enabling. This patch changes various code
point which checks the status of given pmds in order to prevent race
between thp migration and the pmd-related works.

ChangeLog v1 -> v2:
- introduce pmd_related() (I know the naming is not good, but can't
  think up no better name. Any suggesntion is welcomed.)

Signed-off-by: Naoya Horiguchi 

ChangeLog v2 -> v3:
- add is_swap_pmd()
- a pmd entry should be pmd pointing to pte pages, is_swap_pmd(),
  pmd_trans_huge(), pmd_devmap(), or pmd_none()
- pmd_none_or_trans_huge_or_clear_bad() and pmd_trans_unstable() return
  true on pmd_migration_entry, so that migration entries are not
  treated as pmd page table entries.

ChangeLog v4 -> v5:
- add explanation in pmd_none_or_trans_huge_or_clear_bad() to state
  the equivalence of !pmd_present() and is_pmd_migration_entry()
- fix migration entry wait deadlock code (from v1) in follow_page_mask()
- remove unnecessary code (from v1) in follow_trans_huge_pmd()
- use is_swap_pmd() instead of !pmd_present() for pmd migration entry,
  so it will not be confused with pmd_none()
- change author information

ChangeLog v5 -> v7
- use macro to disable the code when thp migration is not enabled

Signed-off-by: Zi Yan 
Cc: Kirill A. Shutemov 
---
 arch/x86/mm/gup.c |  7 +++--
 fs/proc/task_mmu.c| 33 ++---
 include/asm-generic/pgtable.h | 17 ++-
 include/linux/huge_mm.h   | 14 +++--
 mm/gup.c  | 22 --
 mm/huge_memory.c  | 67 +++
 mm/memcontrol.c   |  5 
 mm/memory.c   | 12 ++--
 mm/mprotect.c |  4 +--
 mm/mremap.c   |  2 +-
 10 files changed, 154 insertions(+), 29 deletions(-)

diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 456dfdfd2249..096bbcc801e6 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -9,6 +9,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -243,9 +244,11 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, 
unsigned long end,
pmd_t pmd = *pmdp;
 
next = pmd_addr_end(addr, end);
-   if (pmd_none(pmd))
+   if (!pmd_present(pmd)) {
+   VM_BUG_ON(is_swap_pmd(pmd) && 
IS_ENABLED(CONFIG_MIGRATION) &&
+ !is_pmd_migration_entry(pmd));
return 0;
-   if (unlikely(pmd_large(pmd) || !pmd_present(pmd))) {
+   } else if (unlikely(pmd_large(pmd))) {
/*
 * NUMA hinting faults need to be handled in the GUP
 * slowpath for accounting purposes and so that they
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index f0c8b33d99b1..dd98c365a129 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -601,7 +601,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, 
unsigned long end,
 
ptl = pmd_trans_huge_lock(pmd, vma);
if (ptl) {
-   smaps_pmd_entry(pmd, addr, walk);
+   if (pmd_present(*pmd))
+   smaps_pmd_entry(pmd, addr, walk);
spin_unlock(ptl);
return 0;
}
@@ -943,6 +944,9 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long 
addr,
goto out;
}
 
+   if (!pmd_present(*pmd))
+   goto out;
+
page = pmd_page(*pmd);
 
/* Clear accessed and referenced bits. */
@@ -1222,27 +1226,34 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long 
addr, unsigned long end,
if (ptl) {
u64 flags = 0, frame = 0;
pmd_t pmd = *pmdp;
+   struct page *page = NULL;
 
if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(pmd))
flags |= PM_SOFT_DIRTY;
 
-   /*
-* Currently pmd for thp is always present because thp
-* can not be swapped-out, migrated, or HWPOISONed
-* (split in such cases instead.)
-* This if-check is just to prepare for future implementation.
-*/
if (pmd_present(pmd)) {
-   struct page *page = pmd_page(pmd);
-
-   if (page_mapcount(page) == 1)
-   flags |= PM_MMAP_EXCLUSIVE;
+   page = pmd_page(pmd);
 
flags |= PM_PRESENT;
if (pm->show_pfn)
frame = pmd_pfn(pmd) +

[PATCH v7 06/10] mm: thp: check pmd migration entry in common path

2017-06-20 Thread Zi Yan
From: Zi Yan 

If one of callers of page migration starts to handle thp,
memory management code start to see pmd migration entry, so we need
to prepare for it before enabling. This patch changes various code
point which checks the status of given pmds in order to prevent race
between thp migration and the pmd-related works.

ChangeLog v1 -> v2:
- introduce pmd_related() (I know the naming is not good, but can't
  think up no better name. Any suggesntion is welcomed.)

Signed-off-by: Naoya Horiguchi 

ChangeLog v2 -> v3:
- add is_swap_pmd()
- a pmd entry should be pmd pointing to pte pages, is_swap_pmd(),
  pmd_trans_huge(), pmd_devmap(), or pmd_none()
- pmd_none_or_trans_huge_or_clear_bad() and pmd_trans_unstable() return
  true on pmd_migration_entry, so that migration entries are not
  treated as pmd page table entries.

ChangeLog v4 -> v5:
- add explanation in pmd_none_or_trans_huge_or_clear_bad() to state
  the equivalence of !pmd_present() and is_pmd_migration_entry()
- fix migration entry wait deadlock code (from v1) in follow_page_mask()
- remove unnecessary code (from v1) in follow_trans_huge_pmd()
- use is_swap_pmd() instead of !pmd_present() for pmd migration entry,
  so it will not be confused with pmd_none()
- change author information

ChangeLog v5 -> v7
- use macro to disable the code when thp migration is not enabled

Signed-off-by: Zi Yan 
Cc: Kirill A. Shutemov 
---
 arch/x86/mm/gup.c |  7 +++--
 fs/proc/task_mmu.c| 33 ++---
 include/asm-generic/pgtable.h | 17 ++-
 include/linux/huge_mm.h   | 14 +++--
 mm/gup.c  | 22 --
 mm/huge_memory.c  | 67 +++
 mm/memcontrol.c   |  5 
 mm/memory.c   | 12 ++--
 mm/mprotect.c |  4 +--
 mm/mremap.c   |  2 +-
 10 files changed, 154 insertions(+), 29 deletions(-)

diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 456dfdfd2249..096bbcc801e6 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -9,6 +9,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -243,9 +244,11 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, 
unsigned long end,
pmd_t pmd = *pmdp;
 
next = pmd_addr_end(addr, end);
-   if (pmd_none(pmd))
+   if (!pmd_present(pmd)) {
+   VM_BUG_ON(is_swap_pmd(pmd) && 
IS_ENABLED(CONFIG_MIGRATION) &&
+ !is_pmd_migration_entry(pmd));
return 0;
-   if (unlikely(pmd_large(pmd) || !pmd_present(pmd))) {
+   } else if (unlikely(pmd_large(pmd))) {
/*
 * NUMA hinting faults need to be handled in the GUP
 * slowpath for accounting purposes and so that they
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index f0c8b33d99b1..dd98c365a129 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -601,7 +601,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, 
unsigned long end,
 
ptl = pmd_trans_huge_lock(pmd, vma);
if (ptl) {
-   smaps_pmd_entry(pmd, addr, walk);
+   if (pmd_present(*pmd))
+   smaps_pmd_entry(pmd, addr, walk);
spin_unlock(ptl);
return 0;
}
@@ -943,6 +944,9 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long 
addr,
goto out;
}
 
+   if (!pmd_present(*pmd))
+   goto out;
+
page = pmd_page(*pmd);
 
/* Clear accessed and referenced bits. */
@@ -1222,27 +1226,34 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long 
addr, unsigned long end,
if (ptl) {
u64 flags = 0, frame = 0;
pmd_t pmd = *pmdp;
+   struct page *page = NULL;
 
if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(pmd))
flags |= PM_SOFT_DIRTY;
 
-   /*
-* Currently pmd for thp is always present because thp
-* can not be swapped-out, migrated, or HWPOISONed
-* (split in such cases instead.)
-* This if-check is just to prepare for future implementation.
-*/
if (pmd_present(pmd)) {
-   struct page *page = pmd_page(pmd);
-
-   if (page_mapcount(page) == 1)
-   flags |= PM_MMAP_EXCLUSIVE;
+   page = pmd_page(pmd);
 
flags |= PM_PRESENT;
if (pm->show_pfn)
frame = pmd_pfn(pmd) +
((addr & ~PMD_MASK) >> PAGE_SHIFT);
}
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+