Re: [PATCH v5 06/11] mm: thp: check pmd migration entry in common path
Anshuman Khandual wrote: > On 04/21/2017 02:17 AM, Zi Yan wrote: >> From: Zi Yan>> >> If one of callers of page migration starts to handle thp, >> memory management code start to see pmd migration entry, so we need >> to prepare for it before enabling. This patch changes various code >> point which checks the status of given pmds in order to prevent race >> between thp migration and the pmd-related works. >> >> ChangeLog v1 -> v2: >> - introduce pmd_related() (I know the naming is not good, but can't >> think up no better name. Any suggesntion is welcomed.) >> >> Signed-off-by: Naoya Horiguchi >> >> ChangeLog v2 -> v3: >> - add is_swap_pmd() >> - a pmd entry should be pmd pointing to pte pages, is_swap_pmd(), >> pmd_trans_huge(), pmd_devmap(), or pmd_none() >> - pmd_none_or_trans_huge_or_clear_bad() and pmd_trans_unstable() return >> true on pmd_migration_entry, so that migration entries are not >> treated as pmd page table entries. >> >> ChangeLog v4 -> v5: >> - add explanation in pmd_none_or_trans_huge_or_clear_bad() to state >> the equivalence of !pmd_present() and is_pmd_migration_entry() >> - fix migration entry wait deadlock code (from v1) in follow_page_mask() >> - remove unnecessary code (from v1) in follow_trans_huge_pmd() >> - use is_swap_pmd() instead of !pmd_present() for pmd migration entry, >> so it will not be confused with pmd_none() >> - change author information >> >> Signed-off-by: Zi Yan >> --- >> arch/x86/mm/gup.c | 7 +++-- >> fs/proc/task_mmu.c| 30 + >> include/asm-generic/pgtable.h | 17 +++- >> include/linux/huge_mm.h | 14 -- >> mm/gup.c | 22 ++-- >> mm/huge_memory.c | 61 >> ++- >> mm/memcontrol.c | 5 >> mm/memory.c | 12 +++-- >> mm/mprotect.c | 4 +-- >> mm/mremap.c | 2 +- >> 10 files changed, 145 insertions(+), 29 deletions(-) >> >> diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c >> index 456dfdfd2249..096bbcc801e6 100644 >> --- a/arch/x86/mm/gup.c >> +++ b/arch/x86/mm/gup.c >> @@ -9,6 +9,7 @@ >> #include >> #include >> #include >> +#include >> #include >> >> #include >> @@ -243,9 +244,11 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, >> unsigned long end, >> pmd_t pmd = *pmdp; >> >> next = pmd_addr_end(addr, end); >> -if (pmd_none(pmd)) >> +if (!pmd_present(pmd)) { >> +VM_BUG_ON(is_swap_pmd(pmd) && >> IS_ENABLED(CONFIG_MIGRATION) && >> + !is_pmd_migration_entry(pmd)); >> return 0; >> -if (unlikely(pmd_large(pmd) || !pmd_present(pmd))) { >> +} else if (unlikely(pmd_large(pmd))) { >> /* >> * NUMA hinting faults need to be handled in the GUP >> * slowpath for accounting purposes and so that they >> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c >> index 5c8359704601..57489dcd71c4 100644 >> --- a/fs/proc/task_mmu.c >> +++ b/fs/proc/task_mmu.c >> @@ -600,7 +600,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long >> addr, unsigned long end, >> >> ptl = pmd_trans_huge_lock(pmd, vma); >> if (ptl) { >> -smaps_pmd_entry(pmd, addr, walk); >> +if (pmd_present(*pmd)) >> +smaps_pmd_entry(pmd, addr, walk); >> spin_unlock(ptl); >> return 0; >> } >> @@ -942,6 +943,9 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned >> long addr, >> goto out; >> } >> >> +if (!pmd_present(*pmd)) >> +goto out; >> + > > These pmd_present() checks should have been done irrespective of the > presence of new PMD migration entries. Please separate them out in a > different clean up patch. Not really. The introduction of PMD migration entries makes pmd_trans_huge_lock() return a lock when PMD is a swap entry (See changes on pmd_trans_huge_lock() in this patch). This was not the case before, where pmd_trans_huge_lock() returned NULL if PMD entry was pmd_none() and both two chunks were not reachable. Maybe I should use is_swap_pmd() to clarify the confusion. >> diff --git a/mm/huge_memory.c b/mm/huge_memory.c >> index 7406d88445bf..3479e9caf2fa 100644 >> --- a/mm/huge_memory.c >> +++ b/mm/huge_memory.c >> @@ -912,6 +912,22 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct >> mm_struct *src_mm, >> >> ret = -EAGAIN; >> pmd = *src_pmd; >> + >> +if (unlikely(is_swap_pmd(pmd))) { >> +swp_entry_t entry = pmd_to_swp_entry(pmd); >> + >> +VM_BUG_ON(IS_ENABLED(CONFIG_MIGRATION) && >> +
Re: [PATCH v5 06/11] mm: thp: check pmd migration entry in common path
Anshuman Khandual wrote: > On 04/21/2017 02:17 AM, Zi Yan wrote: >> From: Zi Yan >> >> If one of callers of page migration starts to handle thp, >> memory management code start to see pmd migration entry, so we need >> to prepare for it before enabling. This patch changes various code >> point which checks the status of given pmds in order to prevent race >> between thp migration and the pmd-related works. >> >> ChangeLog v1 -> v2: >> - introduce pmd_related() (I know the naming is not good, but can't >> think up no better name. Any suggesntion is welcomed.) >> >> Signed-off-by: Naoya Horiguchi >> >> ChangeLog v2 -> v3: >> - add is_swap_pmd() >> - a pmd entry should be pmd pointing to pte pages, is_swap_pmd(), >> pmd_trans_huge(), pmd_devmap(), or pmd_none() >> - pmd_none_or_trans_huge_or_clear_bad() and pmd_trans_unstable() return >> true on pmd_migration_entry, so that migration entries are not >> treated as pmd page table entries. >> >> ChangeLog v4 -> v5: >> - add explanation in pmd_none_or_trans_huge_or_clear_bad() to state >> the equivalence of !pmd_present() and is_pmd_migration_entry() >> - fix migration entry wait deadlock code (from v1) in follow_page_mask() >> - remove unnecessary code (from v1) in follow_trans_huge_pmd() >> - use is_swap_pmd() instead of !pmd_present() for pmd migration entry, >> so it will not be confused with pmd_none() >> - change author information >> >> Signed-off-by: Zi Yan >> --- >> arch/x86/mm/gup.c | 7 +++-- >> fs/proc/task_mmu.c| 30 + >> include/asm-generic/pgtable.h | 17 +++- >> include/linux/huge_mm.h | 14 -- >> mm/gup.c | 22 ++-- >> mm/huge_memory.c | 61 >> ++- >> mm/memcontrol.c | 5 >> mm/memory.c | 12 +++-- >> mm/mprotect.c | 4 +-- >> mm/mremap.c | 2 +- >> 10 files changed, 145 insertions(+), 29 deletions(-) >> >> diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c >> index 456dfdfd2249..096bbcc801e6 100644 >> --- a/arch/x86/mm/gup.c >> +++ b/arch/x86/mm/gup.c >> @@ -9,6 +9,7 @@ >> #include >> #include >> #include >> +#include >> #include >> >> #include >> @@ -243,9 +244,11 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, >> unsigned long end, >> pmd_t pmd = *pmdp; >> >> next = pmd_addr_end(addr, end); >> -if (pmd_none(pmd)) >> +if (!pmd_present(pmd)) { >> +VM_BUG_ON(is_swap_pmd(pmd) && >> IS_ENABLED(CONFIG_MIGRATION) && >> + !is_pmd_migration_entry(pmd)); >> return 0; >> -if (unlikely(pmd_large(pmd) || !pmd_present(pmd))) { >> +} else if (unlikely(pmd_large(pmd))) { >> /* >> * NUMA hinting faults need to be handled in the GUP >> * slowpath for accounting purposes and so that they >> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c >> index 5c8359704601..57489dcd71c4 100644 >> --- a/fs/proc/task_mmu.c >> +++ b/fs/proc/task_mmu.c >> @@ -600,7 +600,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long >> addr, unsigned long end, >> >> ptl = pmd_trans_huge_lock(pmd, vma); >> if (ptl) { >> -smaps_pmd_entry(pmd, addr, walk); >> +if (pmd_present(*pmd)) >> +smaps_pmd_entry(pmd, addr, walk); >> spin_unlock(ptl); >> return 0; >> } >> @@ -942,6 +943,9 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned >> long addr, >> goto out; >> } >> >> +if (!pmd_present(*pmd)) >> +goto out; >> + > > These pmd_present() checks should have been done irrespective of the > presence of new PMD migration entries. Please separate them out in a > different clean up patch. Not really. The introduction of PMD migration entries makes pmd_trans_huge_lock() return a lock when PMD is a swap entry (See changes on pmd_trans_huge_lock() in this patch). This was not the case before, where pmd_trans_huge_lock() returned NULL if PMD entry was pmd_none() and both two chunks were not reachable. Maybe I should use is_swap_pmd() to clarify the confusion. >> diff --git a/mm/huge_memory.c b/mm/huge_memory.c >> index 7406d88445bf..3479e9caf2fa 100644 >> --- a/mm/huge_memory.c >> +++ b/mm/huge_memory.c >> @@ -912,6 +912,22 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct >> mm_struct *src_mm, >> >> ret = -EAGAIN; >> pmd = *src_pmd; >> + >> +if (unlikely(is_swap_pmd(pmd))) { >> +swp_entry_t entry = pmd_to_swp_entry(pmd); >> + >> +VM_BUG_ON(IS_ENABLED(CONFIG_MIGRATION) && >> + !is_pmd_migration_entry(pmd)); >> +if (is_write_migration_entry(entry)) { >> +
Re: [PATCH v5 06/11] mm: thp: check pmd migration entry in common path
On 04/21/2017 02:17 AM, Zi Yan wrote: > From: Zi Yan> > If one of callers of page migration starts to handle thp, > memory management code start to see pmd migration entry, so we need > to prepare for it before enabling. This patch changes various code > point which checks the status of given pmds in order to prevent race > between thp migration and the pmd-related works. > > ChangeLog v1 -> v2: > - introduce pmd_related() (I know the naming is not good, but can't > think up no better name. Any suggesntion is welcomed.) > > Signed-off-by: Naoya Horiguchi > > ChangeLog v2 -> v3: > - add is_swap_pmd() > - a pmd entry should be pmd pointing to pte pages, is_swap_pmd(), > pmd_trans_huge(), pmd_devmap(), or pmd_none() > - pmd_none_or_trans_huge_or_clear_bad() and pmd_trans_unstable() return > true on pmd_migration_entry, so that migration entries are not > treated as pmd page table entries. > > ChangeLog v4 -> v5: > - add explanation in pmd_none_or_trans_huge_or_clear_bad() to state > the equivalence of !pmd_present() and is_pmd_migration_entry() > - fix migration entry wait deadlock code (from v1) in follow_page_mask() > - remove unnecessary code (from v1) in follow_trans_huge_pmd() > - use is_swap_pmd() instead of !pmd_present() for pmd migration entry, > so it will not be confused with pmd_none() > - change author information > > Signed-off-by: Zi Yan > --- > arch/x86/mm/gup.c | 7 +++-- > fs/proc/task_mmu.c| 30 + > include/asm-generic/pgtable.h | 17 +++- > include/linux/huge_mm.h | 14 -- > mm/gup.c | 22 ++-- > mm/huge_memory.c | 61 > ++- > mm/memcontrol.c | 5 > mm/memory.c | 12 +++-- > mm/mprotect.c | 4 +-- > mm/mremap.c | 2 +- > 10 files changed, 145 insertions(+), 29 deletions(-) > > diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c > index 456dfdfd2249..096bbcc801e6 100644 > --- a/arch/x86/mm/gup.c > +++ b/arch/x86/mm/gup.c > @@ -9,6 +9,7 @@ > #include > #include > #include > +#include > #include > > #include > @@ -243,9 +244,11 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, > unsigned long end, > pmd_t pmd = *pmdp; > > next = pmd_addr_end(addr, end); > - if (pmd_none(pmd)) > + if (!pmd_present(pmd)) { > + VM_BUG_ON(is_swap_pmd(pmd) && > IS_ENABLED(CONFIG_MIGRATION) && > + !is_pmd_migration_entry(pmd)); > return 0; > - if (unlikely(pmd_large(pmd) || !pmd_present(pmd))) { > + } else if (unlikely(pmd_large(pmd))) { > /* >* NUMA hinting faults need to be handled in the GUP >* slowpath for accounting purposes and so that they > diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c > index 5c8359704601..57489dcd71c4 100644 > --- a/fs/proc/task_mmu.c > +++ b/fs/proc/task_mmu.c > @@ -600,7 +600,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long > addr, unsigned long end, > > ptl = pmd_trans_huge_lock(pmd, vma); > if (ptl) { > - smaps_pmd_entry(pmd, addr, walk); > + if (pmd_present(*pmd)) > + smaps_pmd_entry(pmd, addr, walk); > spin_unlock(ptl); > return 0; > } > @@ -942,6 +943,9 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long > addr, > goto out; > } > > + if (!pmd_present(*pmd)) > + goto out; > + These pmd_present() checks should have been done irrespective of the presence of new PMD migration entries. Please separate them out in a different clean up patch. > page = pmd_page(*pmd); > > /* Clear accessed and referenced bits. */ > @@ -1221,28 +1225,32 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned > long addr, unsigned long end, > if (ptl) { > u64 flags = 0, frame = 0; > pmd_t pmd = *pmdp; > + struct page *page = NULL; > > if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(pmd)) > flags |= PM_SOFT_DIRTY; > > - /* > - * Currently pmd for thp is always present because thp > - * can not be swapped-out, migrated, or HWPOISONed > - * (split in such cases instead.) > - * This if-check is just to prepare for future implementation. > - */ > if (pmd_present(pmd)) { > - struct page *page = pmd_page(pmd); > - > - if (page_mapcount(page) == 1) > - flags |= PM_MMAP_EXCLUSIVE; > +
Re: [PATCH v5 06/11] mm: thp: check pmd migration entry in common path
On 04/21/2017 02:17 AM, Zi Yan wrote: > From: Zi Yan > > If one of callers of page migration starts to handle thp, > memory management code start to see pmd migration entry, so we need > to prepare for it before enabling. This patch changes various code > point which checks the status of given pmds in order to prevent race > between thp migration and the pmd-related works. > > ChangeLog v1 -> v2: > - introduce pmd_related() (I know the naming is not good, but can't > think up no better name. Any suggesntion is welcomed.) > > Signed-off-by: Naoya Horiguchi > > ChangeLog v2 -> v3: > - add is_swap_pmd() > - a pmd entry should be pmd pointing to pte pages, is_swap_pmd(), > pmd_trans_huge(), pmd_devmap(), or pmd_none() > - pmd_none_or_trans_huge_or_clear_bad() and pmd_trans_unstable() return > true on pmd_migration_entry, so that migration entries are not > treated as pmd page table entries. > > ChangeLog v4 -> v5: > - add explanation in pmd_none_or_trans_huge_or_clear_bad() to state > the equivalence of !pmd_present() and is_pmd_migration_entry() > - fix migration entry wait deadlock code (from v1) in follow_page_mask() > - remove unnecessary code (from v1) in follow_trans_huge_pmd() > - use is_swap_pmd() instead of !pmd_present() for pmd migration entry, > so it will not be confused with pmd_none() > - change author information > > Signed-off-by: Zi Yan > --- > arch/x86/mm/gup.c | 7 +++-- > fs/proc/task_mmu.c| 30 + > include/asm-generic/pgtable.h | 17 +++- > include/linux/huge_mm.h | 14 -- > mm/gup.c | 22 ++-- > mm/huge_memory.c | 61 > ++- > mm/memcontrol.c | 5 > mm/memory.c | 12 +++-- > mm/mprotect.c | 4 +-- > mm/mremap.c | 2 +- > 10 files changed, 145 insertions(+), 29 deletions(-) > > diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c > index 456dfdfd2249..096bbcc801e6 100644 > --- a/arch/x86/mm/gup.c > +++ b/arch/x86/mm/gup.c > @@ -9,6 +9,7 @@ > #include > #include > #include > +#include > #include > > #include > @@ -243,9 +244,11 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, > unsigned long end, > pmd_t pmd = *pmdp; > > next = pmd_addr_end(addr, end); > - if (pmd_none(pmd)) > + if (!pmd_present(pmd)) { > + VM_BUG_ON(is_swap_pmd(pmd) && > IS_ENABLED(CONFIG_MIGRATION) && > + !is_pmd_migration_entry(pmd)); > return 0; > - if (unlikely(pmd_large(pmd) || !pmd_present(pmd))) { > + } else if (unlikely(pmd_large(pmd))) { > /* >* NUMA hinting faults need to be handled in the GUP >* slowpath for accounting purposes and so that they > diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c > index 5c8359704601..57489dcd71c4 100644 > --- a/fs/proc/task_mmu.c > +++ b/fs/proc/task_mmu.c > @@ -600,7 +600,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long > addr, unsigned long end, > > ptl = pmd_trans_huge_lock(pmd, vma); > if (ptl) { > - smaps_pmd_entry(pmd, addr, walk); > + if (pmd_present(*pmd)) > + smaps_pmd_entry(pmd, addr, walk); > spin_unlock(ptl); > return 0; > } > @@ -942,6 +943,9 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long > addr, > goto out; > } > > + if (!pmd_present(*pmd)) > + goto out; > + These pmd_present() checks should have been done irrespective of the presence of new PMD migration entries. Please separate them out in a different clean up patch. > page = pmd_page(*pmd); > > /* Clear accessed and referenced bits. */ > @@ -1221,28 +1225,32 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned > long addr, unsigned long end, > if (ptl) { > u64 flags = 0, frame = 0; > pmd_t pmd = *pmdp; > + struct page *page = NULL; > > if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(pmd)) > flags |= PM_SOFT_DIRTY; > > - /* > - * Currently pmd for thp is always present because thp > - * can not be swapped-out, migrated, or HWPOISONed > - * (split in such cases instead.) > - * This if-check is just to prepare for future implementation. > - */ > if (pmd_present(pmd)) { > - struct page *page = pmd_page(pmd); > - > - if (page_mapcount(page) == 1) > - flags |= PM_MMAP_EXCLUSIVE; > + page = pmd_page(pmd); > >