[PATCH -V7 RESEND 11/21] swap: Add sysfs interface to configure THP swapin
Swapin a THP as a whole isn't desirable in some situations. For example, for completely random access pattern, swapin a THP in one piece will inflate the reading greatly. So a sysfs interface: /sys/kernel/mm/transparent_hugepage/swapin_enabled is added to configure it. Three options as follow are provided, - always: THP swapin will be enabled always - madvise: THP swapin will be enabled only for VMA with VM_HUGEPAGE flag set. - never: THP swapin will be disabled always The default configuration is: madvise. During page fault, if a PMD swap mapping is found and THP swapin is disabled, the huge swap cluster and the PMD swap mapping will be split and fallback to normal page swapin. Signed-off-by: "Huang, Ying" Cc: "Kirill A. Shutemov" Cc: Andrea Arcangeli Cc: Michal Hocko Cc: Johannes Weiner Cc: Shaohua Li Cc: Hugh Dickins Cc: Minchan Kim Cc: Rik van Riel Cc: Dave Hansen Cc: Naoya Horiguchi Cc: Zi Yan Cc: Daniel Jordan --- Documentation/admin-guide/mm/transhuge.rst | 21 + include/linux/huge_mm.h| 31 +++ mm/huge_memory.c | 94 +- 3 files changed, 127 insertions(+), 19 deletions(-) diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst index 85e33f785fd7..23aefb17101c 100644 --- a/Documentation/admin-guide/mm/transhuge.rst +++ b/Documentation/admin-guide/mm/transhuge.rst @@ -160,6 +160,27 @@ Some userspace (such as a test program, or an optimized memory allocation cat /sys/kernel/mm/transparent_hugepage/hpage_pmd_size +Transparent hugepage may be swapout and swapin in one piece without +splitting. This will improve the utility of transparent hugepage but +may inflate the read/write too. So whether to enable swapin +transparent hugepage in one piece can be configured as follow. + + echo always >/sys/kernel/mm/transparent_hugepage/swapin_enabled + echo madvise >/sys/kernel/mm/transparent_hugepage/swapin_enabled + echo never >/sys/kernel/mm/transparent_hugepage/swapin_enabled + +always + Attempt to allocate a transparent huge page and read it from + swap space in one piece every time. + +never + Always split the swap space and PMD swap mapping and swapin + the fault normal page during swapin. + +madvise + Only swapin the transparent huge page in one piece for + MADV_HUGEPAGE madvise regions. + khugepaged will be automatically started when transparent_hugepage/enabled is set to "always" or "madvise, and it'll be automatically shutdown if it's set to "never". diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 909321c772b5..ea4999a4b6cd 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -63,6 +63,8 @@ enum transparent_hugepage_flag { #ifdef CONFIG_DEBUG_VM TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG, #endif + TRANSPARENT_HUGEPAGE_SWAPIN_FLAG, + TRANSPARENT_HUGEPAGE_SWAPIN_REQ_MADV_FLAG, }; struct kobject; @@ -375,11 +377,40 @@ static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, #ifdef CONFIG_THP_SWAP extern int do_huge_pmd_swap_page(struct vm_fault *vmf, pmd_t orig_pmd); + +static inline bool transparent_hugepage_swapin_enabled( + struct vm_area_struct *vma) +{ + if (vma->vm_flags & VM_NOHUGEPAGE) + return false; + + if (is_vma_temporary_stack(vma)) + return false; + + if (test_bit(MMF_DISABLE_THP, >vm_mm->flags)) + return false; + + if (transparent_hugepage_flags & + (1 << TRANSPARENT_HUGEPAGE_SWAPIN_FLAG)) + return true; + + if (transparent_hugepage_flags & + (1 << TRANSPARENT_HUGEPAGE_SWAPIN_REQ_MADV_FLAG)) + return !!(vma->vm_flags & VM_HUGEPAGE); + + return false; +} #else /* CONFIG_THP_SWAP */ static inline int do_huge_pmd_swap_page(struct vm_fault *vmf, pmd_t orig_pmd) { return 0; } + +static inline bool transparent_hugepage_swapin_enabled( + struct vm_area_struct *vma) +{ + return false; +} #endif /* CONFIG_THP_SWAP */ #endif /* _LINUX_HUGE_MM_H */ diff --git a/mm/huge_memory.c b/mm/huge_memory.c index d3ee25ffeaaf..abaecf96ceeb 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -57,7 +57,8 @@ unsigned long transparent_hugepage_flags __read_mostly = #endif (1
[PATCH -V7 RESEND 11/21] swap: Add sysfs interface to configure THP swapin
Swapin a THP as a whole isn't desirable in some situations. For example, for completely random access pattern, swapin a THP in one piece will inflate the reading greatly. So a sysfs interface: /sys/kernel/mm/transparent_hugepage/swapin_enabled is added to configure it. Three options as follow are provided, - always: THP swapin will be enabled always - madvise: THP swapin will be enabled only for VMA with VM_HUGEPAGE flag set. - never: THP swapin will be disabled always The default configuration is: madvise. During page fault, if a PMD swap mapping is found and THP swapin is disabled, the huge swap cluster and the PMD swap mapping will be split and fallback to normal page swapin. Signed-off-by: "Huang, Ying" Cc: "Kirill A. Shutemov" Cc: Andrea Arcangeli Cc: Michal Hocko Cc: Johannes Weiner Cc: Shaohua Li Cc: Hugh Dickins Cc: Minchan Kim Cc: Rik van Riel Cc: Dave Hansen Cc: Naoya Horiguchi Cc: Zi Yan Cc: Daniel Jordan --- Documentation/admin-guide/mm/transhuge.rst | 21 + include/linux/huge_mm.h| 31 +++ mm/huge_memory.c | 94 +- 3 files changed, 127 insertions(+), 19 deletions(-) diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst index 85e33f785fd7..23aefb17101c 100644 --- a/Documentation/admin-guide/mm/transhuge.rst +++ b/Documentation/admin-guide/mm/transhuge.rst @@ -160,6 +160,27 @@ Some userspace (such as a test program, or an optimized memory allocation cat /sys/kernel/mm/transparent_hugepage/hpage_pmd_size +Transparent hugepage may be swapout and swapin in one piece without +splitting. This will improve the utility of transparent hugepage but +may inflate the read/write too. So whether to enable swapin +transparent hugepage in one piece can be configured as follow. + + echo always >/sys/kernel/mm/transparent_hugepage/swapin_enabled + echo madvise >/sys/kernel/mm/transparent_hugepage/swapin_enabled + echo never >/sys/kernel/mm/transparent_hugepage/swapin_enabled + +always + Attempt to allocate a transparent huge page and read it from + swap space in one piece every time. + +never + Always split the swap space and PMD swap mapping and swapin + the fault normal page during swapin. + +madvise + Only swapin the transparent huge page in one piece for + MADV_HUGEPAGE madvise regions. + khugepaged will be automatically started when transparent_hugepage/enabled is set to "always" or "madvise, and it'll be automatically shutdown if it's set to "never". diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 909321c772b5..ea4999a4b6cd 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -63,6 +63,8 @@ enum transparent_hugepage_flag { #ifdef CONFIG_DEBUG_VM TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG, #endif + TRANSPARENT_HUGEPAGE_SWAPIN_FLAG, + TRANSPARENT_HUGEPAGE_SWAPIN_REQ_MADV_FLAG, }; struct kobject; @@ -375,11 +377,40 @@ static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, #ifdef CONFIG_THP_SWAP extern int do_huge_pmd_swap_page(struct vm_fault *vmf, pmd_t orig_pmd); + +static inline bool transparent_hugepage_swapin_enabled( + struct vm_area_struct *vma) +{ + if (vma->vm_flags & VM_NOHUGEPAGE) + return false; + + if (is_vma_temporary_stack(vma)) + return false; + + if (test_bit(MMF_DISABLE_THP, >vm_mm->flags)) + return false; + + if (transparent_hugepage_flags & + (1 << TRANSPARENT_HUGEPAGE_SWAPIN_FLAG)) + return true; + + if (transparent_hugepage_flags & + (1 << TRANSPARENT_HUGEPAGE_SWAPIN_REQ_MADV_FLAG)) + return !!(vma->vm_flags & VM_HUGEPAGE); + + return false; +} #else /* CONFIG_THP_SWAP */ static inline int do_huge_pmd_swap_page(struct vm_fault *vmf, pmd_t orig_pmd) { return 0; } + +static inline bool transparent_hugepage_swapin_enabled( + struct vm_area_struct *vma) +{ + return false; +} #endif /* CONFIG_THP_SWAP */ #endif /* _LINUX_HUGE_MM_H */ diff --git a/mm/huge_memory.c b/mm/huge_memory.c index d3ee25ffeaaf..abaecf96ceeb 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -57,7 +57,8 @@ unsigned long transparent_hugepage_flags __read_mostly = #endif (1