khugepaged_enter_vma_merge() is using a different approach to check
whether a vma is valid for khugepaged_enter():

    if (!vma->anon_vma)
            /*
             * Not yet faulted in so we will register later in the
             * page fault if needed.
             */
            return 0;
    if (vma->vm_ops || (vm_flags & VM_NO_KHUGEPAGED))
            /* khugepaged not yet working on file or special mappings */
            return 0;

This check has some problems. One of the obvious problems is that
it doesn't check shmem_file(), so that vma backed with shmem files
will not call khugepaged_enter(). Here is an example of failed madvise():

   /* mount /dev/shm with huge=advise:
    *     mount -o remount,huge=advise /dev/shm */
   /* create file /dev/shm/huge */
   #define HUGE_FILE "/dev/shm/huge"

   fd = open(HUGE_FILE, O_RDONLY);
   ptr = mmap(NULL, FILE_SIZE, PROT_READ, MAP_PRIVATE, fd, 0);
   ret = madvise(ptr, FILE_SIZE, MADV_HUGEPAGE);

madvise() will return 0, but this memory region is never put in huge
page (check from /proc/meminfo: ShmemHugePages).

This patch fixes these problems by reusing hugepage_vma_check() in
khugepaged_enter_vma_merge().

vma->vm_flags is not yet updated in khugepaged_enter_vma_merge(),
so we need to pass the new vm_flags to hugepage_vma_check() through
a separate argument.

Signed-off-by: Song Liu <[email protected]>
---
 mm/khugepaged.c | 53 ++++++++++++++++++++++++-----------------------------
 1 file changed, 24 insertions(+), 29 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index d7b2a4b..e0a3bdf 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -430,18 +430,32 @@ int __khugepaged_enter(struct mm_struct *mm)
        return 0;
 }
 
+static bool hugepage_vma_check(struct vm_area_struct *vma,
+                              unsigned long vm_flags)
+{
+       if ((!(vm_flags & VM_HUGEPAGE) && !khugepaged_always()) ||
+           (vm_flags & VM_NOHUGEPAGE) ||
+           test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
+               return false;
+       if (shmem_file(vma->vm_file)) {
+               if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
+                       return false;
+               return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
+                               HPAGE_PMD_NR);
+       }
+       if (!vma->anon_vma || vma->vm_ops)
+               return false;
+       if (is_vma_temporary_stack(vma))
+               return false;
+       return !(vm_flags & VM_NO_KHUGEPAGED);
+}
+
 int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
                               unsigned long vm_flags)
 {
        unsigned long hstart, hend;
-       if (!vma->anon_vma)
-               /*
-                * Not yet faulted in so we will register later in the
-                * page fault if needed.
-                */
-               return 0;
-       if (vma->vm_ops || (vm_flags & VM_NO_KHUGEPAGED))
-               /* khugepaged not yet working on file or special mappings */
+
+       if (!hugepage_vma_check(vma, vm_flags))
                return 0;
        hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
        hend = vma->vm_end & HPAGE_PMD_MASK;
@@ -819,25 +833,6 @@ khugepaged_alloc_page(struct page **hpage, gfp_t gfp, int 
node)
 }
 #endif
 
-static bool hugepage_vma_check(struct vm_area_struct *vma)
-{
-       if ((!(vma->vm_flags & VM_HUGEPAGE) && !khugepaged_always()) ||
-           (vma->vm_flags & VM_NOHUGEPAGE) ||
-           test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
-               return false;
-       if (shmem_file(vma->vm_file)) {
-               if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
-                       return false;
-               return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
-                               HPAGE_PMD_NR);
-       }
-       if (!vma->anon_vma || vma->vm_ops)
-               return false;
-       if (is_vma_temporary_stack(vma))
-               return false;
-       return !(vma->vm_flags & VM_NO_KHUGEPAGED);
-}
-
 /*
  * If mmap_sem temporarily dropped, revalidate vma
  * before taking mmap_sem.
@@ -862,7 +857,7 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, 
unsigned long address,
        hend = vma->vm_end & HPAGE_PMD_MASK;
        if (address < hstart || address + HPAGE_PMD_SIZE > hend)
                return SCAN_ADDRESS_RANGE;
-       if (!hugepage_vma_check(vma))
+       if (!hugepage_vma_check(vma, vma->vm_flags))
                return SCAN_VMA_CHECK;
        return 0;
 }
@@ -1694,7 +1689,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int 
pages,
                        progress++;
                        break;
                }
-               if (!hugepage_vma_check(vma)) {
+               if (!hugepage_vma_check(vma, vma->vm_flags)) {
 skip:
                        progress++;
                        continue;
-- 
2.9.5

Reply via email to