Hi Andrew, could you please apply the following fixup to avoid potentially using a stale VMA in the new writeback-retry logic for madvise collapse.
Thank you! -- Nico ----8<---- commit a9ac3b1bfa926dd707ac3a785583f8d7a0579578 Author: Nico Pache <[email protected]> Date: Fri Jan 23 16:32:42 2026 -0700 madvise writeback retry logic fix Signed-off-by: Nico Pache <[email protected]> diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 59e5a5588d85..2b054f7d9753 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -2418,6 +2418,14 @@ static enum scan_result collapse_single_pmd(unsigned long addr, mmap_read_unlock(mm); *mmap_locked = false; result = collapse_scan_file(mm, addr, file, pgoff, cc); + + if (!cc->is_khugepaged && result == SCAN_PAGE_DIRTY_OR_WRITEBACK && + mapping_can_writeback(file->f_mapping)) { + const loff_t lstart = (loff_t)pgoff << PAGE_SHIFT; + const loff_t lend = lstart + HPAGE_PMD_SIZE - 1; + + filemap_write_and_wait_range(file->f_mapping, lstart, lend); + } fput(file); if (result != SCAN_PTE_MAPPED_HUGEPAGE) @@ -2840,19 +2848,8 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start, *lock_dropped = true; if (result == SCAN_PAGE_DIRTY_OR_WRITEBACK && !triggered_wb) { - struct file *file = get_file(vma->vm_file); - pgoff_t pgoff = linear_page_index(vma, addr); - - if (mapping_can_writeback(file->f_mapping)) { - loff_t lstart = (loff_t)pgoff << PAGE_SHIFT; - loff_t lend = lstart + HPAGE_PMD_SIZE - 1; - - filemap_write_and_wait_range(file->f_mapping, lstart, lend); - triggered_wb = true; - fput(file); - goto retry; - } - fput(file); + triggered_wb = true; + goto retry; } switch (result) { -- 2.52.0 On 1/22/26 12:28 PM, Nico Pache wrote: > The khugepaged daemon and madvise_collapse have two different > implementations that do almost the same thing. > > Create collapse_single_pmd to increase code reuse and create an entry > point to these two users. > > Refactor madvise_collapse and collapse_scan_mm_slot to use the new > collapse_single_pmd function. This introduces a minor behavioral change > that is most likely an undiscovered bug. The current implementation of > khugepaged tests collapse_test_exit_or_disable before calling > collapse_pte_mapped_thp, but we weren't doing it in the madvise_collapse > case. By unifying these two callers madvise_collapse now also performs > this check. We also modify the return value to be SCAN_ANY_PROCESS which > properly indicates that this process is no longer valid to operate on. > > We also guard the khugepaged_pages_collapsed variable to ensure its only > incremented for khugepaged. > > Reviewed-by: Wei Yang <[email protected]> > Reviewed-by: Lance Yang <[email protected]> > Reviewed-by: Lorenzo Stoakes <[email protected]> > Reviewed-by: Baolin Wang <[email protected]> > Reviewed-by: Zi Yan <[email protected]> > Acked-by: David Hildenbrand <[email protected]> > Signed-off-by: Nico Pache <[email protected]> > --- > mm/khugepaged.c | 106 +++++++++++++++++++++++++++--------------------- > 1 file changed, 60 insertions(+), 46 deletions(-) > > diff --git a/mm/khugepaged.c b/mm/khugepaged.c > index fefcbdca4510..59e5a5588d85 100644 > --- a/mm/khugepaged.c > +++ b/mm/khugepaged.c > @@ -2394,6 +2394,54 @@ static enum scan_result collapse_scan_file(struct > mm_struct *mm, unsigned long a > return result; > } > > +/* > + * Try to collapse a single PMD starting at a PMD aligned addr, and return > + * the results. > + */ > +static enum scan_result collapse_single_pmd(unsigned long addr, > + struct vm_area_struct *vma, bool *mmap_locked, > + struct collapse_control *cc) > +{ > + struct mm_struct *mm = vma->vm_mm; > + enum scan_result result; > + struct file *file; > + pgoff_t pgoff; > + > + if (vma_is_anonymous(vma)) { > + result = collapse_scan_pmd(mm, vma, addr, mmap_locked, cc); > + goto end; > + } > + > + file = get_file(vma->vm_file); > + pgoff = linear_page_index(vma, addr); > + > + mmap_read_unlock(mm); > + *mmap_locked = false; > + result = collapse_scan_file(mm, addr, file, pgoff, cc); > + fput(file); > + > + if (result != SCAN_PTE_MAPPED_HUGEPAGE) > + goto end; > + > + mmap_read_lock(mm); > + *mmap_locked = true; > + if (collapse_test_exit_or_disable(mm)) { > + mmap_read_unlock(mm); > + *mmap_locked = false; > + return SCAN_ANY_PROCESS; > + } > + result = try_collapse_pte_mapped_thp(mm, addr, !cc->is_khugepaged); > + if (result == SCAN_PMD_MAPPED) > + result = SCAN_SUCCEED; > + mmap_read_unlock(mm); > + *mmap_locked = false; > + > +end: > + if (cc->is_khugepaged && result == SCAN_SUCCEED) > + ++khugepaged_pages_collapsed; > + return result; > +} > + > static unsigned int collapse_scan_mm_slot(unsigned int pages, enum > scan_result *result, > struct collapse_control *cc) > __releases(&khugepaged_mm_lock) > @@ -2466,34 +2514,9 @@ static unsigned int collapse_scan_mm_slot(unsigned int > pages, enum scan_result * > VM_BUG_ON(khugepaged_scan.address < hstart || > khugepaged_scan.address + HPAGE_PMD_SIZE > > hend); > - if (!vma_is_anonymous(vma)) { > - struct file *file = get_file(vma->vm_file); > - pgoff_t pgoff = linear_page_index(vma, > - khugepaged_scan.address); > - > - mmap_read_unlock(mm); > - mmap_locked = false; > - *result = collapse_scan_file(mm, > - khugepaged_scan.address, file, pgoff, > cc); > - fput(file); > - if (*result == SCAN_PTE_MAPPED_HUGEPAGE) { > - mmap_read_lock(mm); > - if (collapse_test_exit_or_disable(mm)) > - goto breakouterloop; > - *result = > try_collapse_pte_mapped_thp(mm, > - khugepaged_scan.address, false); > - if (*result == SCAN_PMD_MAPPED) > - *result = SCAN_SUCCEED; > - mmap_read_unlock(mm); > - } > - } else { > - *result = collapse_scan_pmd(mm, vma, > - khugepaged_scan.address, &mmap_locked, > cc); > - } > - > - if (*result == SCAN_SUCCEED) > - ++khugepaged_pages_collapsed; > > + *result = collapse_single_pmd(khugepaged_scan.address, > + vma, &mmap_locked, cc); > /* move to next address */ > khugepaged_scan.address += HPAGE_PMD_SIZE; > progress += HPAGE_PMD_NR; > @@ -2799,6 +2822,7 @@ int madvise_collapse(struct vm_area_struct *vma, > unsigned long start, > cond_resched(); > mmap_read_lock(mm); > mmap_locked = true; > + *lock_dropped = true; > result = hugepage_vma_revalidate(mm, addr, false, &vma, > cc); > if (result != SCAN_SUCCEED) { > @@ -2809,17 +2833,17 @@ int madvise_collapse(struct vm_area_struct *vma, > unsigned long start, > hend = min(hend, vma->vm_end & HPAGE_PMD_MASK); > } > mmap_assert_locked(mm); > - if (!vma_is_anonymous(vma)) { > - struct file *file = get_file(vma->vm_file); > - pgoff_t pgoff = linear_page_index(vma, addr); > > - mmap_read_unlock(mm); > - mmap_locked = false; > + result = collapse_single_pmd(addr, vma, &mmap_locked, cc); > + > + if (!mmap_locked) > *lock_dropped = true; > - result = collapse_scan_file(mm, addr, file, pgoff, cc); > > - if (result == SCAN_PAGE_DIRTY_OR_WRITEBACK && > !triggered_wb && > - mapping_can_writeback(file->f_mapping)) { > + if (result == SCAN_PAGE_DIRTY_OR_WRITEBACK && !triggered_wb) { > + struct file *file = get_file(vma->vm_file); > + pgoff_t pgoff = linear_page_index(vma, addr); > + > + if (mapping_can_writeback(file->f_mapping)) { > loff_t lstart = (loff_t)pgoff << PAGE_SHIFT; > loff_t lend = lstart + HPAGE_PMD_SIZE - 1; > > @@ -2829,26 +2853,16 @@ int madvise_collapse(struct vm_area_struct *vma, > unsigned long start, > goto retry; > } > fput(file); > - } else { > - result = collapse_scan_pmd(mm, vma, addr, &mmap_locked, > cc); > } > - if (!mmap_locked) > - *lock_dropped = true; > > -handle_result: > switch (result) { > case SCAN_SUCCEED: > case SCAN_PMD_MAPPED: > ++thps; > break; > - case SCAN_PTE_MAPPED_HUGEPAGE: > - BUG_ON(mmap_locked); > - mmap_read_lock(mm); > - result = try_collapse_pte_mapped_thp(mm, addr, true); > - mmap_read_unlock(mm); > - goto handle_result; > /* Whitelisted set of results where continuing OK */ > case SCAN_NO_PTE_TABLE: > + case SCAN_PTE_MAPPED_HUGEPAGE: > case SCAN_PTE_NON_PRESENT: > case SCAN_PTE_UFFD_WP: > case SCAN_LACK_REFERENCED_PAGE:
