On 6/10/26 4:29 AM, Baolin Wang wrote:
> Generalize the order of the collapse_file() function to support future
> shmem mTHP collapse.
>
> No functional changes in this patch.
>
> Signed-off-by: Baolin Wang <[email protected]>
> ---
> mm/khugepaged.c | 27 +++++++++++++++------------
> 1 file changed, 15 insertions(+), 12 deletions(-)
>
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 631459172e19..4adc8c6de062 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -2214,6 +2214,7 @@ static void retract_page_tables(struct address_space
> *mapping, pgoff_t pgoff)
> * @file: file that collapse on
> * @start: collapse start address
> * @cc: collapse context and scratchpad
> + * @order: folio order being collapsed to
> *
> * Basic scheme is simple, details are more complex:
> * - allocate and lock a new huge page;
> @@ -2232,15 +2233,17 @@ static void retract_page_tables(struct address_space
> *mapping, pgoff_t pgoff)
> * + unlock and free huge page;
> */
> static enum scan_result collapse_file(struct mm_struct *mm, unsigned long
> addr,
> - struct file *file, pgoff_t start, struct collapse_control *cc)
> + struct file *file, pgoff_t start, struct collapse_control *cc,
> + int order)
> {
> - const unsigned int max_ptes_none = collapse_max_ptes_none(cc, NULL,
> HPAGE_PMD_ORDER);
> + const unsigned int max_ptes_none = collapse_max_ptes_none(cc, NULL,
> order);
> struct address_space *mapping = file->f_mapping;
> + const unsigned long nr_pages = 1UL << order;
> struct page *dst;
> struct folio *folio, *tmp, *new_folio;
> - pgoff_t index = 0, end = start + HPAGE_PMD_NR;
> + pgoff_t index = 0, end = start + nr_pages;
> LIST_HEAD(pagelist);
> - XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER);
> + XA_STATE_ORDER(xas, &mapping->i_pages, start, order);
> enum scan_result result = SCAN_SUCCEED;
> int nr_none = 0;
> bool is_shmem = shmem_file(file);
> @@ -2252,9 +2255,9 @@ static enum scan_result collapse_file(struct mm_struct
> *mm, unsigned long addr,
> * mapping, the shmem check can be removed.
> */
> VM_WARN_ON_ONCE(!is_shmem && !mapping_pmd_folio_support(mapping));
> - VM_WARN_ON_ONCE(start & (HPAGE_PMD_NR - 1));
> + VM_WARN_ON_ONCE(start & (nr_pages - 1));
>
> - result = alloc_charge_folio(&new_folio, mm, cc, HPAGE_PMD_ORDER);
> + result = alloc_charge_folio(&new_folio, mm, cc, order);
> if (result != SCAN_SUCCEED)
> goto out;
>
> @@ -2591,12 +2594,12 @@ static enum scan_result collapse_file(struct
> mm_struct *mm, unsigned long addr,
> }
>
> if (is_shmem) {
> - lruvec_stat_mod_folio(new_folio, NR_SHMEM, HPAGE_PMD_NR);
> + lruvec_stat_mod_folio(new_folio, NR_SHMEM, nr_pages);
> lruvec_stat_mod_folio(new_folio, NR_SHMEM_THPS, HPAGE_PMD_NR);
Is this a accounting bug? (not your changes) but
lruvec_stat_mod_folio(new_folio, NR_SHMEM_THPS, HPAGE_PMD_NR);
If this stat is in THPs why are we iterating it by 512? shouldnt it just be +- 1
> } else {
> lruvec_stat_mod_folio(new_folio, NR_FILE_THPS, HPAGE_PMD_NR);
Same here.
> }
> - lruvec_stat_mod_folio(new_folio, NR_FILE_PAGES, HPAGE_PMD_NR);
> + lruvec_stat_mod_folio(new_folio, NR_FILE_PAGES, nr_pages);
>
> /*
> * Mark new_folio as uptodate before inserting it into the
> @@ -2604,14 +2607,14 @@ static enum scan_result collapse_file(struct
> mm_struct *mm, unsigned long addr,
> * unwritten page.
> */
> folio_mark_uptodate(new_folio);
> - folio_ref_add(new_folio, HPAGE_PMD_NR - 1);
> + folio_ref_add(new_folio, nr_pages - 1);
>
> if (is_shmem)
> folio_mark_dirty(new_folio);
> folio_add_lru(new_folio);
>
> /* Join all the small entries into a single multi-index entry. */
> - xas_set_order(&xas, start, HPAGE_PMD_ORDER);
> + xas_set_order(&xas, start, order);
> xas_store(&xas, new_folio);
> WARN_ON_ONCE(xas_error(&xas));
> xas_unlock_irq(&xas);
> @@ -2666,7 +2669,7 @@ static enum scan_result collapse_file(struct mm_struct
> *mm, unsigned long addr,
> folio_put(new_folio);
> out:
> VM_BUG_ON(!list_empty(&pagelist));
> - trace_mm_khugepaged_collapse_file(mm, new_folio, index, addr, is_shmem,
> file, HPAGE_PMD_NR, result);
> + trace_mm_khugepaged_collapse_file(mm, new_folio, index, addr, is_shmem,
> file, nr_pages, result);
Although the tracepoint has nr_pages, order may be nice too like I did with the
anon tracing.
Once I refactor, and understand file collapse a little better ill be able to
comment on if youre missing anything in this function, but nothing sticks out at
the moment.
> return result;
> }
>
> @@ -2769,7 +2772,7 @@ static enum scan_result collapse_scan_file(struct
> mm_struct *mm,
> result = SCAN_EXCEED_NONE_PTE;
> count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
> } else {
> - result = collapse_file(mm, addr, file, start, cc);
> + result = collapse_file(mm, addr, file, start, cc,
> HPAGE_PMD_ORDER);
> }
> }
>