On Mon, 11 May 2026 12:58:02 -0600 Nico Pache <[email protected]> wrote:
> From: Dev Jain <[email protected]> > > Pass order to alloc_charge_folio() and update mTHP statistics. > > Reviewed-by: Wei Yang <[email protected]> > Reviewed-by: Lance Yang <[email protected]> > Reviewed-by: Baolin Wang <[email protected]> > Reviewed-by: Lorenzo Stoakes <[email protected]> > Reviewed-by: Zi Yan <[email protected]> > Acked-by: Usama Arif <[email protected]> > Acked-by: David Hildenbrand (Arm) <[email protected]> > Signed-off-by: Dev Jain <[email protected]> > Co-developed-by: Nico Pache <[email protected]> > Signed-off-by: Nico Pache <[email protected]> > --- > Documentation/admin-guide/mm/transhuge.rst | 8 ++++++++ > include/linux/huge_mm.h | 2 ++ > mm/huge_memory.c | 4 ++++ > mm/khugepaged.c | 17 +++++++++++------ > 4 files changed, 25 insertions(+), 6 deletions(-) > > diff --git a/Documentation/admin-guide/mm/transhuge.rst > b/Documentation/admin-guide/mm/transhuge.rst > index 5fbc3d89bb07..c51932e6275d 100644 > --- a/Documentation/admin-guide/mm/transhuge.rst > +++ b/Documentation/admin-guide/mm/transhuge.rst > @@ -639,6 +639,14 @@ anon_fault_fallback_charge > instead falls back to using huge pages with lower orders or > small pages even though the allocation was successful. > > +collapse_alloc > + is incremented every time a huge page is successfully allocated for a > + khugepaged collapse. > + > +collapse_alloc_failed > + is incremented every time a huge page allocation fails during a > + khugepaged collapse. > + > zswpout > is incremented every time a huge page is swapped out to zswap in one > piece without splitting. > diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h > index 2949e5acff35..ba7ae6808544 100644 > --- a/include/linux/huge_mm.h > +++ b/include/linux/huge_mm.h > @@ -128,6 +128,8 @@ enum mthp_stat_item { > MTHP_STAT_ANON_FAULT_ALLOC, > MTHP_STAT_ANON_FAULT_FALLBACK, > MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE, > + MTHP_STAT_COLLAPSE_ALLOC, > + MTHP_STAT_COLLAPSE_ALLOC_FAILED, > MTHP_STAT_ZSWPOUT, > MTHP_STAT_SWPIN, > MTHP_STAT_SWPIN_FALLBACK, > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > index e9d499da0ac7..05f482a72a89 100644 > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -699,6 +699,8 @@ static struct kobj_attribute _name##_attr = > __ATTR_RO(_name) > DEFINE_MTHP_STAT_ATTR(anon_fault_alloc, MTHP_STAT_ANON_FAULT_ALLOC); > DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK); > DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, > MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE); > +DEFINE_MTHP_STAT_ATTR(collapse_alloc, MTHP_STAT_COLLAPSE_ALLOC); > +DEFINE_MTHP_STAT_ATTR(collapse_alloc_failed, > MTHP_STAT_COLLAPSE_ALLOC_FAILED); > DEFINE_MTHP_STAT_ATTR(zswpout, MTHP_STAT_ZSWPOUT); > DEFINE_MTHP_STAT_ATTR(swpin, MTHP_STAT_SWPIN); > DEFINE_MTHP_STAT_ATTR(swpin_fallback, MTHP_STAT_SWPIN_FALLBACK); > @@ -764,6 +766,8 @@ static struct attribute *any_stats_attrs[] = { > #endif > &split_attr.attr, > &split_failed_attr.attr, > + &collapse_alloc_attr.attr, > + &collapse_alloc_failed_attr.attr, > NULL, > }; > > diff --git a/mm/khugepaged.c b/mm/khugepaged.c > index 979885694351..f0e29d5c7b1f 100644 > --- a/mm/khugepaged.c > +++ b/mm/khugepaged.c > @@ -1068,21 +1068,26 @@ static enum scan_result > __collapse_huge_page_swapin(struct mm_struct *mm, > } > > static enum scan_result alloc_charge_folio(struct folio **foliop, struct > mm_struct *mm, > - struct collapse_control *cc) > + struct collapse_control *cc, unsigned int order) > { > gfp_t gfp = (cc->is_khugepaged ? alloc_hugepage_khugepaged_gfpmask() : > GFP_TRANSHUGE); > int node = collapse_find_target_node(cc); > struct folio *folio; > > - folio = __folio_alloc(gfp, HPAGE_PMD_ORDER, node, &cc->alloc_nmask); > + folio = __folio_alloc(gfp, order, node, &cc->alloc_nmask); > if (!folio) { > *foliop = NULL; > - count_vm_event(THP_COLLAPSE_ALLOC_FAILED); > + if (is_pmd_order(order)) > + count_vm_event(THP_COLLAPSE_ALLOC_FAILED); > + count_mthp_stat(order, MTHP_STAT_COLLAPSE_ALLOC_FAILED); > return SCAN_ALLOC_HUGE_PAGE_FAIL; > } > > - count_vm_event(THP_COLLAPSE_ALLOC); > + if (is_pmd_order(order)) > + count_vm_event(THP_COLLAPSE_ALLOC); > + count_mthp_stat(order, MTHP_STAT_COLLAPSE_ALLOC); > + The vmstat THP_COLLAPSE_ALLOC counter is pmd order only. But after this we have count_memcg_folio_events(folio, THP_COLLAPSE_ALLOC, 1); which is not being guarded with is_pmd_order(). I think we want this to be pmd order only as well so that the meaning of the vmstat and cgroup counter remains the same? > if (unlikely(mem_cgroup_charge(folio, mm, gfp))) { > folio_put(folio); > *foliop = NULL; > @@ -1118,7 +1123,7 @@ static enum scan_result collapse_huge_page(struct > mm_struct *mm, unsigned long a > */ > mmap_read_unlock(mm); > > - result = alloc_charge_folio(&folio, mm, cc); > + result = alloc_charge_folio(&folio, mm, cc, HPAGE_PMD_ORDER); > if (result != SCAN_SUCCEED) > goto out_nolock; > > @@ -1899,7 +1904,7 @@ static enum scan_result collapse_file(struct mm_struct > *mm, unsigned long addr, > VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem); > VM_BUG_ON(start & (HPAGE_PMD_NR - 1)); > > - result = alloc_charge_folio(&new_folio, mm, cc); > + result = alloc_charge_folio(&new_folio, mm, cc, HPAGE_PMD_ORDER); > if (result != SCAN_SUCCEED) > goto out; > > -- > 2.54.0 > >
