On Mon, 11 May 2026 12:58:02 -0600 Nico Pache <[email protected]> wrote:

> From: Dev Jain <[email protected]>
> 
> Pass order to alloc_charge_folio() and update mTHP statistics.
> 
> Reviewed-by: Wei Yang <[email protected]>
> Reviewed-by: Lance Yang <[email protected]>
> Reviewed-by: Baolin Wang <[email protected]>
> Reviewed-by: Lorenzo Stoakes <[email protected]>
> Reviewed-by: Zi Yan <[email protected]>
> Acked-by: Usama Arif <[email protected]>
> Acked-by: David Hildenbrand (Arm) <[email protected]>
> Signed-off-by: Dev Jain <[email protected]>
> Co-developed-by: Nico Pache <[email protected]>
> Signed-off-by: Nico Pache <[email protected]>
> ---
>  Documentation/admin-guide/mm/transhuge.rst |  8 ++++++++
>  include/linux/huge_mm.h                    |  2 ++
>  mm/huge_memory.c                           |  4 ++++
>  mm/khugepaged.c                            | 17 +++++++++++------
>  4 files changed, 25 insertions(+), 6 deletions(-)
> 
> diff --git a/Documentation/admin-guide/mm/transhuge.rst 
> b/Documentation/admin-guide/mm/transhuge.rst
> index 5fbc3d89bb07..c51932e6275d 100644
> --- a/Documentation/admin-guide/mm/transhuge.rst
> +++ b/Documentation/admin-guide/mm/transhuge.rst
> @@ -639,6 +639,14 @@ anon_fault_fallback_charge
>       instead falls back to using huge pages with lower orders or
>       small pages even though the allocation was successful.
>  
> +collapse_alloc
> +     is incremented every time a huge page is successfully allocated for a
> +     khugepaged collapse.
> +
> +collapse_alloc_failed
> +     is incremented every time a huge page allocation fails during a
> +     khugepaged collapse.
> +
>  zswpout
>       is incremented every time a huge page is swapped out to zswap in one
>       piece without splitting.
> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> index 2949e5acff35..ba7ae6808544 100644
> --- a/include/linux/huge_mm.h
> +++ b/include/linux/huge_mm.h
> @@ -128,6 +128,8 @@ enum mthp_stat_item {
>       MTHP_STAT_ANON_FAULT_ALLOC,
>       MTHP_STAT_ANON_FAULT_FALLBACK,
>       MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
> +     MTHP_STAT_COLLAPSE_ALLOC,
> +     MTHP_STAT_COLLAPSE_ALLOC_FAILED,
>       MTHP_STAT_ZSWPOUT,
>       MTHP_STAT_SWPIN,
>       MTHP_STAT_SWPIN_FALLBACK,
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index e9d499da0ac7..05f482a72a89 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -699,6 +699,8 @@ static struct kobj_attribute _name##_attr = 
> __ATTR_RO(_name)
>  DEFINE_MTHP_STAT_ATTR(anon_fault_alloc, MTHP_STAT_ANON_FAULT_ALLOC);
>  DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK);
>  DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, 
> MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
> +DEFINE_MTHP_STAT_ATTR(collapse_alloc, MTHP_STAT_COLLAPSE_ALLOC);
> +DEFINE_MTHP_STAT_ATTR(collapse_alloc_failed, 
> MTHP_STAT_COLLAPSE_ALLOC_FAILED);
>  DEFINE_MTHP_STAT_ATTR(zswpout, MTHP_STAT_ZSWPOUT);
>  DEFINE_MTHP_STAT_ATTR(swpin, MTHP_STAT_SWPIN);
>  DEFINE_MTHP_STAT_ATTR(swpin_fallback, MTHP_STAT_SWPIN_FALLBACK);
> @@ -764,6 +766,8 @@ static struct attribute *any_stats_attrs[] = {
>  #endif
>       &split_attr.attr,
>       &split_failed_attr.attr,
> +     &collapse_alloc_attr.attr,
> +     &collapse_alloc_failed_attr.attr,
>       NULL,
>  };
>  
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 979885694351..f0e29d5c7b1f 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -1068,21 +1068,26 @@ static enum scan_result 
> __collapse_huge_page_swapin(struct mm_struct *mm,
>  }
>  
>  static enum scan_result alloc_charge_folio(struct folio **foliop, struct 
> mm_struct *mm,
> -             struct collapse_control *cc)
> +             struct collapse_control *cc, unsigned int order)
>  {
>       gfp_t gfp = (cc->is_khugepaged ? alloc_hugepage_khugepaged_gfpmask() :
>                    GFP_TRANSHUGE);
>       int node = collapse_find_target_node(cc);
>       struct folio *folio;
>  
> -     folio = __folio_alloc(gfp, HPAGE_PMD_ORDER, node, &cc->alloc_nmask);
> +     folio = __folio_alloc(gfp, order, node, &cc->alloc_nmask);
>       if (!folio) {
>               *foliop = NULL;
> -             count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
> +             if (is_pmd_order(order))
> +                     count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
> +             count_mthp_stat(order, MTHP_STAT_COLLAPSE_ALLOC_FAILED);
>               return SCAN_ALLOC_HUGE_PAGE_FAIL;
>       }
>  
> -     count_vm_event(THP_COLLAPSE_ALLOC);
> +     if (is_pmd_order(order))
> +             count_vm_event(THP_COLLAPSE_ALLOC);
> +     count_mthp_stat(order, MTHP_STAT_COLLAPSE_ALLOC);
> +

The vmstat THP_COLLAPSE_ALLOC counter is pmd order only.
But after this we have

        count_memcg_folio_events(folio, THP_COLLAPSE_ALLOC, 1);

which is not being guarded with is_pmd_order().

I think we want this to be pmd order only as well so that
the meaning of the vmstat and cgroup counter remains the same?


>       if (unlikely(mem_cgroup_charge(folio, mm, gfp))) {
>               folio_put(folio);
>               *foliop = NULL;
> @@ -1118,7 +1123,7 @@ static enum scan_result collapse_huge_page(struct 
> mm_struct *mm, unsigned long a
>        */
>       mmap_read_unlock(mm);
>  
> -     result = alloc_charge_folio(&folio, mm, cc);
> +     result = alloc_charge_folio(&folio, mm, cc, HPAGE_PMD_ORDER);
>       if (result != SCAN_SUCCEED)
>               goto out_nolock;
>  
> @@ -1899,7 +1904,7 @@ static enum scan_result collapse_file(struct mm_struct 
> *mm, unsigned long addr,
>       VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem);
>       VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
>  
> -     result = alloc_charge_folio(&new_folio, mm, cc);
> +     result = alloc_charge_folio(&new_folio, mm, cc, HPAGE_PMD_ORDER);
>       if (result != SCAN_SUCCEED)
>               goto out;
>  
> -- 
> 2.54.0
> 
> 

Reply via email to