On Thu, May 15, 2025 at 12:39 PM Nico Pache <npa...@redhat.com> wrote: > > setting /transparent_hugepages/enabled=always allows applications > to benefit from THPs without having to madvise. However, the page fault > handler takes very few considerations to decide weather or not to actually > use a THP. This can lead to a lot of wasted memory. khugepaged only > operates on memory that was either allocated with enabled=always or > MADV_HUGEPAGE. > > Introduce the ability to set enabled=defer, which will prevent THPs from > being allocated by the page fault handler unless madvise is set, > leaving it up to khugepaged to decide which allocations will collapse to a > THP. This should allow applications to benefits from THPs, while curbing > some of the memory waste. > > Acked-by: Zi Yan <z...@nvidia.com> > Co-developed-by: Rafael Aquini <raqu...@redhat.com> > Signed-off-by: Rafael Aquini <raqu...@redhat.com> > Signed-off-by: Nico Pache <npa...@redhat.com> > --- > include/linux/huge_mm.h | 15 +++++++++++++-- > mm/huge_memory.c | 31 +++++++++++++++++++++++++++---- > 2 files changed, 40 insertions(+), 6 deletions(-) > > diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h > index e3d15c737008..02038e3db829 100644 > --- a/include/linux/huge_mm.h > +++ b/include/linux/huge_mm.h > @@ -48,6 +48,7 @@ enum transparent_hugepage_flag { > TRANSPARENT_HUGEPAGE_UNSUPPORTED, > TRANSPARENT_HUGEPAGE_FLAG, > TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, > + TRANSPARENT_HUGEPAGE_DEFER_PF_FLAG, > TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, > TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, > TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, > @@ -186,6 +187,7 @@ static inline bool hugepage_global_enabled(void) > { > return transparent_hugepage_flags & > ((1<<TRANSPARENT_HUGEPAGE_FLAG) | > + (1<<TRANSPARENT_HUGEPAGE_DEFER_PF_FLAG) | > (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)); > } > > @@ -195,6 +197,12 @@ static inline bool hugepage_global_always(void) > (1<<TRANSPARENT_HUGEPAGE_FLAG); > } > > +static inline bool hugepage_global_defer(void) > +{ > + return transparent_hugepage_flags & > + (1<<TRANSPARENT_HUGEPAGE_DEFER_PF_FLAG); > +} > + > static inline int highest_order(unsigned long orders) > { > return fls_long(orders) - 1; > @@ -291,13 +299,16 @@ unsigned long thp_vma_allowable_orders(struct > vm_area_struct *vma, > unsigned long tva_flags, > unsigned long orders) > { > + if ((tva_flags & TVA_IN_PF) && hugepage_global_defer() && > + !(vm_flags & VM_HUGEPAGE)) > + return 0; > + > /* Optimization to check if required orders are enabled early. */ > if ((tva_flags & TVA_ENFORCE_SYSFS) && vma_is_anonymous(vma)) { > unsigned long mask = READ_ONCE(huge_anon_orders_always); > - > if (vm_flags & VM_HUGEPAGE) > mask |= READ_ONCE(huge_anon_orders_madvise); > - if (hugepage_global_always() || > + if (hugepage_global_always() || hugepage_global_defer() || > ((vm_flags & VM_HUGEPAGE) && hugepage_global_enabled())) > mask |= READ_ONCE(huge_anon_orders_inherit); > > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > index 700988a0d5cf..ce0ee74753af 100644 > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -297,12 +297,15 @@ static ssize_t enabled_show(struct kobject *kobj, > const char *output; > > if (test_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags)) > - output = "[always] madvise never"; > + output = "[always] madvise defer never";
a small nit: alphabetical ordering might improve readability here. -- Regards Yafang