HugeTLB bootmem vmemmap optimization still carries its own early setup
path, including pre-populating optimized mappings before the generic
sparse-vmemmap code runs.

Now that section metadata records the compound page order, HugeTLB only
needs to mark the bootmem huge page range with that order.  The generic
sparse-vmemmap population path can then allocate and map the shared tail
vmemmap pages without any HugeTLB-specific early population code.

Do that by setting the section order when a bootmem huge page is
allocated and dropping the dedicated pre-HVO helpers and related
special-casing.

This removes duplicate early setup logic and switches HugeTLB to the
section-based vmemmap optimization path.

Signed-off-by: Muchun Song <[email protected]>
---
 include/linux/hugetlb.h |   1 -
 include/linux/mm.h      |   3 -
 include/linux/mmzone.h  |  17 ++++++
 mm/bootmem_info.c       |   5 +-
 mm/hugetlb.c            |  26 ++-------
 mm/hugetlb_vmemmap.c    | 124 ++++++----------------------------------
 mm/hugetlb_vmemmap.h    |  13 ++---
 mm/sparse-vmemmap.c     |  29 ----------
 8 files changed, 45 insertions(+), 173 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index fd901bb3630c..dce8969961ea 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -171,7 +171,6 @@ struct address_space 
*hugetlb_folio_mapping_lock_write(struct folio *folio);
 
 extern int movable_gigantic_pages __read_mostly;
 extern int sysctl_hugetlb_shm_group __read_mostly;
-extern struct list_head huge_boot_pages[MAX_NUMNODES];
 
 void hugetlb_struct_page_init(void);
 void hugetlb_bootmem_alloc(void);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 31e27ff6a35f..f39f6fca6551 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4864,9 +4864,6 @@ int vmemmap_populate_hugepages(unsigned long start, 
unsigned long end,
                               int node, struct vmem_altmap *altmap);
 int vmemmap_populate(unsigned long start, unsigned long end, int node,
                struct vmem_altmap *altmap);
-int vmemmap_populate_hvo(unsigned long start, unsigned long end,
-                        unsigned int order, struct zone *zone,
-                        unsigned long headsize);
 void vmemmap_wrprotect_hvo(unsigned long start, unsigned long end, int node,
                          unsigned long headsize);
 void vmemmap_populate_print_last(void);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index bf4c40818b63..d6a5dd042c25 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -2264,6 +2264,18 @@ static inline unsigned int section_order(const struct 
mem_section *section)
 }
 #endif
 
+static inline void section_set_order_range(unsigned long pfn, unsigned long 
nr_pages,
+                                          unsigned int order)
+{
+       unsigned long section_nr = pfn_to_section_nr(pfn);
+
+       if (!IS_ALIGNED(pfn | nr_pages, PAGES_PER_SECTION))
+               return;
+
+       for (unsigned long i = 0; i < nr_pages / PAGES_PER_SECTION; i++)
+               section_set_order(__nr_to_section(section_nr + i), order);
+}
+
 static inline unsigned int pfn_to_section_order(unsigned long pfn)
 {
        return section_order(__pfn_to_section(pfn));
@@ -2417,6 +2429,11 @@ static inline unsigned long 
next_present_section_nr(unsigned long section_nr)
 #else
 #define sparse_vmemmap_init_nid_early(_nid) do {} while (0)
 #define pfn_in_present_section pfn_valid
+static inline void section_set_order_range(unsigned long pfn, unsigned long 
nr_pages,
+                                          unsigned int order)
+{
+}
+
 static inline unsigned int pfn_to_section_order(unsigned long pfn)
 {
        return 0;
diff --git a/mm/bootmem_info.c b/mm/bootmem_info.c
index 3d7675a3ae04..24f45d86ffb3 100644
--- a/mm/bootmem_info.c
+++ b/mm/bootmem_info.c
@@ -51,9 +51,8 @@ static void __init 
register_page_bootmem_info_section(unsigned long start_pfn)
        section_nr = pfn_to_section_nr(start_pfn);
        ms = __nr_to_section(section_nr);
 
-       if (!preinited_vmemmap_section(ms))
-               register_page_bootmem_memmap(section_nr, pfn_to_page(start_pfn),
-                                            PAGES_PER_SECTION);
+       register_page_bootmem_memmap(section_nr, pfn_to_page(start_pfn),
+                                    PAGES_PER_SECTION);
 
        usage = ms->usage;
        page = virt_to_page(usage);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 8debe5c5abce..080f130017e3 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -57,7 +57,7 @@ unsigned int default_hstate_idx;
 struct hstate hstates[HUGE_MAX_HSTATE];
 
 __initdata nodemask_t hugetlb_bootmem_nodes;
-__initdata struct list_head huge_boot_pages[MAX_NUMNODES];
+static __initdata struct list_head huge_boot_pages[MAX_NUMNODES];
 
 /*
  * Due to ordering constraints across the init code for various
@@ -3111,6 +3111,7 @@ static bool __init alloc_bootmem_huge_page(struct hstate 
*h, int nid)
        } else {
                list_add_tail(&m->list, &huge_boot_pages[nid]);
                m->flags |= HUGE_BOOTMEM_ZONES_VALID;
+               hugetlb_vmemmap_optimize_bootmem_page(m);
                /*
                 * Only initialize the head struct page in 
memmap_init_reserved_pages,
                 * rest of the struct pages will be initialized by the HugeTLB
@@ -3264,13 +3265,15 @@ static void __init 
gather_bootmem_prealloc_node(unsigned long nid)
                                           
OPTIMIZED_FOLIO_VMEMMAP_NR_STRUCT_PAGES);
                init_new_hugetlb_folio(folio);
 
-               if (hugetlb_bootmem_page_prehvo(m))
+               if (hugetlb_bootmem_page_prehvo(m)) {
                        /*
                         * If pre-HVO was done, just set the
                         * flag, the HVO code will then skip
                         * this folio.
                         */
                        folio_set_hugetlb_vmemmap_optimized(folio);
+                       section_set_order_range(folio_pfn(folio), 
folio_nr_pages(folio), 0);
+               }
 
                if (hugetlb_bootmem_page_earlycma(m))
                        folio_set_hugetlb_cma(folio);
@@ -3314,25 +3317,6 @@ void __init hugetlb_struct_page_init(void)
                .max_threads    = num_node_state(N_MEMORY),
                .numa_aware     = true,
        };
-#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
-       struct zone *zone;
-
-       for_each_zone(zone) {
-               for (int i = 0; i < NR_OPTIMIZABLE_FOLIO_ORDERS; i++) {
-                       struct page *tail, *p;
-                       unsigned int order;
-
-                       tail = zone->vmemmap_tails[i];
-                       if (!tail)
-                               continue;
-
-                       order = i + OPTIMIZABLE_FOLIO_MIN_ORDER;
-                       p = page_to_virt(tail);
-                       for (int j = 0; j < PAGE_SIZE / sizeof(struct page); 
j++)
-                               init_compound_tail(p + j, NULL, order, zone);
-               }
-       }
-#endif
 
        padata_do_multithreaded(&job);
 }
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index 4367118f8f57..730190390ba9 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -16,6 +16,7 @@
 #include <linux/mmdebug.h>
 #include <linux/pagewalk.h>
 #include <linux/pgalloc.h>
+#include <linux/io.h>
 
 #include <asm/tlbflush.h>
 #include "hugetlb_vmemmap.h"
@@ -478,12 +479,8 @@ long hugetlb_vmemmap_restore_folios(const struct hstate *h,
        return ret;
 }
 
-/* Return true iff a HugeTLB whose vmemmap should and can be optimized. */
-static bool vmemmap_should_optimize_folio(const struct hstate *h, struct folio 
*folio)
+static inline bool vmemmap_should_optimize(const struct hstate *h)
 {
-       if (folio_test_hugetlb_vmemmap_optimized(folio))
-               return false;
-
        if (!READ_ONCE(vmemmap_optimize_enabled))
                return false;
 
@@ -493,6 +490,15 @@ static bool vmemmap_should_optimize_folio(const struct 
hstate *h, struct folio *
        return true;
 }
 
+/* Return true iff a HugeTLB whose vmemmap should and can be optimized. */
+static bool vmemmap_should_optimize_folio(const struct hstate *h, struct folio 
*folio)
+{
+       if (folio_test_hugetlb_vmemmap_optimized(folio))
+               return false;
+
+       return vmemmap_should_optimize(h);
+}
+
 static struct page *vmemmap_get_tail(unsigned int order, struct zone *zone)
 {
        const unsigned int idx = order - OPTIMIZABLE_FOLIO_MIN_ORDER;
@@ -638,9 +644,6 @@ static void __hugetlb_vmemmap_optimize_folios(struct hstate 
*h,
                        epfn = spfn + hugetlb_vmemmap_size(h);
                        vmemmap_wrprotect_hvo(spfn, epfn, folio_nid(folio),
                                        OPTIMIZED_FOLIO_VMEMMAP_SIZE);
-                       
register_page_bootmem_memmap(pfn_to_section_nr(folio_pfn(folio)),
-                                       &folio->page,
-                                       
OPTIMIZED_FOLIO_VMEMMAP_NR_STRUCT_PAGES);
                        continue;
                }
 
@@ -706,111 +709,18 @@ void hugetlb_vmemmap_optimize_bootmem_folios(struct 
hstate *h, struct list_head
        __hugetlb_vmemmap_optimize_folios(h, folio_list, true);
 }
 
-#ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT
-
-/* Return true of a bootmem allocated HugeTLB page should be pre-HVO-ed */
-static bool vmemmap_should_optimize_bootmem_page(struct huge_bootmem_page *m)
-{
-       unsigned long section_size, psize, pmd_vmemmap_size;
-       phys_addr_t paddr;
-
-       if (!READ_ONCE(vmemmap_optimize_enabled))
-               return false;
-
-       if (!hugetlb_vmemmap_optimizable(m->hstate))
-               return false;
-
-       psize = huge_page_size(m->hstate);
-       paddr = virt_to_phys(m);
-
-       /*
-        * Pre-HVO only works if the bootmem huge page
-        * is aligned to the section size.
-        */
-       section_size = (1UL << PA_SECTION_SHIFT);
-       if (!IS_ALIGNED(paddr, section_size) ||
-           !IS_ALIGNED(psize, section_size))
-               return false;
-
-       /*
-        * The pre-HVO code does not deal with splitting PMDS,
-        * so the bootmem page must be aligned to the number
-        * of base pages that can be mapped with one vmemmap PMD.
-        */
-       pmd_vmemmap_size = (PMD_SIZE / (sizeof(struct page))) << PAGE_SHIFT;
-       if (!IS_ALIGNED(paddr, pmd_vmemmap_size) ||
-           !IS_ALIGNED(psize, pmd_vmemmap_size))
-               return false;
-
-       return true;
-}
-
-static struct zone *pfn_to_zone(unsigned nid, unsigned long pfn);
-
-/*
- * Initialize memmap section for a gigantic page, HVO-style.
- */
-void __init hugetlb_vmemmap_init_early(int nid)
+void __init hugetlb_vmemmap_optimize_bootmem_page(struct huge_bootmem_page *m)
 {
-       unsigned long psize, paddr, section_size;
-       unsigned long ns, i, pnum, pfn, nr_pages;
-       unsigned long start, end;
-       struct huge_bootmem_page *m = NULL;
-       void *map;
+       struct hstate *h = m->hstate;
+       unsigned long pfn = PHYS_PFN(__pa(m));
 
-       if (!READ_ONCE(vmemmap_optimize_enabled))
+       if (!vmemmap_should_optimize(h))
                return;
 
-       section_size = (1UL << PA_SECTION_SHIFT);
-
-       list_for_each_entry(m, &huge_boot_pages[nid], list) {
-               struct zone *zone;
-
-               if (!vmemmap_should_optimize_bootmem_page(m))
-                       continue;
-
-               nr_pages = pages_per_huge_page(m->hstate);
-               psize = nr_pages << PAGE_SHIFT;
-               paddr = virt_to_phys(m);
-               pfn = PHYS_PFN(paddr);
-               map = pfn_to_page(pfn);
-               start = (unsigned long)map;
-               end = start + hugetlb_vmemmap_size(m->hstate);
-               zone = pfn_to_zone(nid, pfn);
-
-               if (vmemmap_populate_hvo(start, end, huge_page_order(m->hstate),
-                                        zone, OPTIMIZED_FOLIO_VMEMMAP_SIZE))
-                       panic("Failed to allocate memmap for HugeTLB page\n");
-               memmap_boot_pages_add(OPTIMIZED_FOLIO_VMEMMAP_PAGES);
-
-               pnum = pfn_to_section_nr(pfn);
-               ns = psize / section_size;
-
-               for (i = 0; i < ns; i++) {
-                       sparse_init_early_section(nid, map, pnum,
-                                       SECTION_IS_VMEMMAP_PREINIT);
-                       map += section_map_size();
-                       pnum++;
-               }
-
+       section_set_order_range(pfn, pages_per_huge_page(h), 
huge_page_order(h));
+       if (section_vmemmap_optimizable(__pfn_to_section(pfn)))
                m->flags |= HUGE_BOOTMEM_HVO;
-       }
-}
-
-static struct zone *pfn_to_zone(unsigned nid, unsigned long pfn)
-{
-       struct zone *zone;
-       enum zone_type zone_type;
-
-       for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) {
-               zone = &NODE_DATA(nid)->node_zones[zone_type];
-               if (zone_spans_pfn(zone, pfn))
-                       return zone;
-       }
-
-       return NULL;
 }
-#endif
 
 static const struct ctl_table hugetlb_vmemmap_sysctls[] = {
        {
diff --git a/mm/hugetlb_vmemmap.h b/mm/hugetlb_vmemmap.h
index 66e11893d076..0d8c88997066 100644
--- a/mm/hugetlb_vmemmap.h
+++ b/mm/hugetlb_vmemmap.h
@@ -9,8 +9,6 @@
 #ifndef _LINUX_HUGETLB_VMEMMAP_H
 #define _LINUX_HUGETLB_VMEMMAP_H
 #include <linux/hugetlb.h>
-#include <linux/io.h>
-#include <linux/memblock.h>
 
 #ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
 int hugetlb_vmemmap_restore_folio(const struct hstate *h, struct folio *folio);
@@ -20,10 +18,7 @@ long hugetlb_vmemmap_restore_folios(const struct hstate *h,
 void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio 
*folio);
 void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head 
*folio_list);
 void hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h, struct 
list_head *folio_list);
-#ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT
-void hugetlb_vmemmap_init_early(int nid);
-#endif
-
+void hugetlb_vmemmap_optimize_bootmem_page(struct huge_bootmem_page *m);
 
 static inline unsigned int hugetlb_vmemmap_size(const struct hstate *h)
 {
@@ -69,13 +64,13 @@ static inline void 
hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h,
 {
 }
 
-static inline void hugetlb_vmemmap_init_early(int nid)
+static inline unsigned int hugetlb_vmemmap_optimizable_size(const struct 
hstate *h)
 {
+       return 0;
 }
 
-static inline unsigned int hugetlb_vmemmap_optimizable_size(const struct 
hstate *h)
+static inline void hugetlb_vmemmap_optimize_bootmem_page(struct 
huge_bootmem_page *m)
 {
-       return 0;
 }
 #endif /* CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP */
 
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 69ae40692e41..b86634903fc0 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -32,7 +32,6 @@
 #include <asm/dma.h>
 #include <asm/tlbflush.h>
 
-#include "hugetlb_vmemmap.h"
 #include "internal.h"
 
 /*
@@ -372,33 +371,6 @@ static __meminit struct page *vmemmap_get_tail(unsigned 
int order, struct zone *
        return tail;
 }
 
-#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
-int __meminit vmemmap_populate_hvo(unsigned long addr, unsigned long end,
-                                      unsigned int order, struct zone *zone,
-                                      unsigned long headsize)
-{
-       unsigned long maddr;
-       struct page *tail;
-       pte_t *pte;
-       int node = zone_to_nid(zone);
-
-       tail = vmemmap_get_tail(order, zone);
-       if (!tail)
-               return -ENOMEM;
-
-       for (maddr = addr; maddr < addr + headsize; maddr += PAGE_SIZE) {
-               pte = vmemmap_populate_address(maddr, node, NULL, -1);
-               if (!pte)
-                       return -ENOMEM;
-       }
-
-       /*
-        * Reuse the last page struct page mapped above for the rest.
-        */
-       return vmemmap_populate_range(maddr, end, node, NULL, 
page_to_pfn(tail));
-}
-#endif
-
 void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
                                      unsigned long addr, unsigned long next)
 {
@@ -600,7 +572,6 @@ struct page * __meminit __populate_section_memmap(unsigned 
long pfn,
  */
 void __init sparse_vmemmap_init_nid_early(int nid)
 {
-       hugetlb_vmemmap_init_early(nid);
 }
 #endif
 
-- 
2.54.0


Reply via email to