HugeTLB bootmem vmemmap optimization still carries its own early setup path, including pre-populating optimized mappings before the generic sparse-vmemmap code runs.
Now that section metadata records the compound page order, HugeTLB only needs to mark the bootmem huge page range with that order. The generic sparse-vmemmap population path can then allocate and map the shared tail vmemmap pages without any HugeTLB-specific early population code. Do that by setting the section order when a bootmem huge page is allocated and dropping the dedicated pre-HVO helpers and related special-casing. This removes duplicate early setup logic and switches HugeTLB to the section-based vmemmap optimization path. Signed-off-by: Muchun Song <[email protected]> --- include/linux/hugetlb.h | 1 - include/linux/mm.h | 3 - include/linux/mmzone.h | 17 ++++++ mm/bootmem_info.c | 5 +- mm/hugetlb.c | 26 ++------- mm/hugetlb_vmemmap.c | 124 ++++++---------------------------------- mm/hugetlb_vmemmap.h | 13 ++--- mm/sparse-vmemmap.c | 29 ---------- 8 files changed, 45 insertions(+), 173 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index fd901bb3630c..dce8969961ea 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -171,7 +171,6 @@ struct address_space *hugetlb_folio_mapping_lock_write(struct folio *folio); extern int movable_gigantic_pages __read_mostly; extern int sysctl_hugetlb_shm_group __read_mostly; -extern struct list_head huge_boot_pages[MAX_NUMNODES]; void hugetlb_struct_page_init(void); void hugetlb_bootmem_alloc(void); diff --git a/include/linux/mm.h b/include/linux/mm.h index 31e27ff6a35f..f39f6fca6551 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4864,9 +4864,6 @@ int vmemmap_populate_hugepages(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap); int vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap); -int vmemmap_populate_hvo(unsigned long start, unsigned long end, - unsigned int order, struct zone *zone, - unsigned long headsize); void vmemmap_wrprotect_hvo(unsigned long start, unsigned long end, int node, unsigned long headsize); void vmemmap_populate_print_last(void); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index bf4c40818b63..d6a5dd042c25 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -2264,6 +2264,18 @@ static inline unsigned int section_order(const struct mem_section *section) } #endif +static inline void section_set_order_range(unsigned long pfn, unsigned long nr_pages, + unsigned int order) +{ + unsigned long section_nr = pfn_to_section_nr(pfn); + + if (!IS_ALIGNED(pfn | nr_pages, PAGES_PER_SECTION)) + return; + + for (unsigned long i = 0; i < nr_pages / PAGES_PER_SECTION; i++) + section_set_order(__nr_to_section(section_nr + i), order); +} + static inline unsigned int pfn_to_section_order(unsigned long pfn) { return section_order(__pfn_to_section(pfn)); @@ -2417,6 +2429,11 @@ static inline unsigned long next_present_section_nr(unsigned long section_nr) #else #define sparse_vmemmap_init_nid_early(_nid) do {} while (0) #define pfn_in_present_section pfn_valid +static inline void section_set_order_range(unsigned long pfn, unsigned long nr_pages, + unsigned int order) +{ +} + static inline unsigned int pfn_to_section_order(unsigned long pfn) { return 0; diff --git a/mm/bootmem_info.c b/mm/bootmem_info.c index 3d7675a3ae04..24f45d86ffb3 100644 --- a/mm/bootmem_info.c +++ b/mm/bootmem_info.c @@ -51,9 +51,8 @@ static void __init register_page_bootmem_info_section(unsigned long start_pfn) section_nr = pfn_to_section_nr(start_pfn); ms = __nr_to_section(section_nr); - if (!preinited_vmemmap_section(ms)) - register_page_bootmem_memmap(section_nr, pfn_to_page(start_pfn), - PAGES_PER_SECTION); + register_page_bootmem_memmap(section_nr, pfn_to_page(start_pfn), + PAGES_PER_SECTION); usage = ms->usage; page = virt_to_page(usage); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 8debe5c5abce..080f130017e3 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -57,7 +57,7 @@ unsigned int default_hstate_idx; struct hstate hstates[HUGE_MAX_HSTATE]; __initdata nodemask_t hugetlb_bootmem_nodes; -__initdata struct list_head huge_boot_pages[MAX_NUMNODES]; +static __initdata struct list_head huge_boot_pages[MAX_NUMNODES]; /* * Due to ordering constraints across the init code for various @@ -3111,6 +3111,7 @@ static bool __init alloc_bootmem_huge_page(struct hstate *h, int nid) } else { list_add_tail(&m->list, &huge_boot_pages[nid]); m->flags |= HUGE_BOOTMEM_ZONES_VALID; + hugetlb_vmemmap_optimize_bootmem_page(m); /* * Only initialize the head struct page in memmap_init_reserved_pages, * rest of the struct pages will be initialized by the HugeTLB @@ -3264,13 +3265,15 @@ static void __init gather_bootmem_prealloc_node(unsigned long nid) OPTIMIZED_FOLIO_VMEMMAP_NR_STRUCT_PAGES); init_new_hugetlb_folio(folio); - if (hugetlb_bootmem_page_prehvo(m)) + if (hugetlb_bootmem_page_prehvo(m)) { /* * If pre-HVO was done, just set the * flag, the HVO code will then skip * this folio. */ folio_set_hugetlb_vmemmap_optimized(folio); + section_set_order_range(folio_pfn(folio), folio_nr_pages(folio), 0); + } if (hugetlb_bootmem_page_earlycma(m)) folio_set_hugetlb_cma(folio); @@ -3314,25 +3317,6 @@ void __init hugetlb_struct_page_init(void) .max_threads = num_node_state(N_MEMORY), .numa_aware = true, }; -#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP - struct zone *zone; - - for_each_zone(zone) { - for (int i = 0; i < NR_OPTIMIZABLE_FOLIO_ORDERS; i++) { - struct page *tail, *p; - unsigned int order; - - tail = zone->vmemmap_tails[i]; - if (!tail) - continue; - - order = i + OPTIMIZABLE_FOLIO_MIN_ORDER; - p = page_to_virt(tail); - for (int j = 0; j < PAGE_SIZE / sizeof(struct page); j++) - init_compound_tail(p + j, NULL, order, zone); - } - } -#endif padata_do_multithreaded(&job); } diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 4367118f8f57..730190390ba9 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -16,6 +16,7 @@ #include <linux/mmdebug.h> #include <linux/pagewalk.h> #include <linux/pgalloc.h> +#include <linux/io.h> #include <asm/tlbflush.h> #include "hugetlb_vmemmap.h" @@ -478,12 +479,8 @@ long hugetlb_vmemmap_restore_folios(const struct hstate *h, return ret; } -/* Return true iff a HugeTLB whose vmemmap should and can be optimized. */ -static bool vmemmap_should_optimize_folio(const struct hstate *h, struct folio *folio) +static inline bool vmemmap_should_optimize(const struct hstate *h) { - if (folio_test_hugetlb_vmemmap_optimized(folio)) - return false; - if (!READ_ONCE(vmemmap_optimize_enabled)) return false; @@ -493,6 +490,15 @@ static bool vmemmap_should_optimize_folio(const struct hstate *h, struct folio * return true; } +/* Return true iff a HugeTLB whose vmemmap should and can be optimized. */ +static bool vmemmap_should_optimize_folio(const struct hstate *h, struct folio *folio) +{ + if (folio_test_hugetlb_vmemmap_optimized(folio)) + return false; + + return vmemmap_should_optimize(h); +} + static struct page *vmemmap_get_tail(unsigned int order, struct zone *zone) { const unsigned int idx = order - OPTIMIZABLE_FOLIO_MIN_ORDER; @@ -638,9 +644,6 @@ static void __hugetlb_vmemmap_optimize_folios(struct hstate *h, epfn = spfn + hugetlb_vmemmap_size(h); vmemmap_wrprotect_hvo(spfn, epfn, folio_nid(folio), OPTIMIZED_FOLIO_VMEMMAP_SIZE); - register_page_bootmem_memmap(pfn_to_section_nr(folio_pfn(folio)), - &folio->page, - OPTIMIZED_FOLIO_VMEMMAP_NR_STRUCT_PAGES); continue; } @@ -706,111 +709,18 @@ void hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h, struct list_head __hugetlb_vmemmap_optimize_folios(h, folio_list, true); } -#ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT - -/* Return true of a bootmem allocated HugeTLB page should be pre-HVO-ed */ -static bool vmemmap_should_optimize_bootmem_page(struct huge_bootmem_page *m) -{ - unsigned long section_size, psize, pmd_vmemmap_size; - phys_addr_t paddr; - - if (!READ_ONCE(vmemmap_optimize_enabled)) - return false; - - if (!hugetlb_vmemmap_optimizable(m->hstate)) - return false; - - psize = huge_page_size(m->hstate); - paddr = virt_to_phys(m); - - /* - * Pre-HVO only works if the bootmem huge page - * is aligned to the section size. - */ - section_size = (1UL << PA_SECTION_SHIFT); - if (!IS_ALIGNED(paddr, section_size) || - !IS_ALIGNED(psize, section_size)) - return false; - - /* - * The pre-HVO code does not deal with splitting PMDS, - * so the bootmem page must be aligned to the number - * of base pages that can be mapped with one vmemmap PMD. - */ - pmd_vmemmap_size = (PMD_SIZE / (sizeof(struct page))) << PAGE_SHIFT; - if (!IS_ALIGNED(paddr, pmd_vmemmap_size) || - !IS_ALIGNED(psize, pmd_vmemmap_size)) - return false; - - return true; -} - -static struct zone *pfn_to_zone(unsigned nid, unsigned long pfn); - -/* - * Initialize memmap section for a gigantic page, HVO-style. - */ -void __init hugetlb_vmemmap_init_early(int nid) +void __init hugetlb_vmemmap_optimize_bootmem_page(struct huge_bootmem_page *m) { - unsigned long psize, paddr, section_size; - unsigned long ns, i, pnum, pfn, nr_pages; - unsigned long start, end; - struct huge_bootmem_page *m = NULL; - void *map; + struct hstate *h = m->hstate; + unsigned long pfn = PHYS_PFN(__pa(m)); - if (!READ_ONCE(vmemmap_optimize_enabled)) + if (!vmemmap_should_optimize(h)) return; - section_size = (1UL << PA_SECTION_SHIFT); - - list_for_each_entry(m, &huge_boot_pages[nid], list) { - struct zone *zone; - - if (!vmemmap_should_optimize_bootmem_page(m)) - continue; - - nr_pages = pages_per_huge_page(m->hstate); - psize = nr_pages << PAGE_SHIFT; - paddr = virt_to_phys(m); - pfn = PHYS_PFN(paddr); - map = pfn_to_page(pfn); - start = (unsigned long)map; - end = start + hugetlb_vmemmap_size(m->hstate); - zone = pfn_to_zone(nid, pfn); - - if (vmemmap_populate_hvo(start, end, huge_page_order(m->hstate), - zone, OPTIMIZED_FOLIO_VMEMMAP_SIZE)) - panic("Failed to allocate memmap for HugeTLB page\n"); - memmap_boot_pages_add(OPTIMIZED_FOLIO_VMEMMAP_PAGES); - - pnum = pfn_to_section_nr(pfn); - ns = psize / section_size; - - for (i = 0; i < ns; i++) { - sparse_init_early_section(nid, map, pnum, - SECTION_IS_VMEMMAP_PREINIT); - map += section_map_size(); - pnum++; - } - + section_set_order_range(pfn, pages_per_huge_page(h), huge_page_order(h)); + if (section_vmemmap_optimizable(__pfn_to_section(pfn))) m->flags |= HUGE_BOOTMEM_HVO; - } -} - -static struct zone *pfn_to_zone(unsigned nid, unsigned long pfn) -{ - struct zone *zone; - enum zone_type zone_type; - - for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) { - zone = &NODE_DATA(nid)->node_zones[zone_type]; - if (zone_spans_pfn(zone, pfn)) - return zone; - } - - return NULL; } -#endif static const struct ctl_table hugetlb_vmemmap_sysctls[] = { { diff --git a/mm/hugetlb_vmemmap.h b/mm/hugetlb_vmemmap.h index 66e11893d076..0d8c88997066 100644 --- a/mm/hugetlb_vmemmap.h +++ b/mm/hugetlb_vmemmap.h @@ -9,8 +9,6 @@ #ifndef _LINUX_HUGETLB_VMEMMAP_H #define _LINUX_HUGETLB_VMEMMAP_H #include <linux/hugetlb.h> -#include <linux/io.h> -#include <linux/memblock.h> #ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP int hugetlb_vmemmap_restore_folio(const struct hstate *h, struct folio *folio); @@ -20,10 +18,7 @@ long hugetlb_vmemmap_restore_folios(const struct hstate *h, void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio); void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list); void hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h, struct list_head *folio_list); -#ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT -void hugetlb_vmemmap_init_early(int nid); -#endif - +void hugetlb_vmemmap_optimize_bootmem_page(struct huge_bootmem_page *m); static inline unsigned int hugetlb_vmemmap_size(const struct hstate *h) { @@ -69,13 +64,13 @@ static inline void hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h, { } -static inline void hugetlb_vmemmap_init_early(int nid) +static inline unsigned int hugetlb_vmemmap_optimizable_size(const struct hstate *h) { + return 0; } -static inline unsigned int hugetlb_vmemmap_optimizable_size(const struct hstate *h) +static inline void hugetlb_vmemmap_optimize_bootmem_page(struct huge_bootmem_page *m) { - return 0; } #endif /* CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP */ diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 69ae40692e41..b86634903fc0 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -32,7 +32,6 @@ #include <asm/dma.h> #include <asm/tlbflush.h> -#include "hugetlb_vmemmap.h" #include "internal.h" /* @@ -372,33 +371,6 @@ static __meminit struct page *vmemmap_get_tail(unsigned int order, struct zone * return tail; } -#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP -int __meminit vmemmap_populate_hvo(unsigned long addr, unsigned long end, - unsigned int order, struct zone *zone, - unsigned long headsize) -{ - unsigned long maddr; - struct page *tail; - pte_t *pte; - int node = zone_to_nid(zone); - - tail = vmemmap_get_tail(order, zone); - if (!tail) - return -ENOMEM; - - for (maddr = addr; maddr < addr + headsize; maddr += PAGE_SIZE) { - pte = vmemmap_populate_address(maddr, node, NULL, -1); - if (!pte) - return -ENOMEM; - } - - /* - * Reuse the last page struct page mapped above for the rest. - */ - return vmemmap_populate_range(maddr, end, node, NULL, page_to_pfn(tail)); -} -#endif - void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node, unsigned long addr, unsigned long next) { @@ -600,7 +572,6 @@ struct page * __meminit __populate_section_memmap(unsigned long pfn, */ void __init sparse_vmemmap_init_nid_early(int nid) { - hugetlb_vmemmap_init_early(nid); } #endif -- 2.54.0
