Gigantic bootmem HugeTLB pages are currently initialized from hugetlb_init(),
but page_alloc_init_late() runs earlier and walks pageblocks to determine
zone contiguity.
If a bootmem HugeTLB region is marked noinit, set_zone_contiguous() can
observe still-uninitialized struct pages through __pageblock_pfn_to_page().
This may not trigger an immediate failure, but it can make
set_zone_contiguous() compute the wrong zone contiguity state. If extra
poisoned-page checks are added in this path, such as PF_POISONED_CHECK()
in page_zone_id(), it can also trigger an early boot panic.
Initialize gigantic bootmem HugeTLB struct pages from page_alloc_init_late(),
before zone contiguity is evaluated, so later page allocator setup only
sees valid struct page state. This also makes the initialization order
more natural, as struct pages should be initialized before later code
inspects them.
Fixes: fde1c4ecf916 ("mm: hugetlb: skip initialization of gigantic tail struct
pages if freed by HVO")
Signed-off-by: Muchun Song <[email protected]>
---
include/linux/hugetlb.h | 5 +++++
mm/hugetlb.c | 3 +--
mm/mm_init.c | 1 +
3 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 93418625d3c5..52a2c30f866c 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -173,6 +173,7 @@ extern int movable_gigantic_pages __read_mostly;
extern int sysctl_hugetlb_shm_group __read_mostly;
extern struct list_head huge_boot_pages[MAX_NUMNODES];
+void hugetlb_struct_page_init(void);
void hugetlb_bootmem_alloc(void);
extern nodemask_t hugetlb_bootmem_nodes;
void hugetlb_bootmem_set_nodes(void);
@@ -1307,6 +1308,10 @@ static inline bool hugetlbfs_pagecache_present(
static inline void hugetlb_bootmem_alloc(void)
{
}
+
+static inline void hugetlb_struct_page_init(void)
+{
+}
#endif /* CONFIG_HUGETLB_PAGE */
static inline spinlock_t *huge_pte_lock(struct hstate *h,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index d22683ab30a1..b4999653a156 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3370,7 +3370,7 @@ static void __init
gather_bootmem_prealloc_parallel(unsigned long start,
gather_bootmem_prealloc_node(nid);
}
-static void __init gather_bootmem_prealloc(void)
+void __init hugetlb_struct_page_init(void)
{
struct padata_mt_job job = {
.thread_fn = gather_bootmem_prealloc_parallel,
@@ -4163,7 +4163,6 @@ static int __init hugetlb_init(void)
}
hugetlb_init_hstates();
- gather_bootmem_prealloc();
report_hugepages();
hugetlb_sysfs_init();
diff --git a/mm/mm_init.c b/mm/mm_init.c
index fde49f7bba6c..5a910cc5534c 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2335,6 +2335,7 @@ void __init page_alloc_init_late(void)
/* Reinit limits that are based on free pages after the kernel is up */
files_maxfiles_init();
#endif
+ hugetlb_struct_page_init();
/* Accounting of total+free memory is stable at this point. */
mem_init_print_info();
--
2.54.0