memmap_init_range() initializes every struct page in the target range.
For compound pages with vmemmap optimization, the tail struct pages are
backed by a shared vmemmap page.

Initializing those tail struct pages would overwrite the shared
vmemmap page contents, so users such as HugeTLB have to open-code
follow-up handling to restore the metadata afterwards.

Use the section's compound page order to detect struct pages that fall
into the shared tail vmemmap range and skip their initialization in
memmap_init_range().  Still initialize the pageblock migratetypes for
the skipped range so the surrounding setup remains intact.

This is a preparatory change for consolidating handling across users of
vmemmap optimization, and it also avoids redundant initialization of
shared tail vmemmap pages during early boot.

Signed-off-by: Muchun Song <[email protected]>
---
 include/linux/mmzone.h |  9 +++++++++
 mm/internal.h          | 16 ++++++++++++++++
 mm/mm_init.c           | 19 +++++++++++++------
 3 files changed, 38 insertions(+), 6 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6f112e6f42bb..5fc968bac1f7 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -2264,6 +2264,11 @@ static inline unsigned int section_order(const struct 
mem_section *section)
 }
 #endif
 
+static inline unsigned int pfn_to_section_order(unsigned long pfn)
+{
+       return section_order(__pfn_to_section(pfn));
+}
+
 void sparse_init_early_section(int nid, struct page *map, unsigned long pnum,
                               unsigned long flags);
 
@@ -2404,6 +2409,10 @@ static inline unsigned long 
next_present_section_nr(unsigned long section_nr)
 #else
 #define sparse_vmemmap_init_nid_early(_nid) do {} while (0)
 #define pfn_in_present_section pfn_valid
+static inline unsigned int pfn_to_section_order(unsigned long pfn)
+{
+       return 0;
+}
 #endif /* CONFIG_SPARSEMEM */
 
 /*
diff --git a/mm/internal.h b/mm/internal.h
index 4a5053368078..1f1c07eb70e2 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1004,10 +1004,26 @@ static inline void sparse_init(void) {}
  */
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 void sparse_init_subsection_map(void);
+
+static inline bool vmemmap_page_optimizable(const struct page *page)
+{
+       unsigned long pfn = page_to_pfn(page);
+       unsigned long nr_pages = 1UL << pfn_to_section_order(pfn);
+
+       if (!is_power_of_2(sizeof(struct page)))
+               return false;
+
+       return (pfn & (nr_pages - 1)) >= 
OPTIMIZED_FOLIO_VMEMMAP_NR_STRUCT_PAGES;
+}
 #else
 static inline void sparse_init_subsection_map(void)
 {
 }
+
+static inline bool vmemmap_page_optimizable(const struct page *page)
+{
+       return false;
+}
 #endif /* CONFIG_SPARSEMEM_VMEMMAP */
 
 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
diff --git a/mm/mm_init.c b/mm/mm_init.c
index c64e5d63c4ae..3aaee1cf7bf0 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -674,19 +674,17 @@ static inline void fixup_hashdist(void)
 static inline void fixup_hashdist(void) {}
 #endif /* CONFIG_NUMA */
 
-#if defined(CONFIG_ZONE_DEVICE) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT)
 static __meminit void pageblock_migratetype_init_range(unsigned long pfn,
-               unsigned long nr_pages, int migratetype, bool atomic)
+               unsigned long nr_pages, int migratetype, bool isolate, bool 
atomic)
 {
        const unsigned long end = pfn + nr_pages;
 
        for (pfn = pageblock_align(pfn); pfn < end; pfn += pageblock_nr_pages) {
-               init_pageblock_migratetype(pfn_to_page(pfn), migratetype, 
false);
+               init_pageblock_migratetype(pfn_to_page(pfn), migratetype, 
isolate);
                if (!atomic && IS_ALIGNED(pfn, PAGES_PER_SECTION))
                        cond_resched();
        }
 }
-#endif
 
 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
 /*
@@ -916,6 +914,15 @@ void __meminit memmap_init_range(unsigned long size, int 
nid, unsigned long zone
                }
 
                page = pfn_to_page(pfn);
+               if (vmemmap_page_optimizable(page)) {
+                       unsigned long start = pfn;
+
+                       pfn = min(ALIGN(start, 1UL << 
pfn_to_section_order(pfn)), end_pfn);
+                       pageblock_migratetype_init_range(start, pfn - start, 
migratetype,
+                                                        isolate_pageblock, 
false);
+                       continue;
+               }
+
                __init_single_page(page, pfn, zone, nid);
                if (context == MEMINIT_HOTPLUG) {
 #ifdef CONFIG_ZONE_DEVICE
@@ -1142,7 +1149,7 @@ void __ref memmap_init_zone_device(struct zone *zone,
                                     compound_nr_pages(pfn, altmap, pgmap));
        }
 
-       pageblock_migratetype_init_range(start_pfn, nr_pages, MIGRATE_MOVABLE, 
false);
+       pageblock_migratetype_init_range(start_pfn, nr_pages, MIGRATE_MOVABLE, 
false, false);
 
        pr_debug("%s initialised %lu pages in %ums\n", __func__,
                nr_pages, jiffies_to_msecs(jiffies - start));
@@ -1982,7 +1989,7 @@ static void __init deferred_free_pages(unsigned long pfn,
        if (!nr_pages)
                return;
 
-       pageblock_migratetype_init_range(pfn, nr_pages, mt, true);
+       pageblock_migratetype_init_range(pfn, nr_pages, mt, false, true);
 
        page = pfn_to_page(pfn);
 
-- 
2.54.0


Reply via email to