DAX vmemmap optimization still uses pgmap-specific state to decide
whether a section should use the optimized layout.

Switch DAX to the compound page order recorded in struct mem_section, so
it follows the same section-based optimization state as the rest of
sparse-vmemmap.

This lets the DAX population, initialization, and teardown paths make
their optimization decisions from the section metadata instead of
carrying separate pgmap-specific state.

This makes DAX vmemmap optimization section-granular. Only
section-aligned ranges record a compound page order, so subsection
mappings remain unoptimized. The resulting loss of vmemmap savings
is negligible.

Signed-off-by: Muchun Song <[email protected]>
---
 arch/powerpc/mm/book3s64/radix_pgtable.c |  5 +++--
 mm/memory_hotplug.c                      |  6 +-----
 mm/mm_init.c                             | 13 ++++---------
 mm/sparse-vmemmap.c                      | 24 ++++++++++++++++++------
 mm/sparse.c                              |  2 +-
 5 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c 
b/arch/powerpc/mm/book3s64/radix_pgtable.c
index fb8738016b30..f0043c57694e 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -1235,8 +1235,9 @@ int __meminit vmemmap_populate_compound_pages(unsigned 
long start_pfn,
        pmd_t *pmd;
        pte_t *pte;
        struct page *tail_page;
+       const struct mem_section *ms = __pfn_to_section(start_pfn);
 
-       tail_page = vmemmap_shared_tail_page(pgmap->vmemmap_shift, 
device_zone(node));
+       tail_page = vmemmap_shared_tail_page(section_order(ms), 
device_zone(node));
        if (!tail_page)
                return -ENOMEM;
 
@@ -1268,7 +1269,7 @@ int __meminit vmemmap_populate_compound_pages(unsigned 
long start_pfn,
                        next = addr + PAGE_SIZE;
                        continue;
                } else {
-                       unsigned long nr_pages = pgmap_vmemmap_nr(pgmap);
+                       unsigned long nr_pages = 1UL << section_order(ms);
                        unsigned long addr_pfn = page_to_pfn((struct page 
*)addr);
                        unsigned long pfn_offset = addr_pfn - 
ALIGN_DOWN(addr_pfn, nr_pages);
 
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 9ff830703785..c9c69f827efa 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -551,11 +551,7 @@ void remove_pfn_range_from_zone(struct zone *zone,
                /* Select all remaining pages up to the next section boundary */
                cur_nr_pages =
                        min(end_pfn - pfn, SECTION_ALIGN_UP(pfn + 1) - pfn);
-               /*
-                * This is a temporary workaround to prevent the shared vmemmap
-                * page from being overwritten; it will be removed later.
-                */
-               if (!zone_is_zone_device(zone))
+               if (!section_vmemmap_optimizable(__pfn_to_section(pfn)))
                        page_init_poison(pfn_to_page(pfn),
                                         sizeof(struct page) * cur_nr_pages);
        }
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 35c99e5c215c..2b94115e6dd5 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1071,16 +1071,11 @@ static void __ref __init_zone_device_page(struct page 
*page, unsigned long pfn,
  * of an altmap. See vmemmap_populate_compound_pages().
  */
 static inline unsigned long compound_nr_pages(unsigned long pfn,
-                                             struct vmem_altmap *altmap,
                                              struct dev_pagemap *pgmap)
 {
-       /*
-        * If DAX memory is hot-plugged into an unoccupied subsection
-        * of an early section, the unoptimized boot memmap is reused.
-        * See section_activate().
-        */
-       if (early_section(__pfn_to_section(pfn)) ||
-           !vmemmap_can_optimize(altmap, pgmap))
+       const struct mem_section *ms = __pfn_to_section(pfn);
+
+       if (!section_vmemmap_optimizable(ms))
                return pgmap_vmemmap_nr(pgmap);
 
        return VMEMMAP_RESERVE_NR * (PAGE_SIZE / sizeof(struct page));
@@ -1150,7 +1145,7 @@ void __ref memmap_init_zone_device(struct zone *zone,
                        continue;
 
                memmap_init_compound(page, pfn, zone_idx, nid, pgmap,
-                                    compound_nr_pages(pfn, altmap, pgmap));
+                                    compound_nr_pages(pfn, pgmap));
        }
 
        pageblock_migratetype_init_range(start_pfn, nr_pages, MIGRATE_MOVABLE, 
false, false);
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index b5c109b8af6f..ad3e5b54abf7 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -455,8 +455,9 @@ static int __meminit 
vmemmap_populate_compound_pages(unsigned long start_pfn,
        pte_t *pte;
        int rc;
        struct page *page;
+       const struct mem_section *ms = __pfn_to_section(start_pfn);
 
-       page = vmemmap_shared_tail_page(pgmap->vmemmap_shift, 
device_zone(node));
+       page = vmemmap_shared_tail_page(section_order(ms), device_zone(node));
        if (!page)
                return -ENOMEM;
 
@@ -464,7 +465,7 @@ static int __meminit 
vmemmap_populate_compound_pages(unsigned long start_pfn,
                return vmemmap_populate_range(start, end, node, NULL,
                                              page_to_pfn(page));
 
-       size = min(end - start, pgmap_vmemmap_nr(pgmap) * sizeof(struct page));
+       size = min(end - start, (1UL << section_order(ms)) * sizeof(struct 
page));
        for (addr = start; addr < end; addr += size) {
                unsigned long next, last = addr + size;
 
@@ -501,7 +502,9 @@ struct page * __meminit __populate_section_memmap(unsigned 
long pfn,
                !IS_ALIGNED(nr_pages, PAGES_PER_SUBSECTION)))
                return NULL;
 
-       if (vmemmap_can_optimize(altmap, pgmap))
+       /* This may occur in sub-section scenarios. */
+       if (vmemmap_can_optimize(altmap, pgmap) &&
+           section_vmemmap_optimizable(__pfn_to_section(pfn)))
                r = vmemmap_populate_compound_pages(pfn, start, end, nid, 
pgmap);
        else
                r = vmemmap_populate(start, end, nid, altmap);
@@ -718,8 +721,10 @@ static void section_deactivate(unsigned long pfn, unsigned 
long nr_pages,
        else if (memmap)
                free_map_bootmem(memmap);
 
-       if (empty)
+       if (empty) {
                ms->section_mem_map = (unsigned long)NULL;
+               section_set_order(ms, 0);
+       }
 }
 
 static struct page * __meminit section_activate(int nid, unsigned long pfn,
@@ -729,8 +734,14 @@ static struct page * __meminit section_activate(int nid, 
unsigned long pfn,
        struct mem_section *ms = __pfn_to_section(pfn);
        struct mem_section_usage *usage = NULL;
        struct page *memmap;
+       unsigned int order;
        int rc;
 
+       order = vmemmap_can_optimize(altmap, pgmap) ? pgmap->vmemmap_shift : 0;
+       /* All sub-sections within a section must share the same order. */
+       if (nr_pages < PAGES_PER_SECTION && section_order(ms) && 
section_order(ms) != order)
+               return ERR_PTR(-ENOTSUPP);
+
        if (!ms->usage) {
                usage = kzalloc(mem_section_usage_size(), GFP_KERNEL);
                if (!usage)
@@ -756,6 +767,7 @@ static struct page * __meminit section_activate(int nid, 
unsigned long pfn,
        if (nr_pages < PAGES_PER_SECTION && early_section(ms))
                return pfn_to_page(pfn);
 
+       section_set_order_range(pfn, nr_pages, order);
        memmap = populate_section_memmap(pfn, nr_pages, nid, altmap, pgmap);
        if (!memmap) {
                section_deactivate(pfn, nr_pages, altmap, pgmap);
@@ -801,14 +813,14 @@ int __meminit sparse_add_section(int nid, unsigned long 
start_pfn,
        if (IS_ERR(memmap))
                return PTR_ERR(memmap);
 
+       ms = __nr_to_section(section_nr);
        /*
         * Poison uninitialized struct pages in order to catch invalid flags
         * combinations.
         */
-       if (!vmemmap_can_optimize(altmap, pgmap))
+       if (!section_vmemmap_optimizable(ms))
                page_init_poison(memmap, sizeof(struct page) * nr_pages);
 
-       ms = __nr_to_section(section_nr);
        __section_mark_present(ms, section_nr);
 
        /* Align memmap to section boundary in the subsection case */
diff --git a/mm/sparse.c b/mm/sparse.c
index 54c38ea08190..6878f8941b4c 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -251,7 +251,7 @@ int __meminit section_nr_vmemmap_pages(unsigned long pfn, 
unsigned long nr_pages
        if (vmemmap_can_optimize(altmap, pgmap))
                vmemmap_pages = VMEMMAP_RESERVE_NR;
 
-       if (!vmemmap_can_optimize(altmap, pgmap) && 
!section_vmemmap_optimizable(ms))
+       if (!section_vmemmap_optimizable(ms))
                return DIV_ROUND_UP(nr_pages * sizeof(struct page), PAGE_SIZE);
 
        if (order < PFN_SECTION_SHIFT) {
-- 
2.54.0


Reply via email to