DAX compound vmemmap population still has its own way to find a reusable tail page by walking the previous section's PTEs.
Switch it to the common vmemmap_shared_tail_page() helper instead, so DAX uses the same per-zone shared tail page as the other vmemmap optimization users. This removes the PTE walk and lets both the section reuse path and the populate path use the same shared page directly. When the target zone is ZONE_DEVICE, mark the shared tail page entries PG_reserved as well, so they match the initialization requirements for device pages. Signed-off-by: Muchun Song <[email protected]> --- include/linux/mmzone.h | 10 +++++++++ mm/memory_hotplug.c | 9 ++++++-- mm/sparse-vmemmap.c | 48 ++++++++++++++---------------------------- 3 files changed, 33 insertions(+), 34 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 5285d53b0c53..7484e7be7b6d 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1693,11 +1693,21 @@ static inline bool zone_is_zone_device(const struct zone *zone) { return zone_idx(zone) == ZONE_DEVICE; } + +static inline struct zone *device_zone(int nid) +{ + return &NODE_DATA(nid)->node_zones[ZONE_DEVICE]; +} #else static inline bool zone_is_zone_device(const struct zone *zone) { return false; } + +static inline struct zone *device_zone(int nid) +{ + return NULL; +} #endif /* diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 462d8dcd636d..9ff830703785 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -551,8 +551,13 @@ void remove_pfn_range_from_zone(struct zone *zone, /* Select all remaining pages up to the next section boundary */ cur_nr_pages = min(end_pfn - pfn, SECTION_ALIGN_UP(pfn + 1) - pfn); - page_init_poison(pfn_to_page(pfn), - sizeof(struct page) * cur_nr_pages); + /* + * This is a temporary workaround to prevent the shared vmemmap + * page from being overwritten; it will be removed later. + */ + if (!zone_is_zone_device(zone)) + page_init_poison(pfn_to_page(pfn), + sizeof(struct page) * cur_nr_pages); } /* diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 53a341fcde74..0c0b54e94c07 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -329,8 +329,12 @@ struct page __ref *vmemmap_shared_tail_page(unsigned int order, struct zone *zon if (!addr) return NULL; - for (int i = 0; i < PAGE_SIZE / sizeof(struct page); i++) - init_compound_tail((struct page *)addr + i, NULL, order, zone); + for (int i = 0; i < PAGE_SIZE / sizeof(struct page); i++) { + page = (struct page *)addr + i; + if (zone_is_zone_device(zone)) + __SetPageReserved(page); + init_compound_tail(page, NULL, order, zone); + } page = virt_to_page(addr); if (cmpxchg(&zone->vmemmap_tails[idx], NULL, page) != NULL) { @@ -442,23 +446,6 @@ static bool __meminit reuse_compound_section(unsigned long start_pfn, return !IS_ALIGNED(offset, nr_pages) && nr_pages > PAGES_PER_SUBSECTION; } -static pte_t * __meminit compound_section_tail_page(unsigned long addr) -{ - pte_t *pte; - - addr -= PAGE_SIZE; - - /* - * Assuming sections are populated sequentially, the previous section's - * page data can be reused. - */ - pte = pte_offset_kernel(pmd_off_k(addr), addr); - if (!pte) - return NULL; - - return pte; -} - static int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn, unsigned long start, unsigned long end, int node, @@ -467,19 +454,15 @@ static int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn, unsigned long size, addr; pte_t *pte; int rc; + struct page *page; - if (reuse_compound_section(start_pfn, pgmap)) { - pte = compound_section_tail_page(start); - if (!pte) - return -ENOMEM; + page = vmemmap_shared_tail_page(pgmap->vmemmap_shift, device_zone(node)); + if (!page) + return -ENOMEM; - /* - * Reuse the page that was populated in the prior iteration - * with just tail struct pages. - */ + if (reuse_compound_section(start_pfn, pgmap)) return vmemmap_populate_range(start, end, node, NULL, - pte_pfn(ptep_get(pte))); - } + page_to_pfn(page)); size = min(end - start, pgmap_vmemmap_nr(pgmap) * sizeof(struct page)); for (addr = start; addr < end; addr += size) { @@ -497,12 +480,12 @@ static int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn, return -ENOMEM; /* - * Reuse the previous page for the rest of tail pages + * Reuse the shared page for the rest of tail pages * See layout diagram in Documentation/mm/vmemmap_dedup.rst */ next += PAGE_SIZE; rc = vmemmap_populate_range(next, last, node, NULL, - pte_pfn(ptep_get(pte))); + page_to_pfn(page)); if (rc) return -ENOMEM; } @@ -828,7 +811,8 @@ int __meminit sparse_add_section(int nid, unsigned long start_pfn, * Poison uninitialized struct pages in order to catch invalid flags * combinations. */ - page_init_poison(memmap, sizeof(struct page) * nr_pages); + if (!vmemmap_can_optimize(altmap, pgmap)) + page_init_poison(memmap, sizeof(struct page) * nr_pages); ms = __nr_to_section(section_nr); __section_mark_present(ms, section_nr); -- 2.54.0
