The compound struct page initialization needed by boot-time gigantic hugetlb folios is currently open-coded in hugetlb code, while ZONE_DEVICE has its own separate initialization path in mm_init.c.
Factor the common compound memmap setup into memmap_init_compound_page_frozen() so both paths can share the same frozen page initialization logic. This removes duplicated open-coded compound page setup and keeps the initialization rules in one place. Signed-off-by: Muchun Song <[email protected]> --- mm/hugetlb.c | 25 +----------- mm/internal.h | 2 + mm/mm_init.c | 111 +++++++++++++++++++------------------------------- 3 files changed, 45 insertions(+), 93 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 10f04fa95d43..7e9f49882395 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3118,28 +3118,6 @@ static bool __init alloc_bootmem_huge_page(struct hstate *h, int nid) return true; } -static void __init hugetlb_folio_init_vmemmap(struct page *head, unsigned long pfn, - enum zone_type zone, int nid, unsigned int order, unsigned int nr_pages) -{ - /* - * This is an open-coded prep_compound_page() whereby we avoid - * walking pages twice by initializing/preparing+freezing them in the - * same go. - */ - __init_single_page(head, pfn, zone, nid); - set_page_count(head, 0); - - __SetPageHead(head); - for (int i = 1; i < nr_pages; i++) { - struct page *page = head + i; - - __init_single_page(page, pfn + i, zone, nid); - prep_compound_tail(page, head, order); - set_page_count(page, 0); - } - prep_compound_head(head, order); -} - /* * memblock-allocated pageblocks might not have the migrate type set * if marked with the 'noinit' flag. Set it to the default (MIGRATE_MOVABLE) @@ -3210,8 +3188,7 @@ static void __init gather_bootmem_prealloc_node(unsigned long nid) VM_BUG_ON(!hstate_is_gigantic(h)); - hugetlb_folio_init_vmemmap(page, pfn, zone, nid, huge_page_order(h), - vmemmap_nr_struct_pages(pfn, nr_pages)); + memmap_init_compound_page_frozen(page, pfn, zone, nid, huge_page_order(h)); init_new_hugetlb_folio(folio); if (order_vmemmap_optimizable(pfn_to_section_order(pfn))) { diff --git a/mm/internal.h b/mm/internal.h index 416afdf7b2ec..2c67ae25124b 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1793,6 +1793,8 @@ static inline bool pte_needs_soft_dirty_wp(struct vm_area_struct *vma, pte_t pte void __meminit __init_single_page(struct page *page, unsigned long pfn, unsigned long zone, int nid); +void __meminit memmap_init_compound_page_frozen(struct page *head, unsigned long pfn, + enum zone_type zone, int nid, unsigned int order); /* shrinker related functions */ unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, diff --git a/mm/mm_init.c b/mm/mm_init.c index 95422e92ede8..9b23c31db8c6 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1018,79 +1018,46 @@ static void __init memmap_init(void) init_unavailable_range(hole_pfn, end_pfn, zone_id, nid); } -#ifdef CONFIG_ZONE_DEVICE -static void __ref __init_zone_device_page(struct page *page, unsigned long pfn, - unsigned long zone_idx, int nid, - struct dev_pagemap *pgmap) +static void __meminit init_single_page_frozen(struct page *page, unsigned long pfn, + enum zone_type zone, int nid) { + __init_single_page(page, pfn, zone, nid); + if (zone_is_zone_device(&NODE_DATA(nid)->node_zones[zone])) { + /* + * ZONE_DEVICE pages are not managed by the page allocator, mark + * them reserved to prevent them from being touched elsewhere. + * + * We can use the non-atomic __set_bit operation for setting + * the flag as we are still initializing the pages. + */ + __SetPageReserved(page); - __init_single_page(page, pfn, zone_idx, nid); - - /* - * Mark page reserved as it will need to wait for onlining - * phase for it to be fully associated with a zone. - * - * We can use the non-atomic __set_bit operation for setting - * the flag as we are still initializing the pages. - */ - __SetPageReserved(page); - - /* - * ZONE_DEVICE pages union ->lru with a ->pgmap back pointer - * and zone_device_data. It is a bug if a ZONE_DEVICE page is - * ever freed or placed on a driver-private list. - */ - page_folio(page)->pgmap = pgmap; - page->zone_device_data = NULL; - - /* - * ZONE_DEVICE pages other than MEMORY_TYPE_GENERIC are released - * directly to the driver page allocator which will set the page count - * to 1 when allocating the page. - * - * MEMORY_TYPE_GENERIC and MEMORY_TYPE_FS_DAX pages automatically have - * their refcount reset to one whenever they are freed (ie. after - * their refcount drops to 0). - */ - switch (pgmap->type) { - case MEMORY_DEVICE_FS_DAX: - case MEMORY_DEVICE_PRIVATE: - case MEMORY_DEVICE_COHERENT: - case MEMORY_DEVICE_PCI_P2PDMA: - set_page_count(page, 0); - break; - - case MEMORY_DEVICE_GENERIC: - break; + /* + * ZONE_DEVICE pages union ->lru with a ->pgmap back pointer + * and zone_device_data. It is a bug if a ZONE_DEVICE page is + * ever freed or placed on a driver-private list. + */ + page->zone_device_data = NULL; } + set_page_count(page, 0); } -static void __ref memmap_init_compound(struct page *head, - unsigned long head_pfn, - unsigned long zone_idx, int nid, - struct dev_pagemap *pgmap, - unsigned long nr_pages) +void __meminit memmap_init_compound_page_frozen(struct page *head, unsigned long pfn, + enum zone_type zone, int nid, unsigned int order) { - unsigned long pfn, end_pfn = head_pfn + nr_pages; - unsigned int order = pgmap->vmemmap_shift; + int nr_pages = vmemmap_nr_struct_pages(pfn, 1UL << order); - /* - * We have to initialize the pages, including setting up page links. - * prep_compound_page() does not take care of that, so instead we - * open-code prep_compound_page() so we can take care of initializing - * the pages in the same go. - */ - __SetPageHead(head); - for (pfn = head_pfn + 1; pfn < end_pfn; pfn++) { - struct page *page = pfn_to_page(pfn); + init_single_page_frozen(head, pfn, zone, nid); - __init_zone_device_page(page, pfn, zone_idx, nid, pgmap); - prep_compound_tail(page, head, order); - set_page_count(page, 0); + __SetPageHead(head); + for (int i = 1; i < nr_pages; i++) { + init_single_page_frozen(head + i, pfn + i, zone, nid); + prep_compound_tail(head + i, head, order); } prep_compound_head(head, order); } +#ifdef CONFIG_ZONE_DEVICE void __ref memmap_init_zone_device(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages, @@ -1118,18 +1085,24 @@ void __ref memmap_init_zone_device(struct zone *zone, } for (pfn = start_pfn; pfn < end_pfn; pfn += pfns_per_compound) { - struct page *page = pfn_to_page(pfn); - - __init_zone_device_page(page, pfn, zone_idx, nid, pgmap); + struct page *head = pfn_to_page(pfn); if (IS_ALIGNED(pfn, PAGES_PER_SECTION)) cond_resched(); - if (pfns_per_compound == 1) - continue; - - memmap_init_compound(page, pfn, zone_idx, nid, pgmap, - vmemmap_nr_struct_pages(pfn, pfns_per_compound)); + if (pgmap->vmemmap_shift) + memmap_init_compound_page_frozen(head, pfn, zone_idx, nid, + pgmap->vmemmap_shift); + else + init_single_page_frozen(head, pfn, zone_idx, nid); + /* + * ZONE_DEVICE pages other than MEMORY_TYPE_GENERIC are released + * directly to the driver page allocator which will set the page + * count to 1 when allocating the page. + */ + if (pgmap->type == MEMORY_DEVICE_GENERIC) + init_page_count(head); + ((struct folio *)head)->pgmap = pgmap; } pageblock_migratetype_init_range(start_pfn, nr_pages, MIGRATE_MOVABLE, false, false); -- 2.54.0
