The template fast path from the previous patch only accelerates head pages. Compound tails in memmap_init_compound() still go through the slow path one by one.
Build separate head and tail templates and reuse one prepared tail template across the tail pages in a compound range. Head pages keep the zone_device_page_init_refcount() policy, while compound tails always start with a refcount of 0 after prep_compound_tail(). This extends the template-copy fast path to pfns_per_compound > 1 without changing the existing slow path. Tested in a VM with a 100 GB devdax namespace (align=2097152) on Intel Ice Lake server. This test exercises the dax_pmem rebind path and measures memmap initialization latency. Test procedure: Unbind and rebind the dax_pmem driver 30 times, collect memmap initialization time from the pr_debug() output of memmap_init_zone_device(). Base(v7.1-rc3): First binding: 1515 ms Average of subsequent rebinds: 313.45 ms With patches 1-4 applied: First binding: 1422 ms Average of subsequent rebinds: 256.56 ms This reduces the average rebind time from 313.45 ms to 256.56 ms, or about 18.1%. Signed-off-by: Li Zhe <[email protected]> --- mm/mm_init.c | 51 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 12 deletions(-) diff --git a/mm/mm_init.c b/mm/mm_init.c index 2992711351a0..17a84d4cda01 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1084,17 +1084,25 @@ static inline bool zone_device_page_init_optimization_enabled(void) IS_ALIGNED(sizeof(struct page), sizeof(u64)); } -static inline void zone_device_template_page_init(struct page *template, - unsigned long pfn, - unsigned long zone_idx, - int nid, - struct dev_pagemap *pgmap) +static inline void zone_device_template_head_page_init(struct page *template, + unsigned long pfn, unsigned long zone_idx, int nid, + struct dev_pagemap *pgmap) { __zone_device_page_init(template, pfn, zone_idx, nid, pgmap); if (!zone_device_page_init_refcount(pgmap)) set_page_count(template, 0); } +static inline void zone_device_template_tail_page_init(struct page *template, + unsigned long pfn, unsigned long zone_idx, int nid, + struct dev_pagemap *pgmap, const struct page *head, + unsigned int order) +{ + __zone_device_page_init(template, pfn, zone_idx, nid, pgmap); + prep_compound_tail(template, head, order); + set_page_count(template, 0); +} + /* * The copied template already provides the PFN-invariant portion of a * ZONE_DEVICE struct page. Fix up the fields that still depend on @pfn @@ -1144,10 +1152,12 @@ static void __ref memmap_init_compound(struct page *head, unsigned long head_pfn, unsigned long zone_idx, int nid, struct dev_pagemap *pgmap, - unsigned long nr_pages) + unsigned long nr_pages, + bool use_template) { unsigned long pfn, end_pfn = head_pfn + nr_pages; unsigned int order = pgmap->vmemmap_shift; + struct page template; /* * We have to initialize the pages, including setting up page links. @@ -1156,12 +1166,28 @@ static void __ref memmap_init_compound(struct page *head, * the pages in the same go. */ __SetPageHead(head); + + /* + * A tail template can be reused for all tail pages in the same compound page + * because shared state for compound tails is pre-set by prep_compound_tail(). + * The per-page page->virtual and section in flags are fixed up after copying. + */ + if (use_template) + zone_device_template_tail_page_init(&template, head_pfn + 1, + zone_idx, nid, pgmap, + head, order); + for (pfn = head_pfn + 1; pfn < end_pfn; pfn++) { struct page *page = pfn_to_page(pfn); - zone_device_page_init_slow(page, pfn, zone_idx, nid, pgmap); - prep_compound_tail(page, head, order); - set_page_count(page, 0); + if (use_template) { + zone_device_page_init_from_template(page, pfn, + &template); + } else { + zone_device_page_init_slow(page, pfn, zone_idx, nid, pgmap); + prep_compound_tail(page, head, order); + set_page_count(page, 0); + } } prep_compound_head(head, order); } @@ -1195,8 +1221,8 @@ void __ref memmap_init_zone_device(struct zone *zone, } if (use_template) - zone_device_template_page_init(&template, start_pfn, zone_idx, - nid, pgmap); + zone_device_template_head_page_init(&template, start_pfn, + zone_idx, nid, pgmap); for (pfn = start_pfn; pfn < end_pfn; pfn += pfns_per_compound) { struct page *page = pfn_to_page(pfn); @@ -1212,7 +1238,8 @@ void __ref memmap_init_zone_device(struct zone *zone, continue; memmap_init_compound(page, pfn, zone_idx, nid, pgmap, - compound_nr_pages(altmap, pgmap)); + compound_nr_pages(altmap, pgmap), + use_template); } pr_debug("%s initialised %lu pages in %ums\n", __func__, -- 2.20.1

