memmap_init_zone_device() repeats nearly identical head-page
initialization for each PFN. Prepare one reusable ZONE_DEVICE head-page
template through the existing slow path, copy it into each destination
page, and then fix up the PFN-dependent fields after the copy.

The optimized path assigns _refcount through the copied template, so
keep it disabled when the page_ref_set tracepoint is enabled. Also fall
back to the slow path if struct page is not an integral number of u64
words.

This patch accelerates the pfns_per_compound == 1 case. Compound tails
are handled in the next patch.

Tested in a VM with a 100 GB fsdax namespace device configured with
map=dev on Intel Ice Lake server. This test exercises the nd_pmem rebind
path (pfns_per_compound == 1).

Test procedure:
Rebind the nd_pmem driver 30 times and collect the memmap initialization
time from the pr_debug() output of memmap_init_zone_device().

Base(v7.1-rc3):
  First binding: 1486 ms
  Average of subsequent rebinds: 273.52 ms

With patches 1-3 applied:
  First binding: 1422 ms
  Average of subsequent rebinds: 245.73 ms

This reduces the average rebind time from 273.52 ms to 245.73 ms, or
about 10%.

Signed-off-by: Li Zhe <[email protected]>
---
 mm/mm_init.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 69 insertions(+), 1 deletion(-)

diff --git a/mm/mm_init.c b/mm/mm_init.c
index 4ba506df93bc..2992711351a0 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1066,6 +1066,63 @@ static void __ref zone_device_page_init_slow(struct page 
*page,
        zone_device_page_init_pageblock(page, pfn);
 }
 
+/*
+ * ZONE_DEVICE depends on MEMORY_HOTPLUG, and MEMORY_HOTPLUG is 64-bit
+ * only. That means CONFIG_ZONE_DEVICE cannot be enabled on 32-bit
+ * builds, so this fast-path code does not need a separate 32-bit
+ * fallback implementation.
+ */
+static inline bool zone_device_page_init_optimization_enabled(void)
+{
+       /*
+        * The template fast path copies a preinitialized struct page as an
+        * array of u64 words. Skip it when the page_ref_set tracepoint is
+        * enabled, and fall back to the slow path if struct page is not an
+        * integral number of u64 words.
+        */
+       return !page_ref_tracepoint_active(page_ref_set) &&
+               IS_ALIGNED(sizeof(struct page), sizeof(u64));
+}
+
+static inline void zone_device_template_page_init(struct page *template,
+                                                 unsigned long pfn,
+                                                 unsigned long zone_idx,
+                                                 int nid,
+                                                 struct dev_pagemap *pgmap)
+{
+       __zone_device_page_init(template, pfn, zone_idx, nid, pgmap);
+       if (!zone_device_page_init_refcount(pgmap))
+               set_page_count(template, 0);
+}
+
+/*
+ * The copied template already provides the PFN-invariant portion of a
+ * ZONE_DEVICE struct page. Fix up the fields that still depend on @pfn
+ * after the copy, namely the section bits and page->virtual when present.
+ */
+static inline void zone_device_page_init_finish(struct page *page,
+                                                       unsigned long pfn)
+{
+       set_page_section_from_pfn(page, pfn);
+#ifdef WANT_PAGE_VIRTUAL
+       if (!is_highmem_idx(ZONE_DEVICE))
+               set_page_address(page, __va(pfn << PAGE_SHIFT));
+#endif
+}
+
+static void zone_device_page_init_from_template(struct page *page,
+               unsigned long pfn, const struct page *template)
+{
+       const u64 *src = (const u64 *)template;
+       u64 *dst = (u64 *)page;
+       unsigned int i;
+
+       for (i = 0; i < sizeof(struct page) / sizeof(u64); i++)
+               dst[i] = src[i];
+       zone_device_page_init_finish(page, pfn);
+       zone_device_page_init_pageblock(page, pfn);
+}
+
 /*
  * With compound page geometry and when struct pages are stored in ram most
  * tail pages are reused. Consequently, the amount of unique struct pages to
@@ -1114,6 +1171,7 @@ void __ref memmap_init_zone_device(struct zone *zone,
                                   unsigned long nr_pages,
                                   struct dev_pagemap *pgmap)
 {
+       bool use_template = zone_device_page_init_optimization_enabled();
        unsigned long pfn, end_pfn = start_pfn + nr_pages;
        struct pglist_data *pgdat = zone->zone_pgdat;
        struct vmem_altmap *altmap = pgmap_altmap(pgmap);
@@ -1121,6 +1179,7 @@ void __ref memmap_init_zone_device(struct zone *zone,
        unsigned long zone_idx = zone_idx(zone);
        unsigned long start = jiffies;
        int nid = pgdat->node_id;
+       struct page template;
 
        if (WARN_ON_ONCE(!pgmap || zone_idx != ZONE_DEVICE))
                return;
@@ -1135,10 +1194,19 @@ void __ref memmap_init_zone_device(struct zone *zone,
                nr_pages = end_pfn - start_pfn;
        }
 
+       if (use_template)
+               zone_device_template_page_init(&template, start_pfn, zone_idx,
+                                              nid, pgmap);
+
        for (pfn = start_pfn; pfn < end_pfn; pfn += pfns_per_compound) {
                struct page *page = pfn_to_page(pfn);
 
-               zone_device_page_init_slow(page, pfn, zone_idx, nid, pgmap);
+               if (use_template)
+                       zone_device_page_init_from_template(page, pfn,
+                                                           &template);
+               else
+                       zone_device_page_init_slow(page, pfn, zone_idx,
+                                                  nid, pgmap);
 
                if (pfns_per_compound == 1)
                        continue;
-- 
2.20.1

Reply via email to